diff options
Diffstat (limited to 'drivers/gpu/drm/i915')
151 files changed, 11394 insertions, 5463 deletions
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 091aef281963..4eee91a3a236 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -17,6 +17,7 @@ subdir-ccflags-y += $(call cc-disable-warning, unused-parameter) subdir-ccflags-y += $(call cc-disable-warning, type-limits) subdir-ccflags-y += $(call cc-disable-warning, missing-field-initializers) subdir-ccflags-y += $(call cc-disable-warning, implicit-fallthrough) +subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable) subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror # Fine grained warnings disable @@ -62,13 +63,14 @@ i915-y += i915_cmd_parser.o \ i915_gem.o \ i915_gem_object.o \ i915_gem_render_state.o \ - i915_gem_request.o \ i915_gem_shrinker.o \ i915_gem_stolen.o \ i915_gem_tiling.o \ i915_gem_timeline.o \ i915_gem_userptr.o \ i915_gemfs.o \ + i915_query.o \ + i915_request.o \ i915_trace_points.o \ i915_vma.o \ intel_breadcrumbs.o \ @@ -83,11 +85,13 @@ i915-y += i915_cmd_parser.o \ i915-y += intel_uc.o \ intel_uc_fw.o \ intel_guc.o \ + intel_guc_ads.o \ intel_guc_ct.o \ intel_guc_fw.o \ intel_guc_log.o \ intel_guc_submission.o \ - intel_huc.o + intel_huc.o \ + intel_huc_fw.o # autogenerated null render state i915-y += intel_renderstate_gen6.o \ @@ -108,6 +112,7 @@ i915-y += intel_audio.o \ intel_fbc.o \ intel_fifo_underrun.o \ intel_frontbuffer.o \ + intel_hdcp.o \ intel_hotplug.o \ intel_modes.o \ intel_overlay.o \ diff --git a/drivers/gpu/drm/i915/dvo_ivch.c b/drivers/gpu/drm/i915/dvo_ivch.c index 4950b82f5b49..c73aff163908 100644 --- a/drivers/gpu/drm/i915/dvo_ivch.c +++ b/drivers/gpu/drm/i915/dvo_ivch.c @@ -59,28 +59,28 @@ * This must not be set while VR01_DVO_BYPASS_ENABLE is set. */ # define VR01_LCD_ENABLE (1 << 2) -/** Enables the DVO repeater. */ +/* Enables the DVO repeater. */ # define VR01_DVO_BYPASS_ENABLE (1 << 1) -/** Enables the DVO clock */ +/* Enables the DVO clock */ # define VR01_DVO_ENABLE (1 << 0) -/** Enable dithering for 18bpp panels. Not documented. */ +/* Enable dithering for 18bpp panels. Not documented. */ # define VR01_DITHER_ENABLE (1 << 4) /* * LCD Interface Format */ #define VR10 0x10 -/** Enables LVDS output instead of CMOS */ +/* Enables LVDS output instead of CMOS */ # define VR10_LVDS_ENABLE (1 << 4) -/** Enables 18-bit LVDS output. */ +/* Enables 18-bit LVDS output. */ # define VR10_INTERFACE_1X18 (0 << 2) -/** Enables 24-bit LVDS or CMOS output */ +/* Enables 24-bit LVDS or CMOS output */ # define VR10_INTERFACE_1X24 (1 << 2) -/** Enables 2x18-bit LVDS or CMOS output. */ +/* Enables 2x18-bit LVDS or CMOS output. */ # define VR10_INTERFACE_2X18 (2 << 2) -/** Enables 2x24-bit LVDS output */ +/* Enables 2x24-bit LVDS output */ # define VR10_INTERFACE_2X24 (3 << 2) -/** Mask that defines the depth of the pipeline */ +/* Mask that defines the depth of the pipeline */ # define VR10_INTERFACE_DEPTH_MASK (3 << 2) /* @@ -97,7 +97,7 @@ * Panel power down status */ #define VR30 0x30 -/** Read only bit indicating that the panel is not in a safe poweroff state. */ +/* Read only bit indicating that the panel is not in a safe poweroff state. */ # define VR30_PANEL_ON (1 << 15) #define VR40 0x40 @@ -183,7 +183,7 @@ struct ivch_priv { static void ivch_dump_regs(struct intel_dvo_device *dvo); -/** +/* * Reads a register on the ivch. * * Each of the 256 registers are 16 bits long. @@ -230,7 +230,7 @@ static bool ivch_read(struct intel_dvo_device *dvo, int addr, uint16_t *data) return false; } -/** Writes a 16-bit register on the ivch */ +/* Writes a 16-bit register on the ivch */ static bool ivch_write(struct intel_dvo_device *dvo, int addr, uint16_t data) { struct ivch_priv *priv = dvo->dev_priv; @@ -258,7 +258,7 @@ static bool ivch_write(struct intel_dvo_device *dvo, int addr, uint16_t data) return false; } -/** Probes the given bus and slave address for an ivch */ +/* Probes the given bus and slave address for an ivch */ static bool ivch_init(struct intel_dvo_device *dvo, struct i2c_adapter *adapter) { @@ -338,7 +338,7 @@ static void ivch_reset(struct intel_dvo_device *dvo) ivch_write(dvo, backup_addresses[i], priv->reg_backup[i]); } -/** Sets the power state of the panel connected to the ivch */ +/* Sets the power state of the panel connected to the ivch */ static void ivch_dpms(struct intel_dvo_device *dvo, bool enable) { int i; diff --git a/drivers/gpu/drm/i915/gvt/Makefile b/drivers/gpu/drm/i915/gvt/Makefile index 347116faa558..b016dc753db9 100644 --- a/drivers/gpu/drm/i915/gvt/Makefile +++ b/drivers/gpu/drm/i915/gvt/Makefile @@ -3,7 +3,7 @@ GVT_DIR := gvt GVT_SOURCE := gvt.o aperture_gm.o handlers.o vgpu.o trace_points.o firmware.o \ interrupt.o gtt.o cfg_space.o opregion.o mmio.o display.o edid.o \ execlist.o scheduler.o sched_policy.o mmio_context.o cmd_parser.o debugfs.o \ - fb_decoder.o dmabuf.o + fb_decoder.o dmabuf.o page_track.o ccflags-y += -I$(src) -I$(src)/$(GVT_DIR) i915-y += $(addprefix $(GVT_DIR)/, $(GVT_SOURCE)) diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index 2fb7b34ef561..b555eb26f9ce 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -162,8 +162,8 @@ static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev, info->size << PAGE_SHIFT); i915_gem_object_init(obj, &intel_vgpu_gem_ops); - obj->base.read_domains = I915_GEM_DOMAIN_GTT; - obj->base.write_domain = 0; + obj->read_domains = I915_GEM_DOMAIN_GTT; + obj->write_domain = 0; if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { unsigned int tiling_mode = 0; unsigned int stride = 0; @@ -459,7 +459,7 @@ int intel_vgpu_get_dmabuf(struct intel_vgpu *vgpu, unsigned int dmabuf_id) obj = vgpu_create_gem(dev, dmabuf_obj->info); if (obj == NULL) { - gvt_vgpu_err("create gvt gem obj failed:%d\n", vgpu->id); + gvt_vgpu_err("create gvt gem obj failed\n"); ret = -ENOMEM; goto out; } diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 8d5317d0122d..d29281231507 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -38,6 +38,12 @@ #include "i915_pvinfo.h" #include "trace.h" +#if defined(VERBOSE_DEBUG) +#define gvt_vdbg_mm(fmt, args...) gvt_dbg_mm(fmt, ##args) +#else +#define gvt_vdbg_mm(fmt, args...) +#endif + static bool enable_out_of_sync = false; static int preallocated_oos_pages = 8192; @@ -264,7 +270,7 @@ static u64 read_pte64(struct drm_i915_private *dev_priv, unsigned long index) return readq(addr); } -static void gtt_invalidate(struct drm_i915_private *dev_priv) +static void ggtt_invalidate(struct drm_i915_private *dev_priv) { mmio_hw_access_pre(dev_priv); I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); @@ -331,20 +337,20 @@ static inline int gtt_set_entry64(void *pt, #define GTT_HAW 46 -#define ADDR_1G_MASK (((1UL << (GTT_HAW - 30)) - 1) << 30) -#define ADDR_2M_MASK (((1UL << (GTT_HAW - 21)) - 1) << 21) -#define ADDR_4K_MASK (((1UL << (GTT_HAW - 12)) - 1) << 12) +#define ADDR_1G_MASK GENMASK_ULL(GTT_HAW - 1, 30) +#define ADDR_2M_MASK GENMASK_ULL(GTT_HAW - 1, 21) +#define ADDR_4K_MASK GENMASK_ULL(GTT_HAW - 1, 12) static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e) { unsigned long pfn; if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) - pfn = (e->val64 & ADDR_1G_MASK) >> 12; + pfn = (e->val64 & ADDR_1G_MASK) >> PAGE_SHIFT; else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) - pfn = (e->val64 & ADDR_2M_MASK) >> 12; + pfn = (e->val64 & ADDR_2M_MASK) >> PAGE_SHIFT; else - pfn = (e->val64 & ADDR_4K_MASK) >> 12; + pfn = (e->val64 & ADDR_4K_MASK) >> PAGE_SHIFT; return pfn; } @@ -352,16 +358,16 @@ static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn) { if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) { e->val64 &= ~ADDR_1G_MASK; - pfn &= (ADDR_1G_MASK >> 12); + pfn &= (ADDR_1G_MASK >> PAGE_SHIFT); } else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) { e->val64 &= ~ADDR_2M_MASK; - pfn &= (ADDR_2M_MASK >> 12); + pfn &= (ADDR_2M_MASK >> PAGE_SHIFT); } else { e->val64 &= ~ADDR_4K_MASK; - pfn &= (ADDR_4K_MASK >> 12); + pfn &= (ADDR_4K_MASK >> PAGE_SHIFT); } - e->val64 |= (pfn << 12); + e->val64 |= (pfn << PAGE_SHIFT); } static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e) @@ -371,7 +377,7 @@ static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e) return false; e->type = get_entry_type(e->type); - if (!(e->val64 & BIT(7))) + if (!(e->val64 & _PAGE_PSE)) return false; e->type = get_pse_type(e->type); @@ -389,17 +395,17 @@ static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e) || e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) return (e->val64 != 0); else - return (e->val64 & BIT(0)); + return (e->val64 & _PAGE_PRESENT); } static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e) { - e->val64 &= ~BIT(0); + e->val64 &= ~_PAGE_PRESENT; } static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e) { - e->val64 |= BIT(0); + e->val64 |= _PAGE_PRESENT; } /* @@ -447,58 +453,91 @@ static struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = { .gma_to_pml4_index = gen8_gma_to_pml4_index, }; -static int gtt_entry_p2m(struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *p, - struct intel_gvt_gtt_entry *m) +/* + * MM helpers. + */ +static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm, + struct intel_gvt_gtt_entry *entry, unsigned long index, + bool guest) { - struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; - unsigned long gfn, mfn; + struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; - *m = *p; + GEM_BUG_ON(mm->type != INTEL_GVT_MM_PPGTT); - if (!ops->test_present(p)) - return 0; + entry->type = mm->ppgtt_mm.root_entry_type; + pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps : + mm->ppgtt_mm.shadow_pdps, + entry, index, false, 0, mm->vgpu); - gfn = ops->get_pfn(p); + pte_ops->test_pse(entry); +} - mfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, gfn); - if (mfn == INTEL_GVT_INVALID_ADDR) { - gvt_vgpu_err("fail to translate gfn: 0x%lx\n", gfn); - return -ENXIO; - } +static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm, + struct intel_gvt_gtt_entry *entry, unsigned long index) +{ + _ppgtt_get_root_entry(mm, entry, index, true); +} - ops->set_pfn(m, mfn); - return 0; +static inline void ppgtt_get_shadow_root_entry(struct intel_vgpu_mm *mm, + struct intel_gvt_gtt_entry *entry, unsigned long index) +{ + _ppgtt_get_root_entry(mm, entry, index, false); } -/* - * MM helpers. - */ -int intel_vgpu_mm_get_entry(struct intel_vgpu_mm *mm, - void *page_table, struct intel_gvt_gtt_entry *e, - unsigned long index) +static void _ppgtt_set_root_entry(struct intel_vgpu_mm *mm, + struct intel_gvt_gtt_entry *entry, unsigned long index, + bool guest) { - struct intel_gvt *gvt = mm->vgpu->gvt; - struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; - int ret; + struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; - e->type = mm->page_table_entry_type; + pte_ops->set_entry(guest ? mm->ppgtt_mm.guest_pdps : + mm->ppgtt_mm.shadow_pdps, + entry, index, false, 0, mm->vgpu); +} - ret = ops->get_entry(page_table, e, index, false, 0, mm->vgpu); - if (ret) - return ret; +static inline void ppgtt_set_guest_root_entry(struct intel_vgpu_mm *mm, + struct intel_gvt_gtt_entry *entry, unsigned long index) +{ + _ppgtt_set_root_entry(mm, entry, index, true); +} - ops->test_pse(e); - return 0; +static inline void ppgtt_set_shadow_root_entry(struct intel_vgpu_mm *mm, + struct intel_gvt_gtt_entry *entry, unsigned long index) +{ + _ppgtt_set_root_entry(mm, entry, index, false); } -int intel_vgpu_mm_set_entry(struct intel_vgpu_mm *mm, - void *page_table, struct intel_gvt_gtt_entry *e, - unsigned long index) +static void ggtt_get_guest_entry(struct intel_vgpu_mm *mm, + struct intel_gvt_gtt_entry *entry, unsigned long index) { - struct intel_gvt *gvt = mm->vgpu->gvt; - struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; + struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; + + GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); + + entry->type = GTT_TYPE_GGTT_PTE; + pte_ops->get_entry(mm->ggtt_mm.virtual_ggtt, entry, index, + false, 0, mm->vgpu); +} + +static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm, + struct intel_gvt_gtt_entry *entry, unsigned long index) +{ + struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; - return ops->set_entry(page_table, e, index, false, 0, mm->vgpu); + GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); + + pte_ops->set_entry(mm->ggtt_mm.virtual_ggtt, entry, index, + false, 0, mm->vgpu); +} + +static void ggtt_set_host_entry(struct intel_vgpu_mm *mm, + struct intel_gvt_gtt_entry *entry, unsigned long index) +{ + struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; + + GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); + + pte_ops->set_entry(NULL, entry, index, false, 0, mm->vgpu); } /* @@ -520,12 +559,15 @@ static inline int ppgtt_spt_get_entry( return -EINVAL; ret = ops->get_entry(page_table, e, index, guest, - spt->guest_page.track.gfn << I915_GTT_PAGE_SHIFT, + spt->guest_page.gfn << I915_GTT_PAGE_SHIFT, spt->vgpu); if (ret) return ret; ops->test_pse(e); + + gvt_vdbg_mm("read ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n", + type, e->type, index, e->val64); return 0; } @@ -541,18 +583,21 @@ static inline int ppgtt_spt_set_entry( if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n")) return -EINVAL; + gvt_vdbg_mm("set ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n", + type, e->type, index, e->val64); + return ops->set_entry(page_table, e, index, guest, - spt->guest_page.track.gfn << I915_GTT_PAGE_SHIFT, + spt->guest_page.gfn << I915_GTT_PAGE_SHIFT, spt->vgpu); } #define ppgtt_get_guest_entry(spt, e, index) \ ppgtt_spt_get_entry(spt, NULL, \ - spt->guest_page_type, e, index, true) + spt->guest_page.type, e, index, true) #define ppgtt_set_guest_entry(spt, e, index) \ ppgtt_spt_set_entry(spt, NULL, \ - spt->guest_page_type, e, index, true) + spt->guest_page.type, e, index, true) #define ppgtt_get_shadow_entry(spt, e, index) \ ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \ @@ -562,159 +607,6 @@ static inline int ppgtt_spt_set_entry( ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \ spt->shadow_page.type, e, index, false) -/** - * intel_vgpu_init_page_track - init a page track data structure - * @vgpu: a vGPU - * @t: a page track data structure - * @gfn: guest memory page frame number - * @handler: the function will be called when target guest memory page has - * been modified. - * - * This function is called when a user wants to prepare a page track data - * structure to track a guest memory page. - * - * Returns: - * Zero on success, negative error code if failed. - */ -int intel_vgpu_init_page_track(struct intel_vgpu *vgpu, - struct intel_vgpu_page_track *t, - unsigned long gfn, - int (*handler)(void *, u64, void *, int), - void *data) -{ - INIT_HLIST_NODE(&t->node); - - t->tracked = false; - t->gfn = gfn; - t->handler = handler; - t->data = data; - - hash_add(vgpu->gtt.tracked_guest_page_hash_table, &t->node, t->gfn); - return 0; -} - -/** - * intel_vgpu_clean_page_track - release a page track data structure - * @vgpu: a vGPU - * @t: a page track data structure - * - * This function is called before a user frees a page track data structure. - */ -void intel_vgpu_clean_page_track(struct intel_vgpu *vgpu, - struct intel_vgpu_page_track *t) -{ - if (!hlist_unhashed(&t->node)) - hash_del(&t->node); - - if (t->tracked) - intel_gvt_hypervisor_disable_page_track(vgpu, t); -} - -/** - * intel_vgpu_find_tracked_page - find a tracked guest page - * @vgpu: a vGPU - * @gfn: guest memory page frame number - * - * This function is called when the emulation layer wants to figure out if a - * trapped GFN is a tracked guest page. - * - * Returns: - * Pointer to page track data structure, NULL if not found. - */ -struct intel_vgpu_page_track *intel_vgpu_find_tracked_page( - struct intel_vgpu *vgpu, unsigned long gfn) -{ - struct intel_vgpu_page_track *t; - - hash_for_each_possible(vgpu->gtt.tracked_guest_page_hash_table, - t, node, gfn) { - if (t->gfn == gfn) - return t; - } - return NULL; -} - -static int init_guest_page(struct intel_vgpu *vgpu, - struct intel_vgpu_guest_page *p, - unsigned long gfn, - int (*handler)(void *, u64, void *, int), - void *data) -{ - p->oos_page = NULL; - p->write_cnt = 0; - - return intel_vgpu_init_page_track(vgpu, &p->track, gfn, handler, data); -} - -static int detach_oos_page(struct intel_vgpu *vgpu, - struct intel_vgpu_oos_page *oos_page); - -static void clean_guest_page(struct intel_vgpu *vgpu, - struct intel_vgpu_guest_page *p) -{ - if (p->oos_page) - detach_oos_page(vgpu, p->oos_page); - - intel_vgpu_clean_page_track(vgpu, &p->track); -} - -static inline int init_shadow_page(struct intel_vgpu *vgpu, - struct intel_vgpu_shadow_page *p, int type, bool hash) -{ - struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev; - dma_addr_t daddr; - - daddr = dma_map_page(kdev, p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL); - if (dma_mapping_error(kdev, daddr)) { - gvt_vgpu_err("fail to map dma addr\n"); - return -EINVAL; - } - - p->vaddr = page_address(p->page); - p->type = type; - - INIT_HLIST_NODE(&p->node); - - p->mfn = daddr >> I915_GTT_PAGE_SHIFT; - if (hash) - hash_add(vgpu->gtt.shadow_page_hash_table, &p->node, p->mfn); - return 0; -} - -static inline void clean_shadow_page(struct intel_vgpu *vgpu, - struct intel_vgpu_shadow_page *p) -{ - struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev; - - dma_unmap_page(kdev, p->mfn << I915_GTT_PAGE_SHIFT, 4096, - PCI_DMA_BIDIRECTIONAL); - - if (!hlist_unhashed(&p->node)) - hash_del(&p->node); -} - -static inline struct intel_vgpu_shadow_page *find_shadow_page( - struct intel_vgpu *vgpu, unsigned long mfn) -{ - struct intel_vgpu_shadow_page *p; - - hash_for_each_possible(vgpu->gtt.shadow_page_hash_table, - p, node, mfn) { - if (p->mfn == mfn) - return p; - } - return NULL; -} - -#define page_track_to_guest_page(ptr) \ - container_of(ptr, struct intel_vgpu_guest_page, track) - -#define guest_page_to_ppgtt_spt(ptr) \ - container_of(ptr, struct intel_vgpu_ppgtt_spt, guest_page) - -#define shadow_page_to_ppgtt_spt(ptr) \ - container_of(ptr, struct intel_vgpu_ppgtt_spt, shadow_page) - static void *alloc_spt(gfp_t gfp_mask) { struct intel_vgpu_ppgtt_spt *spt; @@ -737,63 +629,96 @@ static void free_spt(struct intel_vgpu_ppgtt_spt *spt) kfree(spt); } -static void ppgtt_free_shadow_page(struct intel_vgpu_ppgtt_spt *spt) +static int detach_oos_page(struct intel_vgpu *vgpu, + struct intel_vgpu_oos_page *oos_page); + +static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt) { - trace_spt_free(spt->vgpu->id, spt, spt->shadow_page.type); + struct device *kdev = &spt->vgpu->gvt->dev_priv->drm.pdev->dev; - clean_shadow_page(spt->vgpu, &spt->shadow_page); - clean_guest_page(spt->vgpu, &spt->guest_page); - list_del_init(&spt->post_shadow_list); + trace_spt_free(spt->vgpu->id, spt, spt->guest_page.type); + + dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096, + PCI_DMA_BIDIRECTIONAL); + + radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn); + + if (spt->guest_page.oos_page) + detach_oos_page(spt->vgpu, spt->guest_page.oos_page); + + intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn); + list_del_init(&spt->post_shadow_list); free_spt(spt); } -static void ppgtt_free_all_shadow_page(struct intel_vgpu *vgpu) +static void ppgtt_free_all_spt(struct intel_vgpu *vgpu) { - struct hlist_node *n; - struct intel_vgpu_shadow_page *sp; - int i; + struct intel_vgpu_ppgtt_spt *spt; + struct radix_tree_iter iter; + void **slot; - hash_for_each_safe(vgpu->gtt.shadow_page_hash_table, i, n, sp, node) - ppgtt_free_shadow_page(shadow_page_to_ppgtt_spt(sp)); + radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) { + spt = radix_tree_deref_slot(slot); + ppgtt_free_spt(spt); + } } static int ppgtt_handle_guest_write_page_table_bytes( - struct intel_vgpu_guest_page *gpt, + struct intel_vgpu_ppgtt_spt *spt, u64 pa, void *p_data, int bytes); -static int ppgtt_write_protection_handler(void *data, u64 pa, - void *p_data, int bytes) +static int ppgtt_write_protection_handler( + struct intel_vgpu_page_track *page_track, + u64 gpa, void *data, int bytes) { - struct intel_vgpu_page_track *t = data; - struct intel_vgpu_guest_page *p = page_track_to_guest_page(t); + struct intel_vgpu_ppgtt_spt *spt = page_track->priv_data; + int ret; if (bytes != 4 && bytes != 8) return -EINVAL; - if (!t->tracked) - return -EINVAL; - - ret = ppgtt_handle_guest_write_page_table_bytes(p, - pa, p_data, bytes); + ret = ppgtt_handle_guest_write_page_table_bytes(spt, gpa, data, bytes); if (ret) return ret; return ret; } -static int reclaim_one_mm(struct intel_gvt *gvt); +/* Find a spt by guest gfn. */ +static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn( + struct intel_vgpu *vgpu, unsigned long gfn) +{ + struct intel_vgpu_page_track *track; + + track = intel_vgpu_find_page_track(vgpu, gfn); + if (track && track->handler == ppgtt_write_protection_handler) + return track->priv_data; + + return NULL; +} + +/* Find the spt by shadow page mfn. */ +static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn( + struct intel_vgpu *vgpu, unsigned long mfn) +{ + return radix_tree_lookup(&vgpu->gtt.spt_tree, mfn); +} -static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_shadow_page( +static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt); + +static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt( struct intel_vgpu *vgpu, int type, unsigned long gfn) { + struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev; struct intel_vgpu_ppgtt_spt *spt = NULL; + dma_addr_t daddr; int ret; retry: spt = alloc_spt(GFP_KERNEL | __GFP_ZERO); if (!spt) { - if (reclaim_one_mm(vgpu->gvt)) + if (reclaim_one_ppgtt_mm(vgpu->gvt)) goto retry; gvt_vgpu_err("fail to allocate ppgtt shadow page\n"); @@ -801,44 +726,48 @@ retry: } spt->vgpu = vgpu; - spt->guest_page_type = type; atomic_set(&spt->refcount, 1); INIT_LIST_HEAD(&spt->post_shadow_list); /* - * TODO: guest page type may be different with shadow page type, - * when we support PSE page in future. + * Init shadow_page. */ - ret = init_shadow_page(vgpu, &spt->shadow_page, type, true); - if (ret) { - gvt_vgpu_err("fail to initialize shadow page for spt\n"); - goto err; + spt->shadow_page.type = type; + daddr = dma_map_page(kdev, spt->shadow_page.page, + 0, 4096, PCI_DMA_BIDIRECTIONAL); + if (dma_mapping_error(kdev, daddr)) { + gvt_vgpu_err("fail to map dma addr\n"); + ret = -EINVAL; + goto err_free_spt; } + spt->shadow_page.vaddr = page_address(spt->shadow_page.page); + spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT; - ret = init_guest_page(vgpu, &spt->guest_page, - gfn, ppgtt_write_protection_handler, NULL); - if (ret) { - gvt_vgpu_err("fail to initialize guest page for spt\n"); - goto err; - } + /* + * Init guest_page. + */ + spt->guest_page.type = type; + spt->guest_page.gfn = gfn; - trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn); - return spt; -err: - ppgtt_free_shadow_page(spt); - return ERR_PTR(ret); -} + ret = intel_vgpu_register_page_track(vgpu, spt->guest_page.gfn, + ppgtt_write_protection_handler, spt); + if (ret) + goto err_unmap_dma; -static struct intel_vgpu_ppgtt_spt *ppgtt_find_shadow_page( - struct intel_vgpu *vgpu, unsigned long mfn) -{ - struct intel_vgpu_shadow_page *p = find_shadow_page(vgpu, mfn); + ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt); + if (ret) + goto err_unreg_page_track; - if (p) - return shadow_page_to_ppgtt_spt(p); + trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn); + return spt; - gvt_vgpu_err("fail to find ppgtt shadow page: 0x%lx\n", mfn); - return NULL; +err_unreg_page_track: + intel_vgpu_unregister_page_track(vgpu, spt->guest_page.gfn); +err_unmap_dma: + dma_unmap_page(kdev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); +err_free_spt: + free_spt(spt); + return ERR_PTR(ret); } #define pt_entry_size_shift(spt) \ @@ -857,7 +786,7 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_find_shadow_page( if (!ppgtt_get_shadow_entry(spt, e, i) && \ spt->vgpu->gvt->gtt.pte_ops->test_present(e)) -static void ppgtt_get_shadow_page(struct intel_vgpu_ppgtt_spt *spt) +static void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt) { int v = atomic_read(&spt->refcount); @@ -866,17 +795,16 @@ static void ppgtt_get_shadow_page(struct intel_vgpu_ppgtt_spt *spt) atomic_inc(&spt->refcount); } -static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt); +static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt); -static int ppgtt_invalidate_shadow_page_by_shadow_entry(struct intel_vgpu *vgpu, +static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *e) { struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; struct intel_vgpu_ppgtt_spt *s; intel_gvt_gtt_type_t cur_pt_type; - if (WARN_ON(!gtt_type_is_pt(get_next_pt_type(e->type)))) - return -EINVAL; + GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type))); if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY && e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { @@ -885,16 +813,33 @@ static int ppgtt_invalidate_shadow_page_by_shadow_entry(struct intel_vgpu *vgpu, vgpu->gtt.scratch_pt[cur_pt_type].page_mfn) return 0; } - s = ppgtt_find_shadow_page(vgpu, ops->get_pfn(e)); + s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e)); if (!s) { gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n", ops->get_pfn(e)); return -ENXIO; } - return ppgtt_invalidate_shadow_page(s); + return ppgtt_invalidate_spt(s); +} + +static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt, + struct intel_gvt_gtt_entry *entry) +{ + struct intel_vgpu *vgpu = spt->vgpu; + struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; + unsigned long pfn; + int type; + + pfn = ops->get_pfn(entry); + type = spt->shadow_page.type; + + if (pfn == vgpu->gtt.scratch_pt[type].page_mfn) + return; + + intel_gvt_hypervisor_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT); } -static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) +static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt) { struct intel_vgpu *vgpu = spt->vgpu; struct intel_gvt_gtt_entry e; @@ -903,30 +848,40 @@ static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) int v = atomic_read(&spt->refcount); trace_spt_change(spt->vgpu->id, "die", spt, - spt->guest_page.track.gfn, spt->shadow_page.type); + spt->guest_page.gfn, spt->shadow_page.type); trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1)); if (atomic_dec_return(&spt->refcount) > 0) return 0; - if (gtt_type_is_pte_pt(spt->shadow_page.type)) - goto release; - for_each_present_shadow_entry(spt, &e, index) { - if (!gtt_type_is_pt(get_next_pt_type(e.type))) { - gvt_vgpu_err("GVT doesn't support pse bit for now\n"); - return -EINVAL; + switch (e.type) { + case GTT_TYPE_PPGTT_PTE_4K_ENTRY: + gvt_vdbg_mm("invalidate 4K entry\n"); + ppgtt_invalidate_pte(spt, &e); + break; + case GTT_TYPE_PPGTT_PTE_2M_ENTRY: + case GTT_TYPE_PPGTT_PTE_1G_ENTRY: + WARN(1, "GVT doesn't support 2M/1GB page\n"); + continue; + case GTT_TYPE_PPGTT_PML4_ENTRY: + case GTT_TYPE_PPGTT_PDP_ENTRY: + case GTT_TYPE_PPGTT_PDE_ENTRY: + gvt_vdbg_mm("invalidate PMUL4/PDP/PDE entry\n"); + ret = ppgtt_invalidate_spt_by_shadow_entry( + spt->vgpu, &e); + if (ret) + goto fail; + break; + default: + GEM_BUG_ON(1); } - ret = ppgtt_invalidate_shadow_page_by_shadow_entry( - spt->vgpu, &e); - if (ret) - goto fail; } -release: + trace_spt_change(spt->vgpu->id, "release", spt, - spt->guest_page.track.gfn, spt->shadow_page.type); - ppgtt_free_shadow_page(spt); + spt->guest_page.gfn, spt->shadow_page.type); + ppgtt_free_spt(spt); return 0; fail: gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n", @@ -934,52 +889,44 @@ fail: return ret; } -static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt); +static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt); -static struct intel_vgpu_ppgtt_spt *ppgtt_populate_shadow_page_by_guest_entry( +static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry( struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we) { struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; - struct intel_vgpu_ppgtt_spt *s = NULL; - struct intel_vgpu_guest_page *g; - struct intel_vgpu_page_track *t; + struct intel_vgpu_ppgtt_spt *spt = NULL; int ret; - if (WARN_ON(!gtt_type_is_pt(get_next_pt_type(we->type)))) { - ret = -EINVAL; - goto fail; - } + GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type))); - t = intel_vgpu_find_tracked_page(vgpu, ops->get_pfn(we)); - if (t) { - g = page_track_to_guest_page(t); - s = guest_page_to_ppgtt_spt(g); - ppgtt_get_shadow_page(s); - } else { + spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we)); + if (spt) + ppgtt_get_spt(spt); + else { int type = get_next_pt_type(we->type); - s = ppgtt_alloc_shadow_page(vgpu, type, ops->get_pfn(we)); - if (IS_ERR(s)) { - ret = PTR_ERR(s); + spt = ppgtt_alloc_spt(vgpu, type, ops->get_pfn(we)); + if (IS_ERR(spt)) { + ret = PTR_ERR(spt); goto fail; } - ret = intel_gvt_hypervisor_enable_page_track(vgpu, - &s->guest_page.track); + ret = intel_vgpu_enable_page_track(vgpu, spt->guest_page.gfn); if (ret) goto fail; - ret = ppgtt_populate_shadow_page(s); + ret = ppgtt_populate_spt(spt); if (ret) goto fail; - trace_spt_change(vgpu->id, "new", s, s->guest_page.track.gfn, - s->shadow_page.type); + trace_spt_change(vgpu->id, "new", spt, spt->guest_page.gfn, + spt->shadow_page.type); } - return s; + return spt; fail: gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", - s, we->val64, we->type); + spt, we->val64, we->type); return ERR_PTR(ret); } @@ -994,7 +941,44 @@ static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se, ops->set_pfn(se, s->shadow_page.mfn); } -static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) +static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu, + struct intel_vgpu_ppgtt_spt *spt, unsigned long index, + struct intel_gvt_gtt_entry *ge) +{ + struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; + struct intel_gvt_gtt_entry se = *ge; + unsigned long gfn; + dma_addr_t dma_addr; + int ret; + + if (!pte_ops->test_present(ge)) + return 0; + + gfn = pte_ops->get_pfn(ge); + + switch (ge->type) { + case GTT_TYPE_PPGTT_PTE_4K_ENTRY: + gvt_vdbg_mm("shadow 4K gtt entry\n"); + break; + case GTT_TYPE_PPGTT_PTE_2M_ENTRY: + case GTT_TYPE_PPGTT_PTE_1G_ENTRY: + gvt_vgpu_err("GVT doesn't support 2M/1GB entry\n"); + return -EINVAL; + default: + GEM_BUG_ON(1); + }; + + /* direct shadow */ + ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, &dma_addr); + if (ret) + return -ENXIO; + + pte_ops->set_pfn(&se, dma_addr >> PAGE_SHIFT); + ppgtt_set_shadow_entry(spt, &se, index); + return 0; +} + +static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt) { struct intel_vgpu *vgpu = spt->vgpu; struct intel_gvt *gvt = vgpu->gvt; @@ -1005,34 +989,30 @@ static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) int ret; trace_spt_change(spt->vgpu->id, "born", spt, - spt->guest_page.track.gfn, spt->shadow_page.type); + spt->guest_page.gfn, spt->shadow_page.type); - if (gtt_type_is_pte_pt(spt->shadow_page.type)) { - for_each_present_guest_entry(spt, &ge, i) { + for_each_present_guest_entry(spt, &ge, i) { + if (gtt_type_is_pt(get_next_pt_type(ge.type))) { + s = ppgtt_populate_spt_by_guest_entry(vgpu, &ge); + if (IS_ERR(s)) { + ret = PTR_ERR(s); + goto fail; + } + ppgtt_get_shadow_entry(spt, &se, i); + ppgtt_generate_shadow_entry(&se, s, &ge); + ppgtt_set_shadow_entry(spt, &se, i); + } else { gfn = ops->get_pfn(&ge); - if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn) || - gtt_entry_p2m(vgpu, &ge, &se)) + if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn)) { ops->set_pfn(&se, gvt->gtt.scratch_mfn); - ppgtt_set_shadow_entry(spt, &se, i); - } - return 0; - } - - for_each_present_guest_entry(spt, &ge, i) { - if (!gtt_type_is_pt(get_next_pt_type(ge.type))) { - gvt_vgpu_err("GVT doesn't support pse bit now\n"); - ret = -EINVAL; - goto fail; - } + ppgtt_set_shadow_entry(spt, &se, i); + continue; + } - s = ppgtt_populate_shadow_page_by_guest_entry(vgpu, &ge); - if (IS_ERR(s)) { - ret = PTR_ERR(s); - goto fail; + ret = ppgtt_populate_shadow_entry(vgpu, spt, i, &ge); + if (ret) + goto fail; } - ppgtt_get_shadow_entry(spt, &se, i); - ppgtt_generate_shadow_entry(&se, s, &ge); - ppgtt_set_shadow_entry(spt, &se, i); } return 0; fail: @@ -1041,36 +1021,40 @@ fail: return ret; } -static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt, +static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt, struct intel_gvt_gtt_entry *se, unsigned long index) { - struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt); - struct intel_vgpu_shadow_page *sp = &spt->shadow_page; struct intel_vgpu *vgpu = spt->vgpu; struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; int ret; - trace_gpt_change(spt->vgpu->id, "remove", spt, sp->type, se->val64, - index); + trace_spt_guest_change(spt->vgpu->id, "remove", spt, + spt->shadow_page.type, se->val64, index); + + gvt_vdbg_mm("destroy old shadow entry, type %d, index %lu, value %llx\n", + se->type, index, se->val64); if (!ops->test_present(se)) return 0; - if (ops->get_pfn(se) == vgpu->gtt.scratch_pt[sp->type].page_mfn) + if (ops->get_pfn(se) == + vgpu->gtt.scratch_pt[spt->shadow_page.type].page_mfn) return 0; if (gtt_type_is_pt(get_next_pt_type(se->type))) { struct intel_vgpu_ppgtt_spt *s = - ppgtt_find_shadow_page(vgpu, ops->get_pfn(se)); + intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(se)); if (!s) { gvt_vgpu_err("fail to find guest page\n"); ret = -ENXIO; goto fail; } - ret = ppgtt_invalidate_shadow_page(s); + ret = ppgtt_invalidate_spt(s); if (ret) goto fail; - } + } else + ppgtt_invalidate_pte(spt, se); + return 0; fail: gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", @@ -1078,21 +1062,22 @@ fail: return ret; } -static int ppgtt_handle_guest_entry_add(struct intel_vgpu_guest_page *gpt, +static int ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt *spt, struct intel_gvt_gtt_entry *we, unsigned long index) { - struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt); - struct intel_vgpu_shadow_page *sp = &spt->shadow_page; struct intel_vgpu *vgpu = spt->vgpu; struct intel_gvt_gtt_entry m; struct intel_vgpu_ppgtt_spt *s; int ret; - trace_gpt_change(spt->vgpu->id, "add", spt, sp->type, - we->val64, index); + trace_spt_guest_change(spt->vgpu->id, "add", spt, spt->shadow_page.type, + we->val64, index); + + gvt_vdbg_mm("add shadow entry: type %d, index %lu, value %llx\n", + we->type, index, we->val64); if (gtt_type_is_pt(get_next_pt_type(we->type))) { - s = ppgtt_populate_shadow_page_by_guest_entry(vgpu, we); + s = ppgtt_populate_spt_by_guest_entry(vgpu, we); if (IS_ERR(s)) { ret = PTR_ERR(s); goto fail; @@ -1101,10 +1086,9 @@ static int ppgtt_handle_guest_entry_add(struct intel_vgpu_guest_page *gpt, ppgtt_generate_shadow_entry(&m, s, we); ppgtt_set_shadow_entry(spt, &m, index); } else { - ret = gtt_entry_p2m(vgpu, we, &m); + ret = ppgtt_populate_shadow_entry(vgpu, spt, index, we); if (ret) goto fail; - ppgtt_set_shadow_entry(spt, &m, index); } return 0; fail: @@ -1119,41 +1103,39 @@ static int sync_oos_page(struct intel_vgpu *vgpu, const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; struct intel_gvt *gvt = vgpu->gvt; struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; - struct intel_vgpu_ppgtt_spt *spt = - guest_page_to_ppgtt_spt(oos_page->guest_page); - struct intel_gvt_gtt_entry old, new, m; + struct intel_vgpu_ppgtt_spt *spt = oos_page->spt; + struct intel_gvt_gtt_entry old, new; int index; int ret; trace_oos_change(vgpu->id, "sync", oos_page->id, - oos_page->guest_page, spt->guest_page_type); + spt, spt->guest_page.type); - old.type = new.type = get_entry_type(spt->guest_page_type); + old.type = new.type = get_entry_type(spt->guest_page.type); old.val64 = new.val64 = 0; for (index = 0; index < (I915_GTT_PAGE_SIZE >> info->gtt_entry_size_shift); index++) { ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu); ops->get_entry(NULL, &new, index, true, - oos_page->guest_page->track.gfn << PAGE_SHIFT, vgpu); + spt->guest_page.gfn << PAGE_SHIFT, vgpu); if (old.val64 == new.val64 && !test_and_clear_bit(index, spt->post_shadow_bitmap)) continue; trace_oos_sync(vgpu->id, oos_page->id, - oos_page->guest_page, spt->guest_page_type, + spt, spt->guest_page.type, new.val64, index); - ret = gtt_entry_p2m(vgpu, &new, &m); + ret = ppgtt_populate_shadow_entry(vgpu, spt, index, &new); if (ret) return ret; ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu); - ppgtt_set_shadow_entry(spt, &m, index); } - oos_page->guest_page->write_cnt = 0; + spt->guest_page.write_cnt = 0; list_del_init(&spt->post_shadow_list); return 0; } @@ -1162,15 +1144,14 @@ static int detach_oos_page(struct intel_vgpu *vgpu, struct intel_vgpu_oos_page *oos_page) { struct intel_gvt *gvt = vgpu->gvt; - struct intel_vgpu_ppgtt_spt *spt = - guest_page_to_ppgtt_spt(oos_page->guest_page); + struct intel_vgpu_ppgtt_spt *spt = oos_page->spt; trace_oos_change(vgpu->id, "detach", oos_page->id, - oos_page->guest_page, spt->guest_page_type); + spt, spt->guest_page.type); - oos_page->guest_page->write_cnt = 0; - oos_page->guest_page->oos_page = NULL; - oos_page->guest_page = NULL; + spt->guest_page.write_cnt = 0; + spt->guest_page.oos_page = NULL; + oos_page->spt = NULL; list_del_init(&oos_page->vm_list); list_move_tail(&oos_page->list, &gvt->gtt.oos_page_free_list_head); @@ -1178,51 +1159,49 @@ static int detach_oos_page(struct intel_vgpu *vgpu, return 0; } -static int attach_oos_page(struct intel_vgpu *vgpu, - struct intel_vgpu_oos_page *oos_page, - struct intel_vgpu_guest_page *gpt) +static int attach_oos_page(struct intel_vgpu_oos_page *oos_page, + struct intel_vgpu_ppgtt_spt *spt) { - struct intel_gvt *gvt = vgpu->gvt; + struct intel_gvt *gvt = spt->vgpu->gvt; int ret; - ret = intel_gvt_hypervisor_read_gpa(vgpu, - gpt->track.gfn << I915_GTT_PAGE_SHIFT, + ret = intel_gvt_hypervisor_read_gpa(spt->vgpu, + spt->guest_page.gfn << I915_GTT_PAGE_SHIFT, oos_page->mem, I915_GTT_PAGE_SIZE); if (ret) return ret; - oos_page->guest_page = gpt; - gpt->oos_page = oos_page; + oos_page->spt = spt; + spt->guest_page.oos_page = oos_page; list_move_tail(&oos_page->list, &gvt->gtt.oos_page_use_list_head); - trace_oos_change(vgpu->id, "attach", gpt->oos_page->id, - gpt, guest_page_to_ppgtt_spt(gpt)->guest_page_type); + trace_oos_change(spt->vgpu->id, "attach", oos_page->id, + spt, spt->guest_page.type); return 0; } -static int ppgtt_set_guest_page_sync(struct intel_vgpu *vgpu, - struct intel_vgpu_guest_page *gpt) +static int ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt *spt) { + struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; int ret; - ret = intel_gvt_hypervisor_enable_page_track(vgpu, &gpt->track); + ret = intel_vgpu_enable_page_track(spt->vgpu, spt->guest_page.gfn); if (ret) return ret; - trace_oos_change(vgpu->id, "set page sync", gpt->oos_page->id, - gpt, guest_page_to_ppgtt_spt(gpt)->guest_page_type); + trace_oos_change(spt->vgpu->id, "set page sync", oos_page->id, + spt, spt->guest_page.type); - list_del_init(&gpt->oos_page->vm_list); - return sync_oos_page(vgpu, gpt->oos_page); + list_del_init(&oos_page->vm_list); + return sync_oos_page(spt->vgpu, oos_page); } -static int ppgtt_allocate_oos_page(struct intel_vgpu *vgpu, - struct intel_vgpu_guest_page *gpt) +static int ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt *spt) { - struct intel_gvt *gvt = vgpu->gvt; + struct intel_gvt *gvt = spt->vgpu->gvt; struct intel_gvt_gtt *gtt = &gvt->gtt; - struct intel_vgpu_oos_page *oos_page = gpt->oos_page; + struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; int ret; WARN(oos_page, "shadow PPGTT page has already has a oos page\n"); @@ -1230,31 +1209,30 @@ static int ppgtt_allocate_oos_page(struct intel_vgpu *vgpu, if (list_empty(>t->oos_page_free_list_head)) { oos_page = container_of(gtt->oos_page_use_list_head.next, struct intel_vgpu_oos_page, list); - ret = ppgtt_set_guest_page_sync(vgpu, oos_page->guest_page); + ret = ppgtt_set_guest_page_sync(oos_page->spt); if (ret) return ret; - ret = detach_oos_page(vgpu, oos_page); + ret = detach_oos_page(spt->vgpu, oos_page); if (ret) return ret; } else oos_page = container_of(gtt->oos_page_free_list_head.next, struct intel_vgpu_oos_page, list); - return attach_oos_page(vgpu, oos_page, gpt); + return attach_oos_page(oos_page, spt); } -static int ppgtt_set_guest_page_oos(struct intel_vgpu *vgpu, - struct intel_vgpu_guest_page *gpt) +static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt) { - struct intel_vgpu_oos_page *oos_page = gpt->oos_page; + struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n")) return -EINVAL; - trace_oos_change(vgpu->id, "set page out of sync", gpt->oos_page->id, - gpt, guest_page_to_ppgtt_spt(gpt)->guest_page_type); + trace_oos_change(spt->vgpu->id, "set page out of sync", oos_page->id, + spt, spt->guest_page.type); - list_add_tail(&oos_page->vm_list, &vgpu->gtt.oos_page_list_head); - return intel_gvt_hypervisor_disable_page_track(vgpu, &gpt->track); + list_add_tail(&oos_page->vm_list, &spt->vgpu->gtt.oos_page_list_head); + return intel_vgpu_disable_page_track(spt->vgpu, spt->guest_page.gfn); } /** @@ -1279,7 +1257,7 @@ int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu) list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) { oos_page = container_of(pos, struct intel_vgpu_oos_page, vm_list); - ret = ppgtt_set_guest_page_sync(vgpu, oos_page->guest_page); + ret = ppgtt_set_guest_page_sync(oos_page->spt); if (ret) return ret; } @@ -1290,17 +1268,15 @@ int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu) * The heart of PPGTT shadow page table. */ static int ppgtt_handle_guest_write_page_table( - struct intel_vgpu_guest_page *gpt, + struct intel_vgpu_ppgtt_spt *spt, struct intel_gvt_gtt_entry *we, unsigned long index) { - struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt); struct intel_vgpu *vgpu = spt->vgpu; int type = spt->shadow_page.type; struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; - struct intel_gvt_gtt_entry se; - - int ret; + struct intel_gvt_gtt_entry old_se; int new_present; + int ret; new_present = ops->test_present(we); @@ -1309,21 +1285,21 @@ static int ppgtt_handle_guest_write_page_table( * guarantee the ppgtt table is validated during the window between * adding and removal. */ - ppgtt_get_shadow_entry(spt, &se, index); + ppgtt_get_shadow_entry(spt, &old_se, index); if (new_present) { - ret = ppgtt_handle_guest_entry_add(gpt, we, index); + ret = ppgtt_handle_guest_entry_add(spt, we, index); if (ret) goto fail; } - ret = ppgtt_handle_guest_entry_removal(gpt, &se, index); + ret = ppgtt_handle_guest_entry_removal(spt, &old_se, index); if (ret) goto fail; if (!new_present) { - ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn); - ppgtt_set_shadow_entry(spt, &se, index); + ops->set_pfn(&old_se, vgpu->gtt.scratch_pt[type].page_mfn); + ppgtt_set_shadow_entry(spt, &old_se, index); } return 0; @@ -1333,12 +1309,13 @@ fail: return ret; } -static inline bool can_do_out_of_sync(struct intel_vgpu_guest_page *gpt) + + +static inline bool can_do_out_of_sync(struct intel_vgpu_ppgtt_spt *spt) { return enable_out_of_sync - && gtt_type_is_pte_pt( - guest_page_to_ppgtt_spt(gpt)->guest_page_type) - && gpt->write_cnt >= 2; + && gtt_type_is_pte_pt(spt->guest_page.type) + && spt->guest_page.write_cnt >= 2; } static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt, @@ -1378,8 +1355,8 @@ int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu) GTT_ENTRY_NUM_IN_ONE_PAGE) { ppgtt_get_guest_entry(spt, &ge, index); - ret = ppgtt_handle_guest_write_page_table( - &spt->guest_page, &ge, index); + ret = ppgtt_handle_guest_write_page_table(spt, + &ge, index); if (ret) return ret; clear_bit(index, spt->post_shadow_bitmap); @@ -1390,10 +1367,9 @@ int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu) } static int ppgtt_handle_guest_write_page_table_bytes( - struct intel_vgpu_guest_page *gpt, + struct intel_vgpu_ppgtt_spt *spt, u64 pa, void *p_data, int bytes) { - struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt); struct intel_vgpu *vgpu = spt->vgpu; struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; @@ -1408,7 +1384,7 @@ static int ppgtt_handle_guest_write_page_table_bytes( ops->test_pse(&we); if (bytes == info->gtt_entry_size) { - ret = ppgtt_handle_guest_write_page_table(gpt, &we, index); + ret = ppgtt_handle_guest_write_page_table(spt, &we, index); if (ret) return ret; } else { @@ -1416,7 +1392,7 @@ static int ppgtt_handle_guest_write_page_table_bytes( int type = spt->shadow_page.type; ppgtt_get_shadow_entry(spt, &se, index); - ret = ppgtt_handle_guest_entry_removal(gpt, &se, index); + ret = ppgtt_handle_guest_entry_removal(spt, &se, index); if (ret) return ret; ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn); @@ -1428,128 +1404,54 @@ static int ppgtt_handle_guest_write_page_table_bytes( if (!enable_out_of_sync) return 0; - gpt->write_cnt++; + spt->guest_page.write_cnt++; - if (gpt->oos_page) - ops->set_entry(gpt->oos_page->mem, &we, index, + if (spt->guest_page.oos_page) + ops->set_entry(spt->guest_page.oos_page->mem, &we, index, false, 0, vgpu); - if (can_do_out_of_sync(gpt)) { - if (!gpt->oos_page) - ppgtt_allocate_oos_page(vgpu, gpt); + if (can_do_out_of_sync(spt)) { + if (!spt->guest_page.oos_page) + ppgtt_allocate_oos_page(spt); - ret = ppgtt_set_guest_page_oos(vgpu, gpt); + ret = ppgtt_set_guest_page_oos(spt); if (ret < 0) return ret; } return 0; } -/* - * mm page table allocation policy for bdw+ - * - for ggtt, only virtual page table will be allocated. - * - for ppgtt, dedicated virtual/shadow page table will be allocated. - */ -static int gen8_mm_alloc_page_table(struct intel_vgpu_mm *mm) -{ - struct intel_vgpu *vgpu = mm->vgpu; - struct intel_gvt *gvt = vgpu->gvt; - const struct intel_gvt_device_info *info = &gvt->device_info; - void *mem; - - if (mm->type == INTEL_GVT_MM_PPGTT) { - mm->page_table_entry_cnt = 4; - mm->page_table_entry_size = mm->page_table_entry_cnt * - info->gtt_entry_size; - mem = kzalloc(mm->has_shadow_page_table ? - mm->page_table_entry_size * 2 - : mm->page_table_entry_size, GFP_KERNEL); - if (!mem) - return -ENOMEM; - mm->virtual_page_table = mem; - if (!mm->has_shadow_page_table) - return 0; - mm->shadow_page_table = mem + mm->page_table_entry_size; - } else if (mm->type == INTEL_GVT_MM_GGTT) { - mm->page_table_entry_cnt = - (gvt_ggtt_gm_sz(gvt) >> I915_GTT_PAGE_SHIFT); - mm->page_table_entry_size = mm->page_table_entry_cnt * - info->gtt_entry_size; - mem = vzalloc(mm->page_table_entry_size); - if (!mem) - return -ENOMEM; - mm->virtual_page_table = mem; - } - return 0; -} - -static void gen8_mm_free_page_table(struct intel_vgpu_mm *mm) -{ - if (mm->type == INTEL_GVT_MM_PPGTT) { - kfree(mm->virtual_page_table); - } else if (mm->type == INTEL_GVT_MM_GGTT) { - if (mm->virtual_page_table) - vfree(mm->virtual_page_table); - } - mm->virtual_page_table = mm->shadow_page_table = NULL; -} - -static void invalidate_mm(struct intel_vgpu_mm *mm) +static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm) { struct intel_vgpu *vgpu = mm->vgpu; struct intel_gvt *gvt = vgpu->gvt; struct intel_gvt_gtt *gtt = &gvt->gtt; struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops; struct intel_gvt_gtt_entry se; - int i; + int index; - if (WARN_ON(!mm->has_shadow_page_table || !mm->shadowed)) + if (!mm->ppgtt_mm.shadowed) return; - for (i = 0; i < mm->page_table_entry_cnt; i++) { - ppgtt_get_shadow_root_entry(mm, &se, i); + for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) { + ppgtt_get_shadow_root_entry(mm, &se, index); + if (!ops->test_present(&se)) continue; - ppgtt_invalidate_shadow_page_by_shadow_entry( - vgpu, &se); + + ppgtt_invalidate_spt_by_shadow_entry(vgpu, &se); se.val64 = 0; - ppgtt_set_shadow_root_entry(mm, &se, i); + ppgtt_set_shadow_root_entry(mm, &se, index); - trace_gpt_change(vgpu->id, "destroy root pointer", - NULL, se.type, se.val64, i); + trace_spt_guest_change(vgpu->id, "destroy root pointer", + NULL, se.type, se.val64, index); } - mm->shadowed = false; -} -/** - * intel_vgpu_destroy_mm - destroy a mm object - * @mm: a kref object - * - * This function is used to destroy a mm object for vGPU - * - */ -void intel_vgpu_destroy_mm(struct kref *mm_ref) -{ - struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref); - struct intel_vgpu *vgpu = mm->vgpu; - struct intel_gvt *gvt = vgpu->gvt; - struct intel_gvt_gtt *gtt = &gvt->gtt; - - if (!mm->initialized) - goto out; - - list_del(&mm->list); - list_del(&mm->lru_list); - - if (mm->has_shadow_page_table) - invalidate_mm(mm); - - gtt->mm_free_page_table(mm); -out: - kfree(mm); + mm->ppgtt_mm.shadowed = false; } -static int shadow_mm(struct intel_vgpu_mm *mm) + +static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm) { struct intel_vgpu *vgpu = mm->vgpu; struct intel_gvt *gvt = vgpu->gvt; @@ -1557,119 +1459,155 @@ static int shadow_mm(struct intel_vgpu_mm *mm) struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops; struct intel_vgpu_ppgtt_spt *spt; struct intel_gvt_gtt_entry ge, se; - int i; - int ret; + int index, ret; - if (WARN_ON(!mm->has_shadow_page_table || mm->shadowed)) + if (mm->ppgtt_mm.shadowed) return 0; - mm->shadowed = true; + mm->ppgtt_mm.shadowed = true; + + for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) { + ppgtt_get_guest_root_entry(mm, &ge, index); - for (i = 0; i < mm->page_table_entry_cnt; i++) { - ppgtt_get_guest_root_entry(mm, &ge, i); if (!ops->test_present(&ge)) continue; - trace_gpt_change(vgpu->id, __func__, NULL, - ge.type, ge.val64, i); + trace_spt_guest_change(vgpu->id, __func__, NULL, + ge.type, ge.val64, index); - spt = ppgtt_populate_shadow_page_by_guest_entry(vgpu, &ge); + spt = ppgtt_populate_spt_by_guest_entry(vgpu, &ge); if (IS_ERR(spt)) { gvt_vgpu_err("fail to populate guest root pointer\n"); ret = PTR_ERR(spt); goto fail; } ppgtt_generate_shadow_entry(&se, spt, &ge); - ppgtt_set_shadow_root_entry(mm, &se, i); + ppgtt_set_shadow_root_entry(mm, &se, index); - trace_gpt_change(vgpu->id, "populate root pointer", - NULL, se.type, se.val64, i); + trace_spt_guest_change(vgpu->id, "populate root pointer", + NULL, se.type, se.val64, index); } + return 0; fail: - invalidate_mm(mm); + invalidate_ppgtt_mm(mm); return ret; } +static struct intel_vgpu_mm *vgpu_alloc_mm(struct intel_vgpu *vgpu) +{ + struct intel_vgpu_mm *mm; + + mm = kzalloc(sizeof(*mm), GFP_KERNEL); + if (!mm) + return NULL; + + mm->vgpu = vgpu; + kref_init(&mm->ref); + atomic_set(&mm->pincount, 0); + + return mm; +} + +static void vgpu_free_mm(struct intel_vgpu_mm *mm) +{ + kfree(mm); +} + /** - * intel_vgpu_create_mm - create a mm object for a vGPU + * intel_vgpu_create_ppgtt_mm - create a ppgtt mm object for a vGPU * @vgpu: a vGPU - * @mm_type: mm object type, should be PPGTT or GGTT - * @virtual_page_table: page table root pointers. Could be NULL if user wants - * to populate shadow later. - * @page_table_level: describe the page table level of the mm object - * @pde_base_index: pde root pointer base in GGTT MMIO. + * @root_entry_type: ppgtt root entry type + * @pdps: guest pdps. * - * This function is used to create a mm object for a vGPU. + * This function is used to create a ppgtt mm object for a vGPU. * * Returns: * Zero on success, negative error code in pointer if failed. */ -struct intel_vgpu_mm *intel_vgpu_create_mm(struct intel_vgpu *vgpu, - int mm_type, void *virtual_page_table, int page_table_level, - u32 pde_base_index) +struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu, + intel_gvt_gtt_type_t root_entry_type, u64 pdps[]) { struct intel_gvt *gvt = vgpu->gvt; - struct intel_gvt_gtt *gtt = &gvt->gtt; struct intel_vgpu_mm *mm; int ret; - mm = kzalloc(sizeof(*mm), GFP_KERNEL); - if (!mm) { - ret = -ENOMEM; - goto fail; - } - - mm->type = mm_type; + mm = vgpu_alloc_mm(vgpu); + if (!mm) + return ERR_PTR(-ENOMEM); - if (page_table_level == 1) - mm->page_table_entry_type = GTT_TYPE_GGTT_PTE; - else if (page_table_level == 3) - mm->page_table_entry_type = GTT_TYPE_PPGTT_ROOT_L3_ENTRY; - else if (page_table_level == 4) - mm->page_table_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY; - else { - WARN_ON(1); - ret = -EINVAL; - goto fail; - } + mm->type = INTEL_GVT_MM_PPGTT; - mm->page_table_level = page_table_level; - mm->pde_base_index = pde_base_index; + GEM_BUG_ON(root_entry_type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY && + root_entry_type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY); + mm->ppgtt_mm.root_entry_type = root_entry_type; - mm->vgpu = vgpu; - mm->has_shadow_page_table = !!(mm_type == INTEL_GVT_MM_PPGTT); + INIT_LIST_HEAD(&mm->ppgtt_mm.list); + INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list); - kref_init(&mm->ref); - atomic_set(&mm->pincount, 0); - INIT_LIST_HEAD(&mm->list); - INIT_LIST_HEAD(&mm->lru_list); - list_add_tail(&mm->list, &vgpu->gtt.mm_list_head); + if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) + mm->ppgtt_mm.guest_pdps[0] = pdps[0]; + else + memcpy(mm->ppgtt_mm.guest_pdps, pdps, + sizeof(mm->ppgtt_mm.guest_pdps)); - ret = gtt->mm_alloc_page_table(mm); + ret = shadow_ppgtt_mm(mm); if (ret) { - gvt_vgpu_err("fail to allocate page table for mm\n"); - goto fail; + gvt_vgpu_err("failed to shadow ppgtt mm\n"); + vgpu_free_mm(mm); + return ERR_PTR(ret); } - mm->initialized = true; + list_add_tail(&mm->ppgtt_mm.list, &vgpu->gtt.ppgtt_mm_list_head); + list_add_tail(&mm->ppgtt_mm.lru_list, &gvt->gtt.ppgtt_mm_lru_list_head); + return mm; +} + +static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu) +{ + struct intel_vgpu_mm *mm; + unsigned long nr_entries; - if (virtual_page_table) - memcpy(mm->virtual_page_table, virtual_page_table, - mm->page_table_entry_size); + mm = vgpu_alloc_mm(vgpu); + if (!mm) + return ERR_PTR(-ENOMEM); - if (mm->has_shadow_page_table) { - ret = shadow_mm(mm); - if (ret) - goto fail; - list_add_tail(&mm->lru_list, &gvt->gtt.mm_lru_list_head); + mm->type = INTEL_GVT_MM_GGTT; + + nr_entries = gvt_ggtt_gm_sz(vgpu->gvt) >> I915_GTT_PAGE_SHIFT; + mm->ggtt_mm.virtual_ggtt = vzalloc(nr_entries * + vgpu->gvt->device_info.gtt_entry_size); + if (!mm->ggtt_mm.virtual_ggtt) { + vgpu_free_mm(mm); + return ERR_PTR(-ENOMEM); } + return mm; -fail: - gvt_vgpu_err("fail to create mm\n"); - if (mm) - intel_gvt_mm_unreference(mm); - return ERR_PTR(ret); +} + +/** + * _intel_vgpu_mm_release - destroy a mm object + * @mm_ref: a kref object + * + * This function is used to destroy a mm object for vGPU + * + */ +void _intel_vgpu_mm_release(struct kref *mm_ref) +{ + struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref); + + if (GEM_WARN_ON(atomic_read(&mm->pincount))) + gvt_err("vgpu mm pin count bug detected\n"); + + if (mm->type == INTEL_GVT_MM_PPGTT) { + list_del(&mm->ppgtt_mm.list); + list_del(&mm->ppgtt_mm.lru_list); + invalidate_ppgtt_mm(mm); + } else { + vfree(mm->ggtt_mm.virtual_ggtt); + } + + vgpu_free_mm(mm); } /** @@ -1680,9 +1618,6 @@ fail: */ void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm) { - if (WARN_ON(mm->type != INTEL_GVT_MM_PPGTT)) - return; - atomic_dec(&mm->pincount); } @@ -1701,36 +1636,34 @@ int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm) { int ret; - if (WARN_ON(mm->type != INTEL_GVT_MM_PPGTT)) - return 0; + atomic_inc(&mm->pincount); - if (!mm->shadowed) { - ret = shadow_mm(mm); + if (mm->type == INTEL_GVT_MM_PPGTT) { + ret = shadow_ppgtt_mm(mm); if (ret) return ret; + + list_move_tail(&mm->ppgtt_mm.lru_list, + &mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head); + } - atomic_inc(&mm->pincount); - list_del_init(&mm->lru_list); - list_add_tail(&mm->lru_list, &mm->vgpu->gvt->gtt.mm_lru_list_head); return 0; } -static int reclaim_one_mm(struct intel_gvt *gvt) +static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt) { struct intel_vgpu_mm *mm; struct list_head *pos, *n; - list_for_each_safe(pos, n, &gvt->gtt.mm_lru_list_head) { - mm = container_of(pos, struct intel_vgpu_mm, lru_list); + list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) { + mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list); - if (mm->type != INTEL_GVT_MM_PPGTT) - continue; if (atomic_read(&mm->pincount)) continue; - list_del_init(&mm->lru_list); - invalidate_mm(mm); + list_del_init(&mm->ppgtt_mm.lru_list); + invalidate_ppgtt_mm(mm); return 1; } return 0; @@ -1746,10 +1679,7 @@ static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm, struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; struct intel_vgpu_ppgtt_spt *s; - if (WARN_ON(!mm->has_shadow_page_table)) - return -EINVAL; - - s = ppgtt_find_shadow_page(vgpu, ops->get_pfn(e)); + s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e)); if (!s) return -ENXIO; @@ -1780,85 +1710,72 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma) unsigned long gpa = INTEL_GVT_INVALID_ADDR; unsigned long gma_index[4]; struct intel_gvt_gtt_entry e; - int i, index; + int i, levels = 0; int ret; - if (mm->type != INTEL_GVT_MM_GGTT && mm->type != INTEL_GVT_MM_PPGTT) - return INTEL_GVT_INVALID_ADDR; + GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT && + mm->type != INTEL_GVT_MM_PPGTT); if (mm->type == INTEL_GVT_MM_GGTT) { if (!vgpu_gmadr_is_valid(vgpu, gma)) goto err; - ret = ggtt_get_guest_entry(mm, &e, - gma_ops->gma_to_ggtt_pte_index(gma)); - if (ret) - goto err; + ggtt_get_guest_entry(mm, &e, + gma_ops->gma_to_ggtt_pte_index(gma)); + gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) + (gma & ~I915_GTT_PAGE_MASK); trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa); - return gpa; - } - - switch (mm->page_table_level) { - case 4: - ret = ppgtt_get_shadow_root_entry(mm, &e, 0); - if (ret) - goto err; - gma_index[0] = gma_ops->gma_to_pml4_index(gma); - gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma); - gma_index[2] = gma_ops->gma_to_pde_index(gma); - gma_index[3] = gma_ops->gma_to_pte_index(gma); - index = 4; - break; - case 3: - ret = ppgtt_get_shadow_root_entry(mm, &e, - gma_ops->gma_to_l3_pdp_index(gma)); - if (ret) - goto err; - gma_index[0] = gma_ops->gma_to_pde_index(gma); - gma_index[1] = gma_ops->gma_to_pte_index(gma); - index = 2; - break; - case 2: - ret = ppgtt_get_shadow_root_entry(mm, &e, - gma_ops->gma_to_pde_index(gma)); - if (ret) - goto err; - gma_index[0] = gma_ops->gma_to_pte_index(gma); - index = 1; - break; - default: - WARN_ON(1); - goto err; - } + } else { + switch (mm->ppgtt_mm.root_entry_type) { + case GTT_TYPE_PPGTT_ROOT_L4_ENTRY: + ppgtt_get_shadow_root_entry(mm, &e, 0); + + gma_index[0] = gma_ops->gma_to_pml4_index(gma); + gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma); + gma_index[2] = gma_ops->gma_to_pde_index(gma); + gma_index[3] = gma_ops->gma_to_pte_index(gma); + levels = 4; + break; + case GTT_TYPE_PPGTT_ROOT_L3_ENTRY: + ppgtt_get_shadow_root_entry(mm, &e, + gma_ops->gma_to_l3_pdp_index(gma)); + + gma_index[0] = gma_ops->gma_to_pde_index(gma); + gma_index[1] = gma_ops->gma_to_pte_index(gma); + levels = 2; + break; + default: + GEM_BUG_ON(1); + } - /* walk into the shadow page table and get gpa from guest entry */ - for (i = 0; i < index; i++) { - ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i], - (i == index - 1)); - if (ret) - goto err; + /* walk the shadow page table and get gpa from guest entry */ + for (i = 0; i < levels; i++) { + ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i], + (i == levels - 1)); + if (ret) + goto err; - if (!pte_ops->test_present(&e)) { - gvt_dbg_core("GMA 0x%lx is not present\n", gma); - goto err; + if (!pte_ops->test_present(&e)) { + gvt_dbg_core("GMA 0x%lx is not present\n", gma); + goto err; + } } - } - gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) - + (gma & ~I915_GTT_PAGE_MASK); + gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) + + (gma & ~I915_GTT_PAGE_MASK); + trace_gma_translate(vgpu->id, "ppgtt", 0, + mm->ppgtt_mm.root_entry_type, gma, gpa); + } - trace_gma_translate(vgpu->id, "ppgtt", 0, - mm->page_table_level, gma, gpa); return gpa; err: gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma); return INTEL_GVT_INVALID_ADDR; } -static int emulate_gtt_mmio_read(struct intel_vgpu *vgpu, +static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off, void *p_data, unsigned int bytes) { struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm; @@ -1887,7 +1804,7 @@ static int emulate_gtt_mmio_read(struct intel_vgpu *vgpu, * Returns: * Zero on success, error code if failed. */ -int intel_vgpu_emulate_gtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off, +int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off, void *p_data, unsigned int bytes) { const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; @@ -1897,11 +1814,11 @@ int intel_vgpu_emulate_gtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off, return -EINVAL; off -= info->gtt_start_offset; - ret = emulate_gtt_mmio_read(vgpu, off, p_data, bytes); + ret = emulate_ggtt_mmio_read(vgpu, off, p_data, bytes); return ret; } -static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, +static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, void *p_data, unsigned int bytes) { struct intel_gvt *gvt = vgpu->gvt; @@ -1911,6 +1828,7 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, unsigned long g_gtt_index = off >> info->gtt_entry_size_shift; unsigned long gma, gfn; struct intel_gvt_gtt_entry e, m; + dma_addr_t dma_addr; int ret; if (bytes != 4 && bytes != 8) @@ -1926,6 +1844,7 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data, bytes); + m = e; if (ops->test_present(&e)) { gfn = ops->get_pfn(&e); @@ -1938,29 +1857,29 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, goto out; } - ret = gtt_entry_p2m(vgpu, &e, &m); + ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, + &dma_addr); if (ret) { - gvt_vgpu_err("fail to translate guest gtt entry\n"); + gvt_vgpu_err("fail to populate guest ggtt entry\n"); /* guest driver may read/write the entry when partial * update the entry in this situation p2m will fail * settting the shadow entry to point to a scratch page */ ops->set_pfn(&m, gvt->gtt.scratch_mfn); - } - } else { - m = e; + } else + ops->set_pfn(&m, dma_addr >> PAGE_SHIFT); + } else ops->set_pfn(&m, gvt->gtt.scratch_mfn); - } out: - ggtt_set_shadow_entry(ggtt_mm, &m, g_gtt_index); - gtt_invalidate(gvt->dev_priv); + ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index); + ggtt_invalidate(gvt->dev_priv); ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index); return 0; } /* - * intel_vgpu_emulate_gtt_mmio_write - emulate GTT MMIO register write + * intel_vgpu_emulate_ggtt_mmio_write - emulate GTT MMIO register write * @vgpu: a vGPU * @off: register offset * @p_data: data from guest write @@ -1971,8 +1890,8 @@ out: * Returns: * Zero on success, error code if failed. */ -int intel_vgpu_emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, - void *p_data, unsigned int bytes) +int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, + unsigned int off, void *p_data, unsigned int bytes) { const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; int ret; @@ -1981,43 +1900,10 @@ int intel_vgpu_emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, return -EINVAL; off -= info->gtt_start_offset; - ret = emulate_gtt_mmio_write(vgpu, off, p_data, bytes); - return ret; -} - -int intel_vgpu_write_protect_handler(struct intel_vgpu *vgpu, u64 pa, - void *p_data, unsigned int bytes) -{ - struct intel_gvt *gvt = vgpu->gvt; - int ret = 0; - - if (atomic_read(&vgpu->gtt.n_tracked_guest_page)) { - struct intel_vgpu_page_track *t; - - mutex_lock(&gvt->lock); - - t = intel_vgpu_find_tracked_page(vgpu, pa >> PAGE_SHIFT); - if (t) { - if (unlikely(vgpu->failsafe)) { - /* remove write protection to prevent furture traps */ - intel_vgpu_clean_page_track(vgpu, t); - } else { - ret = t->handler(t, pa, p_data, bytes); - if (ret) { - gvt_err("guest page write error %d, " - "gfn 0x%lx, pa 0x%llx, " - "var 0x%x, len %d\n", - ret, t->gfn, pa, - *(u32 *)p_data, bytes); - } - } - } - mutex_unlock(&gvt->lock); - } + ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes); return ret; } - static int alloc_scratch_pages(struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type) { @@ -2131,43 +2017,47 @@ err: int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) { struct intel_vgpu_gtt *gtt = &vgpu->gtt; - struct intel_vgpu_mm *ggtt_mm; - hash_init(gtt->tracked_guest_page_hash_table); - hash_init(gtt->shadow_page_hash_table); + INIT_RADIX_TREE(>t->spt_tree, GFP_KERNEL); - INIT_LIST_HEAD(>t->mm_list_head); + INIT_LIST_HEAD(>t->ppgtt_mm_list_head); INIT_LIST_HEAD(>t->oos_page_list_head); INIT_LIST_HEAD(>t->post_shadow_list_head); - intel_vgpu_reset_ggtt(vgpu); - - ggtt_mm = intel_vgpu_create_mm(vgpu, INTEL_GVT_MM_GGTT, - NULL, 1, 0); - if (IS_ERR(ggtt_mm)) { + gtt->ggtt_mm = intel_vgpu_create_ggtt_mm(vgpu); + if (IS_ERR(gtt->ggtt_mm)) { gvt_vgpu_err("fail to create mm for ggtt.\n"); - return PTR_ERR(ggtt_mm); + return PTR_ERR(gtt->ggtt_mm); } - gtt->ggtt_mm = ggtt_mm; + intel_vgpu_reset_ggtt(vgpu); return create_scratch_page_tree(vgpu); } -static void intel_vgpu_free_mm(struct intel_vgpu *vgpu, int type) +static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu) { struct list_head *pos, *n; struct intel_vgpu_mm *mm; - list_for_each_safe(pos, n, &vgpu->gtt.mm_list_head) { - mm = container_of(pos, struct intel_vgpu_mm, list); - if (mm->type == type) { - vgpu->gvt->gtt.mm_free_page_table(mm); - list_del(&mm->list); - list_del(&mm->lru_list); - kfree(mm); - } + list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) { + mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); + intel_vgpu_destroy_mm(mm); } + + if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head))) + gvt_err("vgpu ppgtt mm is not fully destroyed\n"); + + if (GEM_WARN_ON(!radix_tree_empty(&vgpu->gtt.spt_tree))) { + gvt_err("Why we still has spt not freed?\n"); + ppgtt_free_all_spt(vgpu); + } +} + +static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu) +{ + intel_vgpu_destroy_mm(vgpu->gtt.ggtt_mm); + vgpu->gtt.ggtt_mm = NULL; } /** @@ -2182,11 +2072,9 @@ static void intel_vgpu_free_mm(struct intel_vgpu *vgpu, int type) */ void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu) { - ppgtt_free_all_shadow_page(vgpu); + intel_vgpu_destroy_all_ppgtt_mm(vgpu); + intel_vgpu_destroy_ggtt_mm(vgpu); release_scratch_page_tree(vgpu); - - intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_PPGTT); - intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_GGTT); } static void clean_spt_oos(struct intel_gvt *gvt) @@ -2248,99 +2136,78 @@ fail: * pointer to mm object on success, NULL if failed. */ struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu, - int page_table_level, void *root_entry) + u64 pdps[]) { - struct list_head *pos; struct intel_vgpu_mm *mm; - u64 *src, *dst; - - list_for_each(pos, &vgpu->gtt.mm_list_head) { - mm = container_of(pos, struct intel_vgpu_mm, list); - if (mm->type != INTEL_GVT_MM_PPGTT) - continue; - - if (mm->page_table_level != page_table_level) - continue; + struct list_head *pos; - src = root_entry; - dst = mm->virtual_page_table; + list_for_each(pos, &vgpu->gtt.ppgtt_mm_list_head) { + mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); - if (page_table_level == 3) { - if (src[0] == dst[0] - && src[1] == dst[1] - && src[2] == dst[2] - && src[3] == dst[3]) + switch (mm->ppgtt_mm.root_entry_type) { + case GTT_TYPE_PPGTT_ROOT_L4_ENTRY: + if (pdps[0] == mm->ppgtt_mm.guest_pdps[0]) return mm; - } else { - if (src[0] == dst[0]) + break; + case GTT_TYPE_PPGTT_ROOT_L3_ENTRY: + if (!memcmp(pdps, mm->ppgtt_mm.guest_pdps, + sizeof(mm->ppgtt_mm.guest_pdps))) return mm; + break; + default: + GEM_BUG_ON(1); } } return NULL; } /** - * intel_vgpu_g2v_create_ppgtt_mm - create a PPGTT mm object from - * g2v notification + * intel_vgpu_get_ppgtt_mm - get or create a PPGTT mm object. * @vgpu: a vGPU - * @page_table_level: PPGTT page table level + * @root_entry_type: ppgtt root entry type + * @pdps: guest pdps * - * This function is used to create a PPGTT mm object from a guest to GVT-g - * notification. + * This function is used to find or create a PPGTT mm object from a guest. * * Returns: * Zero on success, negative error code if failed. */ -int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu, - int page_table_level) +struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu, + intel_gvt_gtt_type_t root_entry_type, u64 pdps[]) { - u64 *pdp = (u64 *)&vgpu_vreg64_t(vgpu, vgtif_reg(pdp[0])); struct intel_vgpu_mm *mm; - if (WARN_ON((page_table_level != 4) && (page_table_level != 3))) - return -EINVAL; - - mm = intel_vgpu_find_ppgtt_mm(vgpu, page_table_level, pdp); + mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps); if (mm) { - intel_gvt_mm_reference(mm); + intel_vgpu_mm_get(mm); } else { - mm = intel_vgpu_create_mm(vgpu, INTEL_GVT_MM_PPGTT, - pdp, page_table_level, 0); - if (IS_ERR(mm)) { + mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps); + if (IS_ERR(mm)) gvt_vgpu_err("fail to create mm\n"); - return PTR_ERR(mm); - } } - return 0; + return mm; } /** - * intel_vgpu_g2v_destroy_ppgtt_mm - destroy a PPGTT mm object from - * g2v notification + * intel_vgpu_put_ppgtt_mm - find and put a PPGTT mm object. * @vgpu: a vGPU - * @page_table_level: PPGTT page table level + * @pdps: guest pdps * - * This function is used to create a PPGTT mm object from a guest to GVT-g - * notification. + * This function is used to find a PPGTT mm object from a guest and destroy it. * * Returns: * Zero on success, negative error code if failed. */ -int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu, - int page_table_level) +int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[]) { - u64 *pdp = (u64 *)&vgpu_vreg64_t(vgpu, vgtif_reg(pdp[0])); struct intel_vgpu_mm *mm; - if (WARN_ON((page_table_level != 4) && (page_table_level != 3))) - return -EINVAL; - - mm = intel_vgpu_find_ppgtt_mm(vgpu, page_table_level, pdp); + mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps); if (!mm) { gvt_vgpu_err("fail to find ppgtt instance.\n"); return -EINVAL; } - intel_gvt_mm_unreference(mm); + intel_vgpu_mm_put(mm); return 0; } @@ -2367,8 +2234,6 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt) || IS_KABYLAKE(gvt->dev_priv)) { gvt->gtt.pte_ops = &gen8_gtt_pte_ops; gvt->gtt.gma_ops = &gen8_gtt_gma_ops; - gvt->gtt.mm_alloc_page_table = gen8_mm_alloc_page_table; - gvt->gtt.mm_free_page_table = gen8_mm_free_page_table; } else { return -ENODEV; } @@ -2399,7 +2264,7 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt) return ret; } } - INIT_LIST_HEAD(&gvt->gtt.mm_lru_list_head); + INIT_LIST_HEAD(&gvt->gtt.ppgtt_mm_lru_list_head); return 0; } @@ -2426,6 +2291,28 @@ void intel_gvt_clean_gtt(struct intel_gvt *gvt) } /** + * intel_vgpu_invalidate_ppgtt - invalidate PPGTT instances + * @vgpu: a vGPU + * + * This function is called when invalidate all PPGTT instances of a vGPU. + * + */ +void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu) +{ + struct list_head *pos, *n; + struct intel_vgpu_mm *mm; + + list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) { + mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); + if (mm->type == INTEL_GVT_MM_PPGTT) { + list_del_init(&mm->ppgtt_mm.lru_list); + if (mm->ppgtt_mm.shadowed) + invalidate_ppgtt_mm(mm); + } + } +} + +/** * intel_vgpu_reset_ggtt - reset the GGTT entry * @vgpu: a vGPU * @@ -2437,28 +2324,25 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu) { struct intel_gvt *gvt = vgpu->gvt; struct drm_i915_private *dev_priv = gvt->dev_priv; - struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; + struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; + struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE}; u32 index; - u32 offset; u32 num_entries; - struct intel_gvt_gtt_entry e; - memset(&e, 0, sizeof(struct intel_gvt_gtt_entry)); - e.type = GTT_TYPE_GGTT_PTE; - ops->set_pfn(&e, gvt->gtt.scratch_mfn); - e.val64 |= _PAGE_PRESENT; + pte_ops->set_pfn(&entry, gvt->gtt.scratch_mfn); + pte_ops->set_present(&entry); index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT; num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT; - for (offset = 0; offset < num_entries; offset++) - ops->set_entry(NULL, &e, index + offset, false, 0, vgpu); + while (num_entries--) + ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++); index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT; num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT; - for (offset = 0; offset < num_entries; offset++) - ops->set_entry(NULL, &e, index + offset, false, 0, vgpu); + while (num_entries--) + ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++); - gtt_invalidate(dev_priv); + ggtt_invalidate(dev_priv); } /** @@ -2471,13 +2355,10 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu) */ void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu) { - ppgtt_free_all_shadow_page(vgpu); - /* Shadow pages are only created when there is no page * table tracking data, so remove page tracking data after * removing the shadow pages. */ - intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_PPGTT); - + intel_vgpu_destroy_all_ppgtt_mm(vgpu); intel_vgpu_reset_ggtt(vgpu); } diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index 4cc13b5934f1..a8b369cd352b 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -39,7 +39,6 @@ struct intel_vgpu_mm; -#define INTEL_GVT_GTT_HASH_BITS 8 #define INTEL_GVT_INVALID_ADDR (~0UL) struct intel_gvt_gtt_entry { @@ -84,17 +83,12 @@ struct intel_gvt_gtt { void (*mm_free_page_table)(struct intel_vgpu_mm *mm); struct list_head oos_page_use_list_head; struct list_head oos_page_free_list_head; - struct list_head mm_lru_list_head; + struct list_head ppgtt_mm_lru_list_head; struct page *scratch_page; unsigned long scratch_mfn; }; -enum { - INTEL_GVT_MM_GGTT = 0, - INTEL_GVT_MM_PPGTT, -}; - typedef enum { GTT_TYPE_INVALID = -1, @@ -125,66 +119,60 @@ typedef enum { GTT_TYPE_MAX, } intel_gvt_gtt_type_t; -struct intel_vgpu_mm { - int type; - bool initialized; - bool shadowed; +enum intel_gvt_mm_type { + INTEL_GVT_MM_GGTT, + INTEL_GVT_MM_PPGTT, +}; - int page_table_entry_type; - u32 page_table_entry_size; - u32 page_table_entry_cnt; - void *virtual_page_table; - void *shadow_page_table; +#define GVT_RING_CTX_NR_PDPS GEN8_3LVL_PDPES - int page_table_level; - bool has_shadow_page_table; - u32 pde_base_index; +struct intel_vgpu_mm { + enum intel_gvt_mm_type type; + struct intel_vgpu *vgpu; - struct list_head list; struct kref ref; atomic_t pincount; - struct list_head lru_list; - struct intel_vgpu *vgpu; -}; - -extern int intel_vgpu_mm_get_entry( - struct intel_vgpu_mm *mm, - void *page_table, struct intel_gvt_gtt_entry *e, - unsigned long index); -extern int intel_vgpu_mm_set_entry( - struct intel_vgpu_mm *mm, - void *page_table, struct intel_gvt_gtt_entry *e, - unsigned long index); - -#define ggtt_get_guest_entry(mm, e, index) \ - intel_vgpu_mm_get_entry(mm, mm->virtual_page_table, e, index) - -#define ggtt_set_guest_entry(mm, e, index) \ - intel_vgpu_mm_set_entry(mm, mm->virtual_page_table, e, index) - -#define ggtt_get_shadow_entry(mm, e, index) \ - intel_vgpu_mm_get_entry(mm, mm->shadow_page_table, e, index) - -#define ggtt_set_shadow_entry(mm, e, index) \ - intel_vgpu_mm_set_entry(mm, mm->shadow_page_table, e, index) + union { + struct { + intel_gvt_gtt_type_t root_entry_type; + /* + * The 4 PDPs in ring context. For 48bit addressing, + * only PDP0 is valid and point to PML4. For 32it + * addressing, all 4 are used as true PDPs. + */ + u64 guest_pdps[GVT_RING_CTX_NR_PDPS]; + u64 shadow_pdps[GVT_RING_CTX_NR_PDPS]; + bool shadowed; + + struct list_head list; + struct list_head lru_list; + } ppgtt_mm; + struct { + void *virtual_ggtt; + } ggtt_mm; + }; +}; -#define ppgtt_get_guest_root_entry(mm, e, index) \ - intel_vgpu_mm_get_entry(mm, mm->virtual_page_table, e, index) +struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu, + intel_gvt_gtt_type_t root_entry_type, u64 pdps[]); -#define ppgtt_set_guest_root_entry(mm, e, index) \ - intel_vgpu_mm_set_entry(mm, mm->virtual_page_table, e, index) +static inline void intel_vgpu_mm_get(struct intel_vgpu_mm *mm) +{ + kref_get(&mm->ref); +} -#define ppgtt_get_shadow_root_entry(mm, e, index) \ - intel_vgpu_mm_get_entry(mm, mm->shadow_page_table, e, index) +void _intel_vgpu_mm_release(struct kref *mm_ref); -#define ppgtt_set_shadow_root_entry(mm, e, index) \ - intel_vgpu_mm_set_entry(mm, mm->shadow_page_table, e, index) +static inline void intel_vgpu_mm_put(struct intel_vgpu_mm *mm) +{ + kref_put(&mm->ref, _intel_vgpu_mm_release); +} -extern struct intel_vgpu_mm *intel_vgpu_create_mm(struct intel_vgpu *vgpu, - int mm_type, void *virtual_page_table, int page_table_level, - u32 pde_base_index); -extern void intel_vgpu_destroy_mm(struct kref *mm_ref); +static inline void intel_vgpu_destroy_mm(struct intel_vgpu_mm *mm) +{ + intel_vgpu_mm_put(mm); +} struct intel_vgpu_guest_page; @@ -196,10 +184,8 @@ struct intel_vgpu_scratch_pt { struct intel_vgpu_gtt { struct intel_vgpu_mm *ggtt_mm; unsigned long active_ppgtt_mm_bitmap; - struct list_head mm_list_head; - DECLARE_HASHTABLE(shadow_page_hash_table, INTEL_GVT_GTT_HASH_BITS); - DECLARE_HASHTABLE(tracked_guest_page_hash_table, INTEL_GVT_GTT_HASH_BITS); - atomic_t n_tracked_guest_page; + struct list_head ppgtt_mm_list_head; + struct radix_tree_root spt_tree; struct list_head oos_page_list_head; struct list_head post_shadow_list_head; struct intel_vgpu_scratch_pt scratch_pt[GTT_TYPE_MAX]; @@ -208,6 +194,7 @@ struct intel_vgpu_gtt { extern int intel_vgpu_init_gtt(struct intel_vgpu *vgpu); extern void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu); void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu); +void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu); extern int intel_gvt_init_gtt(struct intel_gvt *gvt); void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu); @@ -216,32 +203,8 @@ extern void intel_gvt_clean_gtt(struct intel_gvt *gvt); extern struct intel_vgpu_mm *intel_gvt_find_ppgtt_mm(struct intel_vgpu *vgpu, int page_table_level, void *root_entry); -struct intel_vgpu_oos_page; - -struct intel_vgpu_shadow_page { - void *vaddr; - struct page *page; - int type; - struct hlist_node node; - unsigned long mfn; -}; - -struct intel_vgpu_page_track { - struct hlist_node node; - bool tracked; - unsigned long gfn; - int (*handler)(void *, u64, void *, int); - void *data; -}; - -struct intel_vgpu_guest_page { - struct intel_vgpu_page_track track; - unsigned long write_cnt; - struct intel_vgpu_oos_page *oos_page; -}; - struct intel_vgpu_oos_page { - struct intel_vgpu_guest_page *guest_page; + struct intel_vgpu_ppgtt_spt *spt; struct list_head list; struct list_head vm_list; int id; @@ -250,42 +213,33 @@ struct intel_vgpu_oos_page { #define GTT_ENTRY_NUM_IN_ONE_PAGE 512 +/* Represent a vgpu shadow page table. */ struct intel_vgpu_ppgtt_spt { - struct intel_vgpu_shadow_page shadow_page; - struct intel_vgpu_guest_page guest_page; - int guest_page_type; atomic_t refcount; struct intel_vgpu *vgpu; - DECLARE_BITMAP(post_shadow_bitmap, GTT_ENTRY_NUM_IN_ONE_PAGE); - struct list_head post_shadow_list; -}; -int intel_vgpu_init_page_track(struct intel_vgpu *vgpu, - struct intel_vgpu_page_track *t, - unsigned long gfn, - int (*handler)(void *gp, u64, void *, int), - void *data); + struct { + intel_gvt_gtt_type_t type; + void *vaddr; + struct page *page; + unsigned long mfn; + } shadow_page; -void intel_vgpu_clean_page_track(struct intel_vgpu *vgpu, - struct intel_vgpu_page_track *t); + struct { + intel_gvt_gtt_type_t type; + unsigned long gfn; + unsigned long write_cnt; + struct intel_vgpu_oos_page *oos_page; + } guest_page; -struct intel_vgpu_page_track *intel_vgpu_find_tracked_page( - struct intel_vgpu *vgpu, unsigned long gfn); + DECLARE_BITMAP(post_shadow_bitmap, GTT_ENTRY_NUM_IN_ONE_PAGE); + struct list_head post_shadow_list; +}; int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu); int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu); -static inline void intel_gvt_mm_reference(struct intel_vgpu_mm *mm) -{ - kref_get(&mm->ref); -} - -static inline void intel_gvt_mm_unreference(struct intel_vgpu_mm *mm) -{ - kref_put(&mm->ref, intel_vgpu_destroy_mm); -} - int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm); void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm); @@ -294,21 +248,17 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma); struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu, - int page_table_level, void *root_entry); + u64 pdps[]); -int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu, - int page_table_level); +struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu, + intel_gvt_gtt_type_t root_entry_type, u64 pdps[]); -int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu, - int page_table_level); +int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[]); -int intel_vgpu_emulate_gtt_mmio_read(struct intel_vgpu *vgpu, +int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off, void *p_data, unsigned int bytes); -int intel_vgpu_emulate_gtt_mmio_write(struct intel_vgpu *vgpu, +int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, void *p_data, unsigned int bytes); -int intel_vgpu_write_protect_handler(struct intel_vgpu *vgpu, u64 pa, - void *p_data, unsigned int bytes); - #endif /* _GVT_GTT_H_ */ diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index fac54f32d33f..61bd14fcb649 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -183,7 +183,7 @@ static const struct intel_gvt_ops intel_gvt_ops = { .get_gvt_attrs = intel_get_gvt_attrs, .vgpu_query_plane = intel_vgpu_query_plane, .vgpu_get_dmabuf = intel_vgpu_get_dmabuf, - .write_protect_handler = intel_vgpu_write_protect_handler, + .write_protect_handler = intel_vgpu_page_track_handler, }; /** diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index c6197d990818..efacd8abbedc 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -48,6 +48,7 @@ #include "cmd_parser.h" #include "fb_decoder.h" #include "dmabuf.h" +#include "page_track.h" #define GVT_MAX_VGPU 8 @@ -131,11 +132,9 @@ struct intel_vgpu_opregion { #define vgpu_opregion(vgpu) (&(vgpu->opregion)) -#define INTEL_GVT_MAX_PORT 5 - struct intel_vgpu_display { struct intel_vgpu_i2c_edid i2c_edid; - struct intel_vgpu_port ports[INTEL_GVT_MAX_PORT]; + struct intel_vgpu_port ports[I915_MAX_PORTS]; struct intel_vgpu_sbi sbi; }; @@ -190,6 +189,7 @@ struct intel_vgpu { struct intel_vgpu_opregion opregion; struct intel_vgpu_display display; struct intel_vgpu_submission submission; + struct radix_tree_root page_track_tree; u32 hws_pga[I915_NUM_ENGINES]; struct dentry *debugfs; @@ -201,8 +201,16 @@ struct intel_vgpu { int num_regions; struct eventfd_ctx *intx_trigger; struct eventfd_ctx *msi_trigger; - struct rb_root cache; + + /* + * Two caches are used to avoid mapping duplicated pages (eg. + * scratch pages). This help to reduce dma setup overhead. + */ + struct rb_root gfn_cache; + struct rb_root dma_addr_cache; + unsigned long nr_cache_entries; struct mutex cache_lock; + struct notifier_block iommu_notifier; struct notifier_block group_notifier; struct kvm *kvm; @@ -308,7 +316,10 @@ struct intel_gvt { wait_queue_head_t service_thread_wq; unsigned long service_request; - struct engine_mmio *engine_mmio_list; + struct { + struct engine_mmio *mmio; + int ctx_mmio_count[I915_NUM_ENGINES]; + } engine_mmio_list; struct dentry *debugfs_root; }; diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 9be639aa3b55..8c5d5d005854 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -188,7 +188,9 @@ void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason) static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu, unsigned int fence_num, void *p_data, unsigned int bytes) { - if (fence_num >= vgpu_fence_sz(vgpu)) { + unsigned int max_fence = vgpu_fence_sz(vgpu); + + if (fence_num >= max_fence) { /* When guest access oob fence regs without access * pv_info first, we treat guest not supporting GVT, @@ -201,7 +203,7 @@ static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu, if (!vgpu->mmio.disable_warn_untrack) { gvt_vgpu_err("found oob fence register access\n"); gvt_vgpu_err("total fence %d, access fence %d\n", - vgpu_fence_sz(vgpu), fence_num); + max_fence, fence_num); } memset(p_data, 0, bytes); return -EINVAL; @@ -320,7 +322,7 @@ static int gdrst_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, intel_gvt_reset_vgpu_locked(vgpu, false, engine_mask); /* sw will wait for the device to ack the reset request */ - vgpu_vreg(vgpu, offset) = 0; + vgpu_vreg(vgpu, offset) = 0; return 0; } @@ -1139,21 +1141,21 @@ static int pvinfo_mmio_read(struct intel_vgpu *vgpu, unsigned int offset, static int handle_g2v_notification(struct intel_vgpu *vgpu, int notification) { - int ret = 0; + intel_gvt_gtt_type_t root_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY; + struct intel_vgpu_mm *mm; + u64 *pdps; + + pdps = (u64 *)&vgpu_vreg64_t(vgpu, vgtif_reg(pdp[0])); switch (notification) { case VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE: - ret = intel_vgpu_g2v_create_ppgtt_mm(vgpu, 3); - break; - case VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY: - ret = intel_vgpu_g2v_destroy_ppgtt_mm(vgpu, 3); - break; + root_entry_type = GTT_TYPE_PPGTT_ROOT_L3_ENTRY; case VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE: - ret = intel_vgpu_g2v_create_ppgtt_mm(vgpu, 4); - break; + mm = intel_vgpu_get_ppgtt_mm(vgpu, root_entry_type, pdps); + return PTR_ERR_OR_ZERO(mm); + case VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY: case VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY: - ret = intel_vgpu_g2v_destroy_ppgtt_mm(vgpu, 4); - break; + return intel_vgpu_put_ppgtt_mm(vgpu, pdps); case VGT_G2V_EXECLIST_CONTEXT_CREATE: case VGT_G2V_EXECLIST_CONTEXT_DESTROY: case 1: /* Remove this in guest driver. */ @@ -1161,7 +1163,7 @@ static int handle_g2v_notification(struct intel_vgpu *vgpu, int notification) default: gvt_vgpu_err("Invalid PV notification %d\n", notification); } - return ret; + return 0; } static int send_display_ready_uevent(struct intel_vgpu *vgpu, int ready) @@ -1389,8 +1391,8 @@ static int hws_pga_write(struct intel_vgpu *vgpu, unsigned int offset, int ring_id = intel_gvt_render_mmio_to_ring_id(vgpu->gvt, offset); if (!intel_gvt_ggtt_validate_range(vgpu, value, I915_GTT_PAGE_SIZE)) { - gvt_vgpu_err("VM(%d) write invalid HWSP address, reg:0x%x, value:0x%x\n", - vgpu->id, offset, value); + gvt_vgpu_err("write invalid HWSP address, reg:0x%x, value:0x%x\n", + offset, value); return -EINVAL; } /* @@ -1399,8 +1401,8 @@ static int hws_pga_write(struct intel_vgpu *vgpu, unsigned int offset, * support BDW, SKL or other platforms with same HWSP registers. */ if (unlikely(ring_id < 0 || ring_id >= I915_NUM_ENGINES)) { - gvt_vgpu_err("VM(%d) access unknown hardware status page register:0x%x\n", - vgpu->id, offset); + gvt_vgpu_err("access unknown hardware status page register:0x%x\n", + offset); return -EINVAL; } vgpu->hws_pga[ring_id] = value; @@ -1765,6 +1767,10 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(CURBASE(PIPE_B), D_ALL); MMIO_D(CURBASE(PIPE_C), D_ALL); + MMIO_D(CUR_FBC_CTL(PIPE_A), D_ALL); + MMIO_D(CUR_FBC_CTL(PIPE_B), D_ALL); + MMIO_D(CUR_FBC_CTL(PIPE_C), D_ALL); + MMIO_D(_MMIO(0x700ac), D_ALL); MMIO_D(_MMIO(0x710ac), D_ALL); MMIO_D(_MMIO(0x720ac), D_ALL); @@ -2226,6 +2232,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(HSW_AUD_CFG(PIPE_A), D_ALL); MMIO_D(HSW_AUD_PIN_ELD_CP_VLD, D_ALL); + MMIO_D(HSW_AUD_MISC_CTRL(PIPE_A), D_ALL); MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_A), D_ALL, NULL, NULL); MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_B), D_ALL, NULL, NULL); @@ -2557,6 +2564,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(WM_MISC, D_BDW); MMIO_D(_MMIO(BDW_EDP_PSR_BASE), D_BDW); + MMIO_D(_MMIO(0x6671c), D_BDW_PLUS); MMIO_D(_MMIO(0x66c00), D_BDW_PLUS); MMIO_D(_MMIO(0x66c04), D_BDW_PLUS); @@ -2785,6 +2793,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(_MMIO(0x70380), D_SKL_PLUS); MMIO_D(_MMIO(0x71380), D_SKL_PLUS); MMIO_D(_MMIO(0x72380), D_SKL_PLUS); + MMIO_D(_MMIO(0x7239c), D_SKL_PLUS); MMIO_D(_MMIO(0x7039c), D_SKL_PLUS); MMIO_D(_MMIO(0x8f074), D_SKL | D_KBL); @@ -2799,7 +2808,9 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_F(_MMIO(0xc800), 0x7f8, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL); MMIO_F(_MMIO(0xb020), 0x80, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL); + MMIO_D(RPM_CONFIG0, D_SKL_PLUS); MMIO_D(_MMIO(0xd08), D_SKL_PLUS); + MMIO_D(RC6_LOCATION, D_SKL_PLUS); MMIO_DFH(_MMIO(0x20e0), D_SKL_PLUS, F_MODE_MASK, NULL, NULL); MMIO_DFH(_MMIO(0x20ec), D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h index f8e77e166246..f6dd9f717888 100644 --- a/drivers/gpu/drm/i915/gvt/hypercall.h +++ b/drivers/gpu/drm/i915/gvt/hypercall.h @@ -44,13 +44,18 @@ struct intel_gvt_mpt { void (*detach_vgpu)(unsigned long handle); int (*inject_msi)(unsigned long handle, u32 addr, u16 data); unsigned long (*from_virt_to_mfn)(void *p); - int (*set_wp_page)(unsigned long handle, u64 gfn); - int (*unset_wp_page)(unsigned long handle, u64 gfn); + int (*enable_page_track)(unsigned long handle, u64 gfn); + int (*disable_page_track)(unsigned long handle, u64 gfn); int (*read_gpa)(unsigned long handle, unsigned long gpa, void *buf, unsigned long len); int (*write_gpa)(unsigned long handle, unsigned long gpa, void *buf, unsigned long len); unsigned long (*gfn_to_mfn)(unsigned long handle, unsigned long gfn); + + int (*dma_map_guest_page)(unsigned long handle, unsigned long gfn, + dma_addr_t *dma_addr); + void (*dma_unmap_guest_page)(unsigned long handle, dma_addr_t dma_addr); + int (*map_gfn_to_mfn)(unsigned long handle, unsigned long gfn, unsigned long mfn, unsigned int nr, bool map); int (*set_trap_area)(unsigned long handle, u64 start, u64 end, diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 021f722e2481..c16a492449d7 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -41,6 +41,7 @@ #include <linux/kvm_host.h> #include <linux/vfio.h> #include <linux/mdev.h> +#include <linux/debugfs.h> #include "i915_drv.h" #include "gvt.h" @@ -84,12 +85,16 @@ struct kvmgt_guest_info { #define NR_BKT (1 << 18) struct hlist_head ptable[NR_BKT]; #undef NR_BKT + struct dentry *debugfs_cache_entries; }; struct gvt_dma { - struct rb_node node; + struct intel_vgpu *vgpu; + struct rb_node gfn_node; + struct rb_node dma_addr_node; gfn_t gfn; - unsigned long iova; + dma_addr_t dma_addr; + struct kref ref; }; static inline bool handle_valid(unsigned long handle) @@ -101,165 +106,168 @@ static int kvmgt_guest_init(struct mdev_device *mdev); static void intel_vgpu_release_work(struct work_struct *work); static bool kvmgt_guest_exit(struct kvmgt_guest_info *info); -static int gvt_dma_map_iova(struct intel_vgpu *vgpu, kvm_pfn_t pfn, - unsigned long *iova) +static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn, + dma_addr_t *dma_addr) { - struct page *page; struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; - dma_addr_t daddr; + struct page *page; + unsigned long pfn; + int ret; - if (unlikely(!pfn_valid(pfn))) - return -EFAULT; + /* Pin the page first. */ + ret = vfio_pin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1, + IOMMU_READ | IOMMU_WRITE, &pfn); + if (ret != 1) { + gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n", + gfn, ret); + return -EINVAL; + } + /* Setup DMA mapping. */ page = pfn_to_page(pfn); - daddr = dma_map_page(dev, page, 0, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL); - if (dma_mapping_error(dev, daddr)) + *dma_addr = dma_map_page(dev, page, 0, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, *dma_addr)) { + gvt_vgpu_err("DMA mapping failed for gfn 0x%lx\n", gfn); + vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1); return -ENOMEM; + } - *iova = (unsigned long)(daddr >> PAGE_SHIFT); return 0; } -static void gvt_dma_unmap_iova(struct intel_vgpu *vgpu, unsigned long iova) +static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn, + dma_addr_t dma_addr) { struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; - dma_addr_t daddr; + int ret; - daddr = (dma_addr_t)(iova << PAGE_SHIFT); - dma_unmap_page(dev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + dma_unmap_page(dev, dma_addr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + ret = vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1); + WARN_ON(ret != 1); } -static struct gvt_dma *__gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn) +static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu, + dma_addr_t dma_addr) { - struct rb_node *node = vgpu->vdev.cache.rb_node; - struct gvt_dma *ret = NULL; + struct rb_node *node = vgpu->vdev.dma_addr_cache.rb_node; + struct gvt_dma *itr; while (node) { - struct gvt_dma *itr = rb_entry(node, struct gvt_dma, node); + itr = rb_entry(node, struct gvt_dma, dma_addr_node); - if (gfn < itr->gfn) + if (dma_addr < itr->dma_addr) node = node->rb_left; - else if (gfn > itr->gfn) + else if (dma_addr > itr->dma_addr) node = node->rb_right; - else { - ret = itr; - goto out; - } + else + return itr; } - -out: - return ret; + return NULL; } -static unsigned long gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn) +static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn) { - struct gvt_dma *entry; - unsigned long iova; - - mutex_lock(&vgpu->vdev.cache_lock); + struct rb_node *node = vgpu->vdev.gfn_cache.rb_node; + struct gvt_dma *itr; - entry = __gvt_cache_find(vgpu, gfn); - iova = (entry == NULL) ? INTEL_GVT_INVALID_ADDR : entry->iova; + while (node) { + itr = rb_entry(node, struct gvt_dma, gfn_node); - mutex_unlock(&vgpu->vdev.cache_lock); - return iova; + if (gfn < itr->gfn) + node = node->rb_left; + else if (gfn > itr->gfn) + node = node->rb_right; + else + return itr; + } + return NULL; } -static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, - unsigned long iova) +static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, + dma_addr_t dma_addr) { struct gvt_dma *new, *itr; - struct rb_node **link = &vgpu->vdev.cache.rb_node, *parent = NULL; + struct rb_node **link, *parent = NULL; new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL); if (!new) - return; + return -ENOMEM; + new->vgpu = vgpu; new->gfn = gfn; - new->iova = iova; + new->dma_addr = dma_addr; + kref_init(&new->ref); - mutex_lock(&vgpu->vdev.cache_lock); + /* gfn_cache maps gfn to struct gvt_dma. */ + link = &vgpu->vdev.gfn_cache.rb_node; while (*link) { parent = *link; - itr = rb_entry(parent, struct gvt_dma, node); + itr = rb_entry(parent, struct gvt_dma, gfn_node); - if (gfn == itr->gfn) - goto out; - else if (gfn < itr->gfn) + if (gfn < itr->gfn) link = &parent->rb_left; else link = &parent->rb_right; } + rb_link_node(&new->gfn_node, parent, link); + rb_insert_color(&new->gfn_node, &vgpu->vdev.gfn_cache); - rb_link_node(&new->node, parent, link); - rb_insert_color(&new->node, &vgpu->vdev.cache); - mutex_unlock(&vgpu->vdev.cache_lock); - return; + /* dma_addr_cache maps dma addr to struct gvt_dma. */ + parent = NULL; + link = &vgpu->vdev.dma_addr_cache.rb_node; + while (*link) { + parent = *link; + itr = rb_entry(parent, struct gvt_dma, dma_addr_node); -out: - mutex_unlock(&vgpu->vdev.cache_lock); - kfree(new); + if (dma_addr < itr->dma_addr) + link = &parent->rb_left; + else + link = &parent->rb_right; + } + rb_link_node(&new->dma_addr_node, parent, link); + rb_insert_color(&new->dma_addr_node, &vgpu->vdev.dma_addr_cache); + + vgpu->vdev.nr_cache_entries++; + return 0; } static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu, struct gvt_dma *entry) { - rb_erase(&entry->node, &vgpu->vdev.cache); + rb_erase(&entry->gfn_node, &vgpu->vdev.gfn_cache); + rb_erase(&entry->dma_addr_node, &vgpu->vdev.dma_addr_cache); kfree(entry); -} - -static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn) -{ - struct device *dev = mdev_dev(vgpu->vdev.mdev); - struct gvt_dma *this; - unsigned long g1; - int rc; - - mutex_lock(&vgpu->vdev.cache_lock); - this = __gvt_cache_find(vgpu, gfn); - if (!this) { - mutex_unlock(&vgpu->vdev.cache_lock); - return; - } - - g1 = gfn; - gvt_dma_unmap_iova(vgpu, this->iova); - rc = vfio_unpin_pages(dev, &g1, 1); - WARN_ON(rc != 1); - __gvt_cache_remove_entry(vgpu, this); - mutex_unlock(&vgpu->vdev.cache_lock); -} - -static void gvt_cache_init(struct intel_vgpu *vgpu) -{ - vgpu->vdev.cache = RB_ROOT; - mutex_init(&vgpu->vdev.cache_lock); + vgpu->vdev.nr_cache_entries--; } static void gvt_cache_destroy(struct intel_vgpu *vgpu) { struct gvt_dma *dma; struct rb_node *node = NULL; - struct device *dev = mdev_dev(vgpu->vdev.mdev); - unsigned long gfn; for (;;) { mutex_lock(&vgpu->vdev.cache_lock); - node = rb_first(&vgpu->vdev.cache); + node = rb_first(&vgpu->vdev.gfn_cache); if (!node) { mutex_unlock(&vgpu->vdev.cache_lock); break; } - dma = rb_entry(node, struct gvt_dma, node); - gvt_dma_unmap_iova(vgpu, dma->iova); - gfn = dma->gfn; + dma = rb_entry(node, struct gvt_dma, gfn_node); + gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr); __gvt_cache_remove_entry(vgpu, dma); mutex_unlock(&vgpu->vdev.cache_lock); - vfio_unpin_pages(dev, &gfn, 1); } } +static void gvt_cache_init(struct intel_vgpu *vgpu) +{ + vgpu->vdev.gfn_cache = RB_ROOT; + vgpu->vdev.dma_addr_cache = RB_ROOT; + vgpu->vdev.nr_cache_entries = 0; + mutex_init(&vgpu->vdev.cache_lock); +} + static void kvmgt_protect_table_init(struct kvmgt_guest_info *info) { hash_init(info->ptable); @@ -452,7 +460,7 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev) vgpu = intel_gvt_ops->vgpu_create(gvt, type); if (IS_ERR_OR_NULL(vgpu)) { ret = vgpu == NULL ? -EFAULT : PTR_ERR(vgpu); - gvt_vgpu_err("failed to create intel vgpu: %d\n", ret); + gvt_err("failed to create intel vgpu: %d\n", ret); goto out; } @@ -489,13 +497,22 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb, if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { struct vfio_iommu_type1_dma_unmap *unmap = data; - unsigned long gfn, end_gfn; + struct gvt_dma *entry; + unsigned long iov_pfn, end_iov_pfn; + + iov_pfn = unmap->iova >> PAGE_SHIFT; + end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE; - gfn = unmap->iova >> PAGE_SHIFT; - end_gfn = gfn + unmap->size / PAGE_SIZE; + mutex_lock(&vgpu->vdev.cache_lock); + for (; iov_pfn < end_iov_pfn; iov_pfn++) { + entry = __gvt_cache_find_gfn(vgpu, iov_pfn); + if (!entry) + continue; - while (gfn < end_gfn) - gvt_cache_remove(vgpu, gfn++); + gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr); + __gvt_cache_remove_entry(vgpu, entry); + } + mutex_unlock(&vgpu->vdev.cache_lock); } return NOTIFY_OK; @@ -1368,7 +1385,7 @@ static void kvmgt_host_exit(struct device *dev, void *gvt) mdev_unregister_device(dev); } -static int kvmgt_write_protect_add(unsigned long handle, u64 gfn) +static int kvmgt_page_track_add(unsigned long handle, u64 gfn) { struct kvmgt_guest_info *info; struct kvm *kvm; @@ -1402,7 +1419,7 @@ out: return 0; } -static int kvmgt_write_protect_remove(unsigned long handle, u64 gfn) +static int kvmgt_page_track_remove(unsigned long handle, u64 gfn) { struct kvmgt_guest_info *info; struct kvm *kvm; @@ -1530,11 +1547,20 @@ static int kvmgt_guest_init(struct mdev_device *mdev) info->track_node.track_flush_slot = kvmgt_page_track_flush_slot; kvm_page_track_register_notifier(kvm, &info->track_node); + info->debugfs_cache_entries = debugfs_create_ulong( + "kvmgt_nr_cache_entries", + 0444, vgpu->debugfs, + &vgpu->vdev.nr_cache_entries); + if (!info->debugfs_cache_entries) + gvt_vgpu_err("Cannot create kvmgt debugfs entry\n"); + return 0; } static bool kvmgt_guest_exit(struct kvmgt_guest_info *info) { + debugfs_remove(info->debugfs_cache_entries); + kvm_page_track_unregister_notifier(info->kvm, &info->track_node); kvm_put_kvm(info->kvm); kvmgt_protect_table_destroy(info); @@ -1574,39 +1600,84 @@ static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data) static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn) { - unsigned long iova, pfn; struct kvmgt_guest_info *info; - struct device *dev; - struct intel_vgpu *vgpu; - int rc; + kvm_pfn_t pfn; if (!handle_valid(handle)) return INTEL_GVT_INVALID_ADDR; info = (struct kvmgt_guest_info *)handle; - vgpu = info->vgpu; - iova = gvt_cache_find(info->vgpu, gfn); - if (iova != INTEL_GVT_INVALID_ADDR) - return iova; - - pfn = INTEL_GVT_INVALID_ADDR; - dev = mdev_dev(info->vgpu->vdev.mdev); - rc = vfio_pin_pages(dev, &gfn, 1, IOMMU_READ | IOMMU_WRITE, &pfn); - if (rc != 1) { - gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n", - gfn, rc); - return INTEL_GVT_INVALID_ADDR; - } - /* transfer to host iova for GFX to use DMA */ - rc = gvt_dma_map_iova(info->vgpu, pfn, &iova); - if (rc) { - gvt_vgpu_err("gvt_dma_map_iova failed for gfn: 0x%lx\n", gfn); - vfio_unpin_pages(dev, &gfn, 1); + + pfn = gfn_to_pfn(info->kvm, gfn); + if (is_error_noslot_pfn(pfn)) return INTEL_GVT_INVALID_ADDR; + + return pfn; +} + +int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, + dma_addr_t *dma_addr) +{ + struct kvmgt_guest_info *info; + struct intel_vgpu *vgpu; + struct gvt_dma *entry; + int ret; + + if (!handle_valid(handle)) + return -EINVAL; + + info = (struct kvmgt_guest_info *)handle; + vgpu = info->vgpu; + + mutex_lock(&info->vgpu->vdev.cache_lock); + + entry = __gvt_cache_find_gfn(info->vgpu, gfn); + if (!entry) { + ret = gvt_dma_map_page(vgpu, gfn, dma_addr); + if (ret) + goto err_unlock; + + ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr); + if (ret) + goto err_unmap; + } else { + kref_get(&entry->ref); + *dma_addr = entry->dma_addr; } - gvt_cache_add(info->vgpu, gfn, iova); - return iova; + mutex_unlock(&info->vgpu->vdev.cache_lock); + return 0; + +err_unmap: + gvt_dma_unmap_page(vgpu, gfn, *dma_addr); +err_unlock: + mutex_unlock(&info->vgpu->vdev.cache_lock); + return ret; +} + +static void __gvt_dma_release(struct kref *ref) +{ + struct gvt_dma *entry = container_of(ref, typeof(*entry), ref); + + gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr); + __gvt_cache_remove_entry(entry->vgpu, entry); +} + +void kvmgt_dma_unmap_guest_page(unsigned long handle, dma_addr_t dma_addr) +{ + struct kvmgt_guest_info *info; + struct gvt_dma *entry; + + if (!handle_valid(handle)) + return; + + info = (struct kvmgt_guest_info *)handle; + + mutex_lock(&info->vgpu->vdev.cache_lock); + entry = __gvt_cache_find_dma_addr(info->vgpu, dma_addr); + if (entry) + kref_put(&entry->ref, __gvt_dma_release); + mutex_unlock(&info->vgpu->vdev.cache_lock); } static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa, @@ -1676,11 +1747,13 @@ struct intel_gvt_mpt kvmgt_mpt = { .detach_vgpu = kvmgt_detach_vgpu, .inject_msi = kvmgt_inject_msi, .from_virt_to_mfn = kvmgt_virt_to_pfn, - .set_wp_page = kvmgt_write_protect_add, - .unset_wp_page = kvmgt_write_protect_remove, + .enable_page_track = kvmgt_page_track_add, + .disable_page_track = kvmgt_page_track_remove, .read_gpa = kvmgt_read_gpa, .write_gpa = kvmgt_write_gpa, .gfn_to_mfn = kvmgt_gfn_to_pfn, + .dma_map_guest_page = kvmgt_dma_map_guest_page, + .dma_unmap_guest_page = kvmgt_dma_unmap_guest_page, .set_opregion = kvmgt_set_opregion, .get_vfio_device = kvmgt_get_vfio_device, .put_vfio_device = kvmgt_put_vfio_device, diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index 5c869e3fdf3b..11b71b33f1c0 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -76,10 +76,9 @@ static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, uint64_t pa, else intel_vgpu_default_mmio_write(vgpu, offset, p_data, bytes); - } else if (reg_is_gtt(gvt, offset) && - vgpu->gtt.ggtt_mm->virtual_page_table) { + } else if (reg_is_gtt(gvt, offset)) { offset -= gvt->device_info.gtt_start_offset; - pt = vgpu->gtt.ggtt_mm->virtual_page_table + offset; + pt = vgpu->gtt.ggtt_mm->ggtt_mm.virtual_ggtt + offset; if (read) memcpy(p_data, pt, bytes); else @@ -125,7 +124,7 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, if (WARN_ON(!reg_is_gtt(gvt, offset + bytes - 1))) goto err; - ret = intel_vgpu_emulate_gtt_mmio_read(vgpu, offset, + ret = intel_vgpu_emulate_ggtt_mmio_read(vgpu, offset, p_data, bytes); if (ret) goto err; @@ -198,7 +197,7 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, if (WARN_ON(!reg_is_gtt(gvt, offset + bytes - 1))) goto err; - ret = intel_vgpu_emulate_gtt_mmio_write(vgpu, offset, + ret = intel_vgpu_emulate_ggtt_mmio_write(vgpu, offset, p_data, bytes); if (ret) goto err; diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 152df3d0291e..a5bac83d53a9 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -50,6 +50,8 @@ #define RING_GFX_MODE(base) _MMIO((base) + 0x29c) #define VF_GUARDBAND _MMIO(0x83a4) +#define GEN9_MOCS_SIZE 64 + /* Raw offset is appened to each line for convenience. */ static struct engine_mmio gen8_engine_mmio_list[] __cacheline_aligned = { {RCS, GFX_MODE_GEN7, 0xffff, false}, /* 0x229c */ @@ -152,8 +154,8 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = { static struct { bool initialized; - u32 control_table[I915_NUM_ENGINES][64]; - u32 l3cc_table[32]; + u32 control_table[I915_NUM_ENGINES][GEN9_MOCS_SIZE]; + u32 l3cc_table[GEN9_MOCS_SIZE / 2]; } gen9_render_mocs; static void load_render_mocs(struct drm_i915_private *dev_priv) @@ -170,7 +172,7 @@ static void load_render_mocs(struct drm_i915_private *dev_priv) for (ring_id = 0; ring_id < ARRAY_SIZE(regs); ring_id++) { offset.reg = regs[ring_id]; - for (i = 0; i < 64; i++) { + for (i = 0; i < GEN9_MOCS_SIZE; i++) { gen9_render_mocs.control_table[ring_id][i] = I915_READ_FW(offset); offset.reg += 4; @@ -178,7 +180,7 @@ static void load_render_mocs(struct drm_i915_private *dev_priv) } offset.reg = 0xb020; - for (i = 0; i < 32; i++) { + for (i = 0; i < GEN9_MOCS_SIZE / 2; i++) { gen9_render_mocs.l3cc_table[i] = I915_READ_FW(offset); offset.reg += 4; @@ -186,6 +188,153 @@ static void load_render_mocs(struct drm_i915_private *dev_priv) gen9_render_mocs.initialized = true; } +static int +restore_context_mmio_for_inhibit(struct intel_vgpu *vgpu, + struct i915_request *req) +{ + u32 *cs; + int ret; + struct engine_mmio *mmio; + struct intel_gvt *gvt = vgpu->gvt; + int ring_id = req->engine->id; + int count = gvt->engine_mmio_list.ctx_mmio_count[ring_id]; + + if (count == 0) + return 0; + + ret = req->engine->emit_flush(req, EMIT_BARRIER); + if (ret) + return ret; + + cs = intel_ring_begin(req, count * 2 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(count); + for (mmio = gvt->engine_mmio_list.mmio; + i915_mmio_reg_valid(mmio->reg); mmio++) { + if (mmio->ring_id != ring_id || + !mmio->in_context) + continue; + + *cs++ = i915_mmio_reg_offset(mmio->reg); + *cs++ = vgpu_vreg_t(vgpu, mmio->reg) | + (mmio->mask << 16); + gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n", + *(cs-2), *(cs-1), vgpu->id, ring_id); + } + + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); + + ret = req->engine->emit_flush(req, EMIT_BARRIER); + if (ret) + return ret; + + return 0; +} + +static int +restore_render_mocs_control_for_inhibit(struct intel_vgpu *vgpu, + struct i915_request *req) +{ + unsigned int index; + u32 *cs; + + cs = intel_ring_begin(req, 2 * GEN9_MOCS_SIZE + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE); + + for (index = 0; index < GEN9_MOCS_SIZE; index++) { + *cs++ = i915_mmio_reg_offset(GEN9_GFX_MOCS(index)); + *cs++ = vgpu_vreg_t(vgpu, GEN9_GFX_MOCS(index)); + gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n", + *(cs-2), *(cs-1), vgpu->id, req->engine->id); + + } + + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); + + return 0; +} + +static int +restore_render_mocs_l3cc_for_inhibit(struct intel_vgpu *vgpu, + struct i915_request *req) +{ + unsigned int index; + u32 *cs; + + cs = intel_ring_begin(req, 2 * GEN9_MOCS_SIZE / 2 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2); + + for (index = 0; index < GEN9_MOCS_SIZE / 2; index++) { + *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(index)); + *cs++ = vgpu_vreg_t(vgpu, GEN9_LNCFCMOCS(index)); + gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n", + *(cs-2), *(cs-1), vgpu->id, req->engine->id); + + } + + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); + + return 0; +} + +/* + * Use lri command to initialize the mmio which is in context state image for + * inhibit context, it contains tracked engine mmio, render_mocs and + * render_mocs_l3cc. + */ +int intel_vgpu_restore_inhibit_context(struct intel_vgpu *vgpu, + struct i915_request *req) +{ + int ret; + u32 *cs; + + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); + + ret = restore_context_mmio_for_inhibit(vgpu, req); + if (ret) + goto out; + + /* no MOCS register in context except render engine */ + if (req->engine->id != RCS) + goto out; + + ret = restore_render_mocs_control_for_inhibit(vgpu, req); + if (ret) + goto out; + + ret = restore_render_mocs_l3cc_for_inhibit(vgpu, req); + if (ret) + goto out; + +out: + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); + + return ret; +} + static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; @@ -252,11 +401,14 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) return; + if (IS_KABYLAKE(dev_priv) && ring_id == RCS) + return; + if (!pre && !gen9_render_mocs.initialized) load_render_mocs(dev_priv); offset.reg = regs[ring_id]; - for (i = 0; i < 64; i++) { + for (i = 0; i < GEN9_MOCS_SIZE; i++) { if (pre) old_v = vgpu_vreg_t(pre, offset); else @@ -274,7 +426,7 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, if (ring_id == RCS) { l3_offset.reg = 0xb020; - for (i = 0; i < 32; i++) { + for (i = 0; i < GEN9_MOCS_SIZE / 2; i++) { if (pre) old_v = vgpu_vreg_t(pre, l3_offset); else @@ -294,6 +446,16 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, #define CTX_CONTEXT_CONTROL_VAL 0x03 +bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id) +{ + u32 *reg_state = ctx->engine[ring_id].lrc_reg_state; + u32 inhibit_mask = + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); + + return inhibit_mask == + (reg_state[CTX_CONTEXT_CONTROL_VAL] & inhibit_mask); +} + /* Switch ring mmio values (context). */ static void switch_mmio(struct intel_vgpu *pre, struct intel_vgpu *next, @@ -301,9 +463,6 @@ static void switch_mmio(struct intel_vgpu *pre, { struct drm_i915_private *dev_priv; struct intel_vgpu_submission *s; - u32 *reg_state, ctx_ctrl; - u32 inhibit_mask = - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); struct engine_mmio *mmio; u32 old_v, new_v; @@ -311,10 +470,18 @@ static void switch_mmio(struct intel_vgpu *pre, if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) switch_mocs(pre, next, ring_id); - for (mmio = dev_priv->gvt->engine_mmio_list; + for (mmio = dev_priv->gvt->engine_mmio_list.mmio; i915_mmio_reg_valid(mmio->reg); mmio++) { if (mmio->ring_id != ring_id) continue; + /* + * No need to do save or restore of the mmio which is in context + * state image on kabylake, it's initialized by lri command and + * save or restore with context together. + */ + if (IS_KABYLAKE(dev_priv) && mmio->in_context) + continue; + // save if (pre) { vgpu_vreg_t(pre, mmio->reg) = I915_READ_FW(mmio->reg); @@ -328,16 +495,13 @@ static void switch_mmio(struct intel_vgpu *pre, // restore if (next) { s = &next->submission; - reg_state = - s->shadow_ctx->engine[ring_id].lrc_reg_state; - ctx_ctrl = reg_state[CTX_CONTEXT_CONTROL_VAL]; /* - * if it is an inhibit context, load in_context mmio - * into HW by mmio write. If it is not, skip this mmio - * write. + * No need to restore the mmio which is in context state + * image if it's not inhibit context, it will restore + * itself. */ if (mmio->in_context && - (ctx_ctrl & inhibit_mask) != inhibit_mask) + !is_inhibit_context(s->shadow_ctx, ring_id)) continue; if (mmio->mask) @@ -408,8 +572,16 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre, */ void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt) { + struct engine_mmio *mmio; + if (IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv)) - gvt->engine_mmio_list = gen9_engine_mmio_list; + gvt->engine_mmio_list.mmio = gen9_engine_mmio_list; else - gvt->engine_mmio_list = gen8_engine_mmio_list; + gvt->engine_mmio_list.mmio = gen8_engine_mmio_list; + + for (mmio = gvt->engine_mmio_list.mmio; + i915_mmio_reg_valid(mmio->reg); mmio++) { + if (mmio->in_context) + gvt->engine_mmio_list.ctx_mmio_count[mmio->ring_id]++; + } } diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.h b/drivers/gpu/drm/i915/gvt/mmio_context.h index ca2c6a745673..0439eb8057a8 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.h +++ b/drivers/gpu/drm/i915/gvt/mmio_context.h @@ -49,4 +49,9 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre, void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt); +bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id); + +int intel_vgpu_restore_inhibit_context(struct intel_vgpu *vgpu, + struct i915_request *req); + #endif diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index 81aff4eacbfe..32ffcd566cdd 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -154,54 +154,31 @@ static inline unsigned long intel_gvt_hypervisor_virt_to_mfn(void *p) } /** - * intel_gvt_hypervisor_enable - set a guest page to write-protected + * intel_gvt_hypervisor_enable_page_track - track a guest page * @vgpu: a vGPU - * @t: page track data structure + * @gfn: the gfn of guest * * Returns: * Zero on success, negative error code if failed. */ static inline int intel_gvt_hypervisor_enable_page_track( - struct intel_vgpu *vgpu, - struct intel_vgpu_page_track *t) + struct intel_vgpu *vgpu, unsigned long gfn) { - int ret; - - if (t->tracked) - return 0; - - ret = intel_gvt_host.mpt->set_wp_page(vgpu->handle, t->gfn); - if (ret) - return ret; - t->tracked = true; - atomic_inc(&vgpu->gtt.n_tracked_guest_page); - return 0; + return intel_gvt_host.mpt->enable_page_track(vgpu->handle, gfn); } /** - * intel_gvt_hypervisor_disable_page_track - remove the write-protection of a - * guest page + * intel_gvt_hypervisor_disable_page_track - untrack a guest page * @vgpu: a vGPU - * @t: page track data structure + * @gfn: the gfn of guest * * Returns: * Zero on success, negative error code if failed. */ static inline int intel_gvt_hypervisor_disable_page_track( - struct intel_vgpu *vgpu, - struct intel_vgpu_page_track *t) + struct intel_vgpu *vgpu, unsigned long gfn) { - int ret; - - if (!t->tracked) - return 0; - - ret = intel_gvt_host.mpt->unset_wp_page(vgpu->handle, t->gfn); - if (ret) - return ret; - t->tracked = false; - atomic_dec(&vgpu->gtt.n_tracked_guest_page); - return 0; + return intel_gvt_host.mpt->disable_page_track(vgpu->handle, gfn); } /** @@ -251,6 +228,34 @@ static inline unsigned long intel_gvt_hypervisor_gfn_to_mfn( } /** + * intel_gvt_hypervisor_dma_map_guest_page - setup dma map for guest page + * @vgpu: a vGPU + * @gpfn: guest pfn + * @dma_addr: retrieve allocated dma addr + * + * Returns: + * 0 on success, negative error code if failed. + */ +static inline int intel_gvt_hypervisor_dma_map_guest_page( + struct intel_vgpu *vgpu, unsigned long gfn, + dma_addr_t *dma_addr) +{ + return intel_gvt_host.mpt->dma_map_guest_page(vgpu->handle, gfn, + dma_addr); +} + +/** + * intel_gvt_hypervisor_dma_unmap_guest_page - cancel dma map for guest page + * @vgpu: a vGPU + * @dma_addr: the mapped dma addr + */ +static inline void intel_gvt_hypervisor_dma_unmap_guest_page( + struct intel_vgpu *vgpu, dma_addr_t dma_addr) +{ + intel_gvt_host.mpt->dma_unmap_guest_page(vgpu->handle, dma_addr); +} + +/** * intel_gvt_hypervisor_map_gfn_to_mfn - map a GFN region to MFN * @vgpu: a vGPU * @gfn: guest PFN diff --git a/drivers/gpu/drm/i915/gvt/page_track.c b/drivers/gpu/drm/i915/gvt/page_track.c new file mode 100644 index 000000000000..53e2bd79c97d --- /dev/null +++ b/drivers/gpu/drm/i915/gvt/page_track.c @@ -0,0 +1,184 @@ +/* + * Copyright(c) 2011-2017 Intel Corporation. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "i915_drv.h" +#include "gvt.h" + +/** + * intel_vgpu_find_page_track - find page track rcord of guest page + * @vgpu: a vGPU + * @gfn: the gfn of guest page + * + * Returns: + * A pointer to struct intel_vgpu_page_track if found, else NULL returned. + */ +struct intel_vgpu_page_track *intel_vgpu_find_page_track( + struct intel_vgpu *vgpu, unsigned long gfn) +{ + return radix_tree_lookup(&vgpu->page_track_tree, gfn); +} + +/** + * intel_vgpu_register_page_track - register a guest page to be tacked + * @vgpu: a vGPU + * @gfn: the gfn of guest page + * + * Returns: + * zero on success, negative error code if failed. + */ +int intel_vgpu_register_page_track(struct intel_vgpu *vgpu, unsigned long gfn, + gvt_page_track_handler_t handler, void *priv) +{ + struct intel_vgpu_page_track *track; + int ret; + + track = intel_vgpu_find_page_track(vgpu, gfn); + if (track) + return -EEXIST; + + track = kzalloc(sizeof(*track), GFP_KERNEL); + if (!track) + return -ENOMEM; + + track->handler = handler; + track->priv_data = priv; + + ret = radix_tree_insert(&vgpu->page_track_tree, gfn, track); + if (ret) { + kfree(track); + return ret; + } + + return 0; +} + +/** + * intel_vgpu_unregister_page_track - unregister the tracked guest page + * @vgpu: a vGPU + * @gfn: the gfn of guest page + * + */ +void intel_vgpu_unregister_page_track(struct intel_vgpu *vgpu, + unsigned long gfn) +{ + struct intel_vgpu_page_track *track; + + track = radix_tree_delete(&vgpu->page_track_tree, gfn); + if (track) { + if (track->tracked) + intel_gvt_hypervisor_disable_page_track(vgpu, gfn); + kfree(track); + } +} + +/** + * intel_vgpu_enable_page_track - set write-protection on guest page + * @vgpu: a vGPU + * @gfn: the gfn of guest page + * + * Returns: + * zero on success, negative error code if failed. + */ +int intel_vgpu_enable_page_track(struct intel_vgpu *vgpu, unsigned long gfn) +{ + struct intel_vgpu_page_track *track; + int ret; + + track = intel_vgpu_find_page_track(vgpu, gfn); + if (!track) + return -ENXIO; + + if (track->tracked) + return 0; + + ret = intel_gvt_hypervisor_enable_page_track(vgpu, gfn); + if (ret) + return ret; + track->tracked = true; + return 0; +} + +/** + * intel_vgpu_enable_page_track - cancel write-protection on guest page + * @vgpu: a vGPU + * @gfn: the gfn of guest page + * + * Returns: + * zero on success, negative error code if failed. + */ +int intel_vgpu_disable_page_track(struct intel_vgpu *vgpu, unsigned long gfn) +{ + struct intel_vgpu_page_track *track; + int ret; + + track = intel_vgpu_find_page_track(vgpu, gfn); + if (!track) + return -ENXIO; + + if (!track->tracked) + return 0; + + ret = intel_gvt_hypervisor_disable_page_track(vgpu, gfn); + if (ret) + return ret; + track->tracked = false; + return 0; +} + +/** + * intel_vgpu_page_track_handler - called when write to write-protected page + * @vgpu: a vGPU + * @gpa: the gpa of this write + * @data: the writed data + * @bytes: the length of this write + * + * Returns: + * zero on success, negative error code if failed. + */ +int intel_vgpu_page_track_handler(struct intel_vgpu *vgpu, u64 gpa, + void *data, unsigned int bytes) +{ + struct intel_gvt *gvt = vgpu->gvt; + struct intel_vgpu_page_track *page_track; + int ret = 0; + + mutex_lock(&gvt->lock); + + page_track = intel_vgpu_find_page_track(vgpu, gpa >> PAGE_SHIFT); + if (!page_track) { + ret = -ENXIO; + goto out; + } + + if (unlikely(vgpu->failsafe)) { + /* Remove write protection to prevent furture traps. */ + intel_vgpu_disable_page_track(vgpu, gpa >> PAGE_SHIFT); + } else { + ret = page_track->handler(page_track, gpa, data, bytes); + if (ret) + gvt_err("guest page write error, gpa %llx\n", gpa); + } + +out: + mutex_unlock(&gvt->lock); + return ret; +} diff --git a/drivers/gpu/drm/i915/gvt/page_track.h b/drivers/gpu/drm/i915/gvt/page_track.h new file mode 100644 index 000000000000..fa607a71c3c0 --- /dev/null +++ b/drivers/gpu/drm/i915/gvt/page_track.h @@ -0,0 +1,56 @@ +/* + * Copyright(c) 2011-2017 Intel Corporation. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _GVT_PAGE_TRACK_H_ +#define _GVT_PAGE_TRACK_H_ + +struct intel_vgpu_page_track; + +typedef int (*gvt_page_track_handler_t)( + struct intel_vgpu_page_track *page_track, + u64 gpa, void *data, int bytes); + +/* Track record for a write-protected guest page. */ +struct intel_vgpu_page_track { + gvt_page_track_handler_t handler; + bool tracked; + void *priv_data; +}; + +struct intel_vgpu_page_track *intel_vgpu_find_page_track( + struct intel_vgpu *vgpu, unsigned long gfn); + +int intel_vgpu_register_page_track(struct intel_vgpu *vgpu, + unsigned long gfn, gvt_page_track_handler_t handler, + void *priv); +void intel_vgpu_unregister_page_track(struct intel_vgpu *vgpu, + unsigned long gfn); + +int intel_vgpu_enable_page_track(struct intel_vgpu *vgpu, unsigned long gfn); +int intel_vgpu_disable_page_track(struct intel_vgpu *vgpu, unsigned long gfn); + +int intel_vgpu_page_track_handler(struct intel_vgpu *vgpu, u64 gpa, + void *data, unsigned int bytes); + +#endif diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index cc1ce361cd76..75b7bc7b344c 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -103,9 +103,8 @@ static void gvt_balance_timeslice(struct gvt_sched_data *sched_data) list_for_each(pos, &sched_data->lru_runq_head) { vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list); - fair_timeslice = ms_to_ktime(GVT_TS_BALANCE_PERIOD_MS) * - vgpu_data->sched_ctl.weight / - total_weight; + fair_timeslice = ktime_divns(ms_to_ktime(GVT_TS_BALANCE_PERIOD_MS), + total_weight) * vgpu_data->sched_ctl.weight; vgpu_data->allocated_ts = fair_timeslice; vgpu_data->left_ts = vgpu_data->allocated_ts; diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index d74d6f05c62c..638abe84857c 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -52,6 +52,29 @@ static void set_context_pdp_root_pointer( pdp_pair[i].val = pdp[7 - i]; } +static void update_shadow_pdps(struct intel_vgpu_workload *workload) +{ + struct intel_vgpu *vgpu = workload->vgpu; + int ring_id = workload->ring_id; + struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx; + struct drm_i915_gem_object *ctx_obj = + shadow_ctx->engine[ring_id].state->obj; + struct execlist_ring_context *shadow_ring_context; + struct page *page; + + if (WARN_ON(!workload->shadow_mm)) + return; + + if (WARN_ON(!atomic_read(&workload->shadow_mm->pincount))) + return; + + page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); + shadow_ring_context = kmap(page); + set_context_pdp_root_pointer(shadow_ring_context, + (void *)workload->shadow_mm->ppgtt_mm.shadow_pdps); + kunmap(page); +} + /* * when populating shadow ctx from guest, we should not overrride oa related * registers, so that they will not be overlapped by guest oa configs. Thus @@ -150,8 +173,14 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) #define COPY_REG(name) \ intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \ + RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4) +#define COPY_REG_MASKED(name) {\ + intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \ + + RING_CTX_OFF(name.val),\ + &shadow_ring_context->name.val, 4);\ + shadow_ring_context->name.val |= 0xffff << 16;\ + } - COPY_REG(ctx_ctrl); + COPY_REG_MASKED(ctx_ctrl); COPY_REG(ctx_timestamp); if (ring_id == RCS) { @@ -160,9 +189,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) COPY_REG(rcs_indirect_ctx_offset); } #undef COPY_REG - - set_context_pdp_root_pointer(shadow_ring_context, - workload->shadow_mm->shadow_page_table); +#undef COPY_REG_MASKED intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa + @@ -176,7 +203,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) return 0; } -static inline bool is_gvt_request(struct drm_i915_gem_request *req) +static inline bool is_gvt_request(struct i915_request *req) { return i915_gem_context_force_single_submission(req->ctx); } @@ -198,7 +225,7 @@ static void save_ring_hw_state(struct intel_vgpu *vgpu, int ring_id) static int shadow_context_status_change(struct notifier_block *nb, unsigned long action, void *data) { - struct drm_i915_gem_request *req = (struct drm_i915_gem_request *)data; + struct i915_request *req = data; struct intel_gvt *gvt = container_of(nb, struct intel_gvt, shadow_ctx_notifier_block[req->engine->id]); struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; @@ -275,6 +302,11 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload) struct intel_vgpu *vgpu = workload->vgpu; void *shadow_ring_buffer_va; u32 *cs; + struct i915_request *req = workload->req; + + if (IS_KABYLAKE(req->i915) && + is_inhibit_context(req->ctx, req->engine->id)) + intel_vgpu_restore_inhibit_context(vgpu, req); /* allocate shadow ring buffer */ cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32)); @@ -383,13 +415,13 @@ static int intel_gvt_generate_request(struct intel_vgpu_workload *workload) int ring_id = workload->ring_id; struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; struct intel_engine_cs *engine = dev_priv->engine[ring_id]; - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu_submission *s = &vgpu->submission; struct i915_gem_context *shadow_ctx = s->shadow_ctx; int ret; - rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx); + rq = i915_request_alloc(dev_priv->engine[ring_id], shadow_ctx); if (IS_ERR(rq)) { gvt_vgpu_err("fail to allocate gem request\n"); ret = PTR_ERR(rq); @@ -398,7 +430,7 @@ static int intel_gvt_generate_request(struct intel_vgpu_workload *workload) gvt_dbg_sched("ring id %d get i915 gem request %p\n", ring_id, rq); - workload->req = i915_gem_request_get(rq); + workload->req = i915_request_get(rq); ret = copy_workload_to_ring_buffer(workload); if (ret) goto err_unpin; @@ -565,6 +597,8 @@ static int prepare_workload(struct intel_vgpu_workload *workload) return ret; } + update_shadow_pdps(workload); + ret = intel_vgpu_sync_oos_pages(workload->vgpu); if (ret) { gvt_vgpu_err("fail to vgpu sync oos pages\n"); @@ -643,7 +677,7 @@ out: if (!IS_ERR_OR_NULL(workload->req)) { gvt_dbg_sched("ring id %d submit workload to i915 %p\n", ring_id, workload->req); - i915_add_request(workload->req); + i915_request_add(workload->req); workload->dispatched = true; } @@ -830,7 +864,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) workload->status = 0; } - i915_gem_request_put(fetch_and_zero(&workload->req)); + i915_request_put(fetch_and_zero(&workload->req)); if (!workload->status && !(vgpu->resetting_eng & ENGINE_MASK(ring_id))) { @@ -947,7 +981,7 @@ static int workload_thread(void *priv) gvt_dbg_sched("ring id %d wait workload %p\n", workload->ring_id, workload); - i915_wait_request(workload->req, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_wait(workload->req, 0, MAX_SCHEDULE_TIMEOUT); complete: gvt_dbg_sched("will complete workload %p, status: %d\n", @@ -1195,7 +1229,7 @@ void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload) struct intel_vgpu_submission *s = &workload->vgpu->submission; if (workload->shadow_mm) - intel_gvt_mm_unreference(workload->shadow_mm); + intel_vgpu_mm_put(workload->shadow_mm); kmem_cache_free(s->workloads, workload); } @@ -1244,32 +1278,27 @@ static int prepare_mm(struct intel_vgpu_workload *workload) struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc; struct intel_vgpu_mm *mm; struct intel_vgpu *vgpu = workload->vgpu; - int page_table_level; - u32 pdp[8]; + intel_gvt_gtt_type_t root_entry_type; + u64 pdps[GVT_RING_CTX_NR_PDPS]; - if (desc->addressing_mode == 1) { /* legacy 32-bit */ - page_table_level = 3; - } else if (desc->addressing_mode == 3) { /* legacy 64 bit */ - page_table_level = 4; - } else { + switch (desc->addressing_mode) { + case 1: /* legacy 32-bit */ + root_entry_type = GTT_TYPE_PPGTT_ROOT_L3_ENTRY; + break; + case 3: /* legacy 64-bit */ + root_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY; + break; + default: gvt_vgpu_err("Advanced Context mode(SVM) is not supported!\n"); return -EINVAL; } - read_guest_pdps(workload->vgpu, workload->ring_context_gpa, pdp); + read_guest_pdps(workload->vgpu, workload->ring_context_gpa, (void *)pdps); - mm = intel_vgpu_find_ppgtt_mm(workload->vgpu, page_table_level, pdp); - if (mm) { - intel_gvt_mm_reference(mm); - } else { + mm = intel_vgpu_get_ppgtt_mm(workload->vgpu, root_entry_type, pdps); + if (IS_ERR(mm)) + return PTR_ERR(mm); - mm = intel_vgpu_create_mm(workload->vgpu, INTEL_GVT_MM_PPGTT, - pdp, page_table_level, 0); - if (IS_ERR(mm)) { - gvt_vgpu_err("fail to create mm object.\n"); - return PTR_ERR(mm); - } - } workload->shadow_mm = mm; return 0; } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index a79a4f60637e..486ed57a4ad1 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -80,7 +80,7 @@ struct intel_shadow_wa_ctx { struct intel_vgpu_workload { struct intel_vgpu *vgpu; int ring_id; - struct drm_i915_gem_request *req; + struct i915_request *req; /* if this workload has been dispatched to i915? */ bool dispatched; bool shadowed; diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h index 736bd2bc5127..82093f1e8612 100644 --- a/drivers/gpu/drm/i915/gvt/trace.h +++ b/drivers/gpu/drm/i915/gvt/trace.h @@ -113,10 +113,10 @@ TRACE_EVENT(gma_index, ); TRACE_EVENT(gma_translate, - TP_PROTO(int id, char *type, int ring_id, int pt_level, + TP_PROTO(int id, char *type, int ring_id, int root_entry_type, unsigned long gma, unsigned long gpa), - TP_ARGS(id, type, ring_id, pt_level, gma, gpa), + TP_ARGS(id, type, ring_id, root_entry_type, gma, gpa), TP_STRUCT__entry( __array(char, buf, MAX_BUF_LEN) @@ -124,8 +124,8 @@ TRACE_EVENT(gma_translate, TP_fast_assign( snprintf(__entry->buf, MAX_BUF_LEN, - "VM%d %s ring %d pt_level %d gma 0x%lx -> gpa 0x%lx\n", - id, type, ring_id, pt_level, gma, gpa); + "VM%d %s ring %d root_entry_type %d gma 0x%lx -> gpa 0x%lx\n", + id, type, ring_id, root_entry_type, gma, gpa); ), TP_printk("%s", __entry->buf) @@ -168,7 +168,7 @@ TRACE_EVENT(spt_change, TP_printk("%s", __entry->buf) ); -TRACE_EVENT(gpt_change, +TRACE_EVENT(spt_guest_change, TP_PROTO(int id, const char *tag, void *spt, int type, u64 v, unsigned long index), diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index b87b19d8443c..2e0a02a80fe4 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -354,6 +354,7 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, vgpu->gvt = gvt; vgpu->sched_ctl.weight = param->weight; INIT_LIST_HEAD(&vgpu->dmabuf_obj_list_head); + INIT_RADIX_TREE(&vgpu->page_track_tree, GFP_KERNEL); idr_init(&vgpu->object_idr); intel_vgpu_init_cfg_space(vgpu, param->primary); @@ -521,6 +522,7 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, /* full GPU reset or device model level reset */ if (engine_mask == ALL_ENGINES || dmlr) { intel_vgpu_select_submission_ops(vgpu, ALL_ENGINES, 0); + intel_vgpu_invalidate_ppgtt(vgpu); /*fence will not be reset during virtual reset */ if (dmlr) { intel_vgpu_reset_gtt(vgpu); diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e968aeae1d84..89f7ff2c652e 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -49,6 +49,7 @@ static int i915_capabilities(struct seq_file *m, void *data) intel_device_info_dump_flags(info, &p); intel_device_info_dump_runtime(info, &p); + intel_driver_caps_print(&dev_priv->caps, &p); kernel_param_lock(THIS_MODULE); i915_params_dump(&i915_modparams, &p); @@ -149,8 +150,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) get_global_flag(obj), get_pin_mapped_flag(obj), obj->base.size / 1024, - obj->base.read_domains, - obj->base.write_domain, + obj->read_domains, + obj->write_domain, i915_cache_level_str(dev_priv, obj->cache_level), obj->mm.dirty ? " dirty" : "", obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : ""); @@ -518,7 +519,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) list_for_each_entry_reverse(file, &dev->filelist, lhead) { struct file_stats stats; struct drm_i915_file_private *file_priv = file->driver_priv; - struct drm_i915_gem_request *request; + struct i915_request *request; struct task_struct *task; mutex_lock(&dev->struct_mutex); @@ -535,7 +536,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) * Therefore, we need to protect this ->comm access using RCU. */ request = list_first_entry_or_null(&file_priv->mm.request_list, - struct drm_i915_gem_request, + struct i915_request, client_link); rcu_read_lock(); task = pid_task(request && request->ctx->pid ? @@ -645,6 +646,56 @@ static int i915_gem_batch_pool_info(struct seq_file *m, void *data) return 0; } +static void gen8_display_interrupt_info(struct seq_file *m) +{ + struct drm_i915_private *dev_priv = node_to_i915(m->private); + int pipe; + + for_each_pipe(dev_priv, pipe) { + enum intel_display_power_domain power_domain; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, + power_domain)) { + seq_printf(m, "Pipe %c power disabled\n", + pipe_name(pipe)); + continue; + } + seq_printf(m, "Pipe %c IMR:\t%08x\n", + pipe_name(pipe), + I915_READ(GEN8_DE_PIPE_IMR(pipe))); + seq_printf(m, "Pipe %c IIR:\t%08x\n", + pipe_name(pipe), + I915_READ(GEN8_DE_PIPE_IIR(pipe))); + seq_printf(m, "Pipe %c IER:\t%08x\n", + pipe_name(pipe), + I915_READ(GEN8_DE_PIPE_IER(pipe))); + + intel_display_power_put(dev_priv, power_domain); + } + + seq_printf(m, "Display Engine port interrupt mask:\t%08x\n", + I915_READ(GEN8_DE_PORT_IMR)); + seq_printf(m, "Display Engine port interrupt identity:\t%08x\n", + I915_READ(GEN8_DE_PORT_IIR)); + seq_printf(m, "Display Engine port interrupt enable:\t%08x\n", + I915_READ(GEN8_DE_PORT_IER)); + + seq_printf(m, "Display Engine misc interrupt mask:\t%08x\n", + I915_READ(GEN8_DE_MISC_IMR)); + seq_printf(m, "Display Engine misc interrupt identity:\t%08x\n", + I915_READ(GEN8_DE_MISC_IIR)); + seq_printf(m, "Display Engine misc interrupt enable:\t%08x\n", + I915_READ(GEN8_DE_MISC_IER)); + + seq_printf(m, "PCU interrupt mask:\t%08x\n", + I915_READ(GEN8_PCU_IMR)); + seq_printf(m, "PCU interrupt identity:\t%08x\n", + I915_READ(GEN8_PCU_IIR)); + seq_printf(m, "PCU interrupt enable:\t%08x\n", + I915_READ(GEN8_PCU_IER)); +} + static int i915_interrupt_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -708,6 +759,27 @@ static int i915_interrupt_info(struct seq_file *m, void *data) I915_READ(GEN8_PCU_IIR)); seq_printf(m, "PCU interrupt enable:\t%08x\n", I915_READ(GEN8_PCU_IER)); + } else if (INTEL_GEN(dev_priv) >= 11) { + seq_printf(m, "Master Interrupt Control: %08x\n", + I915_READ(GEN11_GFX_MSTR_IRQ)); + + seq_printf(m, "Render/Copy Intr Enable: %08x\n", + I915_READ(GEN11_RENDER_COPY_INTR_ENABLE)); + seq_printf(m, "VCS/VECS Intr Enable: %08x\n", + I915_READ(GEN11_VCS_VECS_INTR_ENABLE)); + seq_printf(m, "GUC/SG Intr Enable:\t %08x\n", + I915_READ(GEN11_GUC_SG_INTR_ENABLE)); + seq_printf(m, "GPM/WGBOXPERF Intr Enable: %08x\n", + I915_READ(GEN11_GPM_WGBOXPERF_INTR_ENABLE)); + seq_printf(m, "Crypto Intr Enable:\t %08x\n", + I915_READ(GEN11_CRYPTO_RSVD_INTR_ENABLE)); + seq_printf(m, "GUnit/CSME Intr Enable:\t %08x\n", + I915_READ(GEN11_GUNIT_CSME_INTR_ENABLE)); + + seq_printf(m, "Display Interrupt Control:\t%08x\n", + I915_READ(GEN11_DISPLAY_INT_CTL)); + + gen8_display_interrupt_info(m); } else if (INTEL_GEN(dev_priv) >= 8) { seq_printf(m, "Master Interrupt Control:\t%08x\n", I915_READ(GEN8_MASTER_IRQ)); @@ -721,49 +793,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data) i, I915_READ(GEN8_GT_IER(i))); } - for_each_pipe(dev_priv, pipe) { - enum intel_display_power_domain power_domain; - - power_domain = POWER_DOMAIN_PIPE(pipe); - if (!intel_display_power_get_if_enabled(dev_priv, - power_domain)) { - seq_printf(m, "Pipe %c power disabled\n", - pipe_name(pipe)); - continue; - } - seq_printf(m, "Pipe %c IMR:\t%08x\n", - pipe_name(pipe), - I915_READ(GEN8_DE_PIPE_IMR(pipe))); - seq_printf(m, "Pipe %c IIR:\t%08x\n", - pipe_name(pipe), - I915_READ(GEN8_DE_PIPE_IIR(pipe))); - seq_printf(m, "Pipe %c IER:\t%08x\n", - pipe_name(pipe), - I915_READ(GEN8_DE_PIPE_IER(pipe))); - - intel_display_power_put(dev_priv, power_domain); - } - - seq_printf(m, "Display Engine port interrupt mask:\t%08x\n", - I915_READ(GEN8_DE_PORT_IMR)); - seq_printf(m, "Display Engine port interrupt identity:\t%08x\n", - I915_READ(GEN8_DE_PORT_IIR)); - seq_printf(m, "Display Engine port interrupt enable:\t%08x\n", - I915_READ(GEN8_DE_PORT_IER)); - - seq_printf(m, "Display Engine misc interrupt mask:\t%08x\n", - I915_READ(GEN8_DE_MISC_IMR)); - seq_printf(m, "Display Engine misc interrupt identity:\t%08x\n", - I915_READ(GEN8_DE_MISC_IIR)); - seq_printf(m, "Display Engine misc interrupt enable:\t%08x\n", - I915_READ(GEN8_DE_MISC_IER)); - - seq_printf(m, "PCU interrupt mask:\t%08x\n", - I915_READ(GEN8_PCU_IMR)); - seq_printf(m, "PCU interrupt identity:\t%08x\n", - I915_READ(GEN8_PCU_IIR)); - seq_printf(m, "PCU interrupt enable:\t%08x\n", - I915_READ(GEN8_PCU_IER)); + gen8_display_interrupt_info(m); } else if (IS_VALLEYVIEW(dev_priv)) { seq_printf(m, "Display IER:\t%08x\n", I915_READ(VLV_IER)); @@ -845,13 +875,35 @@ static int i915_interrupt_info(struct seq_file *m, void *data) seq_printf(m, "Graphics Interrupt mask: %08x\n", I915_READ(GTIMR)); } - if (INTEL_GEN(dev_priv) >= 6) { + + if (INTEL_GEN(dev_priv) >= 11) { + seq_printf(m, "RCS Intr Mask:\t %08x\n", + I915_READ(GEN11_RCS0_RSVD_INTR_MASK)); + seq_printf(m, "BCS Intr Mask:\t %08x\n", + I915_READ(GEN11_BCS_RSVD_INTR_MASK)); + seq_printf(m, "VCS0/VCS1 Intr Mask:\t %08x\n", + I915_READ(GEN11_VCS0_VCS1_INTR_MASK)); + seq_printf(m, "VCS2/VCS3 Intr Mask:\t %08x\n", + I915_READ(GEN11_VCS2_VCS3_INTR_MASK)); + seq_printf(m, "VECS0/VECS1 Intr Mask:\t %08x\n", + I915_READ(GEN11_VECS0_VECS1_INTR_MASK)); + seq_printf(m, "GUC/SG Intr Mask:\t %08x\n", + I915_READ(GEN11_GUC_SG_INTR_MASK)); + seq_printf(m, "GPM/WGBOXPERF Intr Mask: %08x\n", + I915_READ(GEN11_GPM_WGBOXPERF_INTR_MASK)); + seq_printf(m, "Crypto Intr Mask:\t %08x\n", + I915_READ(GEN11_CRYPTO_RSVD_INTR_MASK)); + seq_printf(m, "Gunit/CSME Intr Mask:\t %08x\n", + I915_READ(GEN11_GUNIT_CSME_INTR_MASK)); + + } else if (INTEL_GEN(dev_priv) >= 6) { for_each_engine(engine, dev_priv, id) { seq_printf(m, "Graphics Interrupt mask (%s): %08x\n", engine->name, I915_READ_IMR(engine)); } } + intel_runtime_pm_put(dev_priv); return 0; @@ -988,7 +1040,10 @@ i915_next_seqno_set(void *data, u64 val) if (ret) return ret; + intel_runtime_pm_get(dev_priv); ret = i915_gem_set_global_seqno(dev, val); + intel_runtime_pm_put(dev_priv); + mutex_unlock(&dev->struct_mutex); return ret; @@ -1457,19 +1512,6 @@ static int gen6_drpc_info(struct seq_file *m) struct drm_i915_private *dev_priv = node_to_i915(m->private); u32 gt_core_status, rcctl1, rc6vids = 0; u32 gen9_powergate_enable = 0, gen9_powergate_status = 0; - unsigned forcewake_count; - int count = 0; - - forcewake_count = READ_ONCE(dev_priv->uncore.fw_domain[FW_DOMAIN_ID_RENDER].wake_count); - if (forcewake_count) { - seq_puts(m, "RC information inaccurate because somebody " - "holds a forcewake reference \n"); - } else { - /* NB: we cannot use forcewake, else we read the wrong values */ - while (count++ < 50 && (I915_READ_NOTRACE(FORCEWAKE_ACK) & 1)) - udelay(10); - seq_printf(m, "RC information accurate: %s\n", yesno(count < 51)); - } gt_core_status = I915_READ_FW(GEN6_GT_CORE_STATUS); trace_i915_reg_rw(false, GEN6_GT_CORE_STATUS, gt_core_status, 4, true); @@ -1480,9 +1522,12 @@ static int gen6_drpc_info(struct seq_file *m) gen9_powergate_status = I915_READ(GEN9_PWRGT_DOMAIN_STATUS); } - mutex_lock(&dev_priv->pcu_lock); - sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); - mutex_unlock(&dev_priv->pcu_lock); + if (INTEL_GEN(dev_priv) <= 7) { + mutex_lock(&dev_priv->pcu_lock); + sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, + &rc6vids); + mutex_unlock(&dev_priv->pcu_lock); + } seq_printf(m, "RC1e Enabled: %s\n", yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE)); @@ -1538,12 +1583,15 @@ static int gen6_drpc_info(struct seq_file *m) print_rc6_res(m, "RC6+ residency since boot:", GEN6_GT_GFX_RC6p); print_rc6_res(m, "RC6++ residency since boot:", GEN6_GT_GFX_RC6pp); - seq_printf(m, "RC6 voltage: %dmV\n", - GEN6_DECODE_RC6_VID(((rc6vids >> 0) & 0xff))); - seq_printf(m, "RC6+ voltage: %dmV\n", - GEN6_DECODE_RC6_VID(((rc6vids >> 8) & 0xff))); - seq_printf(m, "RC6++ voltage: %dmV\n", - GEN6_DECODE_RC6_VID(((rc6vids >> 16) & 0xff))); + if (INTEL_GEN(dev_priv) <= 7) { + seq_printf(m, "RC6 voltage: %dmV\n", + GEN6_DECODE_RC6_VID(((rc6vids >> 0) & 0xff))); + seq_printf(m, "RC6+ voltage: %dmV\n", + GEN6_DECODE_RC6_VID(((rc6vids >> 8) & 0xff))); + seq_printf(m, "RC6++ voltage: %dmV\n", + GEN6_DECODE_RC6_VID(((rc6vids >> 16) & 0xff))); + } + return i915_forcewake_domains(m, NULL); } @@ -1596,7 +1644,7 @@ static int i915_fbc_status(struct seq_file *m, void *unused) seq_printf(m, "FBC disabled: %s\n", fbc->no_fbc_reason); if (fbc->work.scheduled) - seq_printf(m, "FBC worker scheduled on vblank %u, now %llu\n", + seq_printf(m, "FBC worker scheduled on vblank %llu, now %llu\n", fbc->work.scheduled_vblank, drm_crtc_vblank_count(&fbc->crtc->base)); @@ -2335,7 +2383,6 @@ static int i915_guc_info(struct seq_file *m, void *data) return -ENODEV; GEM_BUG_ON(!guc->execbuf_client); - GEM_BUG_ON(!guc->preempt_client); seq_printf(m, "Doorbell map:\n"); seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap); @@ -2343,8 +2390,11 @@ static int i915_guc_info(struct seq_file *m, void *data) seq_printf(m, "\nGuC execbuf client @ %p:\n", guc->execbuf_client); i915_guc_client_info(m, dev_priv, guc->execbuf_client); - seq_printf(m, "\nGuC preempt client @ %p:\n", guc->preempt_client); - i915_guc_client_info(m, dev_priv, guc->preempt_client); + if (guc->preempt_client) { + seq_printf(m, "\nGuC preempt client @ %p:\n", + guc->preempt_client); + i915_guc_client_info(m, dev_priv, guc->preempt_client); + } i915_guc_log_info(m, dev_priv); @@ -2464,24 +2514,11 @@ static int i915_guc_log_control_get(void *data, u64 *val) static int i915_guc_log_control_set(void *data, u64 val) { struct drm_i915_private *dev_priv = data; - int ret; if (!HAS_GUC(dev_priv)) return -ENODEV; - if (!dev_priv->guc.log.vma) - return -EINVAL; - - ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex); - if (ret) - return ret; - - intel_runtime_pm_get(dev_priv); - ret = i915_guc_log_control(dev_priv, val); - intel_runtime_pm_put(dev_priv); - - mutex_unlock(&dev_priv->drm.struct_mutex); - return ret; + return intel_guc_log_control(&dev_priv->guc, val); } DEFINE_SIMPLE_ATTRIBUTE(i915_guc_log_control_fops, @@ -2518,15 +2555,19 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) u32 stat[3]; enum pipe pipe; bool enabled = false; + bool sink_support; if (!HAS_PSR(dev_priv)) return -ENODEV; + sink_support = dev_priv->psr.sink_support; + seq_printf(m, "Sink_Support: %s\n", yesno(sink_support)); + if (!sink_support) + return 0; + intel_runtime_pm_get(dev_priv); mutex_lock(&dev_priv->psr.lock); - seq_printf(m, "Sink_Support: %s\n", yesno(dev_priv->psr.sink_support)); - seq_printf(m, "Source_OK: %s\n", yesno(dev_priv->psr.source_ok)); seq_printf(m, "Enabled: %s\n", yesno((bool)dev_priv->psr.enabled)); seq_printf(m, "Active: %s\n", yesno(dev_priv->psr.active)); seq_printf(m, "Busy frontbuffer bits: 0x%03x\n", @@ -2584,9 +2625,9 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) seq_printf(m, "Performance_Counter: %u\n", psrperf); } if (dev_priv->psr.psr2_support) { - u32 psr2 = I915_READ(EDP_PSR2_STATUS_CTL); + u32 psr2 = I915_READ(EDP_PSR2_STATUS); - seq_printf(m, "EDP_PSR2_STATUS_CTL: %x [%s]\n", + seq_printf(m, "EDP_PSR2_STATUS: %x [%s]\n", psr2, psr2_live_status(psr2)); } mutex_unlock(&dev_priv->psr.lock); @@ -2710,7 +2751,8 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused) if (!HAS_RUNTIME_PM(dev_priv)) seq_puts(m, "Runtime power management not supported\n"); - seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->gt.awake)); + seq_printf(m, "GPU idle: %s (epoch %u)\n", + yesno(!dev_priv->gt.awake), dev_priv->gt.epoch); seq_printf(m, "IRQs disabled: %s\n", yesno(!intel_irqs_enabled(dev_priv))); #ifdef CONFIG_PM @@ -3143,8 +3185,8 @@ static int i915_engine_info(struct seq_file *m, void *unused) intel_runtime_pm_get(dev_priv); - seq_printf(m, "GT awake? %s\n", - yesno(dev_priv->gt.awake)); + seq_printf(m, "GT awake? %s (epoch %u)\n", + yesno(dev_priv->gt.awake), dev_priv->gt.epoch); seq_printf(m, "Global active requests: %d\n", dev_priv->gt.active_requests); seq_printf(m, "CS timestamp frequency: %u kHz\n", @@ -3159,6 +3201,16 @@ static int i915_engine_info(struct seq_file *m, void *unused) return 0; } +static int i915_rcs_topology(struct seq_file *m, void *unused) +{ + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + intel_device_info_dump_topology(&INTEL_INFO(dev_priv)->sseu, &p); + + return 0; +} + static int i915_shrinker_info(struct seq_file *m, void *unused) { struct drm_i915_private *i915 = node_to_i915(m->private); @@ -3363,7 +3415,10 @@ static void drrs_status_per_crtc(struct seq_file *m, /* disable_drrs() will make drrs->dp NULL */ if (!drrs->dp) { - seq_puts(m, "Idleness DRRS: Disabled"); + seq_puts(m, "Idleness DRRS: Disabled\n"); + if (dev_priv->psr.enabled) + seq_puts(m, + "\tAs PSR is enabled, DRRS is not enabled\n"); mutex_unlock(&drrs->mutex); return; } @@ -3932,7 +3987,8 @@ i915_wedged_set(void *data, u64 val) engine->hangcheck.stalled = true; } - i915_handle_error(i915, val, "Manually setting wedged to %llu", val); + i915_handle_error(i915, val, "Manually set wedged engine mask = %llx", + val); wait_on_bit(&i915->gpu_error.flags, I915_RESET_HANDOFF, @@ -4066,7 +4122,7 @@ i915_drop_caches_set(void *data, u64 val) I915_WAIT_LOCKED); if (val & DROP_RETIRE) - i915_gem_retire_requests(dev_priv); + i915_retire_requests(dev_priv); mutex_unlock(&dev->struct_mutex); } @@ -4085,10 +4141,8 @@ i915_drop_caches_set(void *data, u64 val) if (val & DROP_IDLE) drain_delayed_work(&dev_priv->gt.idle_work); - if (val & DROP_FREED) { - synchronize_rcu(); + if (val & DROP_FREED) i915_gem_drain_freed_objects(dev_priv); - } return ret; } @@ -4279,7 +4333,7 @@ static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv, continue; sseu->slice_mask = BIT(0); - sseu->subslice_mask |= BIT(ss); + sseu->subslice_mask[0] |= BIT(ss); eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) + ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) + ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) + @@ -4294,11 +4348,11 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, struct sseu_dev_info *sseu) { const struct intel_device_info *info = INTEL_INFO(dev_priv); - int s_max = 6, ss_max = 4; int s, ss; - u32 s_reg[s_max], eu_reg[2 * s_max], eu_mask[2]; + u32 s_reg[info->sseu.max_slices]; + u32 eu_reg[2 * info->sseu.max_subslices], eu_mask[2]; - for (s = 0; s < s_max; s++) { + for (s = 0; s < info->sseu.max_slices; s++) { /* * FIXME: Valid SS Mask respects the spec and read * only valid bits for those registers, excluding reserverd @@ -4320,15 +4374,15 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, GEN9_PGCTL_SSB_EU210_ACK | GEN9_PGCTL_SSB_EU311_ACK; - for (s = 0; s < s_max; s++) { + for (s = 0; s < info->sseu.max_slices; s++) { if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0) /* skip disabled slice */ continue; sseu->slice_mask |= BIT(s); - sseu->subslice_mask = info->sseu.subslice_mask; + sseu->subslice_mask[s] = info->sseu.subslice_mask[s]; - for (ss = 0; ss < ss_max; ss++) { + for (ss = 0; ss < info->sseu.max_subslices; ss++) { unsigned int eu_cnt; if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) @@ -4348,17 +4402,12 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, struct sseu_dev_info *sseu) { - int s_max = 3, ss_max = 4; + const struct intel_device_info *info = INTEL_INFO(dev_priv); int s, ss; - u32 s_reg[s_max], eu_reg[2*s_max], eu_mask[2]; + u32 s_reg[info->sseu.max_slices]; + u32 eu_reg[2 * info->sseu.max_subslices], eu_mask[2]; - /* BXT has a single slice and at most 3 subslices. */ - if (IS_GEN9_LP(dev_priv)) { - s_max = 1; - ss_max = 3; - } - - for (s = 0; s < s_max; s++) { + for (s = 0; s < info->sseu.max_slices; s++) { s_reg[s] = I915_READ(GEN9_SLICE_PGCTL_ACK(s)); eu_reg[2*s] = I915_READ(GEN9_SS01_EU_PGCTL_ACK(s)); eu_reg[2*s + 1] = I915_READ(GEN9_SS23_EU_PGCTL_ACK(s)); @@ -4373,7 +4422,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, GEN9_PGCTL_SSB_EU210_ACK | GEN9_PGCTL_SSB_EU311_ACK; - for (s = 0; s < s_max; s++) { + for (s = 0; s < info->sseu.max_slices; s++) { if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0) /* skip disabled slice */ continue; @@ -4381,10 +4430,10 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, sseu->slice_mask |= BIT(s); if (IS_GEN9_BC(dev_priv)) - sseu->subslice_mask = - INTEL_INFO(dev_priv)->sseu.subslice_mask; + sseu->subslice_mask[s] = + INTEL_INFO(dev_priv)->sseu.subslice_mask[s]; - for (ss = 0; ss < ss_max; ss++) { + for (ss = 0; ss < info->sseu.max_subslices; ss++) { unsigned int eu_cnt; if (IS_GEN9_LP(dev_priv)) { @@ -4392,7 +4441,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, /* skip disabled subslice */ continue; - sseu->subslice_mask |= BIT(ss); + sseu->subslice_mask[s] |= BIT(ss); } eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] & @@ -4414,9 +4463,12 @@ static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv, sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK; if (sseu->slice_mask) { - sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask; sseu->eu_per_subslice = INTEL_INFO(dev_priv)->sseu.eu_per_subslice; + for (s = 0; s < fls(sseu->slice_mask); s++) { + sseu->subslice_mask[s] = + INTEL_INFO(dev_priv)->sseu.subslice_mask[s]; + } sseu->eu_total = sseu->eu_per_subslice * sseu_subslice_total(sseu); @@ -4435,6 +4487,7 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info, { struct drm_i915_private *dev_priv = node_to_i915(m->private); const char *type = is_available_info ? "Available" : "Enabled"; + int s; seq_printf(m, " %s Slice Mask: %04x\n", type, sseu->slice_mask); @@ -4442,10 +4495,10 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info, hweight8(sseu->slice_mask)); seq_printf(m, " %s Subslice Total: %u\n", type, sseu_subslice_total(sseu)); - seq_printf(m, " %s Subslice Mask: %04x\n", type, - sseu->subslice_mask); - seq_printf(m, " %s Subslice Per Slice: %u\n", type, - hweight8(sseu->subslice_mask)); + for (s = 0; s < fls(sseu->slice_mask); s++) { + seq_printf(m, " %s Slice%i subslices: %u\n", type, + s, hweight8(sseu->subslice_mask[s])); + } seq_printf(m, " %s EU Total: %u\n", type, sseu->eu_total); seq_printf(m, " %s EU Per Subslice: %u\n", type, @@ -4479,6 +4532,10 @@ static int i915_sseu_status(struct seq_file *m, void *unused) seq_puts(m, "SSEU Device Status\n"); memset(&sseu, 0, sizeof(sseu)); + sseu.max_slices = INTEL_INFO(dev_priv)->sseu.max_slices; + sseu.max_subslices = INTEL_INFO(dev_priv)->sseu.max_subslices; + sseu.max_eus_per_subslice = + INTEL_INFO(dev_priv)->sseu.max_eus_per_subslice; intel_runtime_pm_get(dev_priv); @@ -4606,6 +4663,46 @@ static const struct file_operations i915_hpd_storm_ctl_fops = { .write = i915_hpd_storm_ctl_write }; +static int i915_drrs_ctl_set(void *data, u64 val) +{ + struct drm_i915_private *dev_priv = data; + struct drm_device *dev = &dev_priv->drm; + struct intel_crtc *intel_crtc; + struct intel_encoder *encoder; + struct intel_dp *intel_dp; + + if (INTEL_GEN(dev_priv) < 7) + return -ENODEV; + + drm_modeset_lock_all(dev); + for_each_intel_crtc(dev, intel_crtc) { + if (!intel_crtc->base.state->active || + !intel_crtc->config->has_drrs) + continue; + + for_each_encoder_on_crtc(dev, &intel_crtc->base, encoder) { + if (encoder->type != INTEL_OUTPUT_EDP) + continue; + + DRM_DEBUG_DRIVER("Manually %sabling DRRS. %llu\n", + val ? "en" : "dis", val); + + intel_dp = enc_to_intel_dp(&encoder->base); + if (val) + intel_edp_drrs_enable(intel_dp, + intel_crtc->config); + else + intel_edp_drrs_disable(intel_dp, + intel_crtc->config); + } + } + drm_modeset_unlock_all(dev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(i915_drrs_ctl_fops, NULL, i915_drrs_ctl_set, "%llu\n"); + static const struct drm_info_list i915_debugfs_list[] = { {"i915_capabilities", i915_capabilities, 0}, {"i915_gem_objects", i915_gem_object_info, 0}, @@ -4646,6 +4743,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_dmc_info", i915_dmc_info, 0}, {"i915_display_info", i915_display_info, 0}, {"i915_engine_info", i915_engine_info, 0}, + {"i915_rcs_topology", i915_rcs_topology, 0}, {"i915_shrinker_info", i915_shrinker_info, 0}, {"i915_shared_dplls_info", i915_shared_dplls_info, 0}, {"i915_dp_mst_info", i915_dp_mst_info, 0}, @@ -4683,7 +4781,8 @@ static const struct i915_debugfs_files { {"i915_dp_test_active", &i915_displayport_test_active_fops}, {"i915_guc_log_control", &i915_guc_log_control_fops}, {"i915_hpd_storm_ctl", &i915_hpd_storm_ctl_fops}, - {"i915_ipc_status", &i915_ipc_status_fops} + {"i915_ipc_status", &i915_ipc_status_fops}, + {"i915_drrs_ctl", &i915_drrs_ctl_fops} }; int i915_debugfs_register(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 2f5209de0391..07c07d55398b 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -49,12 +49,14 @@ #include "i915_drv.h" #include "i915_trace.h" #include "i915_pmu.h" +#include "i915_query.h" #include "i915_vgpu.h" #include "intel_drv.h" #include "intel_uc.h" static struct drm_driver driver; +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) static unsigned int i915_load_fail_count; bool __i915_inject_load_failure(const char *func, int line) @@ -70,6 +72,7 @@ bool __i915_inject_load_failure(const char *func, int line) return false; } +#endif #define FDO_BUG_URL "https://bugs.freedesktop.org/enter_bug.cgi?product=DRI" #define FDO_BUG_MSG "Please file a bug at " FDO_BUG_URL " against DRM/Intel " \ @@ -107,8 +110,12 @@ __i915_printk(struct drm_i915_private *dev_priv, const char *level, static bool i915_error_injected(struct drm_i915_private *dev_priv) { +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) return i915_modparams.inject_load_failure && i915_load_fail_count == i915_modparams.inject_load_failure; +#else + return false; +#endif } #define i915_load_error(dev_priv, fmt, ...) \ @@ -116,10 +123,90 @@ static bool i915_error_injected(struct drm_i915_private *dev_priv) i915_error_injected(dev_priv) ? KERN_DEBUG : KERN_ERR, \ fmt, ##__VA_ARGS__) +/* Map PCH device id to PCH type, or PCH_NONE if unknown. */ +static enum intel_pch +intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) +{ + switch (id) { + case INTEL_PCH_IBX_DEVICE_ID_TYPE: + DRM_DEBUG_KMS("Found Ibex Peak PCH\n"); + WARN_ON(!IS_GEN5(dev_priv)); + return PCH_IBX; + case INTEL_PCH_CPT_DEVICE_ID_TYPE: + DRM_DEBUG_KMS("Found CougarPoint PCH\n"); + WARN_ON(!IS_GEN6(dev_priv) && !IS_IVYBRIDGE(dev_priv)); + return PCH_CPT; + case INTEL_PCH_PPT_DEVICE_ID_TYPE: + DRM_DEBUG_KMS("Found PantherPoint PCH\n"); + WARN_ON(!IS_GEN6(dev_priv) && !IS_IVYBRIDGE(dev_priv)); + /* PantherPoint is CPT compatible */ + return PCH_CPT; + case INTEL_PCH_LPT_DEVICE_ID_TYPE: + DRM_DEBUG_KMS("Found LynxPoint PCH\n"); + WARN_ON(!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)); + WARN_ON(IS_HSW_ULT(dev_priv) || IS_BDW_ULT(dev_priv)); + return PCH_LPT; + case INTEL_PCH_LPT_LP_DEVICE_ID_TYPE: + DRM_DEBUG_KMS("Found LynxPoint LP PCH\n"); + WARN_ON(!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)); + WARN_ON(!IS_HSW_ULT(dev_priv) && !IS_BDW_ULT(dev_priv)); + return PCH_LPT; + case INTEL_PCH_WPT_DEVICE_ID_TYPE: + DRM_DEBUG_KMS("Found WildcatPoint PCH\n"); + WARN_ON(!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)); + WARN_ON(IS_HSW_ULT(dev_priv) || IS_BDW_ULT(dev_priv)); + /* WildcatPoint is LPT compatible */ + return PCH_LPT; + case INTEL_PCH_WPT_LP_DEVICE_ID_TYPE: + DRM_DEBUG_KMS("Found WildcatPoint LP PCH\n"); + WARN_ON(!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)); + WARN_ON(!IS_HSW_ULT(dev_priv) && !IS_BDW_ULT(dev_priv)); + /* WildcatPoint is LPT compatible */ + return PCH_LPT; + case INTEL_PCH_SPT_DEVICE_ID_TYPE: + DRM_DEBUG_KMS("Found SunrisePoint PCH\n"); + WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)); + return PCH_SPT; + case INTEL_PCH_SPT_LP_DEVICE_ID_TYPE: + DRM_DEBUG_KMS("Found SunrisePoint LP PCH\n"); + WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)); + return PCH_SPT; + case INTEL_PCH_KBP_DEVICE_ID_TYPE: + DRM_DEBUG_KMS("Found Kaby Lake PCH (KBP)\n"); + WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv) && + !IS_COFFEELAKE(dev_priv)); + return PCH_KBP; + case INTEL_PCH_CNP_DEVICE_ID_TYPE: + DRM_DEBUG_KMS("Found Cannon Lake PCH (CNP)\n"); + WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv)); + return PCH_CNP; + case INTEL_PCH_CNP_LP_DEVICE_ID_TYPE: + DRM_DEBUG_KMS("Found Cannon Lake LP PCH (CNP-LP)\n"); + WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv)); + return PCH_CNP; + case INTEL_PCH_ICP_DEVICE_ID_TYPE: + DRM_DEBUG_KMS("Found Ice Lake PCH\n"); + WARN_ON(!IS_ICELAKE(dev_priv)); + return PCH_ICP; + default: + return PCH_NONE; + } +} + +static bool intel_is_virt_pch(unsigned short id, + unsigned short svendor, unsigned short sdevice) +{ + return (id == INTEL_PCH_P2X_DEVICE_ID_TYPE || + id == INTEL_PCH_P3X_DEVICE_ID_TYPE || + (id == INTEL_PCH_QEMU_DEVICE_ID_TYPE && + svendor == PCI_SUBVENDOR_ID_REDHAT_QUMRANET && + sdevice == PCI_SUBDEVICE_ID_QEMU)); +} -static enum intel_pch intel_virt_detect_pch(struct drm_i915_private *dev_priv) +static unsigned short +intel_virt_detect_pch(const struct drm_i915_private *dev_priv) { - enum intel_pch ret = PCH_NOP; + unsigned short id = 0; /* * In a virtualized passthrough environment we can be in a @@ -128,28 +215,25 @@ static enum intel_pch intel_virt_detect_pch(struct drm_i915_private *dev_priv) * make an educated guess as to which PCH is really there. */ - if (IS_GEN5(dev_priv)) { - ret = PCH_IBX; - DRM_DEBUG_KMS("Assuming Ibex Peak PCH\n"); - } else if (IS_GEN6(dev_priv) || IS_IVYBRIDGE(dev_priv)) { - ret = PCH_CPT; - DRM_DEBUG_KMS("Assuming CougarPoint PCH\n"); - } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { - ret = PCH_LPT; - if (IS_HSW_ULT(dev_priv) || IS_BDW_ULT(dev_priv)) - dev_priv->pch_id = INTEL_PCH_LPT_LP_DEVICE_ID_TYPE; - else - dev_priv->pch_id = INTEL_PCH_LPT_DEVICE_ID_TYPE; - DRM_DEBUG_KMS("Assuming LynxPoint PCH\n"); - } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - ret = PCH_SPT; - DRM_DEBUG_KMS("Assuming SunrisePoint PCH\n"); - } else if (IS_COFFEELAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) { - ret = PCH_CNP; - DRM_DEBUG_KMS("Assuming CannonPoint PCH\n"); - } + if (IS_GEN5(dev_priv)) + id = INTEL_PCH_IBX_DEVICE_ID_TYPE; + else if (IS_GEN6(dev_priv) || IS_IVYBRIDGE(dev_priv)) + id = INTEL_PCH_CPT_DEVICE_ID_TYPE; + else if (IS_HSW_ULT(dev_priv) || IS_BDW_ULT(dev_priv)) + id = INTEL_PCH_LPT_LP_DEVICE_ID_TYPE; + else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + id = INTEL_PCH_LPT_DEVICE_ID_TYPE; + else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + id = INTEL_PCH_SPT_DEVICE_ID_TYPE; + else if (IS_COFFEELAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) + id = INTEL_PCH_CNP_DEVICE_ID_TYPE; + + if (id) + DRM_DEBUG_KMS("Assuming PCH ID %04x\n", id); + else + DRM_DEBUG_KMS("Assuming no PCH\n"); - return ret; + return id; } static void intel_detect_pch(struct drm_i915_private *dev_priv) @@ -176,94 +260,31 @@ static void intel_detect_pch(struct drm_i915_private *dev_priv) * of only checking the first one. */ while ((pch = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, pch))) { - if (pch->vendor == PCI_VENDOR_ID_INTEL) { - unsigned short id = pch->device & INTEL_PCH_DEVICE_ID_MASK; + unsigned short id; + enum intel_pch pch_type; - dev_priv->pch_id = id; + if (pch->vendor != PCI_VENDOR_ID_INTEL) + continue; - if (id == INTEL_PCH_IBX_DEVICE_ID_TYPE) { - dev_priv->pch_type = PCH_IBX; - DRM_DEBUG_KMS("Found Ibex Peak PCH\n"); - WARN_ON(!IS_GEN5(dev_priv)); - } else if (id == INTEL_PCH_CPT_DEVICE_ID_TYPE) { - dev_priv->pch_type = PCH_CPT; - DRM_DEBUG_KMS("Found CougarPoint PCH\n"); - WARN_ON(!IS_GEN6(dev_priv) && - !IS_IVYBRIDGE(dev_priv)); - } else if (id == INTEL_PCH_PPT_DEVICE_ID_TYPE) { - /* PantherPoint is CPT compatible */ - dev_priv->pch_type = PCH_CPT; - DRM_DEBUG_KMS("Found PantherPoint PCH\n"); - WARN_ON(!IS_GEN6(dev_priv) && - !IS_IVYBRIDGE(dev_priv)); - } else if (id == INTEL_PCH_LPT_DEVICE_ID_TYPE) { - dev_priv->pch_type = PCH_LPT; - DRM_DEBUG_KMS("Found LynxPoint PCH\n"); - WARN_ON(!IS_HASWELL(dev_priv) && - !IS_BROADWELL(dev_priv)); - WARN_ON(IS_HSW_ULT(dev_priv) || - IS_BDW_ULT(dev_priv)); - } else if (id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) { - dev_priv->pch_type = PCH_LPT; - DRM_DEBUG_KMS("Found LynxPoint LP PCH\n"); - WARN_ON(!IS_HASWELL(dev_priv) && - !IS_BROADWELL(dev_priv)); - WARN_ON(!IS_HSW_ULT(dev_priv) && - !IS_BDW_ULT(dev_priv)); - } else if (id == INTEL_PCH_WPT_DEVICE_ID_TYPE) { - /* WildcatPoint is LPT compatible */ - dev_priv->pch_type = PCH_LPT; - DRM_DEBUG_KMS("Found WildcatPoint PCH\n"); - WARN_ON(!IS_HASWELL(dev_priv) && - !IS_BROADWELL(dev_priv)); - WARN_ON(IS_HSW_ULT(dev_priv) || - IS_BDW_ULT(dev_priv)); - } else if (id == INTEL_PCH_WPT_LP_DEVICE_ID_TYPE) { - /* WildcatPoint is LPT compatible */ - dev_priv->pch_type = PCH_LPT; - DRM_DEBUG_KMS("Found WildcatPoint LP PCH\n"); - WARN_ON(!IS_HASWELL(dev_priv) && - !IS_BROADWELL(dev_priv)); - WARN_ON(!IS_HSW_ULT(dev_priv) && - !IS_BDW_ULT(dev_priv)); - } else if (id == INTEL_PCH_SPT_DEVICE_ID_TYPE) { - dev_priv->pch_type = PCH_SPT; - DRM_DEBUG_KMS("Found SunrisePoint PCH\n"); - WARN_ON(!IS_SKYLAKE(dev_priv) && - !IS_KABYLAKE(dev_priv)); - } else if (id == INTEL_PCH_SPT_LP_DEVICE_ID_TYPE) { - dev_priv->pch_type = PCH_SPT; - DRM_DEBUG_KMS("Found SunrisePoint LP PCH\n"); - WARN_ON(!IS_SKYLAKE(dev_priv) && - !IS_KABYLAKE(dev_priv)); - } else if (id == INTEL_PCH_KBP_DEVICE_ID_TYPE) { - dev_priv->pch_type = PCH_KBP; - DRM_DEBUG_KMS("Found Kaby Lake PCH (KBP)\n"); - WARN_ON(!IS_SKYLAKE(dev_priv) && - !IS_KABYLAKE(dev_priv) && - !IS_COFFEELAKE(dev_priv)); - } else if (id == INTEL_PCH_CNP_DEVICE_ID_TYPE) { - dev_priv->pch_type = PCH_CNP; - DRM_DEBUG_KMS("Found Cannon Lake PCH (CNP)\n"); - WARN_ON(!IS_CANNONLAKE(dev_priv) && - !IS_COFFEELAKE(dev_priv)); - } else if (id == INTEL_PCH_CNP_LP_DEVICE_ID_TYPE) { - dev_priv->pch_type = PCH_CNP; - DRM_DEBUG_KMS("Found Cannon Lake LP PCH (CNP-LP)\n"); - WARN_ON(!IS_CANNONLAKE(dev_priv) && - !IS_COFFEELAKE(dev_priv)); - } else if (id == INTEL_PCH_P2X_DEVICE_ID_TYPE || - id == INTEL_PCH_P3X_DEVICE_ID_TYPE || - (id == INTEL_PCH_QEMU_DEVICE_ID_TYPE && - pch->subsystem_vendor == - PCI_SUBVENDOR_ID_REDHAT_QUMRANET && - pch->subsystem_device == - PCI_SUBDEVICE_ID_QEMU)) { - dev_priv->pch_type = - intel_virt_detect_pch(dev_priv); - } else - continue; + id = pch->device & INTEL_PCH_DEVICE_ID_MASK; + pch_type = intel_pch_type(dev_priv, id); + if (pch_type != PCH_NONE) { + dev_priv->pch_type = pch_type; + dev_priv->pch_id = id; + break; + } else if (intel_is_virt_pch(id, pch->subsystem_vendor, + pch->subsystem_device)) { + id = intel_virt_detect_pch(dev_priv); + if (id) { + pch_type = intel_pch_type(dev_priv, id); + if (WARN_ON(pch_type == PCH_NONE)) + pch_type = PCH_NOP; + } else { + pch_type = PCH_NOP; + } + dev_priv->pch_type = pch_type; + dev_priv->pch_id = id; break; } } @@ -273,8 +294,8 @@ static void intel_detect_pch(struct drm_i915_private *dev_priv) pci_dev_put(pch); } -static int i915_getparam(struct drm_device *dev, void *data, - struct drm_file *file_priv) +static int i915_getparam_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) { struct drm_i915_private *dev_priv = to_i915(dev); struct pci_dev *pdev = dev_priv->drm.pdev; @@ -368,13 +389,7 @@ static int i915_getparam(struct drm_device *dev, void *data, value = i915_gem_mmap_gtt_version(); break; case I915_PARAM_HAS_SCHEDULER: - value = 0; - if (dev_priv->engine[RCS] && dev_priv->engine[RCS]->schedule) { - value |= I915_SCHEDULER_CAP_ENABLED; - value |= I915_SCHEDULER_CAP_PRIORITY; - if (HAS_LOGICAL_RING_PREEMPTION(dev_priv)) - value |= I915_SCHEDULER_CAP_PREEMPTION; - } + value = dev_priv->caps.scheduler; break; case I915_PARAM_MMAP_VERSION: @@ -414,7 +429,7 @@ static int i915_getparam(struct drm_device *dev, void *data, return -ENODEV; break; case I915_PARAM_SUBSLICE_MASK: - value = INTEL_INFO(dev_priv)->sseu.subslice_mask; + value = INTEL_INFO(dev_priv)->sseu.subslice_mask[0]; if (!value) return -ENODEV; break; @@ -622,7 +637,7 @@ static void i915_gem_fini(struct drm_i915_private *dev_priv) i915_gem_contexts_fini(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); - intel_uc_fini_wq(dev_priv); + intel_uc_fini_misc(dev_priv); i915_gem_cleanup_userptr(dev_priv); i915_gem_drain_freed_objects(dev_priv); @@ -794,7 +809,7 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv) /* * The i915 workqueue is primarily used for batched retirement of * requests (and thus managing bo) once the task has been completed - * by the GPU. i915_gem_retire_requests() is called directly when we + * by the GPU. i915_retire_requests() is called directly when we * need high-priority retirement, such as waiting for an explicit * bo. * @@ -866,6 +881,7 @@ static void intel_detect_preproduction_hw(struct drm_i915_private *dev_priv) /** * i915_driver_init_early - setup state not requiring device access * @dev_priv: device private + * @ent: the matching pci_device_id * * Initialize everything that is a "SW-only" state, that is state not * requiring accessing the device or exposing the driver via kernel internal @@ -891,11 +907,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, BUILD_BUG_ON(INTEL_MAX_PLATFORMS > sizeof(device_info->platform_mask) * BITS_PER_BYTE); - device_info->platform_mask = BIT(device_info->platform); - BUG_ON(device_info->gen > sizeof(device_info->gen_mask) * BITS_PER_BYTE); - device_info->gen_mask = BIT(device_info->gen - 1); - spin_lock_init(&dev_priv->irq_lock); spin_lock_init(&dev_priv->gpu_error.lock); mutex_init(&dev_priv->backlight_lock); @@ -1599,15 +1611,12 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) { struct drm_i915_private *dev_priv = to_i915(dev); struct pci_dev *pdev = dev_priv->drm.pdev; - bool fw_csr; int ret; disable_rpm_wakeref_asserts(dev_priv); intel_display_set_init_power(dev_priv, false); - fw_csr = !IS_GEN9_LP(dev_priv) && !hibernation && - suspend_to_idle(dev_priv) && dev_priv->csr.dmc_payload; /* * In case of firmware assisted context save/restore don't manually * deinit the power domains. This also means the CSR/DMC firmware will @@ -1615,8 +1624,11 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) * also enable deeper system power states that would be blocked if the * firmware was inactive. */ - if (!fw_csr) + if (IS_GEN9_LP(dev_priv) || hibernation || !suspend_to_idle(dev_priv) || + dev_priv->csr.dmc_payload == NULL) { intel_power_domains_suspend(dev_priv); + dev_priv->power_domains_suspended = true; + } ret = 0; if (IS_GEN9_LP(dev_priv)) @@ -1628,8 +1640,10 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) if (ret) { DRM_ERROR("Suspend complete failed: %d\n", ret); - if (!fw_csr) + if (dev_priv->power_domains_suspended) { intel_power_domains_init_hw(dev_priv, true); + dev_priv->power_domains_suspended = false; + } goto out; } @@ -1650,8 +1664,6 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) if (!(hibernation && INTEL_GEN(dev_priv) < 6)) pci_set_power_state(pdev, PCI_D3hot); - dev_priv->suspended_to_idle = suspend_to_idle(dev_priv); - out: enable_rpm_wakeref_asserts(dev_priv); @@ -1818,8 +1830,7 @@ static int i915_drm_resume_early(struct drm_device *dev) intel_uncore_resume_early(dev_priv); if (IS_GEN9_LP(dev_priv)) { - if (!dev_priv->suspended_to_idle) - gen9_sanitize_dc_state(dev_priv); + gen9_sanitize_dc_state(dev_priv); bxt_disable_dc9(dev_priv); } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { hsw_disable_pc8(dev_priv); @@ -1827,8 +1838,7 @@ static int i915_drm_resume_early(struct drm_device *dev) intel_uncore_sanitize(dev_priv); - if (IS_GEN9_LP(dev_priv) || - !(dev_priv->suspended_to_idle && dev_priv->csr.dmc_payload)) + if (dev_priv->power_domains_suspended) intel_power_domains_init_hw(dev_priv, true); else intel_display_set_init_power(dev_priv, true); @@ -1838,7 +1848,7 @@ static int i915_drm_resume_early(struct drm_device *dev) enable_rpm_wakeref_asserts(dev_priv); out: - dev_priv->suspended_to_idle = false; + dev_priv->power_domains_suspended = false; return ret; } @@ -1900,7 +1910,6 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags) ret = i915_gem_reset_prepare(i915); if (ret) { dev_err(i915->drm.dev, "GPU recovery failed\n"); - intel_gpu_reset(i915, ALL_ENGINES); goto taint; } @@ -1932,7 +1941,8 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags) */ ret = i915_ggtt_enable_hw(i915); if (ret) { - DRM_ERROR("Failed to re-enable GGTT following reset %d\n", ret); + DRM_ERROR("Failed to re-enable GGTT following reset (%d)\n", + ret); goto error; } @@ -1949,7 +1959,8 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags) */ ret = i915_gem_init_hw(i915); if (ret) { - DRM_ERROR("Failed hw init on reset %d\n", ret); + DRM_ERROR("Failed to initialise HW following reset (%d)\n", + ret); goto error; } @@ -1980,7 +1991,8 @@ taint: add_taint(TAINT_WARN, LOCKDEP_STILL_OK); error: i915_gem_set_wedged(i915); - i915_gem_retire_requests(i915); + i915_retire_requests(i915); + intel_gpu_reset(i915, ALL_ENGINES); goto finish; } @@ -2006,7 +2018,7 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *dev_priv, int i915_reset_engine(struct intel_engine_cs *engine, unsigned int flags) { struct i915_gpu_error *error = &engine->i915->gpu_error; - struct drm_i915_gem_request *active_request; + struct i915_request *active_request; int ret; GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); @@ -2562,7 +2574,7 @@ static int intel_runtime_suspend(struct device *kdev) */ i915_gem_runtime_suspend(dev_priv); - intel_guc_suspend(dev_priv); + intel_uc_suspend(dev_priv); intel_runtime_pm_disable_interrupts(dev_priv); @@ -2584,6 +2596,11 @@ static int intel_runtime_suspend(struct device *kdev) intel_runtime_pm_enable_interrupts(dev_priv); + intel_uc_resume(dev_priv); + + i915_gem_init_swizzling(dev_priv); + i915_gem_restore_fences(dev_priv); + enable_rpm_wakeref_asserts(dev_priv); return ret; @@ -2649,8 +2666,6 @@ static int intel_runtime_resume(struct device *kdev) if (intel_uncore_unclaimed_mmio(dev_priv)) DRM_DEBUG_DRIVER("Unclaimed access during suspend, bios?\n"); - intel_guc_resume(dev_priv); - if (IS_GEN9_LP(dev_priv)) { bxt_disable_dc9(dev_priv); bxt_display_core_init(dev_priv, true); @@ -2665,6 +2680,10 @@ static int intel_runtime_resume(struct device *kdev) intel_uncore_runtime_resume(dev_priv); + intel_runtime_pm_enable_interrupts(dev_priv); + + intel_uc_resume(dev_priv); + /* * No point of rolling back things in case of an error, as the best * we can do is to hope that things will still work (and disable RPM). @@ -2672,8 +2691,6 @@ static int intel_runtime_resume(struct device *kdev) i915_gem_init_swizzling(dev_priv); i915_gem_restore_fences(dev_priv); - intel_runtime_pm_enable_interrupts(dev_priv); - /* * On VLV/CHV display interrupts are part of the display * power well, so hpd is reinitialized from there. For @@ -2765,7 +2782,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_BATCHBUFFER, drm_noop, DRM_AUTH), DRM_IOCTL_DEF_DRV(I915_IRQ_EMIT, drm_noop, DRM_AUTH), DRM_IOCTL_DEF_DRV(I915_IRQ_WAIT, drm_noop, DRM_AUTH), - DRM_IOCTL_DEF_DRV(I915_GETPARAM, i915_getparam, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GETPARAM, i915_getparam_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_SETPARAM, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_ALLOC, drm_noop, DRM_AUTH), DRM_IOCTL_DEF_DRV(I915_FREE, drm_noop, DRM_AUTH), @@ -2777,8 +2794,8 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_VBLANK_SWAP, drm_noop, DRM_AUTH), DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH), - DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer_ioctl, DRM_AUTH), + DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), @@ -2797,11 +2814,11 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_GEM_SET_TILING, i915_gem_set_tiling_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_GET_TILING, i915_gem_get_tiling_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id, 0), + DRM_IOCTL_DEF_DRV(I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id_ioctl, 0), DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW), DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW), - DRM_IOCTL_DEF_DRV(I915_SET_SPRITE_COLORKEY, intel_sprite_set_colorkey, DRM_MASTER|DRM_CONTROL_ALLOW), + DRM_IOCTL_DEF_DRV(I915_SET_SPRITE_COLORKEY, intel_sprite_set_colorkey_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW), DRM_IOCTL_DEF_DRV(I915_GET_SPRITE_COLORKEY, drm_noop, DRM_MASTER|DRM_CONTROL_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_WAIT, i915_gem_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_RENDER_ALLOW), @@ -2814,6 +2831,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_PERF_OPEN, i915_perf_open_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_PERF_ADD_CONFIG, i915_perf_add_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_PERF_REMOVE_CONFIG, i915_perf_remove_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_QUERY, i915_query_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), }; static struct drm_driver driver = { diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d307429a5ae0..ce18b6cf6e68 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -71,9 +71,9 @@ #include "i915_gem_fence_reg.h" #include "i915_gem_object.h" #include "i915_gem_gtt.h" -#include "i915_gem_request.h" #include "i915_gem_timeline.h" +#include "i915_request.h" #include "i915_vma.h" #include "intel_gvt.h" @@ -83,8 +83,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20171222" -#define DRIVER_TIMESTAMP 1513971710 +#define DRIVER_DATE "20180308" +#define DRIVER_TIMESTAMP 1520513379 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions @@ -104,9 +104,13 @@ #define I915_STATE_WARN_ON(x) \ I915_STATE_WARN((x), "%s", "WARN_ON(" __stringify(x) ")") +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) bool __i915_inject_load_failure(const char *func, int line); #define i915_inject_load_failure() \ __i915_inject_load_failure(__func__, __LINE__) +#else +#define i915_inject_load_failure() false +#endif typedef struct { uint32_t val; @@ -453,9 +457,9 @@ struct intel_display_error_state; struct i915_gpu_state { struct kref ref; - struct timeval time; - struct timeval boottime; - struct timeval uptime; + ktime_t time; + ktime_t boottime; + ktime_t uptime; struct drm_i915_private *i915; @@ -468,6 +472,7 @@ struct i915_gpu_state { u32 reset_count; u32 suspend_count; struct intel_device_info device_info; + struct intel_driver_caps driver_caps; struct i915_params params; struct i915_error_uc { @@ -551,6 +556,7 @@ struct i915_gpu_state { int ban_score; int active; int guilty; + bool bannable; } context; struct drm_i915_error_object { @@ -661,6 +667,7 @@ struct intel_fbc { */ struct intel_fbc_state_cache { struct i915_vma *vma; + unsigned long flags; struct { unsigned int mode_flags; @@ -699,6 +706,7 @@ struct intel_fbc { */ struct intel_fbc_reg_params { struct i915_vma *vma; + unsigned long flags; struct { enum pipe pipe; @@ -717,7 +725,7 @@ struct intel_fbc { struct intel_fbc_work { bool scheduled; - u32 scheduled_vblank; + u64 scheduled_vblank; struct work_struct work; } work; @@ -754,7 +762,6 @@ struct i915_drrs { struct i915_psr { struct mutex lock; bool sink_support; - bool source_ok; struct intel_dp *enabled; bool active; struct delayed_work work; @@ -783,6 +790,7 @@ enum intel_pch { PCH_SPT, /* Sunrisepoint PCH */ PCH_KBP, /* Kaby Lake PCH */ PCH_CNP, /* Cannon Lake PCH */ + PCH_ICP, /* Ice Lake PCH */ PCH_NOP, }; @@ -941,6 +949,8 @@ struct intel_rps { struct intel_rc6 { bool enabled; + u64 prev_hw_residency[4]; + u64 cur_residency[4]; }; struct intel_llc_pstate { @@ -1087,6 +1097,11 @@ struct i915_gem_mm { struct llist_head free_list; struct work_struct free_work; spinlock_t free_lock; + /** + * Count of objects pending destructions. Used to skip needlessly + * waiting on an RCU barrier if no objects are waiting to be freed. + */ + atomic_t free_count; /** * Small stash of WC pages @@ -1216,7 +1231,7 @@ struct i915_gpu_error { * * #I915_WEDGED - If reset fails and we can no longer use the GPU, * we set the #I915_WEDGED bit. Prior to command submission, e.g. - * i915_gem_request_alloc(), this bit is checked and the sequence + * i915_request_alloc(), this bit is checked and the sequence * aborted (with -EIO reported to userspace) if set. */ unsigned long flags; @@ -1255,6 +1270,7 @@ enum modeset_restore { #define DP_AUX_B 0x10 #define DP_AUX_C 0x20 #define DP_AUX_D 0x30 +#define DP_AUX_F 0x60 #define DDC_PIN_B 0x05 #define DDC_PIN_C 0x04 @@ -1281,6 +1297,7 @@ struct ddi_vbt_port_info { uint8_t dp_boost_level; uint8_t hdmi_boost_level; + int dp_max_link_rate; /* 0 for not limited by VBT */ }; enum psr_lines_to_wait { @@ -1461,6 +1478,7 @@ struct skl_wm_params { uint_fixed_16_16_t plane_blocks_per_line; uint_fixed_16_16_t y_tile_minimum; uint32_t linetime_us; + uint32_t dbuf_block_size; }; /* @@ -1793,7 +1811,7 @@ struct i915_oa_ops { }; struct intel_cdclk_state { - unsigned int cdclk, vco, ref; + unsigned int cdclk, vco, ref, bypass; u8 voltage_level; }; @@ -1808,6 +1826,7 @@ struct drm_i915_private { struct kmem_cache *priorities; const struct intel_device_info info; + struct intel_driver_caps caps; /** * Data Stolen Memory - aka "i915 stolen memory" gives us the start and @@ -2084,6 +2103,7 @@ struct drm_i915_private { */ struct ida hw_ida; #define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */ +#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */ } contexts; u32 fdi_rx_config; @@ -2099,7 +2119,7 @@ struct drm_i915_private { u32 bxt_phy_grc; u32 suspend_count; - bool suspended_to_idle; + bool power_domains_suspended; struct i915_suspend_saved_registers regfile; struct vlv_s0ix_state vlv_s0ix_state; @@ -2314,6 +2334,12 @@ struct drm_i915_private { bool awake; /** + * The number of times we have woken up. + */ + unsigned int epoch; +#define I915_EPOCH_INVALID 0 + + /** * We leave the user IRQ off as much as possible, * but this means that requests will finish and never * be retired once the system goes idle. Set a timer to @@ -2405,18 +2431,17 @@ enum hdmi_force_audio { * * We have one bit per pipe and per scanout plane type. */ -#define INTEL_MAX_SPRITE_BITS_PER_PIPE 5 #define INTEL_FRONTBUFFER_BITS_PER_PIPE 8 -#define INTEL_FRONTBUFFER_PRIMARY(pipe) \ - (1 << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe))) -#define INTEL_FRONTBUFFER_CURSOR(pipe) \ - (1 << (1 + (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)))) -#define INTEL_FRONTBUFFER_SPRITE(pipe, plane) \ - (1 << (2 + plane + (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)))) +#define INTEL_FRONTBUFFER(pipe, plane_id) ({ \ + BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > 32); \ + BUILD_BUG_ON(I915_MAX_PLANES > INTEL_FRONTBUFFER_BITS_PER_PIPE); \ + BIT((plane_id) + INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)); \ +}) #define INTEL_FRONTBUFFER_OVERLAY(pipe) \ - (1 << (2 + INTEL_MAX_SPRITE_BITS_PER_PIPE + (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)))) + BIT(INTEL_FRONTBUFFER_BITS_PER_PIPE - 1 + INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)) #define INTEL_FRONTBUFFER_ALL_MASK(pipe) \ - (0xff << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe))) + GENMASK(INTEL_FRONTBUFFER_BITS_PER_PIPE * ((pipe) + 1) - 1, \ + INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)) /* * Optimised SGL iterator for GEM objects @@ -2596,6 +2621,7 @@ intel_info(const struct drm_i915_private *dev_priv) #define IS_GEMINILAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_GEMINILAKE) #define IS_COFFEELAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_COFFEELAKE) #define IS_CANNONLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_CANNONLAKE) +#define IS_ICELAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_ICELAKE) #define IS_MOBILE(dev_priv) ((dev_priv)->info.is_mobile) #define IS_HSW_EARLY_SDV(dev_priv) (IS_HASWELL(dev_priv) && \ (INTEL_DEVID(dev_priv) & 0xFF00) == 0x0C00) @@ -2647,6 +2673,8 @@ intel_info(const struct drm_i915_private *dev_priv) (dev_priv)->info.gt == 2) #define IS_CFL_GT3(dev_priv) (IS_COFFEELAKE(dev_priv) && \ (dev_priv)->info.gt == 3) +#define IS_CNL_WITH_PORT_F(dev_priv) (IS_CANNONLAKE(dev_priv) && \ + (INTEL_DEVID(dev_priv) & 0x0004) == 0x0004) #define IS_ALPHA_SUPPORT(intel_info) ((intel_info)->is_alpha_support) @@ -2707,6 +2735,7 @@ intel_info(const struct drm_i915_private *dev_priv) #define IS_GEN8(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(7))) #define IS_GEN9(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(8))) #define IS_GEN10(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(9))) +#define IS_GEN11(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(10))) #define IS_LP(dev_priv) (INTEL_INFO(dev_priv)->is_lp) #define IS_GEN9_LP(dev_priv) (IS_GEN9(dev_priv) && IS_LP(dev_priv)) @@ -2718,6 +2747,9 @@ intel_info(const struct drm_i915_private *dev_priv) #define BLT_RING ENGINE_MASK(BCS) #define VEBOX_RING ENGINE_MASK(VECS) #define BSD2_RING ENGINE_MASK(VCS2) +#define BSD3_RING ENGINE_MASK(VCS3) +#define BSD4_RING ENGINE_MASK(VCS4) +#define VEBOX2_RING ENGINE_MASK(VECS2) #define ALL_ENGINES (~0) #define HAS_ENGINE(dev_priv, id) \ @@ -2740,6 +2772,8 @@ intel_info(const struct drm_i915_private *dev_priv) #define HAS_LOGICAL_RING_CONTEXTS(dev_priv) \ ((dev_priv)->info.has_logical_ring_contexts) +#define HAS_LOGICAL_RING_ELSQ(dev_priv) \ + ((dev_priv)->info.has_logical_ring_elsq) #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \ ((dev_priv)->info.has_logical_ring_preemption) @@ -2760,9 +2794,10 @@ intel_info(const struct drm_i915_private *dev_priv) /* Early gen2 have a totally busted CS tlb and require pinned batches. */ #define HAS_BROKEN_CS_TLB(dev_priv) (IS_I830(dev_priv) || IS_I845G(dev_priv)) -/* WaRsDisableCoarsePowerGating:skl,bxt */ +/* WaRsDisableCoarsePowerGating:skl,cnl */ #define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \ - (IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) + (IS_CANNONLAKE(dev_priv) || \ + IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) /* * dp aux and gmbus irq on gen4 seems to be able to generate legacy interrupts @@ -2787,7 +2822,7 @@ intel_info(const struct drm_i915_private *dev_priv) #define HAS_FW_BLC(dev_priv) (INTEL_GEN(dev_priv) > 2) #define HAS_FBC(dev_priv) ((dev_priv)->info.has_fbc) -#define HAS_CUR_FBC(dev_priv) (!HAS_GMCH_DISPLAY(dev_priv) && INTEL_INFO(dev_priv)->gen >= 7) +#define HAS_CUR_FBC(dev_priv) (!HAS_GMCH_DISPLAY(dev_priv) && INTEL_GEN(dev_priv) >= 7) #define HAS_IPS(dev_priv) (IS_HSW_ULT(dev_priv) || IS_BROADWELL(dev_priv)) @@ -2844,23 +2879,26 @@ intel_info(const struct drm_i915_private *dev_priv) #define INTEL_PCH_KBP_DEVICE_ID_TYPE 0xA280 #define INTEL_PCH_CNP_DEVICE_ID_TYPE 0xA300 #define INTEL_PCH_CNP_LP_DEVICE_ID_TYPE 0x9D80 +#define INTEL_PCH_ICP_DEVICE_ID_TYPE 0x3480 #define INTEL_PCH_P2X_DEVICE_ID_TYPE 0x7100 #define INTEL_PCH_P3X_DEVICE_ID_TYPE 0x7000 #define INTEL_PCH_QEMU_DEVICE_ID_TYPE 0x2900 /* qemu q35 has 2918 */ #define INTEL_PCH_TYPE(dev_priv) ((dev_priv)->pch_type) +#define INTEL_PCH_ID(dev_priv) ((dev_priv)->pch_id) +#define HAS_PCH_ICP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_ICP) #define HAS_PCH_CNP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_CNP) #define HAS_PCH_CNP_LP(dev_priv) \ - ((dev_priv)->pch_id == INTEL_PCH_CNP_LP_DEVICE_ID_TYPE) + (INTEL_PCH_ID(dev_priv) == INTEL_PCH_CNP_LP_DEVICE_ID_TYPE) #define HAS_PCH_KBP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_KBP) #define HAS_PCH_SPT(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_SPT) #define HAS_PCH_LPT(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_LPT) #define HAS_PCH_LPT_LP(dev_priv) \ - ((dev_priv)->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE || \ - (dev_priv)->pch_id == INTEL_PCH_WPT_LP_DEVICE_ID_TYPE) + (INTEL_PCH_ID(dev_priv) == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE || \ + INTEL_PCH_ID(dev_priv) == INTEL_PCH_WPT_LP_DEVICE_ID_TYPE) #define HAS_PCH_LPT_H(dev_priv) \ - ((dev_priv)->pch_id == INTEL_PCH_LPT_DEVICE_ID_TYPE || \ - (dev_priv)->pch_id == INTEL_PCH_WPT_DEVICE_ID_TYPE) + (INTEL_PCH_ID(dev_priv) == INTEL_PCH_LPT_DEVICE_ID_TYPE || \ + INTEL_PCH_ID(dev_priv) == INTEL_PCH_WPT_DEVICE_ID_TYPE) #define HAS_PCH_CPT(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_CPT) #define HAS_PCH_IBX(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_IBX) #define HAS_PCH_NOP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_NOP) @@ -2951,8 +2989,10 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, void intel_hpd_init(struct drm_i915_private *dev_priv); void intel_hpd_init_work(struct drm_i915_private *dev_priv); void intel_hpd_cancel_work(struct drm_i915_private *dev_priv); -enum port intel_hpd_pin_to_port(enum hpd_pin pin); -enum hpd_pin intel_hpd_pin(enum port port); +enum port intel_hpd_pin_to_port(struct drm_i915_private *dev_priv, + enum hpd_pin pin); +enum hpd_pin intel_hpd_pin_default(struct drm_i915_private *dev_priv, + enum port port); bool intel_hpd_disable(struct drm_i915_private *dev_priv, enum hpd_pin pin); void intel_hpd_enable(struct drm_i915_private *dev_priv, enum hpd_pin pin); @@ -3065,10 +3105,10 @@ int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); -int i915_gem_execbuffer(struct drm_device *dev, void *data, - struct drm_file *file_priv); -int i915_gem_execbuffer2(struct drm_device *dev, void *data, - struct drm_file *file_priv); +int i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); int i915_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, @@ -3112,6 +3152,9 @@ void i915_gem_free_object(struct drm_gem_object *obj); static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915) { + if (!atomic_read(&i915->mm.free_count)) + return; + /* A single pass should suffice to release all the freed objects (along * most call paths) , but be a little more paranoid in that freeing * the objects does take a little amount of time, during which the rcu @@ -3293,7 +3336,7 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj) int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); void i915_vma_move_to_active(struct i915_vma *vma, - struct drm_i915_gem_request *req, + struct i915_request *rq, unsigned int flags); int i915_gem_dumb_create(struct drm_file *file_priv, struct drm_device *dev, @@ -3308,11 +3351,9 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, int __must_check i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno); -struct drm_i915_gem_request * +struct i915_request * i915_gem_find_active_request(struct intel_engine_cs *engine); -void i915_gem_retire_requests(struct drm_i915_private *dev_priv); - static inline bool i915_reset_backoff(struct i915_gpu_error *error) { return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags)); @@ -3344,7 +3385,7 @@ static inline u32 i915_reset_engine_count(struct i915_gpu_error *error, return READ_ONCE(error->reset_engine_count[engine->id]); } -struct drm_i915_gem_request * +struct i915_request * i915_gem_reset_prepare_engine(struct intel_engine_cs *engine); int i915_gem_reset_prepare(struct drm_i915_private *dev_priv); void i915_gem_reset(struct drm_i915_private *dev_priv); @@ -3353,7 +3394,7 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv); void i915_gem_set_wedged(struct drm_i915_private *dev_priv); bool i915_gem_unset_wedged(struct drm_i915_private *dev_priv); void i915_gem_reset_engine(struct intel_engine_cs *engine, - struct drm_i915_gem_request *request); + struct i915_request *request); void i915_gem_init_mmio(struct drm_i915_private *i915); int __must_check i915_gem_init(struct drm_i915_private *dev_priv); @@ -3383,7 +3424,8 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write); struct i915_vma * __must_check i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, u32 alignment, - const struct i915_ggtt_view *view); + const struct i915_ggtt_view *view, + unsigned int flags); void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma); int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align); @@ -3645,6 +3687,7 @@ extern int intel_setup_gmbus(struct drm_i915_private *dev_priv); extern void intel_teardown_gmbus(struct drm_i915_private *dev_priv); extern bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv, unsigned int pin); +extern int intel_gmbus_output_aksv(struct i2c_adapter *adapter); extern struct i2c_adapter * intel_gmbus_get_adapter(struct drm_i915_private *dev_priv, unsigned int pin); @@ -3720,9 +3763,10 @@ extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e, int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val); int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, u32 mbox, - u32 val, int timeout_us); + u32 val, int fast_timeout_us, + int slow_timeout_ms); #define sandybridge_pcode_write(dev_priv, mbox, val) \ - sandybridge_pcode_write_timeout(dev_priv, mbox, val, 500) + sandybridge_pcode_write_timeout(dev_priv, mbox, val, 500, 0) int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, u32 reply_mask, u32 reply, int timeout_base_ms); @@ -3969,9 +4013,9 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) } static inline bool -__i915_request_irq_complete(const struct drm_i915_gem_request *req) +__i915_request_irq_complete(const struct i915_request *rq) { - struct intel_engine_cs *engine = req->engine; + struct intel_engine_cs *engine = rq->engine; u32 seqno; /* Note that the engine may have wrapped around the seqno, and @@ -3980,7 +4024,7 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req) * this by kicking all the waiters before resetting the seqno * in hardware, and also signal the fence. */ - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &req->fence.flags)) + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) return true; /* The request was dequeued before we were awoken. We check after @@ -3989,14 +4033,14 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req) * the request execution are sufficient to ensure that a check * after reading the value from hw matches this request. */ - seqno = i915_gem_request_global_seqno(req); + seqno = i915_request_global_seqno(rq); if (!seqno) return false; /* Before we do the heavier coherent read of the seqno, * check the value (hopefully) in the CPU cacheline. */ - if (__i915_gem_request_completed(req, seqno)) + if (__i915_request_completed(rq, seqno)) return true; /* Ensure our read of the seqno is coherent so that we @@ -4045,7 +4089,7 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req) wake_up_process(b->irq_wait->tsk); spin_unlock_irq(&b->irq_lock); - if (__i915_gem_request_completed(req, seqno)) + if (__i915_request_completed(rq, seqno)) return true; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6ff5d655c202..7b5a9d7c9593 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -240,8 +240,8 @@ err_phys: static void __start_cpu_write(struct drm_i915_gem_object *obj) { - obj->base.read_domains = I915_GEM_DOMAIN_CPU; - obj->base.write_domain = I915_GEM_DOMAIN_CPU; + obj->read_domains = I915_GEM_DOMAIN_CPU; + obj->write_domain = I915_GEM_DOMAIN_CPU; if (cpu_write_needs_clflush(obj)) obj->cache_dirty = true; } @@ -257,7 +257,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, obj->mm.dirty = false; if (needs_clflush && - (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 && + (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 && !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) drm_clflush_sg(pages); @@ -353,7 +353,7 @@ i915_gem_object_wait_fence(struct dma_fence *fence, long timeout, struct intel_rps_client *rps_client) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1); @@ -366,10 +366,11 @@ i915_gem_object_wait_fence(struct dma_fence *fence, timeout); rq = to_request(fence); - if (i915_gem_request_completed(rq)) + if (i915_request_completed(rq)) goto out; - /* This client is about to stall waiting for the GPU. In many cases + /* + * This client is about to stall waiting for the GPU. In many cases * this is undesirable and limits the throughput of the system, as * many clients cannot continue processing user input/output whilst * blocked. RPS autotuning may take tens of milliseconds to respond @@ -384,18 +385,16 @@ i915_gem_object_wait_fence(struct dma_fence *fence, * forcing the clocks too high for the whole system, we only allow * each client to waitboost once in a busy period. */ - if (rps_client) { + if (rps_client && !i915_request_started(rq)) { if (INTEL_GEN(rq->i915) >= 6) gen6_rps_boost(rq, rps_client); - else - rps_client = NULL; } - timeout = i915_wait_request(rq, flags, timeout); + timeout = i915_request_wait(rq, flags, timeout); out: - if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq)) - i915_gem_request_retire_upto(rq); + if (flags & I915_WAIT_LOCKED && i915_request_completed(rq)) + i915_request_retire_upto(rq); return timeout; } @@ -472,7 +471,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, static void __fence_set_priority(struct dma_fence *fence, int prio) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct intel_engine_cs *engine; if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence)) @@ -480,10 +479,11 @@ static void __fence_set_priority(struct dma_fence *fence, int prio) rq = to_request(fence); engine = rq->engine; - if (!engine->schedule) - return; - engine->schedule(rq, prio); + rcu_read_lock(); + if (engine->schedule) + engine->schedule(rq, prio); + rcu_read_unlock(); } static void fence_set_priority(struct dma_fence *fence, int prio) @@ -712,10 +712,10 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct i915_vma *vma; - if (!(obj->base.write_domain & flush_domains)) + if (!(obj->write_domain & flush_domains)) return; - switch (obj->base.write_domain) { + switch (obj->write_domain) { case I915_GEM_DOMAIN_GTT: i915_gem_flush_ggtt_writes(dev_priv); @@ -740,7 +740,7 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) break; } - obj->base.write_domain = 0; + obj->write_domain = 0; } static inline int @@ -840,7 +840,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, * anyway again before the next pread happens. */ if (!obj->cache_dirty && - !(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) + !(obj->read_domains & I915_GEM_DOMAIN_CPU)) *needs_clflush = CLFLUSH_BEFORE; out: @@ -899,7 +899,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, * Same trick applies to invalidate partially written * cachelines read before writing. */ - if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) + if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) *needs_clflush |= CLFLUSH_BEFORE; } @@ -2400,8 +2400,8 @@ static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) * wasn't in the GTT, there shouldn't be any way it could have been in * a GPU cache */ - GEM_BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); - GEM_BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); + GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); + GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); st = kmalloc(sizeof(*st), GFP_KERNEL); if (st == NULL) @@ -2832,24 +2832,23 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, return 0; } -static bool ban_context(const struct i915_gem_context *ctx, - unsigned int score) -{ - return (i915_gem_context_is_bannable(ctx) && - score >= CONTEXT_SCORE_BAN_THRESHOLD); -} - static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx) { - unsigned int score; bool banned; atomic_inc(&ctx->guilty_count); - score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score); - banned = ban_context(ctx, score); - DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n", - ctx->name, score, yesno(banned)); + banned = false; + if (i915_gem_context_is_bannable(ctx)) { + unsigned int score; + + score = atomic_add_return(CONTEXT_SCORE_GUILTY, + &ctx->ban_score); + banned = score >= CONTEXT_SCORE_BAN_THRESHOLD; + + DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n", + ctx->name, score, yesno(banned)); + } if (!banned) return; @@ -2866,10 +2865,10 @@ static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) atomic_inc(&ctx->active_count); } -struct drm_i915_gem_request * +struct i915_request * i915_gem_find_active_request(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *request, *active = NULL; + struct i915_request *request, *active = NULL; unsigned long flags; /* We are called by the error capture and reset at a random @@ -2882,8 +2881,7 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) */ spin_lock_irqsave(&engine->timeline->lock, flags); list_for_each_entry(request, &engine->timeline->requests, link) { - if (__i915_gem_request_completed(request, - request->global_seqno)) + if (__i915_request_completed(request, request->global_seqno)) continue; GEM_BUG_ON(request->engine != engine); @@ -2916,10 +2914,10 @@ static bool engine_stalled(struct intel_engine_cs *engine) * Ensure irq handler finishes, and not run again. * Also return the active request so that we only search for it once. */ -struct drm_i915_gem_request * +struct i915_request * i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *request = NULL; + struct i915_request *request = NULL; /* * During the reset sequence, we must prevent the engine from @@ -2950,8 +2948,16 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) * calling engine->init_hw() and also writing the ELSP. * Turning off the execlists->tasklet until the reset is over * prevents the race. + * + * Note that this needs to be a single atomic operation on the + * tasklet (flush existing tasks, prevent new tasks) to prevent + * a race between reset and set-wedged. It is not, so we do the best + * we can atm and make sure we don't lock the machine up in the more + * common case of recursively being called from set-wedged from inside + * i915_reset. */ - tasklet_kill(&engine->execlists.tasklet); + if (!atomic_read(&engine->execlists.tasklet.count)) + tasklet_kill(&engine->execlists.tasklet); tasklet_disable(&engine->execlists.tasklet); /* @@ -2977,7 +2983,7 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; - struct drm_i915_gem_request *request; + struct i915_request *request; enum intel_engine_id id; int err = 0; @@ -2996,7 +3002,7 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) return err; } -static void skip_request(struct drm_i915_gem_request *request) +static void skip_request(struct i915_request *request) { void *vaddr = request->ring->vaddr; u32 head; @@ -3015,7 +3021,7 @@ static void skip_request(struct drm_i915_gem_request *request) dma_fence_set_error(&request->fence, -EIO); } -static void engine_skip_context(struct drm_i915_gem_request *request) +static void engine_skip_context(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; struct i915_gem_context *hung_ctx = request->ctx; @@ -3039,9 +3045,9 @@ static void engine_skip_context(struct drm_i915_gem_request *request) } /* Returns the request if it was guilty of the hang */ -static struct drm_i915_gem_request * +static struct i915_request * i915_gem_reset_request(struct intel_engine_cs *engine, - struct drm_i915_gem_request *request) + struct i915_request *request) { /* The guilty request will get skipped on a hung engine. * @@ -3095,7 +3101,7 @@ i915_gem_reset_request(struct intel_engine_cs *engine, } void i915_gem_reset_engine(struct intel_engine_cs *engine, - struct drm_i915_gem_request *request) + struct i915_request *request) { /* * Make sure this write is visible before we re-enable the interrupt @@ -3123,7 +3129,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) lockdep_assert_held(&dev_priv->drm.struct_mutex); - i915_gem_retire_requests(dev_priv); + i915_retire_requests(dev_priv); for_each_engine(engine, dev_priv, id) { struct i915_gem_context *ctx; @@ -3143,13 +3149,13 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) * an incoherent read by the CS (presumably stale TLB). An * empty request appears sufficient to paper over the glitch. */ - if (list_empty(&engine->timeline->requests)) { - struct drm_i915_gem_request *rq; + if (intel_engine_is_idle(engine)) { + struct i915_request *rq; - rq = i915_gem_request_alloc(engine, - dev_priv->kernel_context); + rq = i915_request_alloc(engine, + dev_priv->kernel_context); if (!IS_ERR(rq)) - __i915_add_request(rq, false); + __i915_request_add(rq, false); } } @@ -3184,21 +3190,21 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv) } } -static void nop_submit_request(struct drm_i915_gem_request *request) +static void nop_submit_request(struct i915_request *request) { dma_fence_set_error(&request->fence, -EIO); - i915_gem_request_submit(request); + i915_request_submit(request); } -static void nop_complete_submit_request(struct drm_i915_gem_request *request) +static void nop_complete_submit_request(struct i915_request *request) { unsigned long flags; dma_fence_set_error(&request->fence, -EIO); spin_lock_irqsave(&request->engine->timeline->lock, flags); - __i915_gem_request_submit(request); + __i915_request_submit(request); intel_engine_init_global_seqno(request->engine, request->global_seqno); spin_unlock_irqrestore(&request->engine->timeline->lock, flags); } @@ -3208,6 +3214,16 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) struct intel_engine_cs *engine; enum intel_engine_id id; + if (drm_debug & DRM_UT_DRIVER) { + struct drm_printer p = drm_debug_printer(__func__); + + for_each_engine(engine, i915, id) + intel_engine_dump(engine, &p, "%s\n", engine->name); + } + + set_bit(I915_WEDGED, &i915->gpu_error.flags); + smp_mb__after_atomic(); + /* * First, stop submission to hw, but do not yet complete requests by * rolling the global seqno forward (since this would complete requests @@ -3215,8 +3231,11 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) */ for_each_engine(engine, i915, id) { i915_gem_reset_prepare_engine(engine); + engine->submit_request = nop_submit_request; + engine->schedule = NULL; } + i915->caps.scheduler = 0; /* * Make sure no one is running the old callback before we proceed with @@ -3246,7 +3265,8 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) for_each_engine(engine, i915, id) { unsigned long flags; - /* Mark all pending requests as complete so that any concurrent + /* + * Mark all pending requests as complete so that any concurrent * (lockless) lookup doesn't try and wait upon the request as we * reset it. */ @@ -3258,7 +3278,6 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) i915_gem_reset_finish_engine(engine); } - set_bit(I915_WEDGED, &i915->gpu_error.flags); wake_up_all(&i915->gpu_error.reset_queue); } @@ -3282,7 +3301,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) */ list_for_each_entry(tl, &i915->gt.timelines, link) { for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; rq = i915_gem_active_peek(&tl->engine[i].last_request, &i915->drm.struct_mutex); @@ -3331,7 +3350,7 @@ i915_gem_retire_work_handler(struct work_struct *work) /* Come back later if the device is busy... */ if (mutex_trylock(&dev->struct_mutex)) { - i915_gem_retire_requests(dev_priv); + i915_retire_requests(dev_priv); mutex_unlock(&dev->struct_mutex); } @@ -3346,6 +3365,65 @@ i915_gem_retire_work_handler(struct work_struct *work) round_jiffies_up_relative(HZ)); } +static void shrink_caches(struct drm_i915_private *i915) +{ + /* + * kmem_cache_shrink() discards empty slabs and reorders partially + * filled slabs to prioritise allocating from the mostly full slabs, + * with the aim of reducing fragmentation. + */ + kmem_cache_shrink(i915->priorities); + kmem_cache_shrink(i915->dependencies); + kmem_cache_shrink(i915->requests); + kmem_cache_shrink(i915->luts); + kmem_cache_shrink(i915->vmas); + kmem_cache_shrink(i915->objects); +} + +struct sleep_rcu_work { + union { + struct rcu_head rcu; + struct work_struct work; + }; + struct drm_i915_private *i915; + unsigned int epoch; +}; + +static inline bool +same_epoch(struct drm_i915_private *i915, unsigned int epoch) +{ + /* + * There is a small chance that the epoch wrapped since we started + * sleeping. If we assume that epoch is at least a u32, then it will + * take at least 2^32 * 100ms for it to wrap, or about 326 years. + */ + return epoch == READ_ONCE(i915->gt.epoch); +} + +static void __sleep_work(struct work_struct *work) +{ + struct sleep_rcu_work *s = container_of(work, typeof(*s), work); + struct drm_i915_private *i915 = s->i915; + unsigned int epoch = s->epoch; + + kfree(s); + if (same_epoch(i915, epoch)) + shrink_caches(i915); +} + +static void __sleep_rcu(struct rcu_head *rcu) +{ + struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu); + struct drm_i915_private *i915 = s->i915; + + if (same_epoch(i915, s->epoch)) { + INIT_WORK(&s->work, __sleep_work); + queue_work(i915->wq, &s->work); + } else { + kfree(s); + } +} + static inline bool new_requests_since_last_retire(const struct drm_i915_private *i915) { @@ -3358,26 +3436,24 @@ i915_gem_idle_work_handler(struct work_struct *work) { struct drm_i915_private *dev_priv = container_of(work, typeof(*dev_priv), gt.idle_work.work); + unsigned int epoch = I915_EPOCH_INVALID; bool rearm_hangcheck; - ktime_t end; if (!READ_ONCE(dev_priv->gt.awake)) return; /* * Wait for last execlists context complete, but bail out in case a - * new request is submitted. + * new request is submitted. As we don't trust the hardware, we + * continue on if the wait times out. This is necessary to allow + * the machine to suspend even if the hardware dies, and we will + * try to recover in resume (after depriving the hardware of power, + * it may be in a better mmod). */ - end = ktime_add_ms(ktime_get(), I915_IDLE_ENGINES_TIMEOUT); - do { - if (new_requests_since_last_retire(dev_priv)) - return; - - if (intel_engines_are_idle(dev_priv)) - break; - - usleep_range(100, 500); - } while (ktime_before(ktime_get(), end)); + __wait_for(if (new_requests_since_last_retire(dev_priv)) return, + intel_engines_are_idle(dev_priv), + I915_IDLE_ENGINES_TIMEOUT * 1000, + 10, 500); rearm_hangcheck = cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); @@ -3417,6 +3493,8 @@ i915_gem_idle_work_handler(struct work_struct *work) GEM_BUG_ON(!dev_priv->gt.awake); dev_priv->gt.awake = false; + epoch = dev_priv->gt.epoch; + GEM_BUG_ON(epoch == I915_EPOCH_INVALID); rearm_hangcheck = false; if (INTEL_GEN(dev_priv) >= 6) @@ -3433,6 +3511,23 @@ out_rearm: GEM_BUG_ON(!dev_priv->gt.awake); i915_queue_hangcheck(dev_priv); } + + /* + * When we are idle, it is an opportune time to reap our caches. + * However, we have many objects that utilise RCU and the ordered + * i915->wq that this work is executing on. To try and flush any + * pending frees now we are idle, we first wait for an RCU grace + * period, and then queue a task (that will run last on the wq) to + * shrink and re-optimize the caches. + */ + if (same_epoch(dev_priv, epoch)) { + struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL); + if (s) { + s->i915 = dev_priv; + s->epoch = epoch; + call_rcu(&s->rcu, __sleep_rcu); + } + } } void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) @@ -3578,7 +3673,7 @@ static int wait_for_engines(struct drm_i915_private *i915) for_each_engine(engine, i915, id) intel_engine_dump(engine, &p, - "%s", engine->name); + "%s\n", engine->name); } i915_gem_set_wedged(i915); @@ -3606,7 +3701,7 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) if (ret) return ret; } - i915_gem_retire_requests(i915); + i915_retire_requests(i915); ret = wait_for_engines(i915); } else { @@ -3625,7 +3720,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); if (obj->cache_dirty) i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); - obj->base.write_domain = 0; + obj->write_domain = 0; } void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) @@ -3662,7 +3757,7 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; - if (obj->base.write_domain == I915_GEM_DOMAIN_WC) + if (obj->write_domain == I915_GEM_DOMAIN_WC) return 0; /* Flush and acquire obj->pages so that we are coherent through @@ -3683,17 +3778,17 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) * coherent writes from the GPU, by effectively invalidating the * WC domain upon first access. */ - if ((obj->base.read_domains & I915_GEM_DOMAIN_WC) == 0) + if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) mb(); /* It should now be out of any other write domains, and we can update * the domain values for our changes. */ - GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_WC) != 0); - obj->base.read_domains |= I915_GEM_DOMAIN_WC; + GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); + obj->read_domains |= I915_GEM_DOMAIN_WC; if (write) { - obj->base.read_domains = I915_GEM_DOMAIN_WC; - obj->base.write_domain = I915_GEM_DOMAIN_WC; + obj->read_domains = I915_GEM_DOMAIN_WC; + obj->write_domain = I915_GEM_DOMAIN_WC; obj->mm.dirty = true; } @@ -3725,7 +3820,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; - if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) + if (obj->write_domain == I915_GEM_DOMAIN_GTT) return 0; /* Flush and acquire obj->pages so that we are coherent through @@ -3746,17 +3841,17 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) * coherent writes from the GPU, by effectively invalidating the * GTT domain upon first access. */ - if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) + if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) mb(); /* It should now be out of any other write domains, and we can update * the domain values for our changes. */ - GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); - obj->base.read_domains |= I915_GEM_DOMAIN_GTT; + GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); + obj->read_domains |= I915_GEM_DOMAIN_GTT; if (write) { - obj->base.read_domains = I915_GEM_DOMAIN_GTT; - obj->base.write_domain = I915_GEM_DOMAIN_GTT; + obj->read_domains = I915_GEM_DOMAIN_GTT; + obj->write_domain = I915_GEM_DOMAIN_GTT; obj->mm.dirty = true; } @@ -4000,7 +4095,8 @@ out: struct i915_vma * i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, u32 alignment, - const struct i915_ggtt_view *view) + const struct i915_ggtt_view *view, + unsigned int flags) { struct i915_vma *vma; int ret; @@ -4037,25 +4133,14 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, * try to preserve the existing ABI). */ vma = ERR_PTR(-ENOSPC); - if (!view || view->type == I915_GGTT_VIEW_NORMAL) + if ((flags & PIN_MAPPABLE) == 0 && + (!view || view->type == I915_GGTT_VIEW_NORMAL)) vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, - PIN_MAPPABLE | PIN_NONBLOCK); - if (IS_ERR(vma)) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - unsigned int flags; - - /* Valleyview is definitely limited to scanning out the first - * 512MiB. Lets presume this behaviour was inherited from the - * g4x display engine and that all earlier gen are similarly - * limited. Testing suggests that it is a little more - * complicated than this. For example, Cherryview appears quite - * happy to scanout from anywhere within its global aperture. - */ - flags = 0; - if (HAS_GMCH_DISPLAY(i915)) - flags = PIN_MAPPABLE; + flags | + PIN_MAPPABLE | + PIN_NONBLOCK); + if (IS_ERR(vma)) vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); - } if (IS_ERR(vma)) goto err_unpin_global; @@ -4068,7 +4153,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, /* It should now be out of any other write domains, and we can update * the domain values for our changes. */ - obj->base.read_domains |= I915_GEM_DOMAIN_GTT; + obj->read_domains |= I915_GEM_DOMAIN_GTT; return vma; @@ -4121,15 +4206,15 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); /* Flush the CPU cache if it's still invalid. */ - if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { + if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); - obj->base.read_domains |= I915_GEM_DOMAIN_CPU; + obj->read_domains |= I915_GEM_DOMAIN_CPU; } /* It should now be out of any other write domains, and we can update * the domain values for our changes. */ - GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); + GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); /* If we're writing through the CPU, then the GPU read domains will * need to be invalidated at next use. @@ -4156,7 +4241,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_file_private *file_priv = file->driver_priv; unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; - struct drm_i915_gem_request *request, *target = NULL; + struct i915_request *request, *target = NULL; long ret; /* ABI: return -EIO if already wedged */ @@ -4176,16 +4261,16 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) target = request; } if (target) - i915_gem_request_get(target); + i915_request_get(target); spin_unlock(&file_priv->mm.lock); if (target == NULL) return 0; - ret = i915_wait_request(target, + ret = i915_request_wait(target, I915_WAIT_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); - i915_gem_request_put(target); + i915_request_put(target); return ret < 0 ? ret : 0; } @@ -4204,7 +4289,8 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, lockdep_assert_held(&obj->base.dev->struct_mutex); - if (!view && flags & PIN_MAPPABLE) { + if (flags & PIN_MAPPABLE && + (!view || view->type == I915_GGTT_VIEW_NORMAL)) { /* If the required space is larger than the available * aperture, we will not able to find a slot for the * object and unbinding the object now will be in @@ -4298,7 +4384,7 @@ static __always_inline unsigned int __busy_set_if_active(const struct dma_fence *fence, unsigned int (*flag)(unsigned int id)) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; /* We have to check the current hw status of the fence as the uABI * guarantees forward progress. We could rely on the idle worker @@ -4311,8 +4397,8 @@ __busy_set_if_active(const struct dma_fence *fence, return 0; /* opencode to_request() in order to avoid const warnings */ - rq = container_of(fence, struct drm_i915_gem_request, fence); - if (i915_gem_request_completed(rq)) + rq = container_of(fence, struct i915_request, fence); + if (i915_request_completed(rq)) return 0; return flag(rq->engine->uabi_id); @@ -4457,8 +4543,7 @@ out: } static void -frontbuffer_retire(struct i915_gem_active *active, - struct drm_i915_gem_request *request) +frontbuffer_retire(struct i915_gem_active *active, struct i915_request *request) { struct drm_i915_gem_object *obj = container_of(active, typeof(*obj), frontbuffer_write); @@ -4565,8 +4650,8 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) i915_gem_object_init(obj, &i915_gem_object_ops); - obj->base.write_domain = I915_GEM_DOMAIN_CPU; - obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->write_domain = I915_GEM_DOMAIN_CPU; + obj->read_domains = I915_GEM_DOMAIN_CPU; if (HAS_LLC(dev_priv)) /* On some devices, we can have the GPU use the LLC (the CPU @@ -4680,6 +4765,9 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, kfree(obj->bit_17); i915_gem_object_free(obj); + GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); + atomic_dec(&i915->mm.free_count); + if (on) cond_resched(); } @@ -4710,7 +4798,8 @@ static void __i915_gem_free_work(struct work_struct *work) container_of(work, struct drm_i915_private, mm.free_work); struct llist_node *freed; - /* All file-owned VMA should have been released by this point through + /* + * All file-owned VMA should have been released by this point through * i915_gem_close_object(), or earlier by i915_gem_context_close(). * However, the object may also be bound into the global GTT (e.g. * older GPUs without per-process support, or for direct access through @@ -4737,13 +4826,18 @@ static void __i915_gem_free_object_rcu(struct rcu_head *head) container_of(head, typeof(*obj), rcu); struct drm_i915_private *i915 = to_i915(obj->base.dev); - /* We can't simply use call_rcu() from i915_gem_free_object() - * as we need to block whilst unbinding, and the call_rcu - * task may be called from softirq context. So we take a - * detour through a worker. + /* + * Since we require blocking on struct_mutex to unbind the freed + * object from the GPU before releasing resources back to the + * system, we can not do that directly from the RCU callback (which may + * be a softirq context), but must instead then defer that work onto a + * kthread. We use the RCU callback rather than move the freed object + * directly onto the work queue so that we can mix between using the + * worker and performing frees directly from subsequent allocations for + * crude but effective memory throttling. */ if (llist_add(&obj->freed, &i915->mm.free_list)) - schedule_work(&i915->mm.free_work); + queue_work(i915->wq, &i915->mm.free_work); } void i915_gem_free_object(struct drm_gem_object *gem_obj) @@ -4756,11 +4850,13 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) if (discard_backing_storage(obj)) obj->mm.madv = I915_MADV_DONTNEED; - /* Before we free the object, make sure any pure RCU-only + /* + * Before we free the object, make sure any pure RCU-only * read-side critical sections are complete, e.g. * i915_gem_busy_ioctl(). For the corresponding synchronized * lookup see i915_gem_object_lookup_rcu(). */ + atomic_inc(&to_i915(obj->base.dev)->mm.free_count); call_rcu(&obj->rcu, __i915_gem_free_object_rcu); } @@ -4803,10 +4899,8 @@ void i915_gem_sanitize(struct drm_i915_private *i915) * it may impact the display and we are uncertain about the stability * of the reset, so this could be applied to even earlier gen. */ - if (INTEL_GEN(i915) >= 5) { - int reset = intel_gpu_reset(i915, ALL_ENGINES); - WARN_ON(reset && reset != -ENODEV); - } + if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915)) + WARN_ON(intel_gpu_reset(i915, ALL_ENGINES)); } int i915_gem_suspend(struct drm_i915_private *dev_priv) @@ -4843,7 +4937,7 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) i915_gem_contexts_lost(dev_priv); mutex_unlock(&dev->struct_mutex); - intel_guc_suspend(dev_priv); + intel_uc_suspend(dev_priv); cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); cancel_delayed_work_sync(&dev_priv->gt.retire_work); @@ -4910,7 +5004,7 @@ void i915_gem_resume(struct drm_i915_private *i915) if (i915_gem_init_hw(i915)) goto err_wedged; - intel_guc_resume(i915); + intel_uc_resume(i915); /* Always reload a context for powersaving. */ if (i915_gem_switch_to_kernel_context(i915)) @@ -4986,8 +5080,11 @@ static int __i915_gem_restart_engines(void *data) for_each_engine(engine, i915, id) { err = engine->init_hw(engine); - if (err) + if (err) { + DRM_ERROR("Failed to restart %s (%d)\n", + engine->name, err); return err; + } } return 0; @@ -5039,14 +5136,16 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) ret = i915_ppgtt_init_hw(dev_priv); if (ret) { - DRM_ERROR("PPGTT enable HW failed %d\n", ret); + DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); goto out; } /* We can't enable contexts until all firmware is loaded */ ret = intel_uc_init_hw(dev_priv); - if (ret) + if (ret) { + DRM_ERROR("Enabling uc failed (%d)\n", ret); goto out; + } intel_mocs_init_l3cc_table(dev_priv); @@ -5078,9 +5177,9 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) return PTR_ERR(ctx); for_each_engine(engine, i915, id) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; - rq = i915_gem_request_alloc(engine, ctx); + rq = i915_request_alloc(engine, ctx); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out_ctx; @@ -5090,7 +5189,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) if (engine->init_context) err = engine->init_context(rq); - __i915_add_request(rq, true); + __i915_request_add(rq, true); if (err) goto err_active; } @@ -5198,7 +5297,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) if (ret) return ret; - ret = intel_uc_init_wq(dev_priv); + ret = intel_uc_init_misc(dev_priv); if (ret) return ret; @@ -5294,7 +5393,7 @@ err_unlock: intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); mutex_unlock(&dev_priv->drm.struct_mutex); - intel_uc_fini_wq(dev_priv); + intel_uc_fini_misc(dev_priv); if (ret != -EIO) i915_gem_cleanup_userptr(dev_priv); @@ -5336,10 +5435,10 @@ i915_gem_load_init_fences(struct drm_i915_private *dev_priv) { int i; - if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) && + if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv)) dev_priv->num_fence_regs = 32; - else if (INTEL_INFO(dev_priv)->gen >= 4 || + else if (INTEL_GEN(dev_priv) >= 4 || IS_I945G(dev_priv) || IS_I945GM(dev_priv) || IS_G33(dev_priv) || IS_PINEVIEW(dev_priv)) dev_priv->num_fence_regs = 16; @@ -5396,7 +5495,7 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) if (!dev_priv->luts) goto err_vmas; - dev_priv->requests = KMEM_CACHE(drm_i915_gem_request, + dev_priv->requests = KMEM_CACHE(i915_request, SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | SLAB_TYPESAFE_BY_RCU); @@ -5458,7 +5557,8 @@ err_out: void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) { i915_gem_drain_freed_objects(dev_priv); - WARN_ON(!llist_empty(&dev_priv->mm.free_list)); + GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); + GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); WARN_ON(dev_priv->mm.object_count); mutex_lock(&dev_priv->drm.struct_mutex); @@ -5528,7 +5628,7 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv) void i915_gem_release(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; - struct drm_i915_gem_request *request; + struct i915_request *request; /* Clean up our request list when the client is going away, so that * later retire_requests won't dereference our soon-to-be-gone @@ -5614,7 +5714,7 @@ i915_gem_object_create_from_data(struct drm_i915_private *dev_priv, if (IS_ERR(obj)) return obj; - GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU); + GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); file = obj->base.filp; offset = 0; diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index e920dab7f1b8..f54c4ff74ded 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -29,7 +29,10 @@ #ifdef CONFIG_DRM_I915_DEBUG_GEM #define GEM_BUG_ON(condition) do { if (unlikely((condition))) { \ - printk(KERN_ERR "GEM_BUG_ON(%s)\n", __stringify(condition)); \ + pr_err("%s:%d GEM_BUG_ON(%s)\n", \ + __func__, __LINE__, __stringify(condition)); \ + GEM_TRACE("%s:%d GEM_BUG_ON(%s)\n", \ + __func__, __LINE__, __stringify(condition)); \ BUG(); \ } \ } while(0) @@ -54,6 +57,6 @@ #define GEM_TRACE(...) do { } while (0) #endif -#define I915_NUM_ENGINES 5 +#define I915_NUM_ENGINES 8 #endif /* __I915_GEM_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index c93005c2e0fb..d3cbe8432f48 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -119,7 +119,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, if (!reservation_object_test_signaled_rcu(resv, true)) break; - i915_gem_retire_requests(pool->engine->i915); + i915_retire_requests(pool->engine->i915); GEM_BUG_ON(i915_gem_object_is_active(obj)); /* diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c index b9b53ac14176..f5c570d35b2a 100644 --- a/drivers/gpu/drm/i915/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/i915_gem_clflush.c @@ -177,7 +177,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, } else if (obj->mm.pages) { __i915_do_clflush(obj); } else { - GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU); + GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); } obj->cache_dirty = false; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 0c963fcf31ff..f2cbea7cf940 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -211,17 +211,23 @@ static void context_close(struct i915_gem_context *ctx) static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) { int ret; + unsigned int max; + + if (INTEL_GEN(dev_priv) >= 11) + max = GEN11_MAX_CONTEXT_HW_ID; + else + max = MAX_CONTEXT_HW_ID; ret = ida_simple_get(&dev_priv->contexts.hw_ida, - 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); + 0, max, GFP_KERNEL); if (ret < 0) { /* Contexts are only released when no longer active. * Flush any pending retires to hopefully release some * stale contexts and try again. */ - i915_gem_retire_requests(dev_priv); + i915_retire_requests(dev_priv); ret = ida_simple_get(&dev_priv->contexts.hw_ida, - 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); + 0, max, GFP_KERNEL); if (ret < 0) return ret; } @@ -338,11 +344,6 @@ static void __destroy_hw_context(struct i915_gem_context *ctx, context_close(ctx); } -/** - * The default context needs to exist per ring that uses contexts. It stores the - * context state of the GPU for applications that don't utilize HW contexts, as - * well as an idle case. - */ static struct i915_gem_context * i915_gem_create_context(struct drm_i915_private *dev_priv, struct drm_i915_file_private *file_priv) @@ -449,12 +450,18 @@ destroy_kernel_context(struct i915_gem_context **ctxp) i915_gem_context_free(ctx); } +static bool needs_preempt_context(struct drm_i915_private *i915) +{ + return HAS_LOGICAL_RING_PREEMPTION(i915); +} + int i915_gem_contexts_init(struct drm_i915_private *dev_priv) { struct i915_gem_context *ctx; - int err; + /* Reassure ourselves we are only called once */ GEM_BUG_ON(dev_priv->kernel_context); + GEM_BUG_ON(dev_priv->preempt_context); INIT_LIST_HEAD(&dev_priv->contexts.list); INIT_WORK(&dev_priv->contexts.free_work, contexts_free_worker); @@ -462,14 +469,14 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv) /* Using the simple ida interface, the max is limited by sizeof(int) */ BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); + BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > INT_MAX); ida_init(&dev_priv->contexts.hw_ida); /* lowest priority; idle task */ ctx = i915_gem_context_create_kernel(dev_priv, I915_PRIORITY_MIN); if (IS_ERR(ctx)) { DRM_ERROR("Failed to create default global context\n"); - err = PTR_ERR(ctx); - goto err; + return PTR_ERR(ctx); } /* * For easy recognisablity, we want the kernel context to be 0 and then @@ -479,23 +486,18 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv) dev_priv->kernel_context = ctx; /* highest priority; preempting task */ - ctx = i915_gem_context_create_kernel(dev_priv, INT_MAX); - if (IS_ERR(ctx)) { - DRM_ERROR("Failed to create default preempt context\n"); - err = PTR_ERR(ctx); - goto err_kernel_context; + if (needs_preempt_context(dev_priv)) { + ctx = i915_gem_context_create_kernel(dev_priv, INT_MAX); + if (!IS_ERR(ctx)) + dev_priv->preempt_context = ctx; + else + DRM_ERROR("Failed to create preempt context; disabling preemption\n"); } - dev_priv->preempt_context = ctx; DRM_DEBUG_DRIVER("%s context support initialized\n", dev_priv->engine[RCS]->context_size ? "logical" : "fake"); return 0; - -err_kernel_context: - destroy_kernel_context(&dev_priv->kernel_context); -err: - return err; } void i915_gem_contexts_lost(struct drm_i915_private *dev_priv) @@ -521,7 +523,8 @@ void i915_gem_contexts_fini(struct drm_i915_private *i915) { lockdep_assert_held(&i915->drm.struct_mutex); - destroy_kernel_context(&i915->preempt_context); + if (i915->preempt_context) + destroy_kernel_context(&i915->preempt_context); destroy_kernel_context(&i915->kernel_context); /* Must free all deferred contexts (via flush_workqueue) first */ @@ -594,28 +597,28 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) lockdep_assert_held(&dev_priv->drm.struct_mutex); - i915_gem_retire_requests(dev_priv); + i915_retire_requests(dev_priv); for_each_engine(engine, dev_priv, id) { - struct drm_i915_gem_request *req; + struct i915_request *rq; if (engine_has_idle_kernel_context(engine)) continue; - req = i915_gem_request_alloc(engine, dev_priv->kernel_context); - if (IS_ERR(req)) - return PTR_ERR(req); + rq = i915_request_alloc(engine, dev_priv->kernel_context); + if (IS_ERR(rq)) + return PTR_ERR(rq); /* Queue this switch after all other activity */ list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { - struct drm_i915_gem_request *prev; + struct i915_request *prev; struct intel_timeline *tl; tl = &timeline->engine[engine->id]; prev = i915_gem_active_raw(&tl->last_request, &dev_priv->drm.struct_mutex); if (prev) - i915_sw_fence_await_sw_fence_gfp(&req->submit, + i915_sw_fence_await_sw_fence_gfp(&rq->submit, &prev->submit, I915_FENCE_GFP); } @@ -627,7 +630,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) * but an extra layer of paranoia before we declare the system * idle (on suspend etc) is advisable! */ - __i915_add_request(req, true); + __i915_request_add(rq, true); } return 0; @@ -807,7 +810,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, if (args->size) ret = -EINVAL; - else if (!to_i915(dev)->engine[RCS]->schedule) + else if (!(to_i915(dev)->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) ret = -ENODEV; else if (priority > I915_CONTEXT_MAX_USER_PRIORITY || priority < I915_CONTEXT_MIN_USER_PRIORITY) diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 4bfb72f8e1cb..7854262ddfd9 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -29,6 +29,8 @@ #include <linux/list.h> #include <linux/radix-tree.h> +#include "i915_gem.h" + struct pid; struct drm_device; @@ -37,6 +39,7 @@ struct drm_file; struct drm_i915_private; struct drm_i915_file_private; struct i915_hw_ppgtt; +struct i915_request; struct i915_vma; struct intel_ring; @@ -273,7 +276,7 @@ int i915_gem_context_open(struct drm_i915_private *i915, struct drm_file *file); void i915_gem_context_close(struct drm_file *file); -int i915_switch_context(struct drm_i915_gem_request *req); +int i915_switch_context(struct i915_request *rq); int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv); void i915_gem_context_release(struct kref *ctx_ref); diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 864439a214c8..69a7aec49e84 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -330,8 +330,8 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, * write-combined buffer or a delay through the chipset for GTT * writes that do require us to treat GTT as a separate cache domain.) */ - obj->base.read_domains = I915_GEM_DOMAIN_GTT; - obj->base.write_domain = 0; + obj->read_domains = I915_GEM_DOMAIN_GTT; + obj->write_domain = 0; return &obj->base; diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 60ca4f05ae94..54814a196ee4 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -168,7 +168,7 @@ i915_gem_evict_something(struct i915_address_space *vm, * retiring. */ if (!(flags & PIN_NONBLOCK)) - i915_gem_retire_requests(dev_priv); + i915_retire_requests(dev_priv); else phases[1] = NULL; @@ -293,7 +293,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, * retiring. */ if (!(flags & PIN_NONBLOCK)) - i915_gem_retire_requests(vm->i915); + i915_retire_requests(vm->i915); check_color = vm->mm.color_adjust; if (check_color) { diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 3ab1ace2a6bd..8c170db8495d 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -200,7 +200,7 @@ struct i915_execbuffer { struct i915_gem_context *ctx; /** context for building the request */ struct i915_address_space *vm; /** GTT and vma for the request */ - struct drm_i915_gem_request *request; /** our request to build */ + struct i915_request *request; /** our request to build */ struct i915_vma *batch; /** identity of the batch obj/vma */ /** actual size of execobj[] as we may extend it for the cmdparser */ @@ -227,7 +227,7 @@ struct i915_execbuffer { bool has_fence : 1; bool needs_unfenced : 1; - struct drm_i915_gem_request *rq; + struct i915_request *rq; u32 *rq_cmd; unsigned int rq_size; } reloc_cache; @@ -886,7 +886,7 @@ static void reloc_gpu_flush(struct reloc_cache *cache) i915_gem_object_unpin_map(cache->rq->batch->obj); i915_gem_chipset_flush(cache->rq->i915); - __i915_add_request(cache->rq, true); + __i915_request_add(cache->rq, true); cache->rq = NULL; } @@ -1070,12 +1070,12 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, { struct reloc_cache *cache = &eb->reloc_cache; struct drm_i915_gem_object *obj; - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct i915_vma *batch; u32 *cmd; int err; - GEM_BUG_ON(vma->obj->base.write_domain & I915_GEM_DOMAIN_CPU); + GEM_BUG_ON(vma->obj->write_domain & I915_GEM_DOMAIN_CPU); obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE); if (IS_ERR(obj)) @@ -1103,13 +1103,13 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, if (err) goto err_unmap; - rq = i915_gem_request_alloc(eb->engine, eb->ctx); + rq = i915_request_alloc(eb->engine, eb->ctx); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_unpin; } - err = i915_gem_request_await_object(rq, vma->obj, true); + err = i915_request_await_object(rq, vma->obj, true); if (err) goto err_request; @@ -1141,7 +1141,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, return 0; err_request: - i915_add_request(rq); + i915_request_add(rq); err_unpin: i915_vma_unpin(batch); err_unmap: @@ -1727,7 +1727,7 @@ slow: } static void eb_export_fence(struct i915_vma *vma, - struct drm_i915_gem_request *req, + struct i915_request *rq, unsigned int flags) { struct reservation_object *resv = vma->resv; @@ -1739,9 +1739,9 @@ static void eb_export_fence(struct i915_vma *vma, */ reservation_object_lock(resv, NULL); if (flags & EXEC_OBJECT_WRITE) - reservation_object_add_excl_fence(resv, &req->fence); + reservation_object_add_excl_fence(resv, &rq->fence); else if (reservation_object_reserve_shared(resv) == 0) - reservation_object_add_shared_fence(resv, &req->fence); + reservation_object_add_shared_fence(resv, &rq->fence); reservation_object_unlock(resv); } @@ -1757,7 +1757,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) struct drm_i915_gem_object *obj = vma->obj; if (flags & EXEC_OBJECT_CAPTURE) { - struct i915_gem_capture_list *capture; + struct i915_capture_list *capture; capture = kmalloc(sizeof(*capture), GFP_KERNEL); if (unlikely(!capture)) @@ -1788,7 +1788,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) if (flags & EXEC_OBJECT_ASYNC) continue; - err = i915_gem_request_await_object + err = i915_request_await_object (eb->request, obj, flags & EXEC_OBJECT_WRITE); if (err) return err; @@ -1840,13 +1840,13 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) } void i915_vma_move_to_active(struct i915_vma *vma, - struct drm_i915_gem_request *req, + struct i915_request *rq, unsigned int flags) { struct drm_i915_gem_object *obj = vma->obj; - const unsigned int idx = req->engine->id; + const unsigned int idx = rq->engine->id; - lockdep_assert_held(&req->i915->drm.struct_mutex); + lockdep_assert_held(&rq->i915->drm.struct_mutex); GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); /* @@ -1860,35 +1860,35 @@ void i915_vma_move_to_active(struct i915_vma *vma, if (!i915_vma_is_active(vma)) obj->active_count++; i915_vma_set_active(vma, idx); - i915_gem_active_set(&vma->last_read[idx], req); + i915_gem_active_set(&vma->last_read[idx], rq); list_move_tail(&vma->vm_link, &vma->vm->active_list); - obj->base.write_domain = 0; + obj->write_domain = 0; if (flags & EXEC_OBJECT_WRITE) { - obj->base.write_domain = I915_GEM_DOMAIN_RENDER; + obj->write_domain = I915_GEM_DOMAIN_RENDER; if (intel_fb_obj_invalidate(obj, ORIGIN_CS)) - i915_gem_active_set(&obj->frontbuffer_write, req); + i915_gem_active_set(&obj->frontbuffer_write, rq); - obj->base.read_domains = 0; + obj->read_domains = 0; } - obj->base.read_domains |= I915_GEM_GPU_DOMAINS; + obj->read_domains |= I915_GEM_GPU_DOMAINS; if (flags & EXEC_OBJECT_NEEDS_FENCE) - i915_gem_active_set(&vma->last_fence, req); + i915_gem_active_set(&vma->last_fence, rq); } -static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) +static int i915_reset_gen7_sol_offsets(struct i915_request *rq) { u32 *cs; int i; - if (!IS_GEN7(req->i915) || req->engine->id != RCS) { + if (!IS_GEN7(rq->i915) || rq->engine->id != RCS) { DRM_DEBUG("sol reset is gen7/rcs only\n"); return -EINVAL; } - cs = intel_ring_begin(req, 4 * 2 + 2); + cs = intel_ring_begin(rq, 4 * 2 + 2); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1898,7 +1898,7 @@ static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) *cs++ = 0; } *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -1944,10 +1944,10 @@ out: } static void -add_to_client(struct drm_i915_gem_request *req, struct drm_file *file) +add_to_client(struct i915_request *rq, struct drm_file *file) { - req->file_priv = file->driver_priv; - list_add_tail(&req->client_link, &req->file_priv->mm.request_list); + rq->file_priv = file->driver_priv; + list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list); } static int eb_submit(struct i915_execbuffer *eb) @@ -1975,7 +1975,7 @@ static int eb_submit(struct i915_execbuffer *eb) return 0; } -/** +/* * Find one BSD ring to dispatch the corresponding BSD command. * The engine index is returned. */ @@ -2151,7 +2151,7 @@ await_fence_array(struct i915_execbuffer *eb, if (!fence) return -EINVAL; - err = i915_gem_request_await_dma_fence(eb->request, fence); + err = i915_request_await_dma_fence(eb->request, fence); dma_fence_put(fence); if (err < 0) return err; @@ -2365,14 +2365,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, GEM_BUG_ON(eb.reloc_cache.rq); /* Allocate a request for this batch buffer nice and early. */ - eb.request = i915_gem_request_alloc(eb.engine, eb.ctx); + eb.request = i915_request_alloc(eb.engine, eb.ctx); if (IS_ERR(eb.request)) { err = PTR_ERR(eb.request); goto err_batch_unpin; } if (in_fence) { - err = i915_gem_request_await_dma_fence(eb.request, in_fence); + err = i915_request_await_dma_fence(eb.request, in_fence); if (err < 0) goto err_request; } @@ -2400,10 +2400,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, */ eb.request->batch = eb.batch; - trace_i915_gem_request_queue(eb.request, eb.batch_flags); + trace_i915_request_queue(eb.request, eb.batch_flags); err = eb_submit(&eb); err_request: - __i915_add_request(eb.request, err == 0); + __i915_request_add(eb.request, err == 0); add_to_client(eb.request, file); if (fences) @@ -2465,8 +2465,8 @@ static bool check_buffer_count(size_t count) * list array and passes it to the real function. */ int -i915_gem_execbuffer(struct drm_device *dev, void *data, - struct drm_file *file) +i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) { struct drm_i915_gem_execbuffer *args = data; struct drm_i915_gem_execbuffer2 exec2; @@ -2556,8 +2556,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, } int -i915_gem_execbuffer2(struct drm_device *dev, void *data, - struct drm_file *file) +i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) { struct drm_i915_gem_execbuffer2 *args = data; struct drm_i915_gem_exec_object2 *exec2_list; diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index 012250f25255..d548ac05ccd7 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -64,7 +64,7 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, int fence_pitch_shift; u64 val; - if (INTEL_INFO(fence->i915)->gen >= 6) { + if (INTEL_GEN(fence->i915) >= 6) { fence_reg_lo = FENCE_REG_GEN6_LO(fence->id); fence_reg_hi = FENCE_REG_GEN6_HI(fence->id); fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT; @@ -230,10 +230,14 @@ static int fence_update(struct drm_i915_fence_reg *fence, } if (fence->vma) { - ret = i915_gem_active_retire(&fence->vma->last_fence, - &fence->vma->obj->base.dev->struct_mutex); + struct i915_vma *old = fence->vma; + + ret = i915_gem_active_retire(&old->last_fence, + &old->obj->base.dev->struct_mutex); if (ret) return ret; + + i915_vma_flush_writes(old); } if (fence->vma && fence->vma != vma) { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 7e403eaa9e0f..21d72f695adb 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -543,9 +543,7 @@ static void fill_page_dma_32(struct i915_address_space *vm, static int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) { - struct page *page = NULL; - dma_addr_t addr; - int order; + unsigned long size; /* * In order to utilize 64K pages for an object with a size < 2M, we will @@ -559,48 +557,47 @@ setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) * TODO: we should really consider write-protecting the scratch-page and * sharing between ppgtt */ + size = I915_GTT_PAGE_SIZE_4K; if (i915_vm_is_48bit(vm) && HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) { - order = get_order(I915_GTT_PAGE_SIZE_64K); - page = alloc_pages(gfp | __GFP_ZERO | __GFP_NOWARN, order); - if (page) { - addr = dma_map_page(vm->dma, page, 0, - I915_GTT_PAGE_SIZE_64K, - PCI_DMA_BIDIRECTIONAL); - if (unlikely(dma_mapping_error(vm->dma, addr))) { - __free_pages(page, order); - page = NULL; - } - - if (!IS_ALIGNED(addr, I915_GTT_PAGE_SIZE_64K)) { - dma_unmap_page(vm->dma, addr, - I915_GTT_PAGE_SIZE_64K, - PCI_DMA_BIDIRECTIONAL); - __free_pages(page, order); - page = NULL; - } - } + size = I915_GTT_PAGE_SIZE_64K; + gfp |= __GFP_NOWARN; } + gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL; + + do { + int order = get_order(size); + struct page *page; + dma_addr_t addr; - if (!page) { - order = 0; - page = alloc_page(gfp | __GFP_ZERO); + page = alloc_pages(gfp, order); if (unlikely(!page)) - return -ENOMEM; + goto skip; - addr = dma_map_page(vm->dma, page, 0, PAGE_SIZE, + addr = dma_map_page(vm->dma, page, 0, size, PCI_DMA_BIDIRECTIONAL); - if (unlikely(dma_mapping_error(vm->dma, addr))) { - __free_page(page); - return -ENOMEM; - } - } + if (unlikely(dma_mapping_error(vm->dma, addr))) + goto free_page; - vm->scratch_page.page = page; - vm->scratch_page.daddr = addr; - vm->scratch_page.order = order; + if (unlikely(!IS_ALIGNED(addr, size))) + goto unmap_page; - return 0; + vm->scratch_page.page = page; + vm->scratch_page.daddr = addr; + vm->scratch_page.order = order; + return 0; + +unmap_page: + dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL); +free_page: + __free_pages(page, order); +skip: + if (size == I915_GTT_PAGE_SIZE_4K) + return -ENOMEM; + + size = I915_GTT_PAGE_SIZE_4K; + gfp &= ~__GFP_NOWARN; + } while (1); } static void cleanup_scratch_page(struct i915_address_space *vm) @@ -676,27 +673,22 @@ static void free_pd(struct i915_address_space *vm, static void gen8_initialize_pd(struct i915_address_space *vm, struct i915_page_directory *pd) { - unsigned int i; - fill_px(vm, pd, gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC)); - for (i = 0; i < I915_PDES; i++) - pd->page_table[i] = vm->scratch_pt; + memset_p((void **)pd->page_table, vm->scratch_pt, I915_PDES); } static int __pdp_init(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp) { const unsigned int pdpes = i915_pdpes_per_pdp(vm); - unsigned int i; pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory), GFP_KERNEL | __GFP_NOWARN); if (unlikely(!pdp->page_directory)) return -ENOMEM; - for (i = 0; i < pdpes; i++) - pdp->page_directory[i] = vm->scratch_pd; + memset_p((void **)pdp->page_directory, vm->scratch_pd, pdpes); return 0; } @@ -718,7 +710,7 @@ alloc_pdp(struct i915_address_space *vm) struct i915_page_directory_pointer *pdp; int ret = -ENOMEM; - WARN_ON(!use_4lvl(vm)); + GEM_BUG_ON(!use_4lvl(vm)); pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); if (!pdp) @@ -767,25 +759,22 @@ static void gen8_initialize_pdp(struct i915_address_space *vm, static void gen8_initialize_pml4(struct i915_address_space *vm, struct i915_pml4 *pml4) { - unsigned int i; - fill_px(vm, pml4, gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC)); - for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) - pml4->pdps[i] = vm->scratch_pdp; + memset_p((void **)pml4->pdps, vm->scratch_pdp, GEN8_PML4ES_PER_PML4); } /* Broadwell Page Directory Pointer Descriptors */ -static int gen8_write_pdp(struct drm_i915_gem_request *req, +static int gen8_write_pdp(struct i915_request *rq, unsigned entry, dma_addr_t addr) { - struct intel_engine_cs *engine = req->engine; + struct intel_engine_cs *engine = rq->engine; u32 *cs; BUG_ON(entry >= 4); - cs = intel_ring_begin(req, 6); + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -795,20 +784,20 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req, *cs++ = MI_LOAD_REGISTER_IMM(1); *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, entry)); *cs++ = lower_32_bits(addr); - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int gen8_mm_switch_3lvl(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req) + struct i915_request *rq) { int i, ret; for (i = GEN8_3LVL_PDPES - 1; i >= 0; i--) { const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); - ret = gen8_write_pdp(req, i, pd_daddr); + ret = gen8_write_pdp(rq, i, pd_daddr); if (ret) return ret; } @@ -817,9 +806,9 @@ static int gen8_mm_switch_3lvl(struct i915_hw_ppgtt *ppgtt, } static int gen8_mm_switch_4lvl(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req) + struct i915_request *rq) { - return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4)); + return gen8_write_pdp(rq, 0, px_dma(&ppgtt->pml4)); } /* PDE TLBs are a pain to invalidate on GEN8+. When we modify @@ -1743,13 +1732,13 @@ static inline u32 get_pd_offset(struct i915_hw_ppgtt *ppgtt) } static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req) + struct i915_request *rq) { - struct intel_engine_cs *engine = req->engine; + struct intel_engine_cs *engine = rq->engine; u32 *cs; /* NB: TLBs must be flushed and invalidated before a switch */ - cs = intel_ring_begin(req, 6); + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1759,19 +1748,19 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); *cs++ = get_pd_offset(ppgtt); *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req) + struct i915_request *rq) { - struct intel_engine_cs *engine = req->engine; + struct intel_engine_cs *engine = rq->engine; u32 *cs; /* NB: TLBs must be flushed and invalidated before a switch */ - cs = intel_ring_begin(req, 6); + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1781,16 +1770,16 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); *cs++ = get_pd_offset(ppgtt); *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req) + struct i915_request *rq) { - struct intel_engine_cs *engine = req->engine; - struct drm_i915_private *dev_priv = req->i915; + struct intel_engine_cs *engine = rq->engine; + struct drm_i915_private *dev_priv = rq->i915; I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt)); @@ -2112,7 +2101,7 @@ static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt, ppgtt->base.i915 = dev_priv; ppgtt->base.dma = &dev_priv->drm.pdev->dev; - if (INTEL_INFO(dev_priv)->gen < 8) + if (INTEL_GEN(dev_priv) < 8) return gen6_ppgtt_init(ppgtt); else return gen8_ppgtt_init(ppgtt); @@ -2260,9 +2249,9 @@ void i915_ppgtt_release(struct kref *kref) trace_i915_ppgtt_release(&ppgtt->base); /* vmas should already be unbound and destroyed */ - WARN_ON(!list_empty(&ppgtt->base.active_list)); - WARN_ON(!list_empty(&ppgtt->base.inactive_list)); - WARN_ON(!list_empty(&ppgtt->base.unbound_list)); + GEM_BUG_ON(!list_empty(&ppgtt->base.active_list)); + GEM_BUG_ON(!list_empty(&ppgtt->base.inactive_list)); + GEM_BUG_ON(!list_empty(&ppgtt->base.unbound_list)); ppgtt->base.cleanup(&ppgtt->base); i915_address_space_fini(&ppgtt->base); @@ -2370,9 +2359,10 @@ int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) { do { - if (dma_map_sg(&obj->base.dev->pdev->dev, - pages->sgl, pages->nents, - PCI_DMA_BIDIRECTIONAL)) + if (dma_map_sg_attrs(&obj->base.dev->pdev->dev, + pages->sgl, pages->nents, + PCI_DMA_BIDIRECTIONAL, + DMA_ATTR_NO_WARN)) return 0; /* If the DMA remap fails, one cause can be that we have @@ -2824,10 +2814,10 @@ int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915) i915->mm.aliasing_ppgtt = ppgtt; - WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma); + GEM_BUG_ON(ggtt->base.bind_vma != ggtt_bind_vma); ggtt->base.bind_vma = aliasing_gtt_bind_vma; - WARN_ON(ggtt->base.unbind_vma != ggtt_unbind_vma); + GEM_BUG_ON(ggtt->base.unbind_vma != ggtt_unbind_vma); ggtt->base.unbind_vma = aliasing_gtt_unbind_vma; return 0; @@ -2918,7 +2908,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) ggtt->base.closed = true; mutex_lock(&dev_priv->drm.struct_mutex); - WARN_ON(!list_empty(&ggtt->base.active_list)); + GEM_BUG_ON(!list_empty(&ggtt->base.active_list)); list_for_each_entry_safe(vma, vn, &ggtt->base.inactive_list, vm_link) WARN_ON(i915_vma_unbind(vma)); mutex_unlock(&dev_priv->drm.struct_mutex); @@ -3811,6 +3801,9 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); switch (vma->ggtt_view.type) { + default: + GEM_BUG_ON(vma->ggtt_view.type); + /* fall through */ case I915_GGTT_VIEW_NORMAL: vma->pages = vma->obj->mm.pages; return 0; @@ -3823,11 +3816,6 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) case I915_GGTT_VIEW_PARTIAL: vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); break; - - default: - WARN_ONCE(1, "GGTT view %u not implemented!\n", - vma->ggtt_view.type); - return -EINVAL; } ret = 0; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index a42890d9af38..6efc017e8bb3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -39,7 +39,8 @@ #include <linux/pagevec.h> #include "i915_gem_timeline.h" -#include "i915_gem_request.h" + +#include "i915_request.h" #include "i915_selftest.h" #define I915_GTT_PAGE_SIZE_4K BIT(12) @@ -398,7 +399,7 @@ struct i915_hw_ppgtt { gen6_pte_t __iomem *pd_addr; int (*switch_mm)(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req); + struct i915_request *rq); void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m); }; diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c index a1d6956734f7..0d0144b2104c 100644 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -167,6 +167,10 @@ static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { }; /** + * i915_gem_object_create_internal: create an object with volatile pages + * @i915: the i915 device + * @size: the size in bytes of backing storage to allocate for the object + * * Creates a new object that wraps some internal memory for private use. * This object is not backed by swappable storage, and as such its contents * are volatile and only valid whilst pinned. If the object is reaped by the @@ -197,8 +201,8 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, drm_gem_private_object_init(&i915->drm, &obj->base, size); i915_gem_object_init(obj, &i915_gem_object_internal_ops); - obj->base.read_domains = I915_GEM_DOMAIN_CPU; - obj->base.write_domain = I915_GEM_DOMAIN_CPU; + obj->read_domains = I915_GEM_DOMAIN_CPU; + obj->write_domain = I915_GEM_DOMAIN_CPU; cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; i915_gem_object_set_cache_coherency(obj, cache_level); diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 05e89e1c0a08..54f00b350779 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -33,7 +33,7 @@ #include <drm/i915_drm.h> -#include "i915_gem_request.h" +#include "i915_request.h" #include "i915_selftest.h" struct drm_i915_gem_object; @@ -148,6 +148,21 @@ struct drm_i915_gem_object { #define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1) unsigned int cache_dirty:1; + /** + * @read_domains: Read memory domains. + * + * These monitor which caches contain read/write data related to the + * object. When transitioning from one set of domains to another, + * the driver is called to ensure that caches are suitably flushed and + * invalidated. + */ + u16 read_domains; + + /** + * @write_domain: Corresponding unique write memory domain. + */ + u16 write_domain; + atomic_t frontbuffer_bits; unsigned int frontbuffer_ggtt_origin; /* write once */ struct i915_gem_active frontbuffer_write; diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index f7fc0df251ac..1036e8686916 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -177,7 +177,7 @@ err: #undef OUT_BATCH -int i915_gem_render_state_emit(struct drm_i915_gem_request *rq) +int i915_gem_render_state_emit(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; struct intel_render_state so = {}; /* keep the compiler happy */ diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h index 86369520482e..112cda8fa1a8 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.h +++ b/drivers/gpu/drm/i915/i915_gem_render_state.h @@ -24,8 +24,8 @@ #ifndef _I915_GEM_RENDER_STATE_H_ #define _I915_GEM_RENDER_STATE_H_ -struct drm_i915_gem_request; +struct i915_request; -int i915_gem_render_state_emit(struct drm_i915_gem_request *rq); +int i915_gem_render_state_emit(struct i915_request *rq); #endif /* _I915_GEM_RENDER_STATE_H_ */ diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 0e158f9287c4..5757fb7c4b5a 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -175,7 +175,7 @@ i915_gem_shrink(struct drm_i915_private *i915, i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); trace_i915_gem_shrink(i915, target, flags); - i915_gem_retire_requests(i915); + i915_retire_requests(i915); /* * Unbinding of objects will require HW access; Let us not wake the @@ -267,7 +267,7 @@ i915_gem_shrink(struct drm_i915_private *i915, if (flags & I915_SHRINK_BOUND) intel_runtime_pm_put(i915); - i915_gem_retire_requests(i915); + i915_retire_requests(i915); shrinker_unlock(i915, unlock); diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index d3f222fa6356..62aa67960bf4 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -356,7 +356,7 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) reserved_base = 0; reserved_size = 0; - switch (INTEL_INFO(dev_priv)->gen) { + switch (INTEL_GEN(dev_priv)) { case 2: case 3: break; @@ -516,7 +516,7 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, i915_gem_object_init(obj, &i915_gem_object_stolen_ops); obj->stolen = stolen; - obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; + obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE; i915_gem_object_set_cache_coherency(obj, cache_level); diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h index b5a22400a01f..33e01bf6aa36 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h @@ -27,9 +27,9 @@ #include <linux/list.h> -#include "i915_utils.h" -#include "i915_gem_request.h" +#include "i915_request.h" #include "i915_syncmap.h" +#include "i915_utils.h" struct i915_gem_timeline; diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 382a77a1097e..d596a8302ca3 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -721,7 +721,7 @@ static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { .release = i915_gem_userptr_release, }; -/** +/* * Creates a new mm object that wraps some normal memory from the process * context - user memory. * @@ -757,7 +757,9 @@ static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { * dma-buf instead. */ int -i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +i915_gem_userptr_ioctl(struct drm_device *dev, + void *data, + struct drm_file *file) { struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_userptr *args = data; @@ -796,8 +798,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file drm_gem_private_object_init(dev, &obj->base, args->user_size); i915_gem_object_init(obj, &i915_gem_userptr_ops); - obj->base.read_domains = I915_GEM_DOMAIN_CPU; - obj->base.write_domain = I915_GEM_DOMAIN_CPU; + obj->read_domains = I915_GEM_DOMAIN_CPU; + obj->write_domain = I915_GEM_DOMAIN_CPU; i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); obj->userptr.ptr = args->user_ptr; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 944059322daa..f89ac7a8f95f 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -34,16 +34,25 @@ #include "i915_drv.h" -static const char *engine_str(int engine) -{ - switch (engine) { - case RCS: return "render"; - case VCS: return "bsd"; - case BCS: return "blt"; - case VECS: return "vebox"; - case VCS2: return "bsd2"; - default: return ""; - } +static inline const struct intel_engine_cs * +engine_lookup(const struct drm_i915_private *i915, unsigned int id) +{ + if (id >= I915_NUM_ENGINES) + return NULL; + + return i915->engine[id]; +} + +static inline const char * +__engine_name(const struct intel_engine_cs *engine) +{ + return engine ? engine->name : ""; +} + +static const char * +engine_name(const struct drm_i915_private *i915, unsigned int id) +{ + return __engine_name(engine_lookup(i915, id)); } static const char *tiling_flag(int tiling) @@ -345,7 +354,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, err_puts(m, purgeable_flag(err->purgeable)); err_puts(m, err->userptr ? " userptr" : ""); err_puts(m, err->engine != -1 ? " " : ""); - err_puts(m, engine_str(err->engine)); + err_puts(m, engine_name(m->i915, err->engine)); err_puts(m, i915_cache_level_str(m->i915, err->cache_level)); if (err->name) @@ -387,6 +396,11 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, ee->instdone.row[slice][subslice]); } +static const char *bannable(const struct drm_i915_error_context *ctx) +{ + return ctx->bannable ? "" : " (unbannable)"; +} + static void error_print_request(struct drm_i915_error_state_buf *m, const char *prefix, const struct drm_i915_error_request *erq) @@ -405,9 +419,10 @@ static void error_print_context(struct drm_i915_error_state_buf *m, const char *header, const struct drm_i915_error_context *ctx) { - err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d guilty %d active %d\n", + err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d%s guilty %d active %d\n", header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id, - ctx->priority, ctx->ban_score, ctx->guilty, ctx->active); + ctx->priority, ctx->ban_score, bannable(ctx), + ctx->guilty, ctx->active); } static void error_print_engine(struct drm_i915_error_state_buf *m, @@ -415,7 +430,8 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, { int n; - err_printf(m, "%s command stream:\n", engine_str(ee->engine_id)); + err_printf(m, "%s command stream:\n", + engine_name(m->i915, ee->engine_id)); err_printf(m, " IDLE?: %s\n", yesno(ee->idle)); err_printf(m, " START: 0x%08x\n", ee->start); err_printf(m, " HEAD: 0x%08x [0x%08x]\n", ee->head, ee->rq_head); @@ -563,11 +579,14 @@ static void print_error_obj(struct drm_i915_error_state_buf *m, } static void err_print_capabilities(struct drm_i915_error_state_buf *m, - const struct intel_device_info *info) + const struct intel_device_info *info, + const struct intel_driver_caps *caps) { struct drm_printer p = i915_error_printer(m); intel_device_info_dump_flags(info, &p); + intel_driver_caps_print(caps, &p); + intel_device_info_dump_topology(&info->sseu, &p); } static void err_print_params(struct drm_i915_error_state_buf *m, @@ -610,6 +629,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, { struct drm_i915_private *dev_priv = m->i915; struct drm_i915_error_object *obj; + struct timespec64 ts; int i, j; if (!error) { @@ -620,21 +640,25 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, if (*error->error_msg) err_printf(m, "%s\n", error->error_msg); err_printf(m, "Kernel: " UTS_RELEASE "\n"); - err_printf(m, "Time: %ld s %ld us\n", - error->time.tv_sec, error->time.tv_usec); - err_printf(m, "Boottime: %ld s %ld us\n", - error->boottime.tv_sec, error->boottime.tv_usec); - err_printf(m, "Uptime: %ld s %ld us\n", - error->uptime.tv_sec, error->uptime.tv_usec); + ts = ktime_to_timespec64(error->time); + err_printf(m, "Time: %lld s %ld us\n", + (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC); + ts = ktime_to_timespec64(error->boottime); + err_printf(m, "Boottime: %lld s %ld us\n", + (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC); + ts = ktime_to_timespec64(error->uptime); + err_printf(m, "Uptime: %lld s %ld us\n", + (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC); for (i = 0; i < ARRAY_SIZE(error->engine); i++) { if (error->engine[i].hangcheck_stalled && error->engine[i].context.pid) { - err_printf(m, "Active process (on ring %s): %s [%d], score %d\n", - engine_str(i), + err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n", + engine_name(m->i915, i), error->engine[i].context.comm, error->engine[i].context.pid, - error->engine[i].context.ban_score); + error->engine[i].context.ban_score, + bannable(&error->engine[i].context)); } } err_printf(m, "Reset count: %u\n", error->reset_count); @@ -722,12 +746,13 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, if (obj) { err_puts(m, dev_priv->engine[i]->name); if (ee->context.pid) - err_printf(m, " (submitted by %s [%d], ctx %d [%d], score %d)", + err_printf(m, " (submitted by %s [%d], ctx %d [%d], score %d%s)", ee->context.comm, ee->context.pid, ee->context.handle, ee->context.hw_id, - ee->context.ban_score); + ee->context.ban_score, + bannable(&ee->context)); err_printf(m, " --- gtt_offset = 0x%08x %08x\n", upper_32_bits(obj->gtt_offset), lower_32_bits(obj->gtt_offset)); @@ -786,7 +811,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, if (error->display) intel_display_print_error_state(m, error->display); - err_print_capabilities(m, &error->device_info); + err_print_capabilities(m, &error->device_info, &error->driver_caps); err_print_params(m, &error->params); err_print_uc(m, &error->uc); @@ -967,7 +992,7 @@ out: static inline uint32_t __active_get_seqno(struct i915_gem_active *active) { - struct drm_i915_gem_request *request; + struct i915_request *request; request = __i915_gem_active_peek(active); return request ? request->global_seqno : 0; @@ -976,7 +1001,7 @@ __active_get_seqno(struct i915_gem_active *active) static inline int __active_get_engine_id(struct i915_gem_active *active) { - struct drm_i915_gem_request *request; + struct i915_request *request; request = __i915_gem_active_peek(active); return request ? request->engine->id : -1; @@ -997,8 +1022,8 @@ static void capture_bo(struct drm_i915_error_buffer *err, err->engine = __active_get_engine_id(&obj->frontbuffer_write); err->gtt_offset = vma->node.start; - err->read_domains = obj->base.read_domains; - err->write_domain = obj->base.write_domain; + err->read_domains = obj->read_domains; + err->write_domain = obj->write_domain; err->fence_reg = vma->fence ? vma->fence->id : -1; err->tiling = i915_gem_object_get_tiling(obj); err->dirty = obj->mm.dirty; @@ -1060,9 +1085,9 @@ static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv, return error_code; } -static void i915_gem_record_fences(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error) +static void gem_record_fences(struct i915_gpu_state *error) { + struct drm_i915_private *dev_priv = error->i915; int i; if (INTEL_GEN(dev_priv) >= 6) { @@ -1078,27 +1103,6 @@ static void i915_gem_record_fences(struct drm_i915_private *dev_priv, error->nfence = i; } -static inline u32 -gen8_engine_sync_index(struct intel_engine_cs *engine, - struct intel_engine_cs *other) -{ - int idx; - - /* - * rcs -> 0 = vcs, 1 = bcs, 2 = vecs, 3 = vcs2; - * vcs -> 0 = bcs, 1 = vecs, 2 = vcs2, 3 = rcs; - * bcs -> 0 = vecs, 1 = vcs2. 2 = rcs, 3 = vcs; - * vecs -> 0 = vcs2, 1 = rcs, 2 = vcs, 3 = bcs; - * vcs2 -> 0 = rcs, 1 = vcs, 2 = bcs, 3 = vecs; - */ - - idx = (other - engine) - 1; - if (idx < 0) - idx += I915_NUM_ENGINES; - - return idx; -} - static void gen6_record_semaphore_state(struct intel_engine_cs *engine, struct drm_i915_error_engine *ee) { @@ -1269,7 +1273,7 @@ static void error_record_engine_registers(struct i915_gpu_state *error, } } -static void record_request(struct drm_i915_gem_request *request, +static void record_request(struct i915_request *request, struct drm_i915_error_request *erq) { erq->context = request->ctx->hw_id; @@ -1286,10 +1290,10 @@ static void record_request(struct drm_i915_gem_request *request, } static void engine_record_requests(struct intel_engine_cs *engine, - struct drm_i915_gem_request *first, + struct i915_request *first, struct drm_i915_error_engine *ee) { - struct drm_i915_gem_request *request; + struct i915_request *request; int count; count = 0; @@ -1339,7 +1343,7 @@ static void error_record_engine_execlists(struct intel_engine_cs *engine, unsigned int n; for (n = 0; n < execlists_num_ports(execlists); n++) { - struct drm_i915_gem_request *rq = port_request(&execlists->port[n]); + struct i915_request *rq = port_request(&execlists->port[n]); if (!rq) break; @@ -1369,14 +1373,15 @@ static void record_context(struct drm_i915_error_context *e, e->hw_id = ctx->hw_id; e->priority = ctx->priority; e->ban_score = atomic_read(&ctx->ban_score); + e->bannable = i915_gem_context_is_bannable(ctx); e->guilty = atomic_read(&ctx->guilty_count); e->active = atomic_read(&ctx->active_count); } -static void request_record_user_bo(struct drm_i915_gem_request *request, +static void request_record_user_bo(struct i915_request *request, struct drm_i915_error_engine *ee) { - struct i915_gem_capture_list *c; + struct i915_capture_list *c; struct drm_i915_error_object **bo; long count; @@ -1420,16 +1425,16 @@ capture_object(struct drm_i915_private *dev_priv, } } -static void i915_gem_record_rings(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error) +static void gem_record_rings(struct i915_gpu_state *error) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct drm_i915_private *i915 = error->i915; + struct i915_ggtt *ggtt = &i915->ggtt; int i; for (i = 0; i < I915_NUM_ENGINES; i++) { - struct intel_engine_cs *engine = dev_priv->engine[i]; + struct intel_engine_cs *engine = i915->engine[i]; struct drm_i915_error_engine *ee = &error->engine[i]; - struct drm_i915_gem_request *request; + struct i915_request *request; ee->engine_id = -1; @@ -1456,17 +1461,16 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, * by userspace. */ ee->batchbuffer = - i915_error_object_create(dev_priv, - request->batch); + i915_error_object_create(i915, request->batch); - if (HAS_BROKEN_CS_TLB(dev_priv)) + if (HAS_BROKEN_CS_TLB(i915)) ee->wa_batchbuffer = - i915_error_object_create(dev_priv, + i915_error_object_create(i915, engine->scratch); request_record_user_bo(request, ee); ee->ctx = - i915_error_object_create(dev_priv, + i915_error_object_create(i915, request->ctx->engine[i].state); error->simulated |= @@ -1480,27 +1484,24 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, ee->cpu_ring_head = ring->head; ee->cpu_ring_tail = ring->tail; ee->ringbuffer = - i915_error_object_create(dev_priv, ring->vma); + i915_error_object_create(i915, ring->vma); engine_record_requests(engine, request, ee); } ee->hws_page = - i915_error_object_create(dev_priv, + i915_error_object_create(i915, engine->status_page.vma); - ee->wa_ctx = - i915_error_object_create(dev_priv, engine->wa_ctx.vma); + ee->wa_ctx = i915_error_object_create(i915, engine->wa_ctx.vma); - ee->default_state = - capture_object(dev_priv, engine->default_state); + ee->default_state = capture_object(i915, engine->default_state); } } -static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error, - struct i915_address_space *vm, - int idx) +static void gem_capture_vm(struct i915_gpu_state *error, + struct i915_address_space *vm, + int idx) { struct drm_i915_error_buffer *active_bo; struct i915_vma *vma; @@ -1523,8 +1524,7 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, error->active_bo_count[idx] = count; } -static void i915_capture_active_buffers(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error) +static void capture_active_buffers(struct i915_gpu_state *error) { int cnt = 0, i, j; @@ -1544,14 +1544,13 @@ static void i915_capture_active_buffers(struct drm_i915_private *dev_priv, for (j = 0; j < i && !found; j++) found = error->engine[j].vm == ee->vm; if (!found) - i915_gem_capture_vm(dev_priv, error, ee->vm, cnt++); + gem_capture_vm(error, ee->vm, cnt++); } } -static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error) +static void capture_pinned_buffers(struct i915_gpu_state *error) { - struct i915_address_space *vm = &dev_priv->ggtt.base; + struct i915_address_space *vm = &error->i915->ggtt.base; struct drm_i915_error_buffer *bo; struct i915_vma *vma; int count_inactive, count_active; @@ -1601,9 +1600,9 @@ static void capture_uc_state(struct i915_gpu_state *error) } /* Capture all registers which don't fit into another category. */ -static void i915_capture_reg_state(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error) +static void capture_reg_state(struct i915_gpu_state *error) { + struct drm_i915_private *dev_priv = error->i915; int i; /* General organization @@ -1700,23 +1699,25 @@ static void i915_error_capture_msg(struct drm_i915_private *dev_priv, engine_mask ? "reset" : "continue"); } -static void i915_capture_gen_state(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error) +static void capture_gen_state(struct i915_gpu_state *error) { - error->awake = dev_priv->gt.awake; - error->wakelock = atomic_read(&dev_priv->runtime_pm.wakeref_count); - error->suspended = dev_priv->runtime_pm.suspended; + struct drm_i915_private *i915 = error->i915; + + error->awake = i915->gt.awake; + error->wakelock = atomic_read(&i915->runtime_pm.wakeref_count); + error->suspended = i915->runtime_pm.suspended; error->iommu = -1; #ifdef CONFIG_INTEL_IOMMU error->iommu = intel_iommu_gfx_mapped; #endif - error->reset_count = i915_reset_count(&dev_priv->gpu_error); - error->suspend_count = dev_priv->suspend_count; + error->reset_count = i915_reset_count(&i915->gpu_error); + error->suspend_count = i915->suspend_count; memcpy(&error->device_info, - INTEL_INFO(dev_priv), + INTEL_INFO(i915), sizeof(error->device_info)); + error->driver_caps = i915->caps; } static __always_inline void dup_param(const char *type, void *x) @@ -1737,21 +1738,19 @@ static int capture(void *data) { struct i915_gpu_state *error = data; - do_gettimeofday(&error->time); - error->boottime = ktime_to_timeval(ktime_get_boottime()); - error->uptime = - ktime_to_timeval(ktime_sub(ktime_get(), - error->i915->gt.last_init_time)); + error->time = ktime_get_real(); + error->boottime = ktime_get_boottime(); + error->uptime = ktime_sub(ktime_get(), + error->i915->gt.last_init_time); capture_params(error); + capture_gen_state(error); capture_uc_state(error); - - i915_capture_gen_state(error->i915, error); - i915_capture_reg_state(error->i915, error); - i915_gem_record_fences(error->i915, error); - i915_gem_record_rings(error->i915, error); - i915_capture_active_buffers(error->i915, error); - i915_capture_pinned_buffers(error->i915, error); + capture_reg_state(error); + gem_record_fences(error); + gem_record_rings(error); + capture_active_buffers(error); + capture_pinned_buffers(error); error->overlay = intel_overlay_capture_error_state(error->i915); error->display = intel_display_capture_error_state(error->i915); @@ -1780,14 +1779,16 @@ i915_capture_gpu_state(struct drm_i915_private *i915) /** * i915_capture_error_state - capture an error record for later analysis - * @dev: drm device + * @i915: i915 device + * @engine_mask: the mask of engines triggering the hang + * @error_msg: a message to insert into the error capture header * * Should be called when an error is detected (either a hang or an error * interrupt) to capture error state from the time of the error. Fills * out a structure which becomes available in debugfs for user level tools * to pick up. */ -void i915_capture_error_state(struct drm_i915_private *dev_priv, +void i915_capture_error_state(struct drm_i915_private *i915, u32 engine_mask, const char *error_msg) { @@ -1798,25 +1799,25 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, if (!i915_modparams.error_capture) return; - if (READ_ONCE(dev_priv->gpu_error.first_error)) + if (READ_ONCE(i915->gpu_error.first_error)) return; - error = i915_capture_gpu_state(dev_priv); + error = i915_capture_gpu_state(i915); if (!error) { DRM_DEBUG_DRIVER("out of memory, not capturing error state\n"); return; } - i915_error_capture_msg(dev_priv, error, engine_mask, error_msg); + i915_error_capture_msg(i915, error, engine_mask, error_msg); DRM_INFO("%s\n", error->error_msg); if (!error->simulated) { - spin_lock_irqsave(&dev_priv->gpu_error.lock, flags); - if (!dev_priv->gpu_error.first_error) { - dev_priv->gpu_error.first_error = error; + spin_lock_irqsave(&i915->gpu_error.lock, flags); + if (!i915->gpu_error.first_error) { + i915->gpu_error.first_error = error; error = NULL; } - spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags); + spin_unlock_irqrestore(&i915->gpu_error.lock, flags); } if (error) { @@ -1831,7 +1832,7 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, DRM_INFO("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n"); DRM_INFO("The gpu crash dump is required to analyze gpu hangs, so please always attach it.\n"); DRM_INFO("GPU crash dump saved to /sys/class/drm/card%d/error\n", - dev_priv->drm.primary->index); + i915->drm.primary->index); warned = true; } } diff --git a/drivers/gpu/drm/i915/i915_ioc32.c b/drivers/gpu/drm/i915/i915_ioc32.c index 97f3a5640289..0e5c580d117c 100644 --- a/drivers/gpu/drm/i915/i915_ioc32.c +++ b/drivers/gpu/drm/i915/i915_ioc32.c @@ -1,11 +1,6 @@ -/** - * \file i915_ioc32.c - * +/* * 32-bit ioctl compatibility routines for the i915 DRM. * - * \author Alan Hourihane <alanh@fairlite.demon.co.uk> - * - * * Copyright (C) Paul Mackerras 2005 * Copyright (C) Alan Hourihane 2005 * All Rights Reserved. @@ -28,6 +23,8 @@ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. + * + * Author: Alan Hourihane <alanh@fairlite.demon.co.uk> */ #include <linux/compat.h> @@ -55,10 +52,10 @@ static int compat_i915_getparam(struct file *file, unsigned int cmd, return -EFAULT; request = compat_alloc_user_space(sizeof(*request)); - if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) - || __put_user(req32.param, &request->param) - || __put_user((void __user *)(unsigned long)req32.value, - &request->value)) + if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) || + __put_user(req32.param, &request->param) || + __put_user((void __user *)(unsigned long)req32.value, + &request->value)) return -EFAULT; return drm_ioctl(file, DRM_IOCTL_I915_GETPARAM, @@ -70,13 +67,13 @@ static drm_ioctl_compat_t *i915_compat_ioctls[] = { }; /** + * i915_compat_ioctl - handle the mistakes of the past + * @filp: the file pointer + * @cmd: the ioctl command (and encoded flags) + * @arg: the ioctl argument (from userspace) + * * Called whenever a 32-bit process running under a 64-bit kernel * performs an ioctl on /dev/dri/card<n>. - * - * \param filp file pointer. - * \param cmd command. - * \param arg user argument. - * \return zero on success or negative number on failure. */ long i915_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 3517c6548e2c..633c18785c1e 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -415,6 +415,9 @@ void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv) if (READ_ONCE(rps->interrupts_enabled)) return; + if (WARN_ON_ONCE(IS_GEN11(dev_priv))) + return; + spin_lock_irq(&dev_priv->irq_lock); WARN_ON_ONCE(rps->pm_iir); WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events); @@ -431,6 +434,9 @@ void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv) if (!READ_ONCE(rps->interrupts_enabled)) return; + if (WARN_ON_ONCE(IS_GEN11(dev_priv))) + return; + spin_lock_irq(&dev_priv->irq_lock); rps->interrupts_enabled = false; @@ -452,6 +458,8 @@ void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv) void gen9_reset_guc_interrupts(struct drm_i915_private *dev_priv) { + assert_rpm_wakelock_held(dev_priv); + spin_lock_irq(&dev_priv->irq_lock); gen6_reset_pm_iir(dev_priv, dev_priv->pm_guc_events); spin_unlock_irq(&dev_priv->irq_lock); @@ -459,6 +467,8 @@ void gen9_reset_guc_interrupts(struct drm_i915_private *dev_priv) void gen9_enable_guc_interrupts(struct drm_i915_private *dev_priv) { + assert_rpm_wakelock_held(dev_priv); + spin_lock_irq(&dev_priv->irq_lock); if (!dev_priv->guc.interrupts_enabled) { WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & @@ -471,6 +481,8 @@ void gen9_enable_guc_interrupts(struct drm_i915_private *dev_priv) void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv) { + assert_rpm_wakelock_held(dev_priv); + spin_lock_irq(&dev_priv->irq_lock); dev_priv->guc.interrupts_enabled = false; @@ -1065,7 +1077,7 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) static void notify_ring(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *rq = NULL; + struct i915_request *rq = NULL; struct intel_wait *wait; if (!engine->breadcrumbs.irq_armed) @@ -1092,13 +1104,13 @@ static void notify_ring(struct intel_engine_cs *engine) */ if (i915_seqno_passed(intel_engine_get_seqno(engine), wait->seqno)) { - struct drm_i915_gem_request *waiter = wait->request; + struct i915_request *waiter = wait->request; wakeup = true; if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &waiter->fence.flags) && intel_wait_check_request(wait, waiter)) - rq = i915_gem_request_get(waiter); + rq = i915_request_get(waiter); } if (wakeup) @@ -1111,7 +1123,8 @@ static void notify_ring(struct intel_engine_cs *engine) if (rq) { dma_fence_signal(&rq->fence); - i915_gem_request_put(rq); + GEM_BUG_ON(!i915_request_completed(rq)); + i915_request_put(rq); } trace_intel_engine_notify(engine, wait); @@ -1407,80 +1420,73 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) tasklet_hi_schedule(&execlists->tasklet); } -static irqreturn_t gen8_gt_irq_ack(struct drm_i915_private *dev_priv, - u32 master_ctl, - u32 gt_iir[4]) +static void gen8_gt_irq_ack(struct drm_i915_private *i915, + u32 master_ctl, u32 gt_iir[4]) { - irqreturn_t ret = IRQ_NONE; + void __iomem * const regs = i915->regs; + +#define GEN8_GT_IRQS (GEN8_GT_RCS_IRQ | \ + GEN8_GT_BCS_IRQ | \ + GEN8_GT_VCS1_IRQ | \ + GEN8_GT_VCS2_IRQ | \ + GEN8_GT_VECS_IRQ | \ + GEN8_GT_PM_IRQ | \ + GEN8_GT_GUC_IRQ) if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) { - gt_iir[0] = I915_READ_FW(GEN8_GT_IIR(0)); - if (gt_iir[0]) { - I915_WRITE_FW(GEN8_GT_IIR(0), gt_iir[0]); - ret = IRQ_HANDLED; - } else - DRM_ERROR("The master control interrupt lied (GT0)!\n"); + gt_iir[0] = raw_reg_read(regs, GEN8_GT_IIR(0)); + if (likely(gt_iir[0])) + raw_reg_write(regs, GEN8_GT_IIR(0), gt_iir[0]); } if (master_ctl & (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) { - gt_iir[1] = I915_READ_FW(GEN8_GT_IIR(1)); - if (gt_iir[1]) { - I915_WRITE_FW(GEN8_GT_IIR(1), gt_iir[1]); - ret = IRQ_HANDLED; - } else - DRM_ERROR("The master control interrupt lied (GT1)!\n"); - } - - if (master_ctl & GEN8_GT_VECS_IRQ) { - gt_iir[3] = I915_READ_FW(GEN8_GT_IIR(3)); - if (gt_iir[3]) { - I915_WRITE_FW(GEN8_GT_IIR(3), gt_iir[3]); - ret = IRQ_HANDLED; - } else - DRM_ERROR("The master control interrupt lied (GT3)!\n"); + gt_iir[1] = raw_reg_read(regs, GEN8_GT_IIR(1)); + if (likely(gt_iir[1])) + raw_reg_write(regs, GEN8_GT_IIR(1), gt_iir[1]); } if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) { - gt_iir[2] = I915_READ_FW(GEN8_GT_IIR(2)); - if (gt_iir[2] & (dev_priv->pm_rps_events | - dev_priv->pm_guc_events)) { - I915_WRITE_FW(GEN8_GT_IIR(2), - gt_iir[2] & (dev_priv->pm_rps_events | - dev_priv->pm_guc_events)); - ret = IRQ_HANDLED; - } else - DRM_ERROR("The master control interrupt lied (PM)!\n"); + gt_iir[2] = raw_reg_read(regs, GEN8_GT_IIR(2)); + if (likely(gt_iir[2] & (i915->pm_rps_events | + i915->pm_guc_events))) + raw_reg_write(regs, GEN8_GT_IIR(2), + gt_iir[2] & (i915->pm_rps_events | + i915->pm_guc_events)); } - return ret; + if (master_ctl & GEN8_GT_VECS_IRQ) { + gt_iir[3] = raw_reg_read(regs, GEN8_GT_IIR(3)); + if (likely(gt_iir[3])) + raw_reg_write(regs, GEN8_GT_IIR(3), gt_iir[3]); + } } -static void gen8_gt_irq_handler(struct drm_i915_private *dev_priv, - u32 gt_iir[4]) +static void gen8_gt_irq_handler(struct drm_i915_private *i915, + u32 master_ctl, u32 gt_iir[4]) { - if (gt_iir[0]) { - gen8_cs_irq_handler(dev_priv->engine[RCS], + if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) { + gen8_cs_irq_handler(i915->engine[RCS], gt_iir[0], GEN8_RCS_IRQ_SHIFT); - gen8_cs_irq_handler(dev_priv->engine[BCS], + gen8_cs_irq_handler(i915->engine[BCS], gt_iir[0], GEN8_BCS_IRQ_SHIFT); } - if (gt_iir[1]) { - gen8_cs_irq_handler(dev_priv->engine[VCS], + if (master_ctl & (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) { + gen8_cs_irq_handler(i915->engine[VCS], gt_iir[1], GEN8_VCS1_IRQ_SHIFT); - gen8_cs_irq_handler(dev_priv->engine[VCS2], + gen8_cs_irq_handler(i915->engine[VCS2], gt_iir[1], GEN8_VCS2_IRQ_SHIFT); } - if (gt_iir[3]) - gen8_cs_irq_handler(dev_priv->engine[VECS], + if (master_ctl & GEN8_GT_VECS_IRQ) { + gen8_cs_irq_handler(i915->engine[VECS], gt_iir[3], GEN8_VECS_IRQ_SHIFT); + } - if (gt_iir[2] & dev_priv->pm_rps_events) - gen6_rps_irq_handler(dev_priv, gt_iir[2]); - - if (gt_iir[2] & dev_priv->pm_guc_events) - gen9_guc_irq_handler(dev_priv, gt_iir[2]); + if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) { + gen6_rps_irq_handler(i915, gt_iir[2]); + gen9_guc_irq_handler(i915, gt_iir[2]); + } } static bool bxt_port_hotplug_long_detect(enum port port, u32 val) @@ -1568,10 +1574,11 @@ static bool i9xx_port_hotplug_long_detect(enum port port, u32 val) * * Note that the caller is expected to zero out the masks initially. */ -static void intel_get_hpd_pins(u32 *pin_mask, u32 *long_mask, - u32 hotplug_trigger, u32 dig_hotplug_reg, - const u32 hpd[HPD_NUM_PINS], - bool long_pulse_detect(enum port port, u32 val)) +static void intel_get_hpd_pins(struct drm_i915_private *dev_priv, + u32 *pin_mask, u32 *long_mask, + u32 hotplug_trigger, u32 dig_hotplug_reg, + const u32 hpd[HPD_NUM_PINS], + bool long_pulse_detect(enum port port, u32 val)) { enum port port; int i; @@ -1582,7 +1589,7 @@ static void intel_get_hpd_pins(u32 *pin_mask, u32 *long_mask, *pin_mask |= BIT(i); - port = intel_hpd_pin_to_port(i); + port = intel_hpd_pin_to_port(dev_priv, i); if (port == PORT_NONE) continue; @@ -1970,8 +1977,9 @@ static void i9xx_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_G4X; if (hotplug_trigger) { - intel_get_hpd_pins(&pin_mask, &long_mask, hotplug_trigger, - hotplug_trigger, hpd_status_g4x, + intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, + hotplug_trigger, hotplug_trigger, + hpd_status_g4x, i9xx_port_hotplug_long_detect); intel_hpd_irq_handler(dev_priv, pin_mask, long_mask); @@ -1983,8 +1991,9 @@ static void i9xx_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_I915; if (hotplug_trigger) { - intel_get_hpd_pins(&pin_mask, &long_mask, hotplug_trigger, - hotplug_trigger, hpd_status_i915, + intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, + hotplug_trigger, hotplug_trigger, + hpd_status_i915, i9xx_port_hotplug_long_detect); intel_hpd_irq_handler(dev_priv, pin_mask, long_mask); } @@ -2092,9 +2101,9 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg) do { u32 master_ctl, iir; - u32 gt_iir[4] = {}; u32 pipe_stats[I915_MAX_PIPES] = {}; u32 hotplug_status = 0; + u32 gt_iir[4]; u32 ier = 0; master_ctl = I915_READ(GEN8_MASTER_IRQ) & ~GEN8_MASTER_IRQ_CONTROL; @@ -2147,7 +2156,7 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg) I915_WRITE(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL); POSTING_READ(GEN8_MASTER_IRQ); - gen8_gt_irq_handler(dev_priv, gt_iir); + gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir); if (hotplug_status) i9xx_hpd_irq_handler(dev_priv, hotplug_status); @@ -2185,7 +2194,7 @@ static void ibx_hpd_irq_handler(struct drm_i915_private *dev_priv, if (!hotplug_trigger) return; - intel_get_hpd_pins(&pin_mask, &long_mask, hotplug_trigger, + intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, hotplug_trigger, dig_hotplug_reg, hpd, pch_port_hotplug_long_detect); @@ -2327,8 +2336,8 @@ static void spt_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) dig_hotplug_reg = I915_READ(PCH_PORT_HOTPLUG); I915_WRITE(PCH_PORT_HOTPLUG, dig_hotplug_reg); - intel_get_hpd_pins(&pin_mask, &long_mask, hotplug_trigger, - dig_hotplug_reg, hpd_spt, + intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, + hotplug_trigger, dig_hotplug_reg, hpd_spt, spt_port_hotplug_long_detect); } @@ -2338,8 +2347,8 @@ static void spt_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) dig_hotplug_reg = I915_READ(PCH_PORT_HOTPLUG2); I915_WRITE(PCH_PORT_HOTPLUG2, dig_hotplug_reg); - intel_get_hpd_pins(&pin_mask, &long_mask, hotplug2_trigger, - dig_hotplug_reg, hpd_spt, + intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, + hotplug2_trigger, dig_hotplug_reg, hpd_spt, spt_port_hotplug2_long_detect); } @@ -2359,7 +2368,7 @@ static void ilk_hpd_irq_handler(struct drm_i915_private *dev_priv, dig_hotplug_reg = I915_READ(DIGITAL_PORT_HOTPLUG_CNTRL); I915_WRITE(DIGITAL_PORT_HOTPLUG_CNTRL, dig_hotplug_reg); - intel_get_hpd_pins(&pin_mask, &long_mask, hotplug_trigger, + intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, hotplug_trigger, dig_hotplug_reg, hpd, ilk_port_hotplug_long_detect); @@ -2536,7 +2545,7 @@ static void bxt_hpd_irq_handler(struct drm_i915_private *dev_priv, dig_hotplug_reg = I915_READ(PCH_PORT_HOTPLUG); I915_WRITE(PCH_PORT_HOTPLUG, dig_hotplug_reg); - intel_get_hpd_pins(&pin_mask, &long_mask, hotplug_trigger, + intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, hotplug_trigger, dig_hotplug_reg, hpd, bxt_port_hotplug_long_detect); @@ -2579,6 +2588,9 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) GEN9_AUX_CHANNEL_C | GEN9_AUX_CHANNEL_D; + if (IS_CNL_WITH_PORT_F(dev_priv)) + tmp_mask |= CNL_AUX_CHANNEL_F; + if (iir & tmp_mask) { dp_aux_irq_handler(dev_priv); found = true; @@ -2679,11 +2691,9 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) static irqreturn_t gen8_irq_handler(int irq, void *arg) { - struct drm_device *dev = arg; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(arg); u32 master_ctl; - u32 gt_iir[4] = {}; - irqreturn_t ret; + u32 gt_iir[4]; if (!intel_irqs_enabled(dev_priv)) return IRQ_NONE; @@ -2695,20 +2705,21 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg) I915_WRITE_FW(GEN8_MASTER_IRQ, 0); - /* IRQs are synced during runtime_suspend, we don't require a wakeref */ - disable_rpm_wakeref_asserts(dev_priv); - /* Find, clear, then process each source of interrupt */ - ret = gen8_gt_irq_ack(dev_priv, master_ctl, gt_iir); - gen8_gt_irq_handler(dev_priv, gt_iir); - ret |= gen8_de_irq_handler(dev_priv, master_ctl); + gen8_gt_irq_ack(dev_priv, master_ctl, gt_iir); + + /* IRQs are synced during runtime_suspend, we don't require a wakeref */ + if (master_ctl & ~GEN8_GT_IRQS) { + disable_rpm_wakeref_asserts(dev_priv); + gen8_de_irq_handler(dev_priv, master_ctl); + enable_rpm_wakeref_asserts(dev_priv); + } I915_WRITE_FW(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL); - POSTING_READ_FW(GEN8_MASTER_IRQ); - enable_rpm_wakeref_asserts(dev_priv); + gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir); - return ret; + return IRQ_HANDLED; } struct wedge_me { @@ -2751,6 +2762,156 @@ static void __fini_wedge(struct wedge_me *w) (W)->i915; \ __fini_wedge((W))) +static __always_inline void +gen11_cs_irq_handler(struct intel_engine_cs * const engine, const u32 iir) +{ + gen8_cs_irq_handler(engine, iir, 0); +} + +static void +gen11_gt_engine_irq_handler(struct drm_i915_private * const i915, + const unsigned int bank, + const unsigned int engine_n, + const u16 iir) +{ + struct intel_engine_cs ** const engine = i915->engine; + + switch (bank) { + case 0: + switch (engine_n) { + + case GEN11_RCS0: + return gen11_cs_irq_handler(engine[RCS], iir); + + case GEN11_BCS: + return gen11_cs_irq_handler(engine[BCS], iir); + } + case 1: + switch (engine_n) { + + case GEN11_VCS(0): + return gen11_cs_irq_handler(engine[_VCS(0)], iir); + case GEN11_VCS(1): + return gen11_cs_irq_handler(engine[_VCS(1)], iir); + case GEN11_VCS(2): + return gen11_cs_irq_handler(engine[_VCS(2)], iir); + case GEN11_VCS(3): + return gen11_cs_irq_handler(engine[_VCS(3)], iir); + + case GEN11_VECS(0): + return gen11_cs_irq_handler(engine[_VECS(0)], iir); + case GEN11_VECS(1): + return gen11_cs_irq_handler(engine[_VECS(1)], iir); + } + } +} + +static u32 +gen11_gt_engine_intr(struct drm_i915_private * const i915, + const unsigned int bank, const unsigned int bit) +{ + void __iomem * const regs = i915->regs; + u32 timeout_ts; + u32 ident; + + raw_reg_write(regs, GEN11_IIR_REG_SELECTOR(bank), BIT(bit)); + + /* + * NB: Specs do not specify how long to spin wait, + * so we do ~100us as an educated guess. + */ + timeout_ts = (local_clock() >> 10) + 100; + do { + ident = raw_reg_read(regs, GEN11_INTR_IDENTITY_REG(bank)); + } while (!(ident & GEN11_INTR_DATA_VALID) && + !time_after32(local_clock() >> 10, timeout_ts)); + + if (unlikely(!(ident & GEN11_INTR_DATA_VALID))) { + DRM_ERROR("INTR_IDENTITY_REG%u:%u 0x%08x not valid!\n", + bank, bit, ident); + return 0; + } + + raw_reg_write(regs, GEN11_INTR_IDENTITY_REG(bank), + GEN11_INTR_DATA_VALID); + + return ident & GEN11_INTR_ENGINE_MASK; +} + +static void +gen11_gt_irq_handler(struct drm_i915_private * const i915, + const u32 master_ctl) +{ + void __iomem * const regs = i915->regs; + unsigned int bank; + + for (bank = 0; bank < 2; bank++) { + unsigned long intr_dw; + unsigned int bit; + + if (!(master_ctl & GEN11_GT_DW_IRQ(bank))) + continue; + + intr_dw = raw_reg_read(regs, GEN11_GT_INTR_DW(bank)); + + if (unlikely(!intr_dw)) { + DRM_ERROR("GT_INTR_DW%u blank!\n", bank); + continue; + } + + for_each_set_bit(bit, &intr_dw, 32) { + const u16 iir = gen11_gt_engine_intr(i915, bank, bit); + + if (unlikely(!iir)) + continue; + + gen11_gt_engine_irq_handler(i915, bank, bit, iir); + } + + /* Clear must be after shared has been served for engine */ + raw_reg_write(regs, GEN11_GT_INTR_DW(bank), intr_dw); + } +} + +static irqreturn_t gen11_irq_handler(int irq, void *arg) +{ + struct drm_i915_private * const i915 = to_i915(arg); + void __iomem * const regs = i915->regs; + u32 master_ctl; + + if (!intel_irqs_enabled(i915)) + return IRQ_NONE; + + master_ctl = raw_reg_read(regs, GEN11_GFX_MSTR_IRQ); + master_ctl &= ~GEN11_MASTER_IRQ; + if (!master_ctl) + return IRQ_NONE; + + /* Disable interrupts. */ + raw_reg_write(regs, GEN11_GFX_MSTR_IRQ, 0); + + /* Find, clear, then process each source of interrupt. */ + gen11_gt_irq_handler(i915, master_ctl); + + /* IRQs are synced during runtime_suspend, we don't require a wakeref */ + if (master_ctl & GEN11_DISPLAY_IRQ) { + const u32 disp_ctl = raw_reg_read(regs, GEN11_DISPLAY_INT_CTL); + + disable_rpm_wakeref_asserts(i915); + /* + * GEN11_DISPLAY_INT_CTL has same format as GEN8_MASTER_IRQ + * for the display related bits. + */ + gen8_de_irq_handler(i915, disp_ctl); + enable_rpm_wakeref_asserts(i915); + } + + /* Acknowledge and enable interrupts. */ + raw_reg_write(regs, GEN11_GFX_MSTR_IRQ, GEN11_MASTER_IRQ | master_ctl); + + return IRQ_HANDLED; +} + /** * i915_reset_device - do process context error handling work * @dev_priv: i915 device private @@ -2956,6 +3117,12 @@ static int ironlake_enable_vblank(struct drm_device *dev, unsigned int pipe) ilk_enable_display_irq(dev_priv, bit); spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); + /* Even though there is no DMC, frame counter can get stuck when + * PSR is active as no frames are generated. + */ + if (HAS_PSR(dev_priv)) + drm_vblank_restore(dev, pipe); + return 0; } @@ -2968,6 +3135,12 @@ static int gen8_enable_vblank(struct drm_device *dev, unsigned int pipe) bdw_enable_pipe_irq(dev_priv, pipe, GEN8_PIPE_VBLANK); spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); + /* Even if there is no DMC, frame counter can get stuck when + * PSR is active as no frames are generated, so check only for PSR. + */ + if (HAS_PSR(dev_priv)) + drm_vblank_restore(dev, pipe); + return 0; } @@ -3164,6 +3337,42 @@ static void gen8_irq_reset(struct drm_device *dev) ibx_irq_reset(dev_priv); } +static void gen11_gt_irq_reset(struct drm_i915_private *dev_priv) +{ + /* Disable RCS, BCS, VCS and VECS class engines. */ + I915_WRITE(GEN11_RENDER_COPY_INTR_ENABLE, 0); + I915_WRITE(GEN11_VCS_VECS_INTR_ENABLE, 0); + + /* Restore masks irqs on RCS, BCS, VCS and VECS engines. */ + I915_WRITE(GEN11_RCS0_RSVD_INTR_MASK, ~0); + I915_WRITE(GEN11_BCS_RSVD_INTR_MASK, ~0); + I915_WRITE(GEN11_VCS0_VCS1_INTR_MASK, ~0); + I915_WRITE(GEN11_VCS2_VCS3_INTR_MASK, ~0); + I915_WRITE(GEN11_VECS0_VECS1_INTR_MASK, ~0); +} + +static void gen11_irq_reset(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int pipe; + + I915_WRITE(GEN11_GFX_MSTR_IRQ, 0); + POSTING_READ(GEN11_GFX_MSTR_IRQ); + + gen11_gt_irq_reset(dev_priv); + + I915_WRITE(GEN11_DISPLAY_INT_CTL, 0); + + for_each_pipe(dev_priv, pipe) + if (intel_display_power_is_enabled(dev_priv, + POWER_DOMAIN_PIPE(pipe))) + GEN8_IRQ_RESET_NDX(DE_PIPE, pipe); + + GEN3_IRQ_RESET(GEN8_DE_PORT_); + GEN3_IRQ_RESET(GEN8_DE_MISC_); + GEN3_IRQ_RESET(GEN8_PCU_); +} + void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv, u8 pipe_mask) { @@ -3611,6 +3820,9 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) de_pipe_masked |= GEN8_DE_PIPE_IRQ_FAULT_ERRORS; } + if (IS_CNL_WITH_PORT_F(dev_priv)) + de_port_masked |= CNL_AUX_CHANNEL_F; + de_pipe_enables = de_pipe_masked | GEN8_PIPE_VBLANK | GEN8_PIPE_FIFO_UNDERRUN; @@ -3658,6 +3870,41 @@ static int gen8_irq_postinstall(struct drm_device *dev) return 0; } +static void gen11_gt_irq_postinstall(struct drm_i915_private *dev_priv) +{ + const u32 irqs = GT_RENDER_USER_INTERRUPT | GT_CONTEXT_SWITCH_INTERRUPT; + + BUILD_BUG_ON(irqs & 0xffff0000); + + /* Enable RCS, BCS, VCS and VECS class interrupts. */ + I915_WRITE(GEN11_RENDER_COPY_INTR_ENABLE, irqs << 16 | irqs); + I915_WRITE(GEN11_VCS_VECS_INTR_ENABLE, irqs << 16 | irqs); + + /* Unmask irqs on RCS, BCS, VCS and VECS engines. */ + I915_WRITE(GEN11_RCS0_RSVD_INTR_MASK, ~(irqs << 16)); + I915_WRITE(GEN11_BCS_RSVD_INTR_MASK, ~(irqs << 16)); + I915_WRITE(GEN11_VCS0_VCS1_INTR_MASK, ~(irqs | irqs << 16)); + I915_WRITE(GEN11_VCS2_VCS3_INTR_MASK, ~(irqs | irqs << 16)); + I915_WRITE(GEN11_VECS0_VECS1_INTR_MASK, ~(irqs | irqs << 16)); + + dev_priv->pm_imr = 0xffffffff; /* TODO */ +} + +static int gen11_irq_postinstall(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + gen11_gt_irq_postinstall(dev_priv); + gen8_de_irq_postinstall(dev_priv); + + I915_WRITE(GEN11_DISPLAY_INT_CTL, GEN11_DISPLAY_IRQ_ENABLE); + + I915_WRITE(GEN11_GFX_MSTR_IRQ, GEN11_MASTER_IRQ); + POSTING_READ(GEN11_GFX_MSTR_IRQ); + + return 0; +} + static int cherryview_irq_postinstall(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -4106,6 +4353,14 @@ void intel_irq_init(struct drm_i915_private *dev_priv) dev->driver->enable_vblank = i965_enable_vblank; dev->driver->disable_vblank = i965_disable_vblank; dev_priv->display.hpd_irq_setup = i915_hpd_irq_setup; + } else if (INTEL_GEN(dev_priv) >= 11) { + dev->driver->irq_handler = gen11_irq_handler; + dev->driver->irq_preinstall = gen11_irq_reset; + dev->driver->irq_postinstall = gen11_irq_postinstall; + dev->driver->irq_uninstall = gen11_irq_reset; + dev->driver->enable_vblank = gen8_enable_vblank; + dev->driver->disable_vblank = gen8_disable_vblank; + dev_priv->display.hpd_irq_setup = spt_hpd_irq_setup; } else if (INTEL_GEN(dev_priv) >= 8) { dev->driver->irq_handler = gen8_irq_handler; dev->driver->irq_preinstall = gen8_irq_reset; diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index b5f3eb4fa8a3..08108ce5be21 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -155,7 +155,8 @@ i915_param_named_unsafe(enable_guc, int, 0400, "(-1=auto, 0=disable [default], 1=GuC submission, 2=HuC load)"); i915_param_named(guc_log_level, int, 0400, - "GuC firmware logging level (-1:disabled (default), 0-3:enabled)"); + "GuC firmware logging level. Requires GuC to be loaded. " + "(-1=auto [default], 0=disable, 1..4=enable with verbosity min..max)"); i915_param_named_unsafe(guc_firmware_path, charp, 0400, "GuC firmware path to use instead of the default one"); @@ -166,8 +167,10 @@ i915_param_named_unsafe(huc_firmware_path, charp, 0400, i915_param_named_unsafe(enable_dp_mst, bool, 0600, "Enable multi-stream transport (MST) for new DisplayPort sinks. (default: true)"); +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) i915_param_named_unsafe(inject_load_failure, uint, 0400, "Force an error after a number of failure check points (0:disabled (default), N:force failure at the Nth failure check point)"); +#endif i915_param_named(enable_dpcd_backlight, bool, 0600, "Enable support for DPCD backlight control (default:false)"); diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index c96360398072..430f5f9d0ff4 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -48,7 +48,7 @@ struct drm_printer; param(int, enable_ips, 1) \ param(int, invert_brightness, 0) \ param(int, enable_guc, 0) \ - param(int, guc_log_level, -1) \ + param(int, guc_log_level, 0) \ param(char *, guc_firmware_path, NULL) \ param(char *, huc_firmware_path, NULL) \ param(int, mmio_debug, 0) \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 1c30c688f23a..062e91b39085 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -29,6 +29,9 @@ #include "i915_drv.h" #include "i915_selftest.h" +#define PLATFORM(x) .platform = (x), .platform_mask = BIT(x) +#define GEN(x) .gen = (x), .gen_mask = BIT((x) - 1) + #define GEN_DEFAULT_PIPEOFFSETS \ .pipe_offsets = { PIPE_A_OFFSET, PIPE_B_OFFSET, \ PIPE_C_OFFSET, PIPE_EDP_OFFSET }, \ @@ -63,7 +66,8 @@ .page_sizes = I915_GTT_PAGE_SIZE_4K #define GEN2_FEATURES \ - .gen = 2, .num_pipes = 1, \ + GEN(2), \ + .num_pipes = 1, \ .has_overlay = 1, .overlay_needs_physical = 1, \ .has_gmch_display = 1, \ .hws_needs_physical = 1, \ @@ -76,19 +80,20 @@ static const struct intel_device_info intel_i830_info = { GEN2_FEATURES, - .platform = INTEL_I830, + PLATFORM(INTEL_I830), .is_mobile = 1, .cursor_needs_physical = 1, .num_pipes = 2, /* legal, last one wins */ }; static const struct intel_device_info intel_i845g_info = { GEN2_FEATURES, - .platform = INTEL_I845G, + PLATFORM(INTEL_I845G), }; static const struct intel_device_info intel_i85x_info = { GEN2_FEATURES, - .platform = INTEL_I85X, .is_mobile = 1, + PLATFORM(INTEL_I85X), + .is_mobile = 1, .num_pipes = 2, /* legal, last one wins */ .cursor_needs_physical = 1, .has_fbc = 1, @@ -96,11 +101,12 @@ static const struct intel_device_info intel_i85x_info = { static const struct intel_device_info intel_i865g_info = { GEN2_FEATURES, - .platform = INTEL_I865G, + PLATFORM(INTEL_I865G), }; #define GEN3_FEATURES \ - .gen = 3, .num_pipes = 2, \ + GEN(3), \ + .num_pipes = 2, \ .has_gmch_display = 1, \ .ring_mask = RENDER_RING, \ .has_snoop = true, \ @@ -110,7 +116,8 @@ static const struct intel_device_info intel_i865g_info = { static const struct intel_device_info intel_i915g_info = { GEN3_FEATURES, - .platform = INTEL_I915G, .cursor_needs_physical = 1, + PLATFORM(INTEL_I915G), + .cursor_needs_physical = 1, .has_overlay = 1, .overlay_needs_physical = 1, .hws_needs_physical = 1, .unfenced_needs_alignment = 1, @@ -118,7 +125,7 @@ static const struct intel_device_info intel_i915g_info = { static const struct intel_device_info intel_i915gm_info = { GEN3_FEATURES, - .platform = INTEL_I915GM, + PLATFORM(INTEL_I915GM), .is_mobile = 1, .cursor_needs_physical = 1, .has_overlay = 1, .overlay_needs_physical = 1, @@ -130,7 +137,7 @@ static const struct intel_device_info intel_i915gm_info = { static const struct intel_device_info intel_i945g_info = { GEN3_FEATURES, - .platform = INTEL_I945G, + PLATFORM(INTEL_I945G), .has_hotplug = 1, .cursor_needs_physical = 1, .has_overlay = 1, .overlay_needs_physical = 1, .hws_needs_physical = 1, @@ -139,7 +146,8 @@ static const struct intel_device_info intel_i945g_info = { static const struct intel_device_info intel_i945gm_info = { GEN3_FEATURES, - .platform = INTEL_I945GM, .is_mobile = 1, + PLATFORM(INTEL_I945GM), + .is_mobile = 1, .has_hotplug = 1, .cursor_needs_physical = 1, .has_overlay = 1, .overlay_needs_physical = 1, .supports_tv = 1, @@ -150,20 +158,22 @@ static const struct intel_device_info intel_i945gm_info = { static const struct intel_device_info intel_g33_info = { GEN3_FEATURES, - .platform = INTEL_G33, + PLATFORM(INTEL_G33), .has_hotplug = 1, .has_overlay = 1, }; static const struct intel_device_info intel_pineview_info = { GEN3_FEATURES, - .platform = INTEL_PINEVIEW, .is_mobile = 1, + PLATFORM(INTEL_PINEVIEW), + .is_mobile = 1, .has_hotplug = 1, .has_overlay = 1, }; #define GEN4_FEATURES \ - .gen = 4, .num_pipes = 2, \ + GEN(4), \ + .num_pipes = 2, \ .has_hotplug = 1, \ .has_gmch_display = 1, \ .ring_mask = RENDER_RING, \ @@ -174,7 +184,7 @@ static const struct intel_device_info intel_pineview_info = { static const struct intel_device_info intel_i965g_info = { GEN4_FEATURES, - .platform = INTEL_I965G, + PLATFORM(INTEL_I965G), .has_overlay = 1, .hws_needs_physical = 1, .has_snoop = false, @@ -182,7 +192,7 @@ static const struct intel_device_info intel_i965g_info = { static const struct intel_device_info intel_i965gm_info = { GEN4_FEATURES, - .platform = INTEL_I965GM, + PLATFORM(INTEL_I965GM), .is_mobile = 1, .has_fbc = 1, .has_overlay = 1, .supports_tv = 1, @@ -192,20 +202,21 @@ static const struct intel_device_info intel_i965gm_info = { static const struct intel_device_info intel_g45_info = { GEN4_FEATURES, - .platform = INTEL_G45, + PLATFORM(INTEL_G45), .ring_mask = RENDER_RING | BSD_RING, }; static const struct intel_device_info intel_gm45_info = { GEN4_FEATURES, - .platform = INTEL_GM45, + PLATFORM(INTEL_GM45), .is_mobile = 1, .has_fbc = 1, .supports_tv = 1, .ring_mask = RENDER_RING | BSD_RING, }; #define GEN5_FEATURES \ - .gen = 5, .num_pipes = 2, \ + GEN(5), \ + .num_pipes = 2, \ .has_hotplug = 1, \ .ring_mask = RENDER_RING | BSD_RING, \ .has_snoop = true, \ @@ -217,17 +228,18 @@ static const struct intel_device_info intel_gm45_info = { static const struct intel_device_info intel_ironlake_d_info = { GEN5_FEATURES, - .platform = INTEL_IRONLAKE, + PLATFORM(INTEL_IRONLAKE), }; static const struct intel_device_info intel_ironlake_m_info = { GEN5_FEATURES, - .platform = INTEL_IRONLAKE, + PLATFORM(INTEL_IRONLAKE), .is_mobile = 1, .has_fbc = 1, }; #define GEN6_FEATURES \ - .gen = 6, .num_pipes = 2, \ + GEN(6), \ + .num_pipes = 2, \ .has_hotplug = 1, \ .has_fbc = 1, \ .ring_mask = RENDER_RING | BSD_RING | BLT_RING, \ @@ -241,7 +253,7 @@ static const struct intel_device_info intel_ironlake_m_info = { #define SNB_D_PLATFORM \ GEN6_FEATURES, \ - .platform = INTEL_SANDYBRIDGE + PLATFORM(INTEL_SANDYBRIDGE) static const struct intel_device_info intel_sandybridge_d_gt1_info = { SNB_D_PLATFORM, @@ -255,7 +267,7 @@ static const struct intel_device_info intel_sandybridge_d_gt2_info = { #define SNB_M_PLATFORM \ GEN6_FEATURES, \ - .platform = INTEL_SANDYBRIDGE, \ + PLATFORM(INTEL_SANDYBRIDGE), \ .is_mobile = 1 @@ -270,7 +282,8 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = { }; #define GEN7_FEATURES \ - .gen = 7, .num_pipes = 3, \ + GEN(7), \ + .num_pipes = 3, \ .has_hotplug = 1, \ .has_fbc = 1, \ .ring_mask = RENDER_RING | BSD_RING | BLT_RING, \ @@ -285,7 +298,7 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = { #define IVB_D_PLATFORM \ GEN7_FEATURES, \ - .platform = INTEL_IVYBRIDGE, \ + PLATFORM(INTEL_IVYBRIDGE), \ .has_l3_dpf = 1 static const struct intel_device_info intel_ivybridge_d_gt1_info = { @@ -300,7 +313,7 @@ static const struct intel_device_info intel_ivybridge_d_gt2_info = { #define IVB_M_PLATFORM \ GEN7_FEATURES, \ - .platform = INTEL_IVYBRIDGE, \ + PLATFORM(INTEL_IVYBRIDGE), \ .is_mobile = 1, \ .has_l3_dpf = 1 @@ -316,15 +329,15 @@ static const struct intel_device_info intel_ivybridge_m_gt2_info = { static const struct intel_device_info intel_ivybridge_q_info = { GEN7_FEATURES, - .platform = INTEL_IVYBRIDGE, + PLATFORM(INTEL_IVYBRIDGE), .gt = 2, .num_pipes = 0, /* legal, last one wins */ .has_l3_dpf = 1, }; static const struct intel_device_info intel_valleyview_info = { - .platform = INTEL_VALLEYVIEW, - .gen = 7, + PLATFORM(INTEL_VALLEYVIEW), + GEN(7), .is_lp = 1, .num_pipes = 2, .has_psr = 1, @@ -355,7 +368,7 @@ static const struct intel_device_info intel_valleyview_info = { #define HSW_PLATFORM \ G75_FEATURES, \ - .platform = INTEL_HASWELL, \ + PLATFORM(INTEL_HASWELL), \ .has_l3_dpf = 1 static const struct intel_device_info intel_haswell_gt1_info = { @@ -375,6 +388,7 @@ static const struct intel_device_info intel_haswell_gt3_info = { #define GEN8_FEATURES \ G75_FEATURES, \ + GEN(8), \ BDW_COLORS, \ .page_sizes = I915_GTT_PAGE_SIZE_4K | \ I915_GTT_PAGE_SIZE_2M, \ @@ -385,8 +399,7 @@ static const struct intel_device_info intel_haswell_gt3_info = { #define BDW_PLATFORM \ GEN8_FEATURES, \ - .gen = 8, \ - .platform = INTEL_BROADWELL + PLATFORM(INTEL_BROADWELL) static const struct intel_device_info intel_broadwell_gt1_info = { BDW_PLATFORM, @@ -413,11 +426,12 @@ static const struct intel_device_info intel_broadwell_gt3_info = { }; static const struct intel_device_info intel_cherryview_info = { - .gen = 8, .num_pipes = 3, + PLATFORM(INTEL_CHERRYVIEW), + GEN(8), + .num_pipes = 3, .has_hotplug = 1, .is_lp = 1, .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, - .platform = INTEL_CHERRYVIEW, .has_64bit_reloc = 1, .has_psr = 1, .has_runtime_pm = 1, @@ -443,6 +457,7 @@ static const struct intel_device_info intel_cherryview_info = { #define GEN9_FEATURES \ GEN8_FEATURES, \ + GEN(9), \ GEN9_DEFAULT_PAGE_SIZES, \ .has_logical_ring_preemption = 1, \ .has_csr = 1, \ @@ -452,8 +467,7 @@ static const struct intel_device_info intel_cherryview_info = { #define SKL_PLATFORM \ GEN9_FEATURES, \ - .gen = 9, \ - .platform = INTEL_SKYLAKE + PLATFORM(INTEL_SKYLAKE) static const struct intel_device_info intel_skylake_gt1_info = { SKL_PLATFORM, @@ -481,7 +495,7 @@ static const struct intel_device_info intel_skylake_gt4_info = { }; #define GEN9_LP_FEATURES \ - .gen = 9, \ + GEN(9), \ .is_lp = 1, \ .has_hotplug = 1, \ .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, \ @@ -513,21 +527,20 @@ static const struct intel_device_info intel_skylake_gt4_info = { static const struct intel_device_info intel_broxton_info = { GEN9_LP_FEATURES, - .platform = INTEL_BROXTON, + PLATFORM(INTEL_BROXTON), .ddb_size = 512, }; static const struct intel_device_info intel_geminilake_info = { GEN9_LP_FEATURES, - .platform = INTEL_GEMINILAKE, + PLATFORM(INTEL_GEMINILAKE), .ddb_size = 1024, GLK_COLORS, }; #define KBL_PLATFORM \ GEN9_FEATURES, \ - .gen = 9, \ - .platform = INTEL_KABYLAKE + PLATFORM(INTEL_KABYLAKE) static const struct intel_device_info intel_kabylake_gt1_info = { KBL_PLATFORM, @@ -547,8 +560,7 @@ static const struct intel_device_info intel_kabylake_gt3_info = { #define CFL_PLATFORM \ GEN9_FEATURES, \ - .gen = 9, \ - .platform = INTEL_COFFEELAKE + PLATFORM(INTEL_COFFEELAKE) static const struct intel_device_info intel_coffeelake_gt1_info = { CFL_PLATFORM, @@ -568,17 +580,33 @@ static const struct intel_device_info intel_coffeelake_gt3_info = { #define GEN10_FEATURES \ GEN9_FEATURES, \ + GEN(10), \ .ddb_size = 1024, \ GLK_COLORS -static const struct intel_device_info intel_cannonlake_gt2_info = { +static const struct intel_device_info intel_cannonlake_info = { GEN10_FEATURES, - .is_alpha_support = 1, - .platform = INTEL_CANNONLAKE, - .gen = 10, + PLATFORM(INTEL_CANNONLAKE), .gt = 2, }; +#define GEN11_FEATURES \ + GEN10_FEATURES, \ + GEN(11), \ + .ddb_size = 2048, \ + .has_csr = 0, \ + .has_logical_ring_elsq = 1 + +static const struct intel_device_info intel_icelake_11_info = { + GEN11_FEATURES, + PLATFORM(INTEL_ICELAKE), + .is_alpha_support = 1, + .has_resource_streamer = 0, +}; + +#undef GEN +#undef PLATFORM + /* * Make sure any device matches here are from most specific to most * general. For example, since the Quanta match is based on the subsystem @@ -636,8 +664,8 @@ static const struct pci_device_id pciidlist[] = { INTEL_CFL_U_GT1_IDS(&intel_coffeelake_gt1_info), INTEL_CFL_U_GT2_IDS(&intel_coffeelake_gt2_info), INTEL_CFL_U_GT3_IDS(&intel_coffeelake_gt3_info), - INTEL_CNL_U_GT2_IDS(&intel_cannonlake_gt2_info), - INTEL_CNL_Y_GT2_IDS(&intel_cannonlake_gt2_info), + INTEL_CNL_IDS(&intel_cannonlake_info), + INTEL_ICL_11_IDS(&intel_icelake_11_info), {0, 0, 0} }; MODULE_DEVICE_TABLE(pci, pciidlist); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index f8fe5ffcdcff..abaca6edeb71 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1629,10 +1629,10 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, * Same as gen8_update_reg_state_unlocked only through the batchbuffer. This * is only used by the kernel context. */ -static int gen8_emit_oa_config(struct drm_i915_gem_request *req, +static int gen8_emit_oa_config(struct i915_request *rq, const struct i915_oa_config *oa_config) { - struct drm_i915_private *dev_priv = req->i915; + struct drm_i915_private *dev_priv = rq->i915; /* The MMIO offsets for Flex EU registers aren't contiguous */ u32 flex_mmio[] = { i915_mmio_reg_offset(EU_PERF_CNTL0), @@ -1646,7 +1646,7 @@ static int gen8_emit_oa_config(struct drm_i915_gem_request *req, u32 *cs; int i; - cs = intel_ring_begin(req, ARRAY_SIZE(flex_mmio) * 2 + 4); + cs = intel_ring_begin(rq, ARRAY_SIZE(flex_mmio) * 2 + 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1684,7 +1684,7 @@ static int gen8_emit_oa_config(struct drm_i915_gem_request *req, } *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -1694,38 +1694,38 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr { struct intel_engine_cs *engine = dev_priv->engine[RCS]; struct i915_gem_timeline *timeline; - struct drm_i915_gem_request *req; + struct i915_request *rq; int ret; lockdep_assert_held(&dev_priv->drm.struct_mutex); - i915_gem_retire_requests(dev_priv); + i915_retire_requests(dev_priv); - req = i915_gem_request_alloc(engine, dev_priv->kernel_context); - if (IS_ERR(req)) - return PTR_ERR(req); + rq = i915_request_alloc(engine, dev_priv->kernel_context); + if (IS_ERR(rq)) + return PTR_ERR(rq); - ret = gen8_emit_oa_config(req, oa_config); + ret = gen8_emit_oa_config(rq, oa_config); if (ret) { - i915_add_request(req); + i915_request_add(rq); return ret; } /* Queue this switch after all other activity */ list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { - struct drm_i915_gem_request *prev; + struct i915_request *prev; struct intel_timeline *tl; tl = &timeline->engine[engine->id]; prev = i915_gem_active_raw(&tl->last_request, &dev_priv->drm.struct_mutex); if (prev) - i915_sw_fence_await_sw_fence_gfp(&req->submit, + i915_sw_fence_await_sw_fence_gfp(&rq->submit, &prev->submit, GFP_KERNEL); } - i915_add_request(req); + i915_request_add(rq); return 0; } diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 0e9b98c32b62..d8feb9053e0c 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -329,6 +329,32 @@ engine_event_status(struct intel_engine_cs *engine, return 0; } +static int +config_status(struct drm_i915_private *i915, u64 config) +{ + switch (config) { + case I915_PMU_ACTUAL_FREQUENCY: + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + /* Requires a mutex for sampling! */ + return -ENODEV; + /* Fall-through. */ + case I915_PMU_REQUESTED_FREQUENCY: + if (INTEL_GEN(i915) < 6) + return -ENODEV; + break; + case I915_PMU_INTERRUPTS: + break; + case I915_PMU_RC6_RESIDENCY: + if (!HAS_RC6(i915)) + return -ENODEV; + break; + default: + return -ENOENT; + } + + return 0; +} + static int engine_event_init(struct perf_event *event) { struct drm_i915_private *i915 = @@ -376,30 +402,10 @@ static int i915_pmu_event_init(struct perf_event *event) if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask)) return -EINVAL; - if (is_engine_event(event)) { + if (is_engine_event(event)) ret = engine_event_init(event); - } else { - ret = 0; - switch (event->attr.config) { - case I915_PMU_ACTUAL_FREQUENCY: - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) - /* Requires a mutex for sampling! */ - ret = -ENODEV; - case I915_PMU_REQUESTED_FREQUENCY: - if (INTEL_GEN(i915) < 6) - ret = -ENODEV; - break; - case I915_PMU_INTERRUPTS: - break; - case I915_PMU_RC6_RESIDENCY: - if (!HAS_RC6(i915)) - ret = -ENODEV; - break; - default: - ret = -ENOENT; - break; - } - } + else + ret = config_status(i915, event->attr.config); if (ret) return ret; @@ -427,7 +433,7 @@ static u64 __get_rc6(struct drm_i915_private *i915) return val; } -static u64 get_rc6(struct drm_i915_private *i915, bool locked) +static u64 get_rc6(struct drm_i915_private *i915) { #if IS_ENABLED(CONFIG_PM) unsigned long flags; @@ -443,8 +449,7 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked) * previously. */ - if (!locked) - spin_lock_irqsave(&i915->pmu.lock, flags); + spin_lock_irqsave(&i915->pmu.lock, flags); if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0; @@ -453,12 +458,10 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked) val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur; } - if (!locked) - spin_unlock_irqrestore(&i915->pmu.lock, flags); + spin_unlock_irqrestore(&i915->pmu.lock, flags); } else { struct pci_dev *pdev = i915->drm.pdev; struct device *kdev = &pdev->dev; - unsigned long flags2; /* * We are runtime suspended. @@ -467,10 +470,8 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked) * on top of the last known real value, as the approximated RC6 * counter value. */ - if (!locked) - spin_lock_irqsave(&i915->pmu.lock, flags); - - spin_lock_irqsave(&kdev->power.lock, flags2); + spin_lock_irqsave(&i915->pmu.lock, flags); + spin_lock(&kdev->power.lock); if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) i915->pmu.suspended_jiffies_last = @@ -480,14 +481,13 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked) i915->pmu.suspended_jiffies_last; val += jiffies - kdev->power.accounting_timestamp; - spin_unlock_irqrestore(&kdev->power.lock, flags2); + spin_unlock(&kdev->power.lock); val = jiffies_to_nsecs(val); val += i915->pmu.sample[__I915_SAMPLE_RC6].cur; i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; - if (!locked) - spin_unlock_irqrestore(&i915->pmu.lock, flags); + spin_unlock_irqrestore(&i915->pmu.lock, flags); } return val; @@ -496,7 +496,7 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked) #endif } -static u64 __i915_pmu_event_read(struct perf_event *event, bool locked) +static u64 __i915_pmu_event_read(struct perf_event *event) { struct drm_i915_private *i915 = container_of(event->pmu, typeof(*i915), pmu.base); @@ -534,7 +534,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event, bool locked) val = count_interrupts(i915); break; case I915_PMU_RC6_RESIDENCY: - val = get_rc6(i915, locked); + val = get_rc6(i915); break; } } @@ -549,7 +549,7 @@ static void i915_pmu_event_read(struct perf_event *event) again: prev = local64_read(&hwc->prev_count); - new = __i915_pmu_event_read(event, false); + new = __i915_pmu_event_read(event); if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev) goto again; @@ -599,14 +599,14 @@ static void i915_pmu_enable(struct perf_event *event) engine->pmu.enable_count[sample]++; } + spin_unlock_irqrestore(&i915->pmu.lock, flags); + /* * Store the current counter value so we can report the correct delta * for all listeners. Even when the event was already enabled and has * an existing non-zero value. */ - local64_set(&event->hw.prev_count, __i915_pmu_event_read(event, true)); - - spin_unlock_irqrestore(&i915->pmu.lock, flags); + local64_set(&event->hw.prev_count, __i915_pmu_event_read(event)); } static void i915_pmu_disable(struct perf_event *event) @@ -726,52 +726,9 @@ static ssize_t i915_pmu_event_show(struct device *dev, return sprintf(buf, "config=0x%lx\n", eattr->val); } -#define I915_EVENT_ATTR(_name, _config) \ - (&((struct i915_ext_attribute[]) { \ - { .attr = __ATTR(_name, 0444, i915_pmu_event_show, NULL), \ - .val = _config, } \ - })[0].attr.attr) - -#define I915_EVENT_STR(_name, _str) \ - (&((struct perf_pmu_events_attr[]) { \ - { .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \ - .id = 0, \ - .event_str = _str, } \ - })[0].attr.attr) - -#define I915_EVENT(_name, _config, _unit) \ - I915_EVENT_ATTR(_name, _config), \ - I915_EVENT_STR(_name.unit, _unit) - -#define I915_ENGINE_EVENT(_name, _class, _instance, _sample) \ - I915_EVENT_ATTR(_name, __I915_PMU_ENGINE(_class, _instance, _sample)), \ - I915_EVENT_STR(_name.unit, "ns") - -#define I915_ENGINE_EVENTS(_name, _class, _instance) \ - I915_ENGINE_EVENT(_name##_instance-busy, _class, _instance, I915_SAMPLE_BUSY), \ - I915_ENGINE_EVENT(_name##_instance-sema, _class, _instance, I915_SAMPLE_SEMA), \ - I915_ENGINE_EVENT(_name##_instance-wait, _class, _instance, I915_SAMPLE_WAIT) - -static struct attribute *i915_pmu_events_attrs[] = { - I915_ENGINE_EVENTS(rcs, I915_ENGINE_CLASS_RENDER, 0), - I915_ENGINE_EVENTS(bcs, I915_ENGINE_CLASS_COPY, 0), - I915_ENGINE_EVENTS(vcs, I915_ENGINE_CLASS_VIDEO, 0), - I915_ENGINE_EVENTS(vcs, I915_ENGINE_CLASS_VIDEO, 1), - I915_ENGINE_EVENTS(vecs, I915_ENGINE_CLASS_VIDEO_ENHANCE, 0), - - I915_EVENT(actual-frequency, I915_PMU_ACTUAL_FREQUENCY, "MHz"), - I915_EVENT(requested-frequency, I915_PMU_REQUESTED_FREQUENCY, "MHz"), - - I915_EVENT_ATTR(interrupts, I915_PMU_INTERRUPTS), - - I915_EVENT(rc6-residency, I915_PMU_RC6_RESIDENCY, "ns"), - - NULL, -}; - -static const struct attribute_group i915_pmu_events_attr_group = { +static struct attribute_group i915_pmu_events_attr_group = { .name = "events", - .attrs = i915_pmu_events_attrs, + /* Patch in attrs at runtime. */ }; static ssize_t @@ -789,7 +746,7 @@ static struct attribute *i915_cpumask_attrs[] = { NULL, }; -static struct attribute_group i915_pmu_cpumask_attr_group = { +static const struct attribute_group i915_pmu_cpumask_attr_group = { .attrs = i915_cpumask_attrs, }; @@ -800,6 +757,193 @@ static const struct attribute_group *i915_pmu_attr_groups[] = { NULL }; +#define __event(__config, __name, __unit) \ +{ \ + .config = (__config), \ + .name = (__name), \ + .unit = (__unit), \ +} + +#define __engine_event(__sample, __name) \ +{ \ + .sample = (__sample), \ + .name = (__name), \ +} + +static struct i915_ext_attribute * +add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config) +{ + sysfs_attr_init(&attr->attr.attr); + attr->attr.attr.name = name; + attr->attr.attr.mode = 0444; + attr->attr.show = i915_pmu_event_show; + attr->val = config; + + return ++attr; +} + +static struct perf_pmu_events_attr * +add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name, + const char *str) +{ + sysfs_attr_init(&attr->attr.attr); + attr->attr.attr.name = name; + attr->attr.attr.mode = 0444; + attr->attr.show = perf_event_sysfs_show; + attr->event_str = str; + + return ++attr; +} + +static struct attribute ** +create_event_attributes(struct drm_i915_private *i915) +{ + static const struct { + u64 config; + const char *name; + const char *unit; + } events[] = { + __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"), + __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"), + __event(I915_PMU_INTERRUPTS, "interrupts", NULL), + __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"), + }; + static const struct { + enum drm_i915_pmu_engine_sample sample; + char *name; + } engine_events[] = { + __engine_event(I915_SAMPLE_BUSY, "busy"), + __engine_event(I915_SAMPLE_SEMA, "sema"), + __engine_event(I915_SAMPLE_WAIT, "wait"), + }; + unsigned int count = 0; + struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter; + struct i915_ext_attribute *i915_attr = NULL, *i915_iter; + struct attribute **attr = NULL, **attr_iter; + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned int i; + + /* Count how many counters we will be exposing. */ + for (i = 0; i < ARRAY_SIZE(events); i++) { + if (!config_status(i915, events[i].config)) + count++; + } + + for_each_engine(engine, i915, id) { + for (i = 0; i < ARRAY_SIZE(engine_events); i++) { + if (!engine_event_status(engine, + engine_events[i].sample)) + count++; + } + } + + /* Allocate attribute objects and table. */ + i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL); + if (!i915_attr) + goto err_alloc; + + pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL); + if (!pmu_attr) + goto err_alloc; + + /* Max one pointer of each attribute type plus a termination entry. */ + attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL); + if (!attr) + goto err_alloc; + + i915_iter = i915_attr; + pmu_iter = pmu_attr; + attr_iter = attr; + + /* Initialize supported non-engine counters. */ + for (i = 0; i < ARRAY_SIZE(events); i++) { + char *str; + + if (config_status(i915, events[i].config)) + continue; + + str = kstrdup(events[i].name, GFP_KERNEL); + if (!str) + goto err; + + *attr_iter++ = &i915_iter->attr.attr; + i915_iter = add_i915_attr(i915_iter, str, events[i].config); + + if (events[i].unit) { + str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name); + if (!str) + goto err; + + *attr_iter++ = &pmu_iter->attr.attr; + pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit); + } + } + + /* Initialize supported engine counters. */ + for_each_engine(engine, i915, id) { + for (i = 0; i < ARRAY_SIZE(engine_events); i++) { + char *str; + + if (engine_event_status(engine, + engine_events[i].sample)) + continue; + + str = kasprintf(GFP_KERNEL, "%s-%s", + engine->name, engine_events[i].name); + if (!str) + goto err; + + *attr_iter++ = &i915_iter->attr.attr; + i915_iter = + add_i915_attr(i915_iter, str, + __I915_PMU_ENGINE(engine->uabi_class, + engine->instance, + engine_events[i].sample)); + + str = kasprintf(GFP_KERNEL, "%s-%s.unit", + engine->name, engine_events[i].name); + if (!str) + goto err; + + *attr_iter++ = &pmu_iter->attr.attr; + pmu_iter = add_pmu_attr(pmu_iter, str, "ns"); + } + } + + i915->pmu.i915_attr = i915_attr; + i915->pmu.pmu_attr = pmu_attr; + + return attr; + +err:; + for (attr_iter = attr; *attr_iter; attr_iter++) + kfree((*attr_iter)->name); + +err_alloc: + kfree(attr); + kfree(i915_attr); + kfree(pmu_attr); + + return NULL; +} + +static void free_event_attributes(struct drm_i915_private *i915) +{ + struct attribute **attr_iter = i915_pmu_events_attr_group.attrs; + + for (; *attr_iter; attr_iter++) + kfree((*attr_iter)->name); + + kfree(i915_pmu_events_attr_group.attrs); + kfree(i915->pmu.i915_attr); + kfree(i915->pmu.pmu_attr); + + i915_pmu_events_attr_group.attrs = NULL; + i915->pmu.i915_attr = NULL; + i915->pmu.pmu_attr = NULL; +} + static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) { struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); @@ -873,6 +1017,12 @@ void i915_pmu_register(struct drm_i915_private *i915) return; } + i915_pmu_events_attr_group.attrs = create_event_attributes(i915); + if (!i915_pmu_events_attr_group.attrs) { + ret = -ENOMEM; + goto err; + } + i915->pmu.base.attr_groups = i915_pmu_attr_groups; i915->pmu.base.task_ctx_nr = perf_invalid_context; i915->pmu.base.event_init = i915_pmu_event_init; @@ -901,6 +1051,7 @@ err_unreg: perf_pmu_unregister(&i915->pmu.base); err: i915->pmu.base.event_init = NULL; + free_event_attributes(i915); DRM_NOTE("Failed to register PMU! (err=%d)\n", ret); } @@ -917,4 +1068,5 @@ void i915_pmu_unregister(struct drm_i915_private *i915) perf_pmu_unregister(&i915->pmu.base); i915->pmu.base.event_init = NULL; + free_event_attributes(i915); } diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index bb62df15afa4..aa1b1a987ea1 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -100,6 +100,14 @@ struct i915_pmu { * @suspended_jiffies_last: Cached suspend time from PM core. */ unsigned long suspended_jiffies_last; + /** + * @i915_attr: Memory block holding device attributes. + */ + void *i915_attr; + /** + * @pmu_attr: Memory block holding device attributes. + */ + void *pmu_attr; }; #ifdef CONFIG_PERF_EVENTS diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c new file mode 100644 index 000000000000..3ace929dd90f --- /dev/null +++ b/drivers/gpu/drm/i915/i915_query.c @@ -0,0 +1,125 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include "i915_drv.h" +#include "i915_query.h" +#include <uapi/drm/i915_drm.h> + +static int query_topology_info(struct drm_i915_private *dev_priv, + struct drm_i915_query_item *query_item) +{ + const struct sseu_dev_info *sseu = &INTEL_INFO(dev_priv)->sseu; + struct drm_i915_query_topology_info topo; + u32 slice_length, subslice_length, eu_length, total_length; + + if (query_item->flags != 0) + return -EINVAL; + + if (sseu->max_slices == 0) + return -ENODEV; + + BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask)); + + slice_length = sizeof(sseu->slice_mask); + subslice_length = sseu->max_slices * + DIV_ROUND_UP(sseu->max_subslices, + sizeof(sseu->subslice_mask[0]) * BITS_PER_BYTE); + eu_length = sseu->max_slices * sseu->max_subslices * + DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); + + total_length = sizeof(topo) + slice_length + subslice_length + eu_length; + + if (query_item->length == 0) + return total_length; + + if (query_item->length < total_length) + return -EINVAL; + + if (copy_from_user(&topo, u64_to_user_ptr(query_item->data_ptr), + sizeof(topo))) + return -EFAULT; + + if (topo.flags != 0) + return -EINVAL; + + if (!access_ok(VERIFY_WRITE, u64_to_user_ptr(query_item->data_ptr), + total_length)) + return -EFAULT; + + memset(&topo, 0, sizeof(topo)); + topo.max_slices = sseu->max_slices; + topo.max_subslices = sseu->max_subslices; + topo.max_eus_per_subslice = sseu->max_eus_per_subslice; + + topo.subslice_offset = slice_length; + topo.subslice_stride = DIV_ROUND_UP(sseu->max_subslices, BITS_PER_BYTE); + topo.eu_offset = slice_length + subslice_length; + topo.eu_stride = + DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); + + if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr), + &topo, sizeof(topo))) + return -EFAULT; + + if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr + sizeof(topo)), + &sseu->slice_mask, slice_length)) + return -EFAULT; + + if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr + + sizeof(topo) + slice_length), + sseu->subslice_mask, subslice_length)) + return -EFAULT; + + if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr + + sizeof(topo) + + slice_length + subslice_length), + sseu->eu_mask, eu_length)) + return -EFAULT; + + return total_length; +} + +static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv, + struct drm_i915_query_item *query_item) = { + query_topology_info, +}; + +int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_query *args = data; + struct drm_i915_query_item __user *user_item_ptr = + u64_to_user_ptr(args->items_ptr); + u32 i; + + if (args->flags != 0) + return -EINVAL; + + for (i = 0; i < args->num_items; i++, user_item_ptr++) { + struct drm_i915_query_item item; + u64 func_idx; + int ret; + + if (copy_from_user(&item, user_item_ptr, sizeof(item))) + return -EFAULT; + + if (item.query_id == 0) + return -EINVAL; + + func_idx = item.query_id - 1; + + if (func_idx < ARRAY_SIZE(i915_query_funcs)) + ret = i915_query_funcs[func_idx](dev_priv, &item); + else + ret = -EINVAL; + + /* Only write the length back to userspace if they differ. */ + if (ret != item.length && put_user(ret, &user_item_ptr->length)) + return -EFAULT; + } + + return 0; +} diff --git a/drivers/gpu/drm/i915/i915_query.h b/drivers/gpu/drm/i915/i915_query.h new file mode 100644 index 000000000000..31dcef181f63 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_query.h @@ -0,0 +1,15 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#ifndef _I915_QUERY_H_ +#define _I915_QUERY_H_ + +struct drm_device; +struct drm_file; + +int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file); + +#endif diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 33eb0c5b1d32..e6a8c0ee7df1 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -178,6 +178,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define BCS_HW 2 #define VECS_HW 3 #define VCS2_HW 4 +#define VCS3_HW 6 +#define VCS4_HW 7 +#define VECS2_HW 12 /* Engine class */ @@ -188,7 +191,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OTHER_CLASS 4 #define MAX_ENGINE_CLASS 4 -#define MAX_ENGINE_INSTANCE 1 +#define MAX_ENGINE_INSTANCE 3 /* PCI config space */ @@ -1304,6 +1307,7 @@ enum i915_power_well_id { SKL_DISP_PW_DDI_B, SKL_DISP_PW_DDI_C, SKL_DISP_PW_DDI_D, + CNL_DISP_PW_DDI_F = 6, GLK_DISP_PW_AUX_A = 8, GLK_DISP_PW_AUX_B, @@ -1312,6 +1316,7 @@ enum i915_power_well_id { CNL_DISP_PW_AUX_B = GLK_DISP_PW_AUX_B, CNL_DISP_PW_AUX_C = GLK_DISP_PW_AUX_C, CNL_DISP_PW_AUX_D, + CNL_DISP_PW_AUX_F, SKL_DISP_PW_1 = 14, SKL_DISP_PW_2, @@ -1904,6 +1909,11 @@ enum i915_power_well_id { #define CL_POWER_DOWN_ENABLE (1 << 4) #define SUS_CLOCK_CONFIG (3 << 0) +#define _ICL_PORT_CL_DW5_A 0x162014 +#define _ICL_PORT_CL_DW5_B 0x6C014 +#define ICL_PORT_CL_DW5(port) _MMIO_PORT(port, _ICL_PORT_CL_DW5_A, \ + _ICL_PORT_CL_DW5_B) + #define _PORT_CL1CM_DW9_A 0x162024 #define _PORT_CL1CM_DW9_BC 0x6C024 #define IREF0RC_OFFSET_SHIFT 8 @@ -1963,7 +1973,7 @@ enum i915_power_well_id { #define _CNL_PORT_TX_DW2_LN0_B 0x162648 #define _CNL_PORT_TX_DW2_LN0_C 0x162C48 #define _CNL_PORT_TX_DW2_LN0_D 0x162E48 -#define _CNL_PORT_TX_DW2_LN0_F 0x162A48 +#define _CNL_PORT_TX_DW2_LN0_F 0x162848 #define CNL_PORT_TX_DW2_GRP(port) _MMIO_PORT6(port, \ _CNL_PORT_TX_DW2_GRP_AE, \ _CNL_PORT_TX_DW2_GRP_B, \ @@ -2102,6 +2112,28 @@ enum i915_power_well_id { #define CNL_PORT_COMP_DW9 _MMIO(0x162124) #define CNL_PORT_COMP_DW10 _MMIO(0x162128) +#define _ICL_PORT_COMP_DW0_A 0x162100 +#define _ICL_PORT_COMP_DW0_B 0x6C100 +#define ICL_PORT_COMP_DW0(port) _MMIO_PORT(port, _ICL_PORT_COMP_DW0_A, \ + _ICL_PORT_COMP_DW0_B) +#define _ICL_PORT_COMP_DW1_A 0x162104 +#define _ICL_PORT_COMP_DW1_B 0x6C104 +#define ICL_PORT_COMP_DW1(port) _MMIO_PORT(port, _ICL_PORT_COMP_DW1_A, \ + _ICL_PORT_COMP_DW1_B) +#define _ICL_PORT_COMP_DW3_A 0x16210C +#define _ICL_PORT_COMP_DW3_B 0x6C10C +#define ICL_PORT_COMP_DW3(port) _MMIO_PORT(port, _ICL_PORT_COMP_DW3_A, \ + _ICL_PORT_COMP_DW3_B) +#define _ICL_PORT_COMP_DW9_A 0x162124 +#define _ICL_PORT_COMP_DW9_B 0x6C124 +#define ICL_PORT_COMP_DW9(port) _MMIO_PORT(port, _ICL_PORT_COMP_DW9_A, \ + _ICL_PORT_COMP_DW9_B) +#define _ICL_PORT_COMP_DW10_A 0x162128 +#define _ICL_PORT_COMP_DW10_B 0x6C128 +#define ICL_PORT_COMP_DW10(port) _MMIO_PORT(port, \ + _ICL_PORT_COMP_DW10_A, \ + _ICL_PORT_COMP_DW10_B) + /* BXT PHY Ref registers */ #define _PORT_REF_DW3_A 0x16218C #define _PORT_REF_DW3_BC 0x6C18C @@ -2313,7 +2345,13 @@ enum i915_power_well_id { #define BSD_RING_BASE 0x04000 #define GEN6_BSD_RING_BASE 0x12000 #define GEN8_BSD2_RING_BASE 0x1c000 +#define GEN11_BSD_RING_BASE 0x1c0000 +#define GEN11_BSD2_RING_BASE 0x1c4000 +#define GEN11_BSD3_RING_BASE 0x1d0000 +#define GEN11_BSD4_RING_BASE 0x1d4000 #define VEBOX_RING_BASE 0x1a000 +#define GEN11_VEBOX_RING_BASE 0x1c8000 +#define GEN11_VEBOX2_RING_BASE 0x1d8000 #define BLT_RING_BASE 0x22000 #define RING_TAIL(base) _MMIO((base)+0x30) #define RING_HEAD(base) _MMIO((base)+0x34) @@ -2590,6 +2628,8 @@ enum i915_power_well_id { #define GFX_FORWARD_VBLANK_ALWAYS (1<<5) #define GFX_FORWARD_VBLANK_COND (2<<5) +#define GEN11_GFX_DISABLE_LEGACY_MODE (1<<3) + #define VLV_DISPLAY_BASE 0x180000 #define VLV_MIPI_BASE VLV_DISPLAY_BASE #define BXT_MIPI_BASE 0x60000 @@ -2648,6 +2688,31 @@ enum i915_power_well_id { #define LM_FIFO_WATERMARK 0x0000001F #define MI_ARB_STATE _MMIO(0x20e4) /* 915+ only */ +#define MBUS_ABOX_CTL _MMIO(0x45038) +#define MBUS_ABOX_BW_CREDIT_MASK (3 << 20) +#define MBUS_ABOX_BW_CREDIT(x) ((x) << 20) +#define MBUS_ABOX_B_CREDIT_MASK (0xF << 16) +#define MBUS_ABOX_B_CREDIT(x) ((x) << 16) +#define MBUS_ABOX_BT_CREDIT_POOL2_MASK (0x1F << 8) +#define MBUS_ABOX_BT_CREDIT_POOL2(x) ((x) << 8) +#define MBUS_ABOX_BT_CREDIT_POOL1_MASK (0x1F << 0) +#define MBUS_ABOX_BT_CREDIT_POOL1(x) ((x) << 0) + +#define _PIPEA_MBUS_DBOX_CTL 0x7003C +#define _PIPEB_MBUS_DBOX_CTL 0x7103C +#define PIPE_MBUS_DBOX_CTL(pipe) _MMIO_PIPE(pipe, _PIPEA_MBUS_DBOX_CTL, \ + _PIPEB_MBUS_DBOX_CTL) +#define MBUS_DBOX_BW_CREDIT_MASK (3 << 14) +#define MBUS_DBOX_BW_CREDIT(x) ((x) << 14) +#define MBUS_DBOX_B_CREDIT_MASK (0x1F << 8) +#define MBUS_DBOX_B_CREDIT(x) ((x) << 8) +#define MBUS_DBOX_A_CREDIT_MASK (0xF << 0) +#define MBUS_DBOX_A_CREDIT(x) ((x) << 0) + +#define MBUS_UBOX_CTL _MMIO(0x4503C) +#define MBUS_BBOX_CTL_S1 _MMIO(0x45040) +#define MBUS_BBOX_CTL_S2 _MMIO(0x45044) + /* Make render/texture TLB fetches lower priorty than associated data * fetches. This is not turned on by default */ @@ -2751,6 +2816,13 @@ enum i915_power_well_id { #define GEN9_RCS_FE_FSM2 _MMIO(0x22a4) /* Fuse readout registers for GT */ +#define HSW_PAVP_FUSE1 _MMIO(0x911C) +#define HSW_F1_EU_DIS_SHIFT 16 +#define HSW_F1_EU_DIS_MASK (0x3 << HSW_F1_EU_DIS_SHIFT) +#define HSW_F1_EU_DIS_10EUS 0 +#define HSW_F1_EU_DIS_8EUS 1 +#define HSW_F1_EU_DIS_6EUS 2 + #define CHV_FUSE_GT _MMIO(VLV_DISPLAY_BASE + 0x2168) #define CHV_FGT_DISABLE_SS0 (1 << 10) #define CHV_FGT_DISABLE_SS1 (1 << 11) @@ -3045,6 +3117,7 @@ enum i915_power_well_id { # define GPIO_DATA_PULLUP_DISABLE (1 << 13) #define GMBUS0 _MMIO(dev_priv->gpio_mmio_base + 0x5100) /* clock/port select */ +#define GMBUS_AKSV_SELECT (1<<11) #define GMBUS_RATE_100KHZ (0<<8) #define GMBUS_RATE_50KHZ (1<<8) #define GMBUS_RATE_400KHZ (2<<8) /* reserved on Pineview */ @@ -3063,7 +3136,12 @@ enum i915_power_well_id { #define GMBUS_PIN_2_BXT 2 #define GMBUS_PIN_3_BXT 3 #define GMBUS_PIN_4_CNP 4 -#define GMBUS_NUM_PINS 7 /* including 0 */ +#define GMBUS_PIN_9_TC1_ICP 9 +#define GMBUS_PIN_10_TC2_ICP 10 +#define GMBUS_PIN_11_TC3_ICP 11 +#define GMBUS_PIN_12_TC4_ICP 12 + +#define GMBUS_NUM_PINS 13 /* including 0 */ #define GMBUS1 _MMIO(dev_priv->gpio_mmio_base + 0x5104) /* command/status */ #define GMBUS_SW_CLR_INT (1<<31) #define GMBUS_SW_RDY (1<<30) @@ -3834,6 +3912,12 @@ enum { #define GEN8_CTX_ID_SHIFT 32 #define GEN8_CTX_ID_WIDTH 21 +#define GEN11_SW_CTX_ID_SHIFT 37 +#define GEN11_SW_CTX_ID_WIDTH 11 +#define GEN11_ENGINE_CLASS_SHIFT 61 +#define GEN11_ENGINE_CLASS_WIDTH 3 +#define GEN11_ENGINE_INSTANCE_SHIFT 48 +#define GEN11_ENGINE_INSTANCE_WIDTH 6 #define CHV_CLK_CTL1 _MMIO(0x101100) #define VLV_CLK_CTL2 _MMIO(0x101104) @@ -3881,6 +3965,9 @@ enum { #define SARBUNIT_CLKGATE_DIS (1 << 5) #define RCCUNIT_CLKGATE_DIS (1 << 7) +#define SUBSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9524) +#define GWUNIT_CLKGATE_DIS (1 << 16) + #define UNSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9434) #define VFUNIT_CLKGATE_DIS (1 << 20) @@ -4066,7 +4153,7 @@ enum { #define EDP_PSR_AUX_CTL _MMIO(dev_priv->psr_mmio_base + 0x10) #define EDP_PSR_AUX_DATA(i) _MMIO(dev_priv->psr_mmio_base + 0x14 + (i) * 4) /* 5 registers */ -#define EDP_PSR_STATUS_CTL _MMIO(dev_priv->psr_mmio_base + 0x40) +#define EDP_PSR_STATUS _MMIO(dev_priv->psr_mmio_base + 0x40) #define EDP_PSR_STATUS_STATE_MASK (7<<29) #define EDP_PSR_STATUS_STATE_IDLE (0<<29) #define EDP_PSR_STATUS_STATE_SRDONACK (1<<29) @@ -4093,7 +4180,7 @@ enum { #define EDP_PSR_PERF_CNT _MMIO(dev_priv->psr_mmio_base + 0x44) #define EDP_PSR_PERF_CNT_MASK 0xffffff -#define EDP_PSR_DEBUG_CTL _MMIO(dev_priv->psr_mmio_base + 0x60) +#define EDP_PSR_DEBUG _MMIO(dev_priv->psr_mmio_base + 0x60) #define EDP_PSR_DEBUG_MASK_MAX_SLEEP (1<<28) #define EDP_PSR_DEBUG_MASK_LPSP (1<<27) #define EDP_PSR_DEBUG_MASK_MEMUP (1<<26) @@ -4116,7 +4203,7 @@ enum { #define EDP_PSR2_IDLE_MASK 0xf #define EDP_PSR2_FRAME_BEFORE_SU(a) ((a)<<4) -#define EDP_PSR2_STATUS_CTL _MMIO(0x6f940) +#define EDP_PSR2_STATUS _MMIO(0x6f940) #define EDP_PSR2_STATUS_STATE_MASK (0xf<<28) #define EDP_PSR2_STATUS_STATE_SHIFT 28 @@ -5278,8 +5365,15 @@ enum { #define _DPD_AUX_CH_DATA4 (dev_priv->info.display_mmio_offset + 0x64320) #define _DPD_AUX_CH_DATA5 (dev_priv->info.display_mmio_offset + 0x64324) -#define DP_AUX_CH_CTL(port) _MMIO_PORT(port, _DPA_AUX_CH_CTL, _DPB_AUX_CH_CTL) -#define DP_AUX_CH_DATA(port, i) _MMIO(_PORT(port, _DPA_AUX_CH_DATA1, _DPB_AUX_CH_DATA1) + (i) * 4) /* 5 registers */ +#define _DPF_AUX_CH_CTL (dev_priv->info.display_mmio_offset + 0x64510) +#define _DPF_AUX_CH_DATA1 (dev_priv->info.display_mmio_offset + 0x64514) +#define _DPF_AUX_CH_DATA2 (dev_priv->info.display_mmio_offset + 0x64518) +#define _DPF_AUX_CH_DATA3 (dev_priv->info.display_mmio_offset + 0x6451c) +#define _DPF_AUX_CH_DATA4 (dev_priv->info.display_mmio_offset + 0x64520) +#define _DPF_AUX_CH_DATA5 (dev_priv->info.display_mmio_offset + 0x64524) + +#define DP_AUX_CH_CTL(aux_ch) _MMIO_PORT(aux_ch, _DPA_AUX_CH_CTL, _DPB_AUX_CH_CTL) +#define DP_AUX_CH_DATA(aux_ch, i) _MMIO(_PORT(aux_ch, _DPA_AUX_CH_DATA1, _DPB_AUX_CH_DATA1) + (i) * 4) /* 5 registers */ #define DP_AUX_CH_CTL_SEND_BUSY (1 << 31) #define DP_AUX_CH_CTL_DONE (1 << 30) @@ -6063,6 +6157,7 @@ enum { #define _DVSACNTR 0x72180 #define DVS_ENABLE (1<<31) #define DVS_GAMMA_ENABLE (1<<30) +#define DVS_YUV_RANGE_CORRECTION_DISABLE (1<<27) #define DVS_PIXFORMAT_MASK (3<<25) #define DVS_FORMAT_YUV422 (0<<25) #define DVS_FORMAT_RGBX101010 (1<<25) @@ -6071,6 +6166,7 @@ enum { #define DVS_PIPE_CSC_ENABLE (1<<24) #define DVS_SOURCE_KEY (1<<22) #define DVS_RGB_ORDER_XBGR (1<<20) +#define DVS_YUV_FORMAT_BT709 (1<<18) #define DVS_YUV_BYTE_ORDER_MASK (3<<16) #define DVS_YUV_ORDER_YUYV (0<<16) #define DVS_YUV_ORDER_UYVY (1<<16) @@ -6130,6 +6226,7 @@ enum { #define _SPRA_CTL 0x70280 #define SPRITE_ENABLE (1<<31) #define SPRITE_GAMMA_ENABLE (1<<30) +#define SPRITE_YUV_RANGE_CORRECTION_DISABLE (1<<28) #define SPRITE_PIXFORMAT_MASK (7<<25) #define SPRITE_FORMAT_YUV422 (0<<25) #define SPRITE_FORMAT_RGBX101010 (1<<25) @@ -6141,7 +6238,7 @@ enum { #define SPRITE_SOURCE_KEY (1<<22) #define SPRITE_RGB_ORDER_RGBX (1<<20) /* only for 888 and 161616 */ #define SPRITE_YUV_TO_RGB_CSC_DISABLE (1<<19) -#define SPRITE_YUV_CSC_FORMAT_BT709 (1<<18) /* 0 is BT601 */ +#define SPRITE_YUV_TO_RGB_CSC_FORMAT_BT709 (1<<18) /* 0 is BT601 */ #define SPRITE_YUV_BYTE_ORDER_MASK (3<<16) #define SPRITE_YUV_ORDER_YUYV (0<<16) #define SPRITE_YUV_ORDER_UYVY (1<<16) @@ -6217,6 +6314,7 @@ enum { #define SP_FORMAT_RGBA8888 (0xf<<26) #define SP_ALPHA_PREMULTIPLY (1<<23) /* CHV pipe B */ #define SP_SOURCE_KEY (1<<22) +#define SP_YUV_FORMAT_BT709 (1<<18) #define SP_YUV_BYTE_ORDER_MASK (3<<16) #define SP_YUV_ORDER_YUYV (0<<16) #define SP_YUV_ORDER_UYVY (1<<16) @@ -6236,6 +6334,12 @@ enum { #define _SPATILEOFF (VLV_DISPLAY_BASE + 0x721a4) #define _SPACONSTALPHA (VLV_DISPLAY_BASE + 0x721a8) #define SP_CONST_ALPHA_ENABLE (1<<31) +#define _SPACLRC0 (VLV_DISPLAY_BASE + 0x721d0) +#define SP_CONTRAST(x) ((x) << 18) /* u3.6 */ +#define SP_BRIGHTNESS(x) ((x) & 0xff) /* s8 */ +#define _SPACLRC1 (VLV_DISPLAY_BASE + 0x721d4) +#define SP_SH_SIN(x) (((x) & 0x7ff) << 16) /* s4.7 */ +#define SP_SH_COS(x) (x) /* u3.7 */ #define _SPAGAMC (VLV_DISPLAY_BASE + 0x721f4) #define _SPBCNTR (VLV_DISPLAY_BASE + 0x72280) @@ -6249,6 +6353,8 @@ enum { #define _SPBKEYMAXVAL (VLV_DISPLAY_BASE + 0x722a0) #define _SPBTILEOFF (VLV_DISPLAY_BASE + 0x722a4) #define _SPBCONSTALPHA (VLV_DISPLAY_BASE + 0x722a8) +#define _SPBCLRC0 (VLV_DISPLAY_BASE + 0x722d0) +#define _SPBCLRC1 (VLV_DISPLAY_BASE + 0x722d4) #define _SPBGAMC (VLV_DISPLAY_BASE + 0x722f4) #define _MMIO_VLV_SPR(pipe, plane_id, reg_a, reg_b) \ @@ -6265,6 +6371,8 @@ enum { #define SPKEYMAXVAL(pipe, plane_id) _MMIO_VLV_SPR((pipe), (plane_id), _SPAKEYMAXVAL, _SPBKEYMAXVAL) #define SPTILEOFF(pipe, plane_id) _MMIO_VLV_SPR((pipe), (plane_id), _SPATILEOFF, _SPBTILEOFF) #define SPCONSTALPHA(pipe, plane_id) _MMIO_VLV_SPR((pipe), (plane_id), _SPACONSTALPHA, _SPBCONSTALPHA) +#define SPCLRC0(pipe, plane_id) _MMIO_VLV_SPR((pipe), (plane_id), _SPACLRC0, _SPBCLRC0) +#define SPCLRC1(pipe, plane_id) _MMIO_VLV_SPR((pipe), (plane_id), _SPACLRC1, _SPBCLRC1) #define SPGAMC(pipe, plane_id) _MMIO_VLV_SPR((pipe), (plane_id), _SPAGAMC, _SPBGAMC) /* @@ -6310,6 +6418,12 @@ enum { #define _PLANE_CTL_3_A 0x70380 #define PLANE_CTL_ENABLE (1 << 31) #define PLANE_CTL_PIPE_GAMMA_ENABLE (1 << 30) /* Pre-GLK */ +#define PLANE_CTL_YUV_RANGE_CORRECTION_DISABLE (1 << 28) +/* + * ICL+ uses the same PLANE_CTL_FORMAT bits, but the field definition + * expanded to include bit 23 as well. However, the shift-24 based values + * correctly map to the same formats in ICL, as long as bit 23 is set to 0 + */ #define PLANE_CTL_FORMAT_MASK (0xf << 24) #define PLANE_CTL_FORMAT_YUV422 ( 0 << 24) #define PLANE_CTL_FORMAT_NV12 ( 1 << 24) @@ -6319,12 +6433,14 @@ enum { #define PLANE_CTL_FORMAT_AYUV ( 8 << 24) #define PLANE_CTL_FORMAT_INDEXED ( 12 << 24) #define PLANE_CTL_FORMAT_RGB_565 ( 14 << 24) +#define ICL_PLANE_CTL_FORMAT_MASK (0x1f << 23) #define PLANE_CTL_PIPE_CSC_ENABLE (1 << 23) /* Pre-GLK */ #define PLANE_CTL_KEY_ENABLE_MASK (0x3 << 21) #define PLANE_CTL_KEY_ENABLE_SOURCE ( 1 << 21) #define PLANE_CTL_KEY_ENABLE_DESTINATION ( 2 << 21) #define PLANE_CTL_ORDER_BGRX (0 << 20) #define PLANE_CTL_ORDER_RGBX (1 << 20) +#define PLANE_CTL_YUV_TO_RGB_CSC_FORMAT_BT709 (1 << 18) #define PLANE_CTL_YUV422_ORDER_MASK (0x3 << 16) #define PLANE_CTL_YUV422_YUYV ( 0 << 16) #define PLANE_CTL_YUV422_UYVY ( 1 << 16) @@ -6377,7 +6493,13 @@ enum { #define _PLANE_COLOR_CTL_2_A 0x702CC /* GLK+ */ #define _PLANE_COLOR_CTL_3_A 0x703CC /* GLK+ */ #define PLANE_COLOR_PIPE_GAMMA_ENABLE (1 << 30) +#define PLANE_COLOR_YUV_RANGE_CORRECTION_DISABLE (1 << 28) #define PLANE_COLOR_PIPE_CSC_ENABLE (1 << 23) +#define PLANE_COLOR_CSC_MODE_BYPASS (0 << 17) +#define PLANE_COLOR_CSC_MODE_YUV601_TO_RGB709 (1 << 17) +#define PLANE_COLOR_CSC_MODE_YUV709_TO_RGB709 (2 << 17) +#define PLANE_COLOR_CSC_MODE_YUV2020_TO_RGB2020 (3 << 17) +#define PLANE_COLOR_CSC_MODE_RGB709_TO_RGB2020 (4 << 17) #define PLANE_COLOR_PLANE_GAMMA_DISABLE (1 << 13) #define PLANE_COLOR_ALPHA_MASK (0x3 << 4) #define PLANE_COLOR_ALPHA_DISABLE (0 << 4) @@ -6933,6 +7055,7 @@ enum { #define GEN8_DE_PORT_IMR _MMIO(0x44444) #define GEN8_DE_PORT_IIR _MMIO(0x44448) #define GEN8_DE_PORT_IER _MMIO(0x4444c) +#define CNL_AUX_CHANNEL_F (1 << 28) #define GEN9_AUX_CHANNEL_D (1 << 27) #define GEN9_AUX_CHANNEL_C (1 << 26) #define GEN9_AUX_CHANNEL_B (1 << 25) @@ -6957,6 +7080,69 @@ enum { #define GEN8_PCU_IIR _MMIO(0x444e8) #define GEN8_PCU_IER _MMIO(0x444ec) +#define GEN11_GFX_MSTR_IRQ _MMIO(0x190010) +#define GEN11_MASTER_IRQ (1 << 31) +#define GEN11_PCU_IRQ (1 << 30) +#define GEN11_DISPLAY_IRQ (1 << 16) +#define GEN11_GT_DW_IRQ(x) (1 << (x)) +#define GEN11_GT_DW1_IRQ (1 << 1) +#define GEN11_GT_DW0_IRQ (1 << 0) + +#define GEN11_DISPLAY_INT_CTL _MMIO(0x44200) +#define GEN11_DISPLAY_IRQ_ENABLE (1 << 31) +#define GEN11_AUDIO_CODEC_IRQ (1 << 24) +#define GEN11_DE_PCH_IRQ (1 << 23) +#define GEN11_DE_MISC_IRQ (1 << 22) +#define GEN11_DE_PORT_IRQ (1 << 20) +#define GEN11_DE_PIPE_C (1 << 18) +#define GEN11_DE_PIPE_B (1 << 17) +#define GEN11_DE_PIPE_A (1 << 16) + +#define GEN11_GT_INTR_DW0 _MMIO(0x190018) +#define GEN11_CSME (31) +#define GEN11_GUNIT (28) +#define GEN11_GUC (25) +#define GEN11_WDPERF (20) +#define GEN11_KCR (19) +#define GEN11_GTPM (16) +#define GEN11_BCS (15) +#define GEN11_RCS0 (0) + +#define GEN11_GT_INTR_DW1 _MMIO(0x19001c) +#define GEN11_VECS(x) (31 - (x)) +#define GEN11_VCS(x) (x) + +#define GEN11_GT_INTR_DW(x) _MMIO(0x190018 + (x * 4)) + +#define GEN11_INTR_IDENTITY_REG0 _MMIO(0x190060) +#define GEN11_INTR_IDENTITY_REG1 _MMIO(0x190064) +#define GEN11_INTR_DATA_VALID (1 << 31) +#define GEN11_INTR_ENGINE_MASK (0xffff) + +#define GEN11_INTR_IDENTITY_REG(x) _MMIO(0x190060 + (x * 4)) + +#define GEN11_IIR_REG0_SELECTOR _MMIO(0x190070) +#define GEN11_IIR_REG1_SELECTOR _MMIO(0x190074) + +#define GEN11_IIR_REG_SELECTOR(x) _MMIO(0x190070 + (x * 4)) + +#define GEN11_RENDER_COPY_INTR_ENABLE _MMIO(0x190030) +#define GEN11_VCS_VECS_INTR_ENABLE _MMIO(0x190034) +#define GEN11_GUC_SG_INTR_ENABLE _MMIO(0x190038) +#define GEN11_GPM_WGBOXPERF_INTR_ENABLE _MMIO(0x19003c) +#define GEN11_CRYPTO_RSVD_INTR_ENABLE _MMIO(0x190040) +#define GEN11_GUNIT_CSME_INTR_ENABLE _MMIO(0x190044) + +#define GEN11_RCS0_RSVD_INTR_MASK _MMIO(0x190090) +#define GEN11_BCS_RSVD_INTR_MASK _MMIO(0x1900a0) +#define GEN11_VCS0_VCS1_INTR_MASK _MMIO(0x1900a8) +#define GEN11_VCS2_VCS3_INTR_MASK _MMIO(0x1900ac) +#define GEN11_VECS0_VECS1_INTR_MASK _MMIO(0x1900d0) +#define GEN11_GUC_SG_INTR_MASK _MMIO(0x1900e8) +#define GEN11_GPM_WGBOXPERF_INTR_MASK _MMIO(0x1900ec) +#define GEN11_CRYPTO_RSVD_INTR_MASK _MMIO(0x1900f0) +#define GEN11_GUNIT_CSME_INTR_MASK _MMIO(0x1900f4) + #define ILK_DISPLAY_CHICKEN2 _MMIO(0x42004) /* Required on all Ironlake and Sandybridge according to the B-Spec. */ #define ILK_ELPIN_409_SELECT (1 << 25) @@ -7011,8 +7197,12 @@ enum { #define CHICKEN_TRANS_A 0x420c0 #define CHICKEN_TRANS_B 0x420c4 #define CHICKEN_TRANS(trans) _MMIO_TRANS(trans, CHICKEN_TRANS_A, CHICKEN_TRANS_B) -#define PSR2_VSC_ENABLE_PROG_HEADER (1<<12) -#define PSR2_ADD_VERTICAL_LINE_COUNT (1<<15) +#define DDI_TRAINING_OVERRIDE_ENABLE (1<<19) +#define DDI_TRAINING_OVERRIDE_VALUE (1<<18) +#define DDIE_TRAINING_OVERRIDE_ENABLE (1<<17) /* CHICKEN_TRANS_A only */ +#define DDIE_TRAINING_OVERRIDE_VALUE (1<<16) /* CHICKEN_TRANS_A only */ +#define PSR2_ADD_VERTICAL_LINE_COUNT (1<<15) +#define PSR2_VSC_ENABLE_PROG_HEADER (1<<12) #define DISP_ARB_CTL _MMIO(0x45000) #define DISP_FBC_MEMORY_WAKE (1<<31) @@ -7022,6 +7212,8 @@ enum { #define DISP_DATA_PARTITION_5_6 (1<<6) #define DISP_IPC_ENABLE (1<<3) #define DBUF_CTL _MMIO(0x45008) +#define DBUF_CTL_S1 _MMIO(0x45008) +#define DBUF_CTL_S2 _MMIO(0x44FE8) #define DBUF_POWER_REQUEST (1<<31) #define DBUF_POWER_STATE (1<<30) #define GEN7_MSG_CTL _MMIO(0x45010) @@ -7031,8 +7223,9 @@ enum { #define RESET_PCH_HANDSHAKE_ENABLE (1<<4) #define GEN8_CHICKEN_DCPR_1 _MMIO(0x46430) -#define SKL_SELECT_ALTERNATE_DC_EXIT (1<<30) -#define MASK_WAKEMEM (1<<13) +#define SKL_SELECT_ALTERNATE_DC_EXIT (1 << 30) +#define MASK_WAKEMEM (1 << 13) +#define CNL_DDI_CLOCK_REG_ACCESS_ON (1 << 7) #define SKL_DFSM _MMIO(0x51000) #define SKL_DFSM_CDCLK_LIMIT_MASK (3 << 23) @@ -7044,8 +7237,12 @@ enum { #define SKL_DFSM_PIPE_B_DISABLE (1 << 21) #define SKL_DFSM_PIPE_C_DISABLE (1 << 28) -#define SKL_DSSM _MMIO(0x51004) -#define CNL_DSSM_CDCLK_PLL_REFCLK_24MHz (1 << 31) +#define SKL_DSSM _MMIO(0x51004) +#define CNL_DSSM_CDCLK_PLL_REFCLK_24MHz (1 << 31) +#define ICL_DSSM_CDCLK_PLL_REFCLK_MASK (7 << 29) +#define ICL_DSSM_CDCLK_PLL_REFCLK_24MHz (0 << 29) +#define ICL_DSSM_CDCLK_PLL_REFCLK_19_2MHz (1 << 29) +#define ICL_DSSM_CDCLK_PLL_REFCLK_38_4MHz (2 << 29) #define GEN7_FF_SLICE_CS_CHICKEN1 _MMIO(0x20e0) #define GEN9_FFSC_PERCTX_PREEMPT_CTRL (1<<14) @@ -7351,6 +7548,8 @@ enum { #define CNP_RAWCLK_DIV(div) ((div) << 16) #define CNP_RAWCLK_FRAC_MASK (0xf << 26) #define CNP_RAWCLK_FRAC(frac) ((frac) << 26) +#define ICP_RAWCLK_DEN(den) ((den) << 26) +#define ICP_RAWCLK_NUM(num) ((num) << 11) #define PCH_DPLL_TMR_CFG _MMIO(0xc6208) @@ -7723,8 +7922,8 @@ enum { #define _PCH_DPD_AUX_CH_DATA4 0xe4320 #define _PCH_DPD_AUX_CH_DATA5 0xe4324 -#define PCH_DP_AUX_CH_CTL(port) _MMIO_PORT((port) - PORT_B, _PCH_DPB_AUX_CH_CTL, _PCH_DPC_AUX_CH_CTL) -#define PCH_DP_AUX_CH_DATA(port, i) _MMIO(_PORT((port) - PORT_B, _PCH_DPB_AUX_CH_DATA1, _PCH_DPC_AUX_CH_DATA1) + (i) * 4) /* 5 registers */ +#define PCH_DP_AUX_CH_CTL(aux_ch) _MMIO_PORT((aux_ch) - AUX_CH_B, _PCH_DPB_AUX_CH_CTL, _PCH_DPC_AUX_CH_CTL) +#define PCH_DP_AUX_CH_DATA(aux_ch, i) _MMIO(_PORT((aux_ch) - AUX_CH_B, _PCH_DPB_AUX_CH_DATA1, _PCH_DPC_AUX_CH_DATA1) + (i) * 4) /* 5 registers */ /* CPT */ #define PORT_TRANS_A_SEL_CPT 0 @@ -7824,9 +8023,13 @@ enum { #define VLV_GTLC_PW_RENDER_STATUS_MASK (1 << 7) #define FORCEWAKE_MT _MMIO(0xa188) /* multi-threaded */ #define FORCEWAKE_MEDIA_GEN9 _MMIO(0xa270) +#define FORCEWAKE_MEDIA_VDBOX_GEN11(n) _MMIO(0xa540 + (n) * 4) +#define FORCEWAKE_MEDIA_VEBOX_GEN11(n) _MMIO(0xa560 + (n) * 4) #define FORCEWAKE_RENDER_GEN9 _MMIO(0xa278) #define FORCEWAKE_BLITTER_GEN9 _MMIO(0xa188) #define FORCEWAKE_ACK_MEDIA_GEN9 _MMIO(0x0D88) +#define FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(n) _MMIO(0x0D50 + (n) * 4) +#define FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(n) _MMIO(0x0D70 + (n) * 4) #define FORCEWAKE_ACK_RENDER_GEN9 _MMIO(0x0D84) #define FORCEWAKE_ACK_BLITTER_GEN9 _MMIO(0x130044) #define FORCEWAKE_KERNEL BIT(0) @@ -8048,6 +8251,7 @@ enum { #define GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT 8 #define GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT 16 #define GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT 24 +#define SKL_PCODE_LOAD_HDCP_KEYS 0x5 #define SKL_PCODE_CDCLK_CONTROL 0x7 #define SKL_CDCLK_PREPARE_FOR_CHANGE 0x3 #define SKL_CDCLK_READY_FOR_CHANGE 0x1 @@ -8350,6 +8554,101 @@ enum skl_power_gate { #define SKL_PW_TO_PG(pw) ((pw) - SKL_DISP_PW_1 + SKL_PG1) #define SKL_FUSE_PG_DIST_STATUS(pg) (1 << (27 - (pg))) +#define _CNL_AUX_REG_IDX(pw) ((pw) - 9) +#define _CNL_AUX_ANAOVRD1_B 0x162250 +#define _CNL_AUX_ANAOVRD1_C 0x162210 +#define _CNL_AUX_ANAOVRD1_D 0x1622D0 +#define _CNL_AUX_ANAOVRD1_F 0x162A90 +#define CNL_AUX_ANAOVRD1(pw) _MMIO(_PICK(_CNL_AUX_REG_IDX(pw), \ + _CNL_AUX_ANAOVRD1_B, \ + _CNL_AUX_ANAOVRD1_C, \ + _CNL_AUX_ANAOVRD1_D, \ + _CNL_AUX_ANAOVRD1_F)) +#define CNL_AUX_ANAOVRD1_ENABLE (1<<16) +#define CNL_AUX_ANAOVRD1_LDO_BYPASS (1<<23) + +/* HDCP Key Registers */ +#define HDCP_KEY_CONF _MMIO(0x66c00) +#define HDCP_AKSV_SEND_TRIGGER BIT(31) +#define HDCP_CLEAR_KEYS_TRIGGER BIT(30) +#define HDCP_KEY_LOAD_TRIGGER BIT(8) +#define HDCP_KEY_STATUS _MMIO(0x66c04) +#define HDCP_FUSE_IN_PROGRESS BIT(7) +#define HDCP_FUSE_ERROR BIT(6) +#define HDCP_FUSE_DONE BIT(5) +#define HDCP_KEY_LOAD_STATUS BIT(1) +#define HDCP_KEY_LOAD_DONE BIT(0) +#define HDCP_AKSV_LO _MMIO(0x66c10) +#define HDCP_AKSV_HI _MMIO(0x66c14) + +/* HDCP Repeater Registers */ +#define HDCP_REP_CTL _MMIO(0x66d00) +#define HDCP_DDIB_REP_PRESENT BIT(30) +#define HDCP_DDIA_REP_PRESENT BIT(29) +#define HDCP_DDIC_REP_PRESENT BIT(28) +#define HDCP_DDID_REP_PRESENT BIT(27) +#define HDCP_DDIF_REP_PRESENT BIT(26) +#define HDCP_DDIE_REP_PRESENT BIT(25) +#define HDCP_DDIB_SHA1_M0 (1 << 20) +#define HDCP_DDIA_SHA1_M0 (2 << 20) +#define HDCP_DDIC_SHA1_M0 (3 << 20) +#define HDCP_DDID_SHA1_M0 (4 << 20) +#define HDCP_DDIF_SHA1_M0 (5 << 20) +#define HDCP_DDIE_SHA1_M0 (6 << 20) /* Bspec says 5? */ +#define HDCP_SHA1_BUSY BIT(16) +#define HDCP_SHA1_READY BIT(17) +#define HDCP_SHA1_COMPLETE BIT(18) +#define HDCP_SHA1_V_MATCH BIT(19) +#define HDCP_SHA1_TEXT_32 (1 << 1) +#define HDCP_SHA1_COMPLETE_HASH (2 << 1) +#define HDCP_SHA1_TEXT_24 (4 << 1) +#define HDCP_SHA1_TEXT_16 (5 << 1) +#define HDCP_SHA1_TEXT_8 (6 << 1) +#define HDCP_SHA1_TEXT_0 (7 << 1) +#define HDCP_SHA_V_PRIME_H0 _MMIO(0x66d04) +#define HDCP_SHA_V_PRIME_H1 _MMIO(0x66d08) +#define HDCP_SHA_V_PRIME_H2 _MMIO(0x66d0C) +#define HDCP_SHA_V_PRIME_H3 _MMIO(0x66d10) +#define HDCP_SHA_V_PRIME_H4 _MMIO(0x66d14) +#define HDCP_SHA_V_PRIME(h) _MMIO((0x66d04 + h * 4)) +#define HDCP_SHA_TEXT _MMIO(0x66d18) + +/* HDCP Auth Registers */ +#define _PORTA_HDCP_AUTHENC 0x66800 +#define _PORTB_HDCP_AUTHENC 0x66500 +#define _PORTC_HDCP_AUTHENC 0x66600 +#define _PORTD_HDCP_AUTHENC 0x66700 +#define _PORTE_HDCP_AUTHENC 0x66A00 +#define _PORTF_HDCP_AUTHENC 0x66900 +#define _PORT_HDCP_AUTHENC(port, x) _MMIO(_PICK(port, \ + _PORTA_HDCP_AUTHENC, \ + _PORTB_HDCP_AUTHENC, \ + _PORTC_HDCP_AUTHENC, \ + _PORTD_HDCP_AUTHENC, \ + _PORTE_HDCP_AUTHENC, \ + _PORTF_HDCP_AUTHENC) + x) +#define PORT_HDCP_CONF(port) _PORT_HDCP_AUTHENC(port, 0x0) +#define HDCP_CONF_CAPTURE_AN BIT(0) +#define HDCP_CONF_AUTH_AND_ENC (BIT(1) | BIT(0)) +#define PORT_HDCP_ANINIT(port) _PORT_HDCP_AUTHENC(port, 0x4) +#define PORT_HDCP_ANLO(port) _PORT_HDCP_AUTHENC(port, 0x8) +#define PORT_HDCP_ANHI(port) _PORT_HDCP_AUTHENC(port, 0xC) +#define PORT_HDCP_BKSVLO(port) _PORT_HDCP_AUTHENC(port, 0x10) +#define PORT_HDCP_BKSVHI(port) _PORT_HDCP_AUTHENC(port, 0x14) +#define PORT_HDCP_RPRIME(port) _PORT_HDCP_AUTHENC(port, 0x18) +#define PORT_HDCP_STATUS(port) _PORT_HDCP_AUTHENC(port, 0x1C) +#define HDCP_STATUS_STREAM_A_ENC BIT(31) +#define HDCP_STATUS_STREAM_B_ENC BIT(30) +#define HDCP_STATUS_STREAM_C_ENC BIT(29) +#define HDCP_STATUS_STREAM_D_ENC BIT(28) +#define HDCP_STATUS_AUTH BIT(21) +#define HDCP_STATUS_ENC BIT(20) +#define HDCP_STATUS_RI_MATCH BIT(19) +#define HDCP_STATUS_R0_READY BIT(18) +#define HDCP_STATUS_AN_READY BIT(17) +#define HDCP_STATUS_CIPHER BIT(16) +#define HDCP_STATUS_FRAME_CNT(x) ((x >> 8) & 0xff) + /* Per-pipe DDI Function Control */ #define _TRANS_DDI_FUNC_CTL_A 0x60400 #define _TRANS_DDI_FUNC_CTL_B 0x61400 @@ -8381,6 +8680,7 @@ enum skl_power_gate { #define TRANS_DDI_EDP_INPUT_A_ONOFF (4<<12) #define TRANS_DDI_EDP_INPUT_B_ONOFF (5<<12) #define TRANS_DDI_EDP_INPUT_C_ONOFF (6<<12) +#define TRANS_DDI_HDCP_SIGNALLING (1<<9) #define TRANS_DDI_DP_VC_PAYLOAD_ALLOC (1<<8) #define TRANS_DDI_HDMI_SCRAMBLER_CTS_ENABLE (1<<7) #define TRANS_DDI_HDMI_SCRAMBLER_RESET_FREQ (1<<6) @@ -8579,20 +8879,21 @@ enum skl_power_gate { /* CDCLK_CTL */ #define CDCLK_CTL _MMIO(0x46000) -#define CDCLK_FREQ_SEL_MASK (3<<26) -#define CDCLK_FREQ_450_432 (0<<26) -#define CDCLK_FREQ_540 (1<<26) -#define CDCLK_FREQ_337_308 (2<<26) -#define CDCLK_FREQ_675_617 (3<<26) -#define BXT_CDCLK_CD2X_DIV_SEL_MASK (3<<22) -#define BXT_CDCLK_CD2X_DIV_SEL_1 (0<<22) -#define BXT_CDCLK_CD2X_DIV_SEL_1_5 (1<<22) -#define BXT_CDCLK_CD2X_DIV_SEL_2 (2<<22) -#define BXT_CDCLK_CD2X_DIV_SEL_4 (3<<22) -#define BXT_CDCLK_CD2X_PIPE(pipe) ((pipe)<<20) -#define CDCLK_DIVMUX_CD_OVERRIDE (1<<19) +#define CDCLK_FREQ_SEL_MASK (3 << 26) +#define CDCLK_FREQ_450_432 (0 << 26) +#define CDCLK_FREQ_540 (1 << 26) +#define CDCLK_FREQ_337_308 (2 << 26) +#define CDCLK_FREQ_675_617 (3 << 26) +#define BXT_CDCLK_CD2X_DIV_SEL_MASK (3 << 22) +#define BXT_CDCLK_CD2X_DIV_SEL_1 (0 << 22) +#define BXT_CDCLK_CD2X_DIV_SEL_1_5 (1 << 22) +#define BXT_CDCLK_CD2X_DIV_SEL_2 (2 << 22) +#define BXT_CDCLK_CD2X_DIV_SEL_4 (3 << 22) +#define BXT_CDCLK_CD2X_PIPE(pipe) ((pipe) << 20) +#define CDCLK_DIVMUX_CD_OVERRIDE (1 << 19) #define BXT_CDCLK_CD2X_PIPE_NONE BXT_CDCLK_CD2X_PIPE(3) -#define BXT_CDCLK_SSA_PRECHARGE_ENABLE (1<<16) +#define ICL_CDCLK_CD2X_PIPE_NONE (7 << 19) +#define BXT_CDCLK_SSA_PRECHARGE_ENABLE (1 << 16) #define CDCLK_FREQ_DECIMAL_MASK (0x7ff) /* LCPLL_CTL */ @@ -8663,10 +8964,12 @@ enum skl_power_gate { * CNL Clocks */ #define DPCLKA_CFGCR0 _MMIO(0x6C200) -#define DPCLKA_CFGCR0_DDI_CLK_OFF(port) (1 << ((port)+10)) -#define DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port) (3 << ((port)*2)) -#define DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(port) ((port)*2) -#define DPCLKA_CFGCR0_DDI_CLK_SEL(pll, port) ((pll) << ((port)*2)) +#define DPCLKA_CFGCR0_DDI_CLK_OFF(port) (1 << ((port) == PORT_F ? 23 : \ + (port)+10)) +#define DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(port) ((port) == PORT_F ? 21 : \ + (port)*2) +#define DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port) (3 << DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(port)) +#define DPCLKA_CFGCR0_DDI_CLK_SEL(pll, port) ((pll) << DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(port)) /* CNL PLL */ #define DPLL0_ENABLE 0x46010 @@ -8762,6 +9065,7 @@ enum skl_power_gate { #define SFUSE_STRAP_RAW_FREQUENCY (1<<8) #define SFUSE_STRAP_DISPLAY_DISABLED (1<<7) #define SFUSE_STRAP_CRT_DISABLED (1<<6) +#define SFUSE_STRAP_DDIF_DETECTED (1<<3) #define SFUSE_STRAP_DDIB_DETECTED (1<<2) #define SFUSE_STRAP_DDIC_DETECTED (1<<1) #define SFUSE_STRAP_DDID_DETECTED (1<<0) @@ -9498,4 +9802,10 @@ enum skl_power_gate { #define MMCD_PCLA (1 << 31) #define MMCD_HOTSPOT_EN (1 << 27) +#define _ICL_PHY_MISC_A 0x64C00 +#define _ICL_PHY_MISC_B 0x64C04 +#define ICL_PHY_MISC(port) _MMIO_PORT(port, _ICL_PHY_MISC_A, \ + _ICL_PHY_MISC_B) +#define ICL_PHY_MISC_DE_IO_COMP_PWR_DOWN (1 << 23) + #endif /* _I915_REG_H_ */ diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_request.c index a3e93d46316a..282f57630cc1 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -37,7 +37,8 @@ static const char *i915_fence_get_driver_name(struct dma_fence *fence) static const char *i915_fence_get_timeline_name(struct dma_fence *fence) { - /* The timeline struct (as part of the ppgtt underneath a context) + /* + * The timeline struct (as part of the ppgtt underneath a context) * may be freed when the request is no longer in use by the GPU. * We could extend the life of a context to beyond that of all * fences, possibly keeping the hw resource around indefinitely, @@ -53,7 +54,7 @@ static const char *i915_fence_get_timeline_name(struct dma_fence *fence) static bool i915_fence_signaled(struct dma_fence *fence) { - return i915_gem_request_completed(to_request(fence)); + return i915_request_completed(to_request(fence)); } static bool i915_fence_enable_signaling(struct dma_fence *fence) @@ -69,22 +70,23 @@ static signed long i915_fence_wait(struct dma_fence *fence, bool interruptible, signed long timeout) { - return i915_wait_request(to_request(fence), interruptible, timeout); + return i915_request_wait(to_request(fence), interruptible, timeout); } static void i915_fence_release(struct dma_fence *fence) { - struct drm_i915_gem_request *req = to_request(fence); + struct i915_request *rq = to_request(fence); - /* The request is put onto a RCU freelist (i.e. the address + /* + * The request is put onto a RCU freelist (i.e. the address * is immediately reused), mark the fences as being freed now. * Otherwise the debugobjects for the fences are only marked as * freed when the slab cache itself is freed, and so we would get * caught trying to reuse dead objects. */ - i915_sw_fence_fini(&req->submit); + i915_sw_fence_fini(&rq->submit); - kmem_cache_free(req->i915->requests, req); + kmem_cache_free(rq->i915->requests, rq); } const struct dma_fence_ops i915_fence_ops = { @@ -97,7 +99,7 @@ const struct dma_fence_ops i915_fence_ops = { }; static inline void -i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) +i915_request_remove_from_client(struct i915_request *request) { struct drm_i915_file_private *file_priv; @@ -161,12 +163,16 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) GEM_BUG_ON(!list_empty(&pt->link)); - /* Everyone we depended upon (the fences we wait to be signaled) + /* + * Everyone we depended upon (the fences we wait to be signaled) * should retire before us and remove themselves from our list. * However, retirement is run independently on each timeline and * so we may be called out-of-order. */ list_for_each_entry_safe(dep, next, &pt->signalers_list, signal_link) { + GEM_BUG_ON(!i915_priotree_signaled(dep->signaler)); + GEM_BUG_ON(!list_empty(&dep->dfs_link)); + list_del(&dep->wait_link); if (dep->flags & I915_DEPENDENCY_ALLOC) i915_dependency_free(i915, dep); @@ -174,6 +180,9 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) /* Remove ourselves from everyone who depends upon us */ list_for_each_entry_safe(dep, next, &pt->waiters_list, wait_link) { + GEM_BUG_ON(dep->signaler != pt); + GEM_BUG_ON(!list_empty(&dep->dfs_link)); + list_del(&dep->signal_link); if (dep->flags & I915_DEPENDENCY_ALLOC) i915_dependency_free(i915, dep); @@ -208,9 +217,9 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) struct intel_timeline *tl = engine->timeline; if (!i915_seqno_passed(seqno, tl->seqno)) { - /* spin until threads are complete */ - while (intel_breadcrumbs_busy(engine)) - cond_resched(); + /* Flush any waiters before we reuse the seqno */ + intel_engine_disarm_breadcrumbs(engine); + GEM_BUG_ON(!list_empty(&engine->breadcrumbs.signals)); } /* Check we are idle before we fiddle with hw state! */ @@ -231,17 +240,15 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *i915 = to_i915(dev); - lockdep_assert_held(&dev_priv->drm.struct_mutex); + lockdep_assert_held(&i915->drm.struct_mutex); if (seqno == 0) return -EINVAL; - /* HWS page needs to be set less than what we - * will inject to ring - */ - return reset_all_global_seqno(dev_priv, seqno - 1); + /* HWS page needs to be set less than what we will inject to ring */ + return reset_all_global_seqno(i915, seqno - 1); } static void mark_busy(struct drm_i915_private *i915) @@ -267,6 +274,8 @@ static void mark_busy(struct drm_i915_private *i915) intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); i915->gt.awake = true; + if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ + i915->gt.epoch = 1; intel_enable_gt_powersave(i915); i915_update_gfx_val(i915); @@ -322,16 +331,17 @@ static void unreserve_engine(struct intel_engine_cs *engine) } void i915_gem_retire_noop(struct i915_gem_active *active, - struct drm_i915_gem_request *request) + struct i915_request *request) { /* Space left intentionally blank */ } -static void advance_ring(struct drm_i915_gem_request *request) +static void advance_ring(struct i915_request *request) { unsigned int tail; - /* We know the GPU must have read the request to have + /* + * We know the GPU must have read the request to have * sent us the seqno + interrupt, so use the position * of tail of the request to update the last known position * of the GPU head. @@ -340,7 +350,8 @@ static void advance_ring(struct drm_i915_gem_request *request) * completion order. */ if (list_is_last(&request->ring_link, &request->ring->request_list)) { - /* We may race here with execlists resubmitting this request + /* + * We may race here with execlists resubmitting this request * as we retire it. The resubmission will move the ring->tail * forwards (to request->wa_tail). We either read the * current value that was written to hw, or the value that @@ -356,30 +367,30 @@ static void advance_ring(struct drm_i915_gem_request *request) request->ring->head = tail; } -static void free_capture_list(struct drm_i915_gem_request *request) +static void free_capture_list(struct i915_request *request) { - struct i915_gem_capture_list *capture; + struct i915_capture_list *capture; capture = request->capture_list; while (capture) { - struct i915_gem_capture_list *next = capture->next; + struct i915_capture_list *next = capture->next; kfree(capture); capture = next; } } -static void i915_gem_request_retire(struct drm_i915_gem_request *request) +static void i915_request_retire(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; struct i915_gem_active *active, *next; lockdep_assert_held(&request->i915->drm.struct_mutex); GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); - GEM_BUG_ON(!i915_gem_request_completed(request)); + GEM_BUG_ON(!i915_request_completed(request)); GEM_BUG_ON(!request->i915->gt.active_requests); - trace_i915_gem_request_retire(request); + trace_i915_request_retire(request); spin_lock_irq(&engine->timeline->lock); list_del_init(&request->link); @@ -390,7 +401,8 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) free_capture_list(request); - /* Walk through the active list, calling retire on each. This allows + /* + * Walk through the active list, calling retire on each. This allows * objects to track their GPU activity and mark themselves as idle * when their *last* active request is completed (updating state * tracking lists for eviction, active references for GEM, etc). @@ -400,7 +412,8 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) * the node after the callback). */ list_for_each_entry_safe(active, next, &request->active_list, link) { - /* In microbenchmarks or focusing upon time inside the kernel, + /* + * In microbenchmarks or focusing upon time inside the kernel, * we may spend an inordinate amount of time simply handling * the retirement of requests and processing their callbacks. * Of which, this loop itself is particularly hot due to the @@ -417,15 +430,16 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) active->retire(active, request); } - i915_gem_request_remove_from_client(request); + i915_request_remove_from_client(request); /* Retirement decays the ban score as it is a sign of ctx progress */ atomic_dec_if_positive(&request->ctx->ban_score); - /* The backing object for the context is done after switching to the + /* + * The backing object for the context is done after switching to the * *next* context. Therefore we cannot retire the previous context until * the next context has already started running. However, since we - * cannot take the required locks at i915_gem_request_submit() we + * cannot take the required locks at i915_request_submit() we * defer the unpinning of the active context to now, retirement of * the subsequent request. */ @@ -434,32 +448,37 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) engine->last_retired_context = request->ctx; spin_lock_irq(&request->lock); - if (request->waitboost) + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags)) + dma_fence_signal_locked(&request->fence); + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) + intel_engine_cancel_signaling(request); + if (request->waitboost) { + GEM_BUG_ON(!atomic_read(&request->i915->gt_pm.rps.num_waiters)); atomic_dec(&request->i915->gt_pm.rps.num_waiters); - dma_fence_signal_locked(&request->fence); + } spin_unlock_irq(&request->lock); i915_priotree_fini(request->i915, &request->priotree); - i915_gem_request_put(request); + i915_request_put(request); } -void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) +void i915_request_retire_upto(struct i915_request *rq) { - struct intel_engine_cs *engine = req->engine; - struct drm_i915_gem_request *tmp; + struct intel_engine_cs *engine = rq->engine; + struct i915_request *tmp; - lockdep_assert_held(&req->i915->drm.struct_mutex); - GEM_BUG_ON(!i915_gem_request_completed(req)); + lockdep_assert_held(&rq->i915->drm.struct_mutex); + GEM_BUG_ON(!i915_request_completed(rq)); - if (list_empty(&req->link)) + if (list_empty(&rq->link)) return; do { tmp = list_first_entry(&engine->timeline->requests, typeof(*tmp), link); - i915_gem_request_retire(tmp); - } while (tmp != req); + i915_request_retire(tmp); + } while (tmp != rq); } static u32 timeline_get_seqno(struct intel_timeline *tl) @@ -467,7 +486,7 @@ static u32 timeline_get_seqno(struct intel_timeline *tl) return ++tl->seqno; } -void __i915_gem_request_submit(struct drm_i915_gem_request *request) +void __i915_request_submit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; struct intel_timeline *timeline; @@ -499,12 +518,12 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) list_move_tail(&request->link, &timeline->requests); spin_unlock(&request->timeline->lock); - trace_i915_gem_request_execute(request); + trace_i915_request_execute(request); wake_up_all(&request->execute); } -void i915_gem_request_submit(struct drm_i915_gem_request *request) +void i915_request_submit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; unsigned long flags; @@ -512,12 +531,12 @@ void i915_gem_request_submit(struct drm_i915_gem_request *request) /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&engine->timeline->lock, flags); - __i915_gem_request_submit(request); + __i915_request_submit(request); spin_unlock_irqrestore(&engine->timeline->lock, flags); } -void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request) +void __i915_request_unsubmit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; struct intel_timeline *timeline; @@ -525,11 +544,14 @@ void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request) GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->timeline->lock); - /* Only unwind in reverse order, required so that the per-context list + /* + * Only unwind in reverse order, required so that the per-context list * is kept in seqno/ring order. */ GEM_BUG_ON(!request->global_seqno); GEM_BUG_ON(request->global_seqno != engine->timeline->seqno); + GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), + request->global_seqno)); engine->timeline->seqno--; /* We may be recursing from the signal callback of another i915 fence */ @@ -547,15 +569,16 @@ void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request) list_move(&request->link, &timeline->requests); spin_unlock(&timeline->lock); - /* We don't need to wake_up any waiters on request->execute, they + /* + * We don't need to wake_up any waiters on request->execute, they * will get woken by any other event or us re-adding this request - * to the engine timeline (__i915_gem_request_submit()). The waiters + * to the engine timeline (__i915_request_submit()). The waiters * should be quite adapt at finding that the request now has a new * global_seqno to the one they went to sleep on. */ } -void i915_gem_request_unsubmit(struct drm_i915_gem_request *request) +void i915_request_unsubmit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; unsigned long flags; @@ -563,7 +586,7 @@ void i915_gem_request_unsubmit(struct drm_i915_gem_request *request) /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&engine->timeline->lock, flags); - __i915_gem_request_unsubmit(request); + __i915_request_unsubmit(request); spin_unlock_irqrestore(&engine->timeline->lock, flags); } @@ -571,18 +594,19 @@ void i915_gem_request_unsubmit(struct drm_i915_gem_request *request) static int __i915_sw_fence_call submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { - struct drm_i915_gem_request *request = + struct i915_request *request = container_of(fence, typeof(*request), submit); switch (state) { case FENCE_COMPLETE: - trace_i915_gem_request_submit(request); + trace_i915_request_submit(request); /* - * We need to serialize use of the submit_request() callback with its - * hotplugging performed during an emergency i915_gem_set_wedged(). - * We use the RCU mechanism to mark the critical section in order to - * force i915_gem_set_wedged() to wait until the submit_request() is - * completed before proceeding. + * We need to serialize use of the submit_request() callback + * with its hotplugging performed during an emergency + * i915_gem_set_wedged(). We use the RCU mechanism to mark the + * critical section in order to force i915_gem_set_wedged() to + * wait until the submit_request() is completed before + * proceeding. */ rcu_read_lock(); request->engine->submit_request(request); @@ -590,7 +614,7 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) break; case FENCE_FREE: - i915_gem_request_put(request); + i915_request_put(request); break; } @@ -598,7 +622,7 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) } /** - * i915_gem_request_alloc - allocate a request structure + * i915_request_alloc - allocate a request structure * * @engine: engine that we wish to issue the request on. * @ctx: context that the request will be associated with. @@ -606,31 +630,32 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) * Returns a pointer to the allocated request if successful, * or an error code if not. */ -struct drm_i915_gem_request * -i915_gem_request_alloc(struct intel_engine_cs *engine, - struct i915_gem_context *ctx) +struct i915_request * +i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - struct drm_i915_private *dev_priv = engine->i915; - struct drm_i915_gem_request *req; + struct drm_i915_private *i915 = engine->i915; + struct i915_request *rq; struct intel_ring *ring; int ret; - lockdep_assert_held(&dev_priv->drm.struct_mutex); + lockdep_assert_held(&i915->drm.struct_mutex); /* * Preempt contexts are reserved for exclusive use to inject a * preemption context switch. They are never to be used for any trivial * request! */ - GEM_BUG_ON(ctx == dev_priv->preempt_context); + GEM_BUG_ON(ctx == i915->preempt_context); - /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report + /* + * ABI: Before userspace accesses the GPU (e.g. execbuffer), report * EIO if the GPU is already wedged. */ - if (i915_terminally_wedged(&dev_priv->gpu_error)) + if (i915_terminally_wedged(&i915->gpu_error)) return ERR_PTR(-EIO); - /* Pinning the contexts may generate requests in order to acquire + /* + * Pinning the contexts may generate requests in order to acquire * GGTT space, so do this first before we reserve a seqno for * ourselves. */ @@ -648,12 +673,13 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, goto err_unreserve; /* Move the oldest request to the slab-cache (if not in use!) */ - req = list_first_entry_or_null(&engine->timeline->requests, - typeof(*req), link); - if (req && i915_gem_request_completed(req)) - i915_gem_request_retire(req); + rq = list_first_entry_or_null(&engine->timeline->requests, + typeof(*rq), link); + if (rq && i915_request_completed(rq)) + i915_request_retire(rq); - /* Beware: Dragons be flying overhead. + /* + * Beware: Dragons be flying overhead. * * We use RCU to look up requests in flight. The lookups may * race with the request being allocated from the slab freelist. @@ -681,61 +707,73 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, * * Do not use kmem_cache_zalloc() here! */ - req = kmem_cache_alloc(dev_priv->requests, - GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); - if (unlikely(!req)) { + rq = kmem_cache_alloc(i915->requests, + GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); + if (unlikely(!rq)) { /* Ratelimit ourselves to prevent oom from malicious clients */ - ret = i915_gem_wait_for_idle(dev_priv, + ret = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED | I915_WAIT_INTERRUPTIBLE); if (ret) goto err_unreserve; - req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL); - if (!req) { + /* + * We've forced the client to stall and catch up with whatever + * backlog there might have been. As we are assuming that we + * caused the mempressure, now is an opportune time to + * recover as much memory from the request pool as is possible. + * Having already penalized the client to stall, we spend + * a little extra time to re-optimise page allocation. + */ + kmem_cache_shrink(i915->requests); + rcu_barrier(); /* Recover the TYPESAFE_BY_RCU pages */ + + rq = kmem_cache_alloc(i915->requests, GFP_KERNEL); + if (!rq) { ret = -ENOMEM; goto err_unreserve; } } - req->timeline = i915_gem_context_lookup_timeline(ctx, engine); - GEM_BUG_ON(req->timeline == engine->timeline); + rq->timeline = i915_gem_context_lookup_timeline(ctx, engine); + GEM_BUG_ON(rq->timeline == engine->timeline); - spin_lock_init(&req->lock); - dma_fence_init(&req->fence, + spin_lock_init(&rq->lock); + dma_fence_init(&rq->fence, &i915_fence_ops, - &req->lock, - req->timeline->fence_context, - timeline_get_seqno(req->timeline)); + &rq->lock, + rq->timeline->fence_context, + timeline_get_seqno(rq->timeline)); /* We bump the ref for the fence chain */ - i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify); - init_waitqueue_head(&req->execute); + i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify); + init_waitqueue_head(&rq->execute); - i915_priotree_init(&req->priotree); + i915_priotree_init(&rq->priotree); - INIT_LIST_HEAD(&req->active_list); - req->i915 = dev_priv; - req->engine = engine; - req->ctx = ctx; - req->ring = ring; + INIT_LIST_HEAD(&rq->active_list); + rq->i915 = i915; + rq->engine = engine; + rq->ctx = ctx; + rq->ring = ring; /* No zalloc, must clear what we need by hand */ - req->global_seqno = 0; - req->file_priv = NULL; - req->batch = NULL; - req->capture_list = NULL; - req->waitboost = false; + rq->global_seqno = 0; + rq->signaling.wait.seqno = 0; + rq->file_priv = NULL; + rq->batch = NULL; + rq->capture_list = NULL; + rq->waitboost = false; /* * Reserve space in the ring buffer for all the commands required to * eventually emit this request. This is to guarantee that the - * i915_add_request() call can't fail. Note that the reserve may need + * i915_request_add() call can't fail. Note that the reserve may need * to be redone if the request is not actually submitted straight * away, e.g. because a GPU scheduler has deferred it. */ - req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; - GEM_BUG_ON(req->reserved_space < engine->emit_breadcrumb_sz); + rq->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; + GEM_BUG_ON(rq->reserved_space < engine->emit_breadcrumb_sz); /* * Record the position of the start of the request so that @@ -743,30 +781,30 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, * GPU processing the request, we never over-estimate the * position of the head. */ - req->head = req->ring->emit; + rq->head = rq->ring->emit; /* Unconditionally invalidate GPU caches and TLBs. */ - ret = engine->emit_flush(req, EMIT_INVALIDATE); + ret = engine->emit_flush(rq, EMIT_INVALIDATE); if (ret) goto err_unwind; - ret = engine->request_alloc(req); + ret = engine->request_alloc(rq); if (ret) goto err_unwind; /* Check that we didn't interrupt ourselves with a new request */ - GEM_BUG_ON(req->timeline->seqno != req->fence.seqno); - return req; + GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno); + return rq; err_unwind: - req->ring->emit = req->head; + rq->ring->emit = rq->head; /* Make sure we didn't add ourselves to external state before freeing */ - GEM_BUG_ON(!list_empty(&req->active_list)); - GEM_BUG_ON(!list_empty(&req->priotree.signalers_list)); - GEM_BUG_ON(!list_empty(&req->priotree.waiters_list)); + GEM_BUG_ON(!list_empty(&rq->active_list)); + GEM_BUG_ON(!list_empty(&rq->priotree.signalers_list)); + GEM_BUG_ON(!list_empty(&rq->priotree.waiters_list)); - kmem_cache_free(dev_priv->requests, req); + kmem_cache_free(i915->requests, rq); err_unreserve: unreserve_engine(engine); err_unpin: @@ -775,15 +813,14 @@ err_unpin: } static int -i915_gem_request_await_request(struct drm_i915_gem_request *to, - struct drm_i915_gem_request *from) +i915_request_await_request(struct i915_request *to, struct i915_request *from) { int ret; GEM_BUG_ON(to == from); GEM_BUG_ON(to->timeline == from->timeline); - if (i915_gem_request_completed(from)) + if (i915_request_completed(from)) return 0; if (to->engine->schedule) { @@ -806,7 +843,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, GEM_BUG_ON(!from->engine->semaphore.signal); - seqno = i915_gem_request_global_seqno(from); + seqno = i915_request_global_seqno(from); if (!seqno) goto await_dma_fence; @@ -830,14 +867,14 @@ await_dma_fence: } int -i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req, - struct dma_fence *fence) +i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) { struct dma_fence **child = &fence; unsigned int nchild = 1; int ret; - /* Note that if the fence-array was created in signal-on-any mode, + /* + * Note that if the fence-array was created in signal-on-any mode, * we should *not* decompose it into its individual fences. However, * we don't currently store which mode the fence-array is operating * in. Fortunately, the only user of signal-on-any is private to @@ -859,40 +896,39 @@ i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req, /* * Requests on the same timeline are explicitly ordered, along - * with their dependencies, by i915_add_request() which ensures + * with their dependencies, by i915_request_add() which ensures * that requests are submitted in-order through each ring. */ - if (fence->context == req->fence.context) + if (fence->context == rq->fence.context) continue; /* Squash repeated waits to the same timelines */ - if (fence->context != req->i915->mm.unordered_timeline && - intel_timeline_sync_is_later(req->timeline, fence)) + if (fence->context != rq->i915->mm.unordered_timeline && + intel_timeline_sync_is_later(rq->timeline, fence)) continue; if (dma_fence_is_i915(fence)) - ret = i915_gem_request_await_request(req, - to_request(fence)); + ret = i915_request_await_request(rq, to_request(fence)); else - ret = i915_sw_fence_await_dma_fence(&req->submit, fence, + ret = i915_sw_fence_await_dma_fence(&rq->submit, fence, I915_FENCE_TIMEOUT, I915_FENCE_GFP); if (ret < 0) return ret; /* Record the latest fence used against each timeline */ - if (fence->context != req->i915->mm.unordered_timeline) - intel_timeline_sync_set(req->timeline, fence); + if (fence->context != rq->i915->mm.unordered_timeline) + intel_timeline_sync_set(rq->timeline, fence); } while (--nchild); return 0; } /** - * i915_gem_request_await_object - set this request to (async) wait upon a bo - * + * i915_request_await_object - set this request to (async) wait upon a bo * @to: request we are wishing to use * @obj: object which may be in use on another ring. + * @write: whether the wait is on behalf of a writer * * This code is meant to abstract object synchronization with the GPU. * Conceptually we serialise writes between engines inside the GPU. @@ -909,9 +945,9 @@ i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req, * Returns 0 if successful, else propagates up the lower layer error. */ int -i915_gem_request_await_object(struct drm_i915_gem_request *to, - struct drm_i915_gem_object *obj, - bool write) +i915_request_await_object(struct i915_request *to, + struct drm_i915_gem_object *obj, + bool write) { struct dma_fence *excl; int ret = 0; @@ -926,7 +962,7 @@ i915_gem_request_await_object(struct drm_i915_gem_request *to, return ret; for (i = 0; i < count; i++) { - ret = i915_gem_request_await_dma_fence(to, shared[i]); + ret = i915_request_await_dma_fence(to, shared[i]); if (ret) break; @@ -942,7 +978,7 @@ i915_gem_request_await_object(struct drm_i915_gem_request *to, if (excl) { if (ret == 0) - ret = i915_gem_request_await_dma_fence(to, excl); + ret = i915_request_await_dma_fence(to, excl); dma_fence_put(excl); } @@ -955,20 +991,21 @@ i915_gem_request_await_object(struct drm_i915_gem_request *to, * request is not being tracked for completion but the work itself is * going to happen on the hardware. This would be a Bad Thing(tm). */ -void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) +void __i915_request_add(struct i915_request *request, bool flush_caches) { struct intel_engine_cs *engine = request->engine; struct intel_ring *ring = request->ring; struct intel_timeline *timeline = request->timeline; - struct drm_i915_gem_request *prev; + struct i915_request *prev; u32 *cs; int err; lockdep_assert_held(&request->i915->drm.struct_mutex); - trace_i915_gem_request_add(request); + trace_i915_request_add(request); - /* Make sure that no request gazumped us - if it was allocated after - * our i915_gem_request_alloc() and called __i915_add_request() before + /* + * Make sure that no request gazumped us - if it was allocated after + * our i915_request_alloc() and called __i915_request_add() before * us, the timeline will hold its seqno which is later than ours. */ GEM_BUG_ON(timeline->seqno != request->fence.seqno); @@ -994,7 +1031,8 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) WARN(err, "engine->emit_flush() failed: %d!\n", err); } - /* Record the position of the start of the breadcrumb so that + /* + * Record the position of the start of the breadcrumb so that * should we detect the updated seqno part-way through the * GPU processing the request, we never over-estimate the * position of the ring's HEAD. @@ -1003,7 +1041,8 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) GEM_BUG_ON(IS_ERR(cs)); request->postfix = intel_ring_offset(request, cs); - /* Seal the request and mark it as pending execution. Note that + /* + * Seal the request and mark it as pending execution. Note that * we may inspect this state, without holding any locks, during * hangcheck. Hence we apply the barrier to ensure that we do not * see a more recent value in the hws than we are tracking. @@ -1011,7 +1050,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) prev = i915_gem_active_raw(&timeline->last_request, &request->i915->drm.struct_mutex); - if (prev) { + if (prev && !i915_request_completed(prev)) { i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, &request->submitq); if (engine->schedule) @@ -1031,7 +1070,8 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) list_add_tail(&request->ring_link, &ring->request_list); request->emitted_jiffies = jiffies; - /* Let the backend know a new request has arrived that may need + /* + * Let the backend know a new request has arrived that may need * to adjust the existing execution schedule due to a high priority * request - i.e. we may want to preempt the current request in order * to run a high priority dependency chain *before* we can execute this @@ -1041,19 +1081,42 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) * decide whether to preempt the entire chain so that it is ready to * run at the earliest possible convenience. */ + rcu_read_lock(); if (engine->schedule) engine->schedule(request, request->ctx->priority); + rcu_read_unlock(); local_bh_disable(); i915_sw_fence_commit(&request->submit); local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ + + /* + * In typical scenarios, we do not expect the previous request on + * the timeline to be still tracked by timeline->last_request if it + * has been completed. If the completed request is still here, that + * implies that request retirement is a long way behind submission, + * suggesting that we haven't been retiring frequently enough from + * the combination of retire-before-alloc, waiters and the background + * retirement worker. So if the last request on this timeline was + * already completed, do a catch up pass, flushing the retirement queue + * up to this client. Since we have now moved the heaviest operations + * during retirement onto secondary workers, such as freeing objects + * or contexts, retiring a bunch of requests is mostly list management + * (and cache misses), and so we should not be overly penalizing this + * client by performing excess work, though we may still performing + * work on behalf of others -- but instead we should benefit from + * improved resource management. (Well, that's the theory at least.) + */ + if (prev && i915_request_completed(prev)) + i915_request_retire_upto(prev); } static unsigned long local_clock_us(unsigned int *cpu) { unsigned long t; - /* Cheaply and approximately convert from nanoseconds to microseconds. + /* + * Cheaply and approximately convert from nanoseconds to microseconds. * The result and subsequent calculations are also defined in the same * approximate microseconds units. The principal source of timing * error here is from the simple truncation. @@ -1081,10 +1144,10 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu) return this_cpu != cpu; } -static bool __i915_spin_request(const struct drm_i915_gem_request *req, +static bool __i915_spin_request(const struct i915_request *rq, u32 seqno, int state, unsigned long timeout_us) { - struct intel_engine_cs *engine = req->engine; + struct intel_engine_cs *engine = rq->engine; unsigned int irq, cpu; GEM_BUG_ON(!seqno); @@ -1103,7 +1166,8 @@ static bool __i915_spin_request(const struct drm_i915_gem_request *req, if (!i915_seqno_passed(intel_engine_get_seqno(engine), seqno - 1)) return false; - /* When waiting for high frequency requests, e.g. during synchronous + /* + * When waiting for high frequency requests, e.g. during synchronous * rendering split between the CPU and GPU, the finite amount of time * required to set up the irq and wait upon it limits the response * rate. By busywaiting on the request completion for a short while we @@ -1117,9 +1181,10 @@ static bool __i915_spin_request(const struct drm_i915_gem_request *req, timeout_us += local_clock_us(&cpu); do { if (i915_seqno_passed(intel_engine_get_seqno(engine), seqno)) - return seqno == i915_gem_request_global_seqno(req); + return seqno == i915_request_global_seqno(rq); - /* Seqno are meant to be ordered *before* the interrupt. If + /* + * Seqno are meant to be ordered *before* the interrupt. If * we see an interrupt without a corresponding seqno advance, * assume we won't see one in the near future but require * the engine->seqno_barrier() to fixup coherency. @@ -1139,7 +1204,7 @@ static bool __i915_spin_request(const struct drm_i915_gem_request *req, return false; } -static bool __i915_wait_request_check_and_reset(struct drm_i915_gem_request *request) +static bool __i915_wait_request_check_and_reset(struct i915_request *request) { if (likely(!i915_reset_handoff(&request->i915->gpu_error))) return false; @@ -1150,12 +1215,12 @@ static bool __i915_wait_request_check_and_reset(struct drm_i915_gem_request *req } /** - * i915_wait_request - wait until execution of request has finished - * @req: the request to wait upon + * i915_request_wait - wait until execution of request has finished + * @rq: the request to wait upon * @flags: how to wait * @timeout: how long to wait in jiffies * - * i915_wait_request() waits for the request to be completed, for a + * i915_request_wait() waits for the request to be completed, for a * maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an * unbounded wait). * @@ -1168,13 +1233,13 @@ static bool __i915_wait_request_check_and_reset(struct drm_i915_gem_request *req * May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is * pending before the request completes. */ -long i915_wait_request(struct drm_i915_gem_request *req, +long i915_request_wait(struct i915_request *rq, unsigned int flags, long timeout) { const int state = flags & I915_WAIT_INTERRUPTIBLE ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; - wait_queue_head_t *errq = &req->i915->gpu_error.wait_queue; + wait_queue_head_t *errq = &rq->i915->gpu_error.wait_queue; DEFINE_WAIT_FUNC(reset, default_wake_function); DEFINE_WAIT_FUNC(exec, default_wake_function); struct intel_wait wait; @@ -1182,33 +1247,33 @@ long i915_wait_request(struct drm_i915_gem_request *req, might_sleep(); #if IS_ENABLED(CONFIG_LOCKDEP) GEM_BUG_ON(debug_locks && - !!lockdep_is_held(&req->i915->drm.struct_mutex) != + !!lockdep_is_held(&rq->i915->drm.struct_mutex) != !!(flags & I915_WAIT_LOCKED)); #endif GEM_BUG_ON(timeout < 0); - if (i915_gem_request_completed(req)) + if (i915_request_completed(rq)) return timeout; if (!timeout) return -ETIME; - trace_i915_gem_request_wait_begin(req, flags); + trace_i915_request_wait_begin(rq, flags); - add_wait_queue(&req->execute, &exec); + add_wait_queue(&rq->execute, &exec); if (flags & I915_WAIT_LOCKED) add_wait_queue(errq, &reset); - intel_wait_init(&wait, req); + intel_wait_init(&wait, rq); restart: do { set_current_state(state); - if (intel_wait_update_request(&wait, req)) + if (intel_wait_update_request(&wait, rq)) break; if (flags & I915_WAIT_LOCKED && - __i915_wait_request_check_and_reset(req)) + __i915_wait_request_check_and_reset(rq)) continue; if (signal_pending_state(state, current)) { @@ -1225,22 +1290,23 @@ restart: } while (1); GEM_BUG_ON(!intel_wait_has_seqno(&wait)); - GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit)); + GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); /* Optimistic short spin before touching IRQs */ - if (__i915_spin_request(req, wait.seqno, state, 5)) + if (__i915_spin_request(rq, wait.seqno, state, 5)) goto complete; set_current_state(state); - if (intel_engine_add_wait(req->engine, &wait)) - /* In order to check that we haven't missed the interrupt + if (intel_engine_add_wait(rq->engine, &wait)) + /* + * In order to check that we haven't missed the interrupt * as we enabled it, we need to kick ourselves to do a * coherent check on the seqno before we sleep. */ goto wakeup; if (flags & I915_WAIT_LOCKED) - __i915_wait_request_check_and_reset(req); + __i915_wait_request_check_and_reset(rq); for (;;) { if (signal_pending_state(state, current)) { @@ -1256,21 +1322,23 @@ restart: timeout = io_schedule_timeout(timeout); if (intel_wait_complete(&wait) && - intel_wait_check_request(&wait, req)) + intel_wait_check_request(&wait, rq)) break; set_current_state(state); wakeup: - /* Carefully check if the request is complete, giving time + /* + * Carefully check if the request is complete, giving time * for the seqno to be visible following the interrupt. * We also have to check in case we are kicked by the GPU * reset in order to drop the struct_mutex. */ - if (__i915_request_irq_complete(req)) + if (__i915_request_irq_complete(rq)) break; - /* If the GPU is hung, and we hold the lock, reset the GPU + /* + * If the GPU is hung, and we hold the lock, reset the GPU * and then check for completion. On a full reset, the engine's * HW seqno will be advanced passed us and we are complete. * If we do a partial reset, we have to wait for the GPU to @@ -1281,33 +1349,33 @@ wakeup: * itself, or indirectly by recovering the GPU). */ if (flags & I915_WAIT_LOCKED && - __i915_wait_request_check_and_reset(req)) + __i915_wait_request_check_and_reset(rq)) continue; /* Only spin if we know the GPU is processing this request */ - if (__i915_spin_request(req, wait.seqno, state, 2)) + if (__i915_spin_request(rq, wait.seqno, state, 2)) break; - if (!intel_wait_check_request(&wait, req)) { - intel_engine_remove_wait(req->engine, &wait); + if (!intel_wait_check_request(&wait, rq)) { + intel_engine_remove_wait(rq->engine, &wait); goto restart; } } - intel_engine_remove_wait(req->engine, &wait); + intel_engine_remove_wait(rq->engine, &wait); complete: __set_current_state(TASK_RUNNING); if (flags & I915_WAIT_LOCKED) remove_wait_queue(errq, &reset); - remove_wait_queue(&req->execute, &exec); - trace_i915_gem_request_wait_end(req); + remove_wait_queue(&rq->execute, &exec); + trace_i915_request_wait_end(rq); return timeout; } static void engine_retire_requests(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *request, *next; + struct i915_request *request, *next; u32 seqno = intel_engine_get_seqno(engine); LIST_HEAD(retire); @@ -1322,24 +1390,24 @@ static void engine_retire_requests(struct intel_engine_cs *engine) spin_unlock_irq(&engine->timeline->lock); list_for_each_entry_safe(request, next, &retire, link) - i915_gem_request_retire(request); + i915_request_retire(request); } -void i915_gem_retire_requests(struct drm_i915_private *dev_priv) +void i915_retire_requests(struct drm_i915_private *i915) { struct intel_engine_cs *engine; enum intel_engine_id id; - lockdep_assert_held(&dev_priv->drm.struct_mutex); + lockdep_assert_held(&i915->drm.struct_mutex); - if (!dev_priv->gt.active_requests) + if (!i915->gt.active_requests) return; - for_each_engine(engine, dev_priv, id) + for_each_engine(engine, i915, id) engine_retire_requests(engine); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_request.c" -#include "selftests/i915_gem_request.c" +#include "selftests/i915_request.c" #endif diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_request.h index 0d6d39f19506..7d6eb82eeb91 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -1,5 +1,5 @@ /* - * Copyright © 2008-2015 Intel Corporation + * Copyright © 2008-2018 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -22,8 +22,8 @@ * */ -#ifndef I915_GEM_REQUEST_H -#define I915_GEM_REQUEST_H +#ifndef I915_REQUEST_H +#define I915_REQUEST_H #include <linux/dma-fence.h> @@ -34,18 +34,18 @@ struct drm_file; struct drm_i915_gem_object; -struct drm_i915_gem_request; +struct i915_request; struct intel_wait { struct rb_node node; struct task_struct *tsk; - struct drm_i915_gem_request *request; + struct i915_request *request; u32 seqno; }; struct intel_signal_node { - struct rb_node node; struct intel_wait wait; + struct list_head link; }; struct i915_dependency { @@ -57,7 +57,12 @@ struct i915_dependency { #define I915_DEPENDENCY_ALLOC BIT(0) }; -/* Requests exist in a complex web of interdependencies. Each request +/* + * "People assume that time is a strict progression of cause to effect, but + * actually, from a nonlinear, non-subjective viewpoint, it's more like a big + * ball of wibbly-wobbly, timey-wimey ... stuff." -The Doctor, 2015 + * + * Requests exist in a complex web of interdependencies. Each request * has to wait for some other request to complete before it is ready to be run * (e.g. we have to wait until the pixels have been rendering into a texture * before we can copy from it). We track the readiness of a request in terms @@ -81,8 +86,8 @@ enum { I915_PRIORITY_INVALID = INT_MIN }; -struct i915_gem_capture_list { - struct i915_gem_capture_list *next; +struct i915_capture_list { + struct i915_capture_list *next; struct i915_vma *vma; }; @@ -106,7 +111,7 @@ struct i915_gem_capture_list { * * The requests are reference counted. */ -struct drm_i915_gem_request { +struct i915_request { struct dma_fence fence; spinlock_t lock; @@ -120,7 +125,7 @@ struct drm_i915_gem_request { * it persists while any request is linked to it. Requests themselves * are also refcounted, so the request will only be freed when the last * reference to it is dismissed, and the code in - * i915_gem_request_free() will then decrement the refcount on the + * i915_request_free() will then decrement the refcount on the * context. */ struct i915_gem_context *ctx; @@ -129,7 +134,8 @@ struct drm_i915_gem_request { struct intel_timeline *timeline; struct intel_signal_node signaling; - /* Fences for the various phases in the request's lifetime. + /* + * Fences for the various phases in the request's lifetime. * * The submit fence is used to await upon all of the request's * dependencies. When it is signaled, the request is ready to run. @@ -139,7 +145,8 @@ struct drm_i915_gem_request { wait_queue_entry_t submitq; wait_queue_head_t execute; - /* A list of everyone we wait upon, and everyone who waits upon us. + /* + * A list of everyone we wait upon, and everyone who waits upon us. * Even though we will not be submitted to the hardware before the * submit fence is signaled (it waits for all external events as well * as our own requests), the scheduler still needs to know the @@ -150,7 +157,8 @@ struct drm_i915_gem_request { struct i915_priotree priotree; struct i915_dependency dep; - /** GEM sequence number associated with this request on the + /** + * GEM sequence number associated with this request on the * global execution timeline. It is zero when the request is not * on the HW queue (i.e. not on the engine timeline list). * Its value is guarded by the timeline spinlock. @@ -180,12 +188,13 @@ struct drm_i915_gem_request { * error state dump only). */ struct i915_vma *batch; - /** Additional buffers requested by userspace to be captured upon + /** + * Additional buffers requested by userspace to be captured upon * a GPU hang. The vma/obj on this list are protected by their * active reference - all objects on this list must also be * on the active_list (of their final request). */ - struct i915_gem_capture_list *capture_list; + struct i915_capture_list *capture_list; struct list_head active_list; /** Time at which this request was emitted, in jiffies. */ @@ -213,52 +222,40 @@ static inline bool dma_fence_is_i915(const struct dma_fence *fence) return fence->ops == &i915_fence_ops; } -struct drm_i915_gem_request * __must_check -i915_gem_request_alloc(struct intel_engine_cs *engine, - struct i915_gem_context *ctx); -void i915_gem_request_retire_upto(struct drm_i915_gem_request *req); +struct i915_request * __must_check +i915_request_alloc(struct intel_engine_cs *engine, + struct i915_gem_context *ctx); +void i915_request_retire_upto(struct i915_request *rq); -static inline struct drm_i915_gem_request * +static inline struct i915_request * to_request(struct dma_fence *fence) { /* We assume that NULL fence/request are interoperable */ - BUILD_BUG_ON(offsetof(struct drm_i915_gem_request, fence) != 0); + BUILD_BUG_ON(offsetof(struct i915_request, fence) != 0); GEM_BUG_ON(fence && !dma_fence_is_i915(fence)); - return container_of(fence, struct drm_i915_gem_request, fence); + return container_of(fence, struct i915_request, fence); } -static inline struct drm_i915_gem_request * -i915_gem_request_get(struct drm_i915_gem_request *req) +static inline struct i915_request * +i915_request_get(struct i915_request *rq) { - return to_request(dma_fence_get(&req->fence)); + return to_request(dma_fence_get(&rq->fence)); } -static inline struct drm_i915_gem_request * -i915_gem_request_get_rcu(struct drm_i915_gem_request *req) +static inline struct i915_request * +i915_request_get_rcu(struct i915_request *rq) { - return to_request(dma_fence_get_rcu(&req->fence)); + return to_request(dma_fence_get_rcu(&rq->fence)); } static inline void -i915_gem_request_put(struct drm_i915_gem_request *req) -{ - dma_fence_put(&req->fence); -} - -static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, - struct drm_i915_gem_request *src) +i915_request_put(struct i915_request *rq) { - if (src) - i915_gem_request_get(src); - - if (*pdst) - i915_gem_request_put(*pdst); - - *pdst = src; + dma_fence_put(&rq->fence); } /** - * i915_gem_request_global_seqno - report the current global seqno + * i915_request_global_seqno - report the current global seqno * @request - the request * * A request is assigned a global seqno only when it is on the hardware @@ -276,34 +273,28 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, * after the read, it is indeed complete). */ static u32 -i915_gem_request_global_seqno(const struct drm_i915_gem_request *request) +i915_request_global_seqno(const struct i915_request *request) { return READ_ONCE(request->global_seqno); } -int -i915_gem_request_await_object(struct drm_i915_gem_request *to, +int i915_request_await_object(struct i915_request *to, struct drm_i915_gem_object *obj, bool write); -int i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req, - struct dma_fence *fence); +int i915_request_await_dma_fence(struct i915_request *rq, + struct dma_fence *fence); -void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches); -#define i915_add_request(req) \ - __i915_add_request(req, false) +void __i915_request_add(struct i915_request *rq, bool flush_caches); +#define i915_request_add(rq) \ + __i915_request_add(rq, false) -void __i915_gem_request_submit(struct drm_i915_gem_request *request); -void i915_gem_request_submit(struct drm_i915_gem_request *request); +void __i915_request_submit(struct i915_request *request); +void i915_request_submit(struct i915_request *request); -void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request); -void i915_gem_request_unsubmit(struct drm_i915_gem_request *request); +void __i915_request_unsubmit(struct i915_request *request); +void i915_request_unsubmit(struct i915_request *request); -struct intel_rps_client; -#define NO_WAITBOOST ERR_PTR(-1) -#define IS_RPS_CLIENT(p) (!IS_ERR(p)) -#define IS_RPS_USER(p) (!IS_ERR_OR_NULL(p)) - -long i915_wait_request(struct drm_i915_gem_request *req, +long i915_request_wait(struct i915_request *rq, unsigned int flags, long timeout) __attribute__((nonnull(1))); @@ -322,26 +313,48 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2) } static inline bool -__i915_gem_request_completed(const struct drm_i915_gem_request *req, u32 seqno) +__i915_request_completed(const struct i915_request *rq, u32 seqno) { GEM_BUG_ON(!seqno); - return i915_seqno_passed(intel_engine_get_seqno(req->engine), seqno) && - seqno == i915_gem_request_global_seqno(req); + return i915_seqno_passed(intel_engine_get_seqno(rq->engine), seqno) && + seqno == i915_request_global_seqno(rq); } -static inline bool -i915_gem_request_completed(const struct drm_i915_gem_request *req) +static inline bool i915_request_completed(const struct i915_request *rq) { u32 seqno; - seqno = i915_gem_request_global_seqno(req); + seqno = i915_request_global_seqno(rq); if (!seqno) return false; - return __i915_gem_request_completed(req, seqno); + return __i915_request_completed(rq, seqno); } -/* We treat requests as fences. This is not be to confused with our +static inline bool i915_request_started(const struct i915_request *rq) +{ + u32 seqno; + + seqno = i915_request_global_seqno(rq); + if (!seqno) + return false; + + return i915_seqno_passed(intel_engine_get_seqno(rq->engine), + seqno - 1); +} + +static inline bool i915_priotree_signaled(const struct i915_priotree *pt) +{ + const struct i915_request *rq = + container_of(pt, const struct i915_request, priotree); + + return i915_request_completed(rq); +} + +void i915_retire_requests(struct drm_i915_private *i915); + +/* + * We treat requests as fences. This is not be to confused with our * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync. * We use the fences to synchronize access from the CPU with activity on the * GPU, for example, we should not rewrite an object's PTE whilst the GPU @@ -371,16 +384,16 @@ i915_gem_request_completed(const struct drm_i915_gem_request *req) struct i915_gem_active; typedef void (*i915_gem_retire_fn)(struct i915_gem_active *, - struct drm_i915_gem_request *); + struct i915_request *); struct i915_gem_active { - struct drm_i915_gem_request __rcu *request; + struct i915_request __rcu *request; struct list_head link; i915_gem_retire_fn retire; }; void i915_gem_retire_noop(struct i915_gem_active *, - struct drm_i915_gem_request *request); + struct i915_request *request); /** * init_request_active - prepares the activity tracker for use @@ -412,7 +425,7 @@ init_request_active(struct i915_gem_active *active, */ static inline void i915_gem_active_set(struct i915_gem_active *active, - struct drm_i915_gem_request *request) + struct i915_request *request) { list_move(&active->link, &request->active_list); rcu_assign_pointer(active->request, request); @@ -437,10 +450,11 @@ i915_gem_active_set_retire_fn(struct i915_gem_active *active, active->retire = fn ?: i915_gem_retire_noop; } -static inline struct drm_i915_gem_request * +static inline struct i915_request * __i915_gem_active_peek(const struct i915_gem_active *active) { - /* Inside the error capture (running with the driver in an unknown + /* + * Inside the error capture (running with the driver in an unknown * state), we want to bend the rules slightly (a lot). * * Work is in progress to make it safer, in the meantime this keeps @@ -457,7 +471,7 @@ __i915_gem_active_peek(const struct i915_gem_active *active) * It does not obtain a reference on the request for the caller, so the caller * must hold struct_mutex. */ -static inline struct drm_i915_gem_request * +static inline struct i915_request * i915_gem_active_raw(const struct i915_gem_active *active, struct mutex *mutex) { return rcu_dereference_protected(active->request, @@ -472,13 +486,13 @@ i915_gem_active_raw(const struct i915_gem_active *active, struct mutex *mutex) * still active, or NULL. It does not obtain a reference on the request * for the caller, so the caller must hold struct_mutex. */ -static inline struct drm_i915_gem_request * +static inline struct i915_request * i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex) { - struct drm_i915_gem_request *request; + struct i915_request *request; request = i915_gem_active_raw(active, mutex); - if (!request || i915_gem_request_completed(request)) + if (!request || i915_request_completed(request)) return NULL; return request; @@ -491,10 +505,10 @@ i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex) * i915_gem_active_get() returns a reference to the active request, or NULL * if the active tracker is idle. The caller must hold struct_mutex. */ -static inline struct drm_i915_gem_request * +static inline struct i915_request * i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex) { - return i915_gem_request_get(i915_gem_active_peek(active, mutex)); + return i915_request_get(i915_gem_active_peek(active, mutex)); } /** @@ -505,10 +519,11 @@ i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex) * if the active tracker is idle. The caller must hold the RCU read lock, but * the returned pointer is safe to use outside of RCU. */ -static inline struct drm_i915_gem_request * +static inline struct i915_request * __i915_gem_active_get_rcu(const struct i915_gem_active *active) { - /* Performing a lockless retrieval of the active request is super + /* + * Performing a lockless retrieval of the active request is super * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing * slab of request objects will not be freed whilst we hold the * RCU read lock. It does not guarantee that the request itself @@ -516,13 +531,13 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active) * * Thread A Thread B * - * req = active.request - * retire(req) -> free(req); - * (req is now first on the slab freelist) + * rq = active.request + * retire(rq) -> free(rq); + * (rq is now first on the slab freelist) * active.request = NULL * - * req = new submission on a new object - * ref(req) + * rq = new submission on a new object + * ref(rq) * * To prevent the request from being reused whilst the caller * uses it, we take a reference like normal. Whilst acquiring @@ -551,32 +566,34 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active) * * It is then imperative that we do not zero the request on * reallocation, so that we can chase the dangling pointers! - * See i915_gem_request_alloc(). + * See i915_request_alloc(). */ do { - struct drm_i915_gem_request *request; + struct i915_request *request; request = rcu_dereference(active->request); - if (!request || i915_gem_request_completed(request)) + if (!request || i915_request_completed(request)) return NULL; - /* An especially silly compiler could decide to recompute the - * result of i915_gem_request_completed, more specifically + /* + * An especially silly compiler could decide to recompute the + * result of i915_request_completed, more specifically * re-emit the load for request->fence.seqno. A race would catch * a later seqno value, which could flip the result from true to * false. Which means part of the instructions below might not * be executed, while later on instructions are executed. Due to * barriers within the refcounting the inconsistency can't reach - * past the call to i915_gem_request_get_rcu, but not executing - * that while still executing i915_gem_request_put() creates + * past the call to i915_request_get_rcu, but not executing + * that while still executing i915_request_put() creates * havoc enough. Prevent this with a compiler barrier. */ barrier(); - request = i915_gem_request_get_rcu(request); + request = i915_request_get_rcu(request); - /* What stops the following rcu_access_pointer() from occurring - * before the above i915_gem_request_get_rcu()? If we were + /* + * What stops the following rcu_access_pointer() from occurring + * before the above i915_request_get_rcu()? If we were * to read the value before pausing to get the reference to * the request, we may not notice a change in the active * tracker. @@ -590,9 +607,9 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active) * compiler. * * The atomic operation at the heart of - * i915_gem_request_get_rcu(), see dma_fence_get_rcu(), is + * i915_request_get_rcu(), see dma_fence_get_rcu(), is * atomic_inc_not_zero() which is only a full memory barrier - * when successful. That is, if i915_gem_request_get_rcu() + * when successful. That is, if i915_request_get_rcu() * returns the request (and so with the reference counted * incremented) then the following read for rcu_access_pointer() * must occur after the atomic operation and so confirm @@ -604,7 +621,7 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active) if (!request || request == rcu_access_pointer(active->request)) return rcu_pointer_handoff(request); - i915_gem_request_put(request); + i915_request_put(request); } while (1); } @@ -616,12 +633,12 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active) * or NULL if the active tracker is idle. The reference is obtained under RCU, * so no locking is required by the caller. * - * The reference should be freed with i915_gem_request_put(). + * The reference should be freed with i915_request_put(). */ -static inline struct drm_i915_gem_request * +static inline struct i915_request * i915_gem_active_get_unlocked(const struct i915_gem_active *active) { - struct drm_i915_gem_request *request; + struct i915_request *request; rcu_read_lock(); request = __i915_gem_active_get_rcu(active); @@ -661,7 +678,7 @@ i915_gem_active_isset(const struct i915_gem_active *active) * can then wait upon the request, and afterwards release our reference, * free of any locking. * - * This function wraps i915_wait_request(), see it for the full details on + * This function wraps i915_request_wait(), see it for the full details on * the arguments. * * Returns 0 if successful, or a negative error code. @@ -669,13 +686,13 @@ i915_gem_active_isset(const struct i915_gem_active *active) static inline int i915_gem_active_wait(const struct i915_gem_active *active, unsigned int flags) { - struct drm_i915_gem_request *request; + struct i915_request *request; long ret = 0; request = i915_gem_active_get_unlocked(active); if (request) { - ret = i915_wait_request(request, flags, MAX_SCHEDULE_TIMEOUT); - i915_gem_request_put(request); + ret = i915_request_wait(request, flags, MAX_SCHEDULE_TIMEOUT); + i915_request_put(request); } return ret < 0 ? ret : 0; @@ -694,14 +711,14 @@ static inline int __must_check i915_gem_active_retire(struct i915_gem_active *active, struct mutex *mutex) { - struct drm_i915_gem_request *request; + struct i915_request *request; long ret; request = i915_gem_active_raw(active, mutex); if (!request) return 0; - ret = i915_wait_request(request, + ret = i915_request_wait(request, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT); if (ret < 0) @@ -718,4 +735,4 @@ i915_gem_active_retire(struct i915_gem_active *active, #define for_each_active(mask, idx) \ for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~BIT(idx)) -#endif /* I915_GEM_REQUEST_H */ +#endif /* I915_REQUEST_H */ diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 3669f5eeb91e..1de5173e53a2 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -365,18 +365,31 @@ int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence, struct i915_sw_dma_fence_cb { struct dma_fence_cb base; struct i915_sw_fence *fence; +}; + +struct i915_sw_dma_fence_cb_timer { + struct i915_sw_dma_fence_cb base; struct dma_fence *dma; struct timer_list timer; struct irq_work work; struct rcu_head rcu; }; +static void dma_i915_sw_fence_wake(struct dma_fence *dma, + struct dma_fence_cb *data) +{ + struct i915_sw_dma_fence_cb *cb = container_of(data, typeof(*cb), base); + + i915_sw_fence_complete(cb->fence); + kfree(cb); +} + static void timer_i915_sw_fence_wake(struct timer_list *t) { - struct i915_sw_dma_fence_cb *cb = from_timer(cb, t, timer); + struct i915_sw_dma_fence_cb_timer *cb = from_timer(cb, t, timer); struct i915_sw_fence *fence; - fence = xchg(&cb->fence, NULL); + fence = xchg(&cb->base.fence, NULL); if (!fence) return; @@ -388,13 +401,14 @@ static void timer_i915_sw_fence_wake(struct timer_list *t) i915_sw_fence_complete(fence); } -static void dma_i915_sw_fence_wake(struct dma_fence *dma, - struct dma_fence_cb *data) +static void dma_i915_sw_fence_wake_timer(struct dma_fence *dma, + struct dma_fence_cb *data) { - struct i915_sw_dma_fence_cb *cb = container_of(data, typeof(*cb), base); + struct i915_sw_dma_fence_cb_timer *cb = + container_of(data, typeof(*cb), base.base); struct i915_sw_fence *fence; - fence = xchg(&cb->fence, NULL); + fence = xchg(&cb->base.fence, NULL); if (fence) i915_sw_fence_complete(fence); @@ -403,7 +417,8 @@ static void dma_i915_sw_fence_wake(struct dma_fence *dma, static void irq_i915_sw_fence_work(struct irq_work *wrk) { - struct i915_sw_dma_fence_cb *cb = container_of(wrk, typeof(*cb), work); + struct i915_sw_dma_fence_cb_timer *cb = + container_of(wrk, typeof(*cb), work); del_timer_sync(&cb->timer); dma_fence_put(cb->dma); @@ -417,6 +432,7 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, gfp_t gfp) { struct i915_sw_dma_fence_cb *cb; + dma_fence_func_t func; int ret; debug_fence_assert(fence); @@ -425,7 +441,10 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, if (dma_fence_is_signaled(dma)) return 0; - cb = kmalloc(sizeof(*cb), gfp); + cb = kmalloc(timeout ? + sizeof(struct i915_sw_dma_fence_cb_timer) : + sizeof(struct i915_sw_dma_fence_cb), + gfp); if (!cb) { if (!gfpflags_allow_blocking(gfp)) return -ENOMEM; @@ -436,19 +455,26 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, cb->fence = fence; i915_sw_fence_await(fence); - cb->dma = NULL; - timer_setup(&cb->timer, timer_i915_sw_fence_wake, TIMER_IRQSAFE); - init_irq_work(&cb->work, irq_i915_sw_fence_work); + func = dma_i915_sw_fence_wake; if (timeout) { - cb->dma = dma_fence_get(dma); - mod_timer(&cb->timer, round_jiffies_up(jiffies + timeout)); + struct i915_sw_dma_fence_cb_timer *timer = + container_of(cb, typeof(*timer), base); + + timer->dma = dma_fence_get(dma); + init_irq_work(&timer->work, irq_i915_sw_fence_work); + + timer_setup(&timer->timer, + timer_i915_sw_fence_wake, TIMER_IRQSAFE); + mod_timer(&timer->timer, round_jiffies_up(jiffies + timeout)); + + func = dma_i915_sw_fence_wake_timer; } - ret = dma_fence_add_callback(dma, &cb->base, dma_i915_sw_fence_wake); + ret = dma_fence_add_callback(dma, &cb->base, func); if (ret == 0) { ret = 1; } else { - dma_i915_sw_fence_wake(dma, &cb->base); + func(dma, &cb->base); if (ret == -ENOENT) /* fence already signaled */ ret = 0; } diff --git a/drivers/gpu/drm/i915/i915_syncmap.c b/drivers/gpu/drm/i915/i915_syncmap.c index 0087acf731a8..58f8d0cc125c 100644 --- a/drivers/gpu/drm/i915/i915_syncmap.c +++ b/drivers/gpu/drm/i915/i915_syncmap.c @@ -86,7 +86,7 @@ struct i915_syncmap { /** * i915_syncmap_init -- initialise the #i915_syncmap - * @root - pointer to the #i915_syncmap + * @root: pointer to the #i915_syncmap */ void i915_syncmap_init(struct i915_syncmap **root) { @@ -139,9 +139,9 @@ static inline bool seqno_later(u32 a, u32 b) /** * i915_syncmap_is_later -- compare against the last know sync point - * @root - pointer to the #i915_syncmap - * @id - the context id (other timeline) we are synchronising to - * @seqno - the sequence number along the other timeline + * @root: pointer to the #i915_syncmap + * @id: the context id (other timeline) we are synchronising to + * @seqno: the sequence number along the other timeline * * If we have already synchronised this @root timeline with another (@id) then * we can omit any repeated or earlier synchronisation requests. If the two @@ -339,9 +339,9 @@ found: /** * i915_syncmap_set -- mark the most recent syncpoint between contexts - * @root - pointer to the #i915_syncmap - * @id - the context id (other timeline) we have synchronised to - * @seqno - the sequence number along the other timeline + * @root: pointer to the #i915_syncmap + * @id: the context id (other timeline) we have synchronised to + * @seqno: the sequence number along the other timeline * * When we synchronise this @root timeline with another (@id), we also know * that we have synchronized with all previous seqno along that timeline. If @@ -382,7 +382,7 @@ static void __sync_free(struct i915_syncmap *p) /** * i915_syncmap_free -- free all memory associated with the syncmap - * @root - pointer to the #i915_syncmap + * @root: pointer to the #i915_syncmap * * Either when the timeline is to be freed and we no longer need the sync * point tracking, or when the fences are all known to be signaled and the diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index e1169c02eb2b..408827bf5d96 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -586,8 +586,7 @@ TRACE_EVENT(i915_gem_evict_vm, ); TRACE_EVENT(i915_gem_ring_sync_to, - TP_PROTO(struct drm_i915_gem_request *to, - struct drm_i915_gem_request *from), + TP_PROTO(struct i915_request *to, struct i915_request *from), TP_ARGS(to, from), TP_STRUCT__entry( @@ -610,9 +609,9 @@ TRACE_EVENT(i915_gem_ring_sync_to, __entry->seqno) ); -TRACE_EVENT(i915_gem_request_queue, - TP_PROTO(struct drm_i915_gem_request *req, u32 flags), - TP_ARGS(req, flags), +TRACE_EVENT(i915_request_queue, + TP_PROTO(struct i915_request *rq, u32 flags), + TP_ARGS(rq, flags), TP_STRUCT__entry( __field(u32, dev) @@ -624,11 +623,11 @@ TRACE_EVENT(i915_gem_request_queue, ), TP_fast_assign( - __entry->dev = req->i915->drm.primary->index; - __entry->hw_id = req->ctx->hw_id; - __entry->ring = req->engine->id; - __entry->ctx = req->fence.context; - __entry->seqno = req->fence.seqno; + __entry->dev = rq->i915->drm.primary->index; + __entry->hw_id = rq->ctx->hw_id; + __entry->ring = rq->engine->id; + __entry->ctx = rq->fence.context; + __entry->seqno = rq->fence.seqno; __entry->flags = flags; ), @@ -637,9 +636,9 @@ TRACE_EVENT(i915_gem_request_queue, __entry->seqno, __entry->flags) ); -DECLARE_EVENT_CLASS(i915_gem_request, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req), +DECLARE_EVENT_CLASS(i915_request, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq), TP_STRUCT__entry( __field(u32, dev) @@ -651,12 +650,12 @@ DECLARE_EVENT_CLASS(i915_gem_request, ), TP_fast_assign( - __entry->dev = req->i915->drm.primary->index; - __entry->hw_id = req->ctx->hw_id; - __entry->ring = req->engine->id; - __entry->ctx = req->fence.context; - __entry->seqno = req->fence.seqno; - __entry->global = req->global_seqno; + __entry->dev = rq->i915->drm.primary->index; + __entry->hw_id = rq->ctx->hw_id; + __entry->ring = rq->engine->id; + __entry->ctx = rq->fence.context; + __entry->seqno = rq->fence.seqno; + __entry->global = rq->global_seqno; ), TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u", @@ -664,26 +663,25 @@ DECLARE_EVENT_CLASS(i915_gem_request, __entry->seqno, __entry->global) ); -DEFINE_EVENT(i915_gem_request, i915_gem_request_add, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req) +DEFINE_EVENT(i915_request, i915_request_add, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq) ); #if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) -DEFINE_EVENT(i915_gem_request, i915_gem_request_submit, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req) +DEFINE_EVENT(i915_request, i915_request_submit, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq) ); -DEFINE_EVENT(i915_gem_request, i915_gem_request_execute, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req) +DEFINE_EVENT(i915_request, i915_request_execute, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq) ); -DECLARE_EVENT_CLASS(i915_gem_request_hw, - TP_PROTO(struct drm_i915_gem_request *req, - unsigned int port), - TP_ARGS(req, port), +DECLARE_EVENT_CLASS(i915_request_hw, + TP_PROTO(struct i915_request *rq, unsigned int port), + TP_ARGS(rq, port), TP_STRUCT__entry( __field(u32, dev) @@ -696,14 +694,14 @@ DECLARE_EVENT_CLASS(i915_gem_request_hw, ), TP_fast_assign( - __entry->dev = req->i915->drm.primary->index; - __entry->hw_id = req->ctx->hw_id; - __entry->ring = req->engine->id; - __entry->ctx = req->fence.context; - __entry->seqno = req->fence.seqno; - __entry->global_seqno = req->global_seqno; - __entry->port = port; - ), + __entry->dev = rq->i915->drm.primary->index; + __entry->hw_id = rq->ctx->hw_id; + __entry->ring = rq->engine->id; + __entry->ctx = rq->fence.context; + __entry->seqno = rq->fence.seqno; + __entry->global_seqno = rq->global_seqno; + __entry->port = port; + ), TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u, port=%u", __entry->dev, __entry->hw_id, __entry->ring, @@ -711,34 +709,34 @@ DECLARE_EVENT_CLASS(i915_gem_request_hw, __entry->global_seqno, __entry->port) ); -DEFINE_EVENT(i915_gem_request_hw, i915_gem_request_in, - TP_PROTO(struct drm_i915_gem_request *req, unsigned int port), - TP_ARGS(req, port) +DEFINE_EVENT(i915_request_hw, i915_request_in, + TP_PROTO(struct i915_request *rq, unsigned int port), + TP_ARGS(rq, port) ); -DEFINE_EVENT(i915_gem_request, i915_gem_request_out, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req) +DEFINE_EVENT(i915_request, i915_request_out, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq) ); #else #if !defined(TRACE_HEADER_MULTI_READ) static inline void -trace_i915_gem_request_submit(struct drm_i915_gem_request *req) +trace_i915_request_submit(struct i915_request *rq) { } static inline void -trace_i915_gem_request_execute(struct drm_i915_gem_request *req) +trace_i915_request_execute(struct i915_request *rq) { } static inline void -trace_i915_gem_request_in(struct drm_i915_gem_request *req, unsigned int port) +trace_i915_request_in(struct i915_request *rq, unsigned int port) { } static inline void -trace_i915_gem_request_out(struct drm_i915_gem_request *req) +trace_i915_request_out(struct i915_request *rq) { } #endif @@ -767,14 +765,14 @@ TRACE_EVENT(intel_engine_notify, __entry->waiters) ); -DEFINE_EVENT(i915_gem_request, i915_gem_request_retire, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req) +DEFINE_EVENT(i915_request, i915_request_retire, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq) ); -TRACE_EVENT(i915_gem_request_wait_begin, - TP_PROTO(struct drm_i915_gem_request *req, unsigned int flags), - TP_ARGS(req, flags), +TRACE_EVENT(i915_request_wait_begin, + TP_PROTO(struct i915_request *rq, unsigned int flags), + TP_ARGS(rq, flags), TP_STRUCT__entry( __field(u32, dev) @@ -793,12 +791,12 @@ TRACE_EVENT(i915_gem_request_wait_begin, * less desirable. */ TP_fast_assign( - __entry->dev = req->i915->drm.primary->index; - __entry->hw_id = req->ctx->hw_id; - __entry->ring = req->engine->id; - __entry->ctx = req->fence.context; - __entry->seqno = req->fence.seqno; - __entry->global = req->global_seqno; + __entry->dev = rq->i915->drm.primary->index; + __entry->hw_id = rq->ctx->hw_id; + __entry->ring = rq->engine->id; + __entry->ctx = rq->fence.context; + __entry->seqno = rq->fence.seqno; + __entry->global = rq->global_seqno; __entry->flags = flags; ), @@ -808,9 +806,9 @@ TRACE_EVENT(i915_gem_request_wait_begin, !!(__entry->flags & I915_WAIT_LOCKED), __entry->flags) ); -DEFINE_EVENT(i915_gem_request, i915_gem_request_wait_end, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req) +DEFINE_EVENT(i915_request, i915_request_wait_end, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq) ); TRACE_EVENT(i915_flip_request, diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index e0e7c48f45dc..4bda3bd29bf5 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -31,8 +31,7 @@ #include <drm/drm_gem.h> static void -i915_vma_retire(struct i915_gem_active *active, - struct drm_i915_gem_request *rq) +i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq) { const unsigned int idx = rq->engine->id; struct i915_vma *vma = diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index fd5b84904f7c..8c5022095418 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -32,8 +32,8 @@ #include "i915_gem_gtt.h" #include "i915_gem_fence_reg.h" #include "i915_gem_object.h" -#include "i915_gem_request.h" +#include "i915_request.h" enum i915_cache_level; diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c index 36d4e635e4ce..e9fb692076d7 100644 --- a/drivers/gpu/drm/i915/intel_atomic.c +++ b/drivers/gpu/drm/i915/intel_atomic.c @@ -110,6 +110,8 @@ int intel_digital_connector_atomic_check(struct drm_connector *conn, to_intel_digital_connector_state(old_state); struct drm_crtc_state *crtc_state; + intel_hdcp_atomic_check(conn, old_state, new_state); + if (!new_state->crtc) return 0; @@ -186,13 +188,14 @@ intel_crtc_duplicate_state(struct drm_crtc *crtc) /** * intel_crtc_destroy_state - destroy crtc state * @crtc: drm crtc + * @state: the state to destroy * * Destroys the crtc state (both common and Intel-specific) for the * specified crtc. */ void intel_crtc_destroy_state(struct drm_crtc *crtc, - struct drm_crtc_state *state) + struct drm_crtc_state *state) { drm_atomic_helper_crtc_destroy_state(crtc, state); } @@ -200,7 +203,7 @@ intel_crtc_destroy_state(struct drm_crtc *crtc, /** * intel_atomic_setup_scalers() - setup scalers for crtc per staged requests * @dev_priv: i915 device - * @crtc: intel crtc + * @intel_crtc: intel crtc * @crtc_state: incoming crtc_state to validate and setup scalers * * This function sets up scalers based on staged scaling requests for diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index 8e6dc159f64d..7481ce85746b 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -56,7 +56,6 @@ intel_create_plane_state(struct drm_plane *plane) state->base.plane = plane; state->base.rotation = DRM_MODE_ROTATE_0; - state->ckey.flags = I915_SET_COLORKEY_NONE; return state; } @@ -86,6 +85,7 @@ intel_plane_duplicate_state(struct drm_plane *plane) __drm_atomic_helper_plane_duplicate_state(plane, state); intel_state->vma = NULL; + intel_state->flags = 0; return state; } @@ -129,14 +129,6 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_ if (!intel_state->base.crtc && !old_plane_state->base.crtc) return 0; - /* Clip all planes to CRTC size, or 0x0 if CRTC is disabled */ - intel_state->clip.x1 = 0; - intel_state->clip.y1 = 0; - intel_state->clip.x2 = - crtc_state->base.enable ? crtc_state->pipe_src_w : 0; - intel_state->clip.y2 = - crtc_state->base.enable ? crtc_state->pipe_src_h : 0; - if (state->fb && drm_rotation_90_or_270(state->rotation)) { struct drm_format_name_buf format_name; diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 4a01f62a392d..709d6ca68074 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -704,7 +704,7 @@ void intel_init_audio_hooks(struct drm_i915_private *dev_priv) } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { dev_priv->display.audio_codec_enable = ilk_audio_codec_enable; dev_priv->display.audio_codec_disable = ilk_audio_codec_disable; - } else if (IS_HASWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 8) { + } else if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8) { dev_priv->display.audio_codec_enable = hsw_audio_codec_enable; dev_priv->display.audio_codec_disable = hsw_audio_codec_disable; } else if (HAS_PCH_SPLIT(dev_priv)) { diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index b49a2df44430..c5c7530ba157 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -391,7 +391,7 @@ parse_sdvo_panel_data(struct drm_i915_private *dev_priv, static int intel_bios_ssc_frequency(struct drm_i915_private *dev_priv, bool alternate) { - switch (INTEL_INFO(dev_priv)->gen) { + switch (INTEL_GEN(dev_priv)) { case 2: return alternate ? 66667 : 48000; case 3: @@ -1228,6 +1228,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, {DVO_PORT_HDMIC, DVO_PORT_DPC, -1}, {DVO_PORT_HDMID, DVO_PORT_DPD, -1}, {DVO_PORT_CRT, DVO_PORT_HDMIE, DVO_PORT_DPE}, + {DVO_PORT_HDMIF, DVO_PORT_DPF, -1}, }; /* @@ -1355,6 +1356,27 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, DRM_DEBUG_KMS("VBT HDMI boost level for port %c: %d\n", port_name(port), info->hdmi_boost_level); } + + /* DP max link rate for CNL+ */ + if (bdb_version >= 216) { + switch (child->dp_max_link_rate) { + default: + case VBT_DP_MAX_LINK_RATE_HBR3: + info->dp_max_link_rate = 810000; + break; + case VBT_DP_MAX_LINK_RATE_HBR2: + info->dp_max_link_rate = 540000; + break; + case VBT_DP_MAX_LINK_RATE_HBR: + info->dp_max_link_rate = 270000; + break; + case VBT_DP_MAX_LINK_RATE_LBR: + info->dp_max_link_rate = 162000; + break; + } + DRM_DEBUG_KMS("VBT DP max link rate for port %c: %d\n", + port_name(port), info->dp_max_link_rate); + } } static void parse_ddi_ports(struct drm_i915_private *dev_priv, u8 bdb_version) @@ -1801,6 +1823,7 @@ bool intel_bios_is_port_present(struct drm_i915_private *dev_priv, enum port por [PORT_C] = { DVO_PORT_DPC, DVO_PORT_HDMIC, }, [PORT_D] = { DVO_PORT_DPD, DVO_PORT_HDMID, }, [PORT_E] = { DVO_PORT_DPE, DVO_PORT_HDMIE, }, + [PORT_F] = { DVO_PORT_DPF, DVO_PORT_HDMIF, }, }; int i; @@ -1839,6 +1862,7 @@ bool intel_bios_is_port_edp(struct drm_i915_private *dev_priv, enum port port) [PORT_C] = DVO_PORT_DPC, [PORT_D] = DVO_PORT_DPD, [PORT_E] = DVO_PORT_DPE, + [PORT_F] = DVO_PORT_DPF, }; int i; @@ -1874,6 +1898,7 @@ static bool child_dev_is_dp_dual_mode(const struct child_device_config *child, [PORT_C] = { DVO_PORT_DPC, DVO_PORT_HDMIC, }, [PORT_D] = { DVO_PORT_DPD, DVO_PORT_HDMID, }, [PORT_E] = { DVO_PORT_DPE, DVO_PORT_HDMIE, }, + [PORT_F] = { DVO_PORT_DPF, DVO_PORT_HDMIF, }, }; if (port == PORT_A || port >= ARRAY_SIZE(port_mapping)) @@ -2040,6 +2065,11 @@ intel_bios_is_lspcon_present(struct drm_i915_private *dev_priv, if (port == PORT_D) return true; break; + case DVO_PORT_DPF: + case DVO_PORT_HDMIF: + if (port == PORT_F) + return true; + break; default: break; } diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index f54ddda9fdad..1f79e7a47433 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -168,17 +168,21 @@ static void irq_enable(struct intel_engine_cs *engine) set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); /* Caller disables interrupts */ - spin_lock(&engine->i915->irq_lock); - engine->irq_enable(engine); - spin_unlock(&engine->i915->irq_lock); + if (engine->irq_enable) { + spin_lock(&engine->i915->irq_lock); + engine->irq_enable(engine); + spin_unlock(&engine->i915->irq_lock); + } } static void irq_disable(struct intel_engine_cs *engine) { /* Caller disables interrupts */ - spin_lock(&engine->i915->irq_lock); - engine->irq_disable(engine); - spin_unlock(&engine->i915->irq_lock); + if (engine->irq_disable) { + spin_lock(&engine->i915->irq_lock); + engine->irq_disable(engine); + spin_unlock(&engine->i915->irq_lock); + } } void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) @@ -224,7 +228,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) struct intel_wait *wait, *n; if (!b->irq_armed) - goto wakeup_signaler; + return; /* * We only disarm the irq when we are idle (all requests completed), @@ -243,20 +247,14 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) spin_unlock(&b->irq_lock); rbtree_postorder_for_each_entry_safe(wait, n, &b->waiters, node) { + GEM_BUG_ON(!i915_seqno_passed(intel_engine_get_seqno(engine), + wait->seqno)); RB_CLEAR_NODE(&wait->node); wake_up_process(wait->tsk); } b->waiters = RB_ROOT; spin_unlock_irq(&b->rb_lock); - - /* - * The signaling thread may be asleep holding a reference to a request, - * that had its signaling cancelled prior to being preempted. We need - * to kick the signaler, just in case, to release any such reference. - */ -wakeup_signaler: - wake_up_process(b->signaler); } static bool use_fake_irq(const struct intel_breadcrumbs *b) @@ -344,7 +342,8 @@ static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b, lockdep_assert_held(&b->rb_lock); GEM_BUG_ON(b->irq_wait == wait); - /* This request is completed, so remove it from the tree, mark it as + /* + * This request is completed, so remove it from the tree, mark it as * complete, and *then* wake up the associated task. N.B. when the * task wakes up, it will find the empty rb_node, discern that it * has already been removed from the tree and skip the serialisation @@ -355,7 +354,8 @@ static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b, rb_erase(&wait->node, &b->waiters); RB_CLEAR_NODE(&wait->node); - wake_up_process(wait->tsk); /* implicit smp_wmb() */ + if (wait->tsk->state != TASK_RUNNING) + wake_up_process(wait->tsk); /* implicit smp_wmb() */ } static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine, @@ -385,6 +385,8 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, bool first, armed; u32 seqno; + GEM_BUG_ON(!wait->seqno); + /* Insert the request into the retirement ordered list * of waiters by walking the rbtree. If we are the oldest * seqno in the tree (the first to be retired), then @@ -594,23 +596,6 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, spin_unlock_irq(&b->rb_lock); } -static bool signal_complete(const struct drm_i915_gem_request *request) -{ - if (!request) - return false; - - /* - * Carefully check if the request is complete, giving time for the - * seqno to be visible or if the GPU hung. - */ - return __i915_request_irq_complete(request); -} - -static struct drm_i915_gem_request *to_signaler(struct rb_node *rb) -{ - return rb_entry(rb, struct drm_i915_gem_request, signaling.node); -} - static void signaler_set_rtpriority(void) { struct sched_param param = { .sched_priority = 1 }; @@ -622,17 +607,22 @@ static int intel_breadcrumbs_signaler(void *arg) { struct intel_engine_cs *engine = arg; struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct drm_i915_gem_request *request; + struct i915_request *rq, *n; /* Install ourselves with high priority to reduce signalling latency */ signaler_set_rtpriority(); do { bool do_schedule = true; + LIST_HEAD(list); + u32 seqno; set_current_state(TASK_INTERRUPTIBLE); + if (list_empty(&b->signals)) + goto sleep; - /* We are either woken up by the interrupt bottom-half, + /* + * We are either woken up by the interrupt bottom-half, * or by a client adding a new signaller. In both cases, * the GPU seqno may have advanced beyond our oldest signal. * If it has, propagate the signal, remove the waiter and @@ -640,48 +630,45 @@ static int intel_breadcrumbs_signaler(void *arg) * need to wait for a new interrupt from the GPU or for * a new client. */ - rcu_read_lock(); - request = rcu_dereference(b->first_signal); - if (request) - request = i915_gem_request_get_rcu(request); - rcu_read_unlock(); - if (signal_complete(request)) { - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &request->fence.flags)) { - local_bh_disable(); - dma_fence_signal(&request->fence); - GEM_BUG_ON(!i915_gem_request_completed(request)); - local_bh_enable(); /* kick start the tasklets */ - } + seqno = intel_engine_get_seqno(engine); - spin_lock_irq(&b->rb_lock); + spin_lock_irq(&b->rb_lock); + list_for_each_entry_safe(rq, n, &b->signals, signaling.link) { + u32 this = rq->signaling.wait.seqno; - /* Wake up all other completed waiters and select the - * next bottom-half for the next user interrupt. - */ - __intel_engine_remove_wait(engine, - &request->signaling.wait); - - /* Find the next oldest signal. Note that as we have - * not been holding the lock, another client may - * have installed an even older signal than the one - * we just completed - so double check we are still - * the oldest before picking the next one. - */ - if (request == rcu_access_pointer(b->first_signal)) { - struct rb_node *rb = - rb_next(&request->signaling.node); - rcu_assign_pointer(b->first_signal, - rb ? to_signaler(rb) : NULL); - } - rb_erase(&request->signaling.node, &b->signals); - RB_CLEAR_NODE(&request->signaling.node); + GEM_BUG_ON(!rq->signaling.wait.seqno); + + if (!i915_seqno_passed(seqno, this)) + break; - spin_unlock_irq(&b->rb_lock); + if (likely(this == i915_request_global_seqno(rq))) { + __intel_engine_remove_wait(engine, + &rq->signaling.wait); - i915_gem_request_put(request); + rq->signaling.wait.seqno = 0; + __list_del_entry(&rq->signaling.link); - /* If the engine is saturated we may be continually + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &rq->fence.flags)) { + list_add_tail(&rq->signaling.link, + &list); + i915_request_get(rq); + } + } + } + spin_unlock_irq(&b->rb_lock); + + if (!list_empty(&list)) { + local_bh_disable(); + list_for_each_entry_safe(rq, n, &list, signaling.link) { + dma_fence_signal(&rq->fence); + GEM_BUG_ON(!i915_request_completed(rq)); + i915_request_put(rq); + } + local_bh_enable(); /* kick start the tasklets */ + + /* + * If the engine is saturated we may be continually * processing completed requests. This angers the * NMI watchdog if we never let anything else * have access to the CPU. Let's pretend to be nice @@ -692,31 +679,65 @@ static int intel_breadcrumbs_signaler(void *arg) } if (unlikely(do_schedule)) { + /* Before we sleep, check for a missed seqno */ + if (current->state & TASK_NORMAL && + !list_empty(&b->signals) && + engine->irq_seqno_barrier && + test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, + &engine->irq_posted)) { + engine->irq_seqno_barrier(engine); + intel_engine_wakeup(engine); + } + +sleep: if (kthread_should_park()) kthread_parkme(); - if (unlikely(kthread_should_stop())) { - i915_gem_request_put(request); + if (unlikely(kthread_should_stop())) break; - } schedule(); } - i915_gem_request_put(request); } while (1); __set_current_state(TASK_RUNNING); return 0; } -void intel_engine_enable_signaling(struct drm_i915_gem_request *request, - bool wakeup) +static void insert_signal(struct intel_breadcrumbs *b, + struct i915_request *request, + const u32 seqno) +{ + struct i915_request *iter; + + lockdep_assert_held(&b->rb_lock); + + /* + * A reasonable assumption is that we are called to add signals + * in sequence, as the requests are submitted for execution and + * assigned a global_seqno. This will be the case for the majority + * of internally generated signals (inter-engine signaling). + * + * Out of order waiters triggering random signaling enabling will + * be more problematic, but hopefully rare enough and the list + * small enough that the O(N) insertion sort is not an issue. + */ + + list_for_each_entry_reverse(iter, &b->signals, signaling.link) + if (i915_seqno_passed(seqno, iter->signaling.wait.seqno)) + break; + + list_add(&request->signaling.link, &iter->signaling.link); +} + +void intel_engine_enable_signaling(struct i915_request *request, bool wakeup) { struct intel_engine_cs *engine = request->engine; struct intel_breadcrumbs *b = &engine->breadcrumbs; u32 seqno; - /* Note that we may be called from an interrupt handler on another + /* + * Note that we may be called from an interrupt handler on another * device (e.g. nouveau signaling a fence completion causing us * to submit a request, and so enable signaling). As such, * we need to make sure that all other users of b->rb_lock protect @@ -727,18 +748,17 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request, GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&request->lock); - seqno = i915_gem_request_global_seqno(request); - if (!seqno) + seqno = i915_request_global_seqno(request); + if (!seqno) /* will be enabled later upon execution */ return; + GEM_BUG_ON(request->signaling.wait.seqno); request->signaling.wait.tsk = b->signaler; request->signaling.wait.request = request; request->signaling.wait.seqno = seqno; - i915_gem_request_get(request); - - spin_lock(&b->rb_lock); - /* First add ourselves into the list of waiters, but register our + /* + * Add ourselves into the list of waiters, but registering our * bottom-half as the signaller thread. As per usual, only the oldest * waiter (not just signaller) is tasked as the bottom-half waking * up all completed waiters after the user interrupt. @@ -746,73 +766,31 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request, * If we are the oldest waiter, enable the irq (after which we * must double check that the seqno did not complete). */ + spin_lock(&b->rb_lock); + insert_signal(b, request, seqno); wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait); - - if (!__i915_gem_request_completed(request, seqno)) { - struct rb_node *parent, **p; - bool first; - - /* Now insert ourselves into the retirement ordered list of - * signals on this engine. We track the oldest seqno as that - * will be the first signal to complete. - */ - parent = NULL; - first = true; - p = &b->signals.rb_node; - while (*p) { - parent = *p; - if (i915_seqno_passed(seqno, - to_signaler(parent)->signaling.wait.seqno)) { - p = &parent->rb_right; - first = false; - } else { - p = &parent->rb_left; - } - } - rb_link_node(&request->signaling.node, parent, p); - rb_insert_color(&request->signaling.node, &b->signals); - if (first) - rcu_assign_pointer(b->first_signal, request); - } else { - __intel_engine_remove_wait(engine, &request->signaling.wait); - i915_gem_request_put(request); - wakeup = false; - } - spin_unlock(&b->rb_lock); if (wakeup) wake_up_process(b->signaler); } -void intel_engine_cancel_signaling(struct drm_i915_gem_request *request) +void intel_engine_cancel_signaling(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; struct intel_breadcrumbs *b = &engine->breadcrumbs; GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&request->lock); - GEM_BUG_ON(!request->signaling.wait.seqno); - spin_lock(&b->rb_lock); - - if (!RB_EMPTY_NODE(&request->signaling.node)) { - if (request == rcu_access_pointer(b->first_signal)) { - struct rb_node *rb = - rb_next(&request->signaling.node); - rcu_assign_pointer(b->first_signal, - rb ? to_signaler(rb) : NULL); - } - rb_erase(&request->signaling.node, &b->signals); - RB_CLEAR_NODE(&request->signaling.node); - i915_gem_request_put(request); - } + if (!READ_ONCE(request->signaling.wait.seqno)) + return; + spin_lock(&b->rb_lock); __intel_engine_remove_wait(engine, &request->signaling.wait); - + if (fetch_and_zero(&request->signaling.wait.seqno)) + __list_del_entry(&request->signaling.link); spin_unlock(&b->rb_lock); - - request->signaling.wait.seqno = 0; } int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) @@ -826,6 +804,8 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0); timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0); + INIT_LIST_HEAD(&b->signals); + /* Spawn a thread to provide a common bottom-half for all signals. * As this is an asynchronous interface we cannot steal the current * task for handling the bottom-half to the user interrupt, therefore @@ -885,8 +865,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) /* The engines should be idle and all requests accounted for! */ WARN_ON(READ_ONCE(b->irq_wait)); WARN_ON(!RB_EMPTY_ROOT(&b->waiters)); - WARN_ON(rcu_access_pointer(b->first_signal)); - WARN_ON(!RB_EMPTY_ROOT(&b->signals)); + WARN_ON(!list_empty(&b->signals)); if (!IS_ERR_OR_NULL(b->signaler)) kthread_stop(b->signaler); @@ -894,28 +873,6 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) cancel_fake_irq(engine); } -bool intel_breadcrumbs_busy(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - bool busy = false; - - spin_lock_irq(&b->rb_lock); - - if (b->irq_wait) { - wake_up_process(b->irq_wait->tsk); - busy = true; - } - - if (rcu_access_pointer(b->first_signal)) { - wake_up_process(b->signaler); - busy = true; - } - - spin_unlock_irq(&b->rb_lock); - - return busy; -} - #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/intel_breadcrumbs.c" #endif diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 1704c8897afd..dc7db8a2caf8 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -858,7 +858,7 @@ static void skl_get_cdclk(struct drm_i915_private *dev_priv, skl_dpll0_update(dev_priv, cdclk_state); - cdclk_state->cdclk = cdclk_state->ref; + cdclk_state->cdclk = cdclk_state->bypass = cdclk_state->ref; if (cdclk_state->vco == 0) goto out; @@ -1006,7 +1006,7 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, /* Choose frequency for this cdclk */ switch (cdclk) { default: - WARN_ON(cdclk != dev_priv->cdclk.hw.ref); + WARN_ON(cdclk != dev_priv->cdclk.hw.bypass); WARN_ON(vco != 0); /* fall through */ case 308571: @@ -1085,7 +1085,7 @@ static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv) /* Is PLL enabled and locked ? */ if (dev_priv->cdclk.hw.vco == 0 || - dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.ref) + dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.bypass) goto sanitize; /* DPLL okay; verify the cdclock @@ -1159,7 +1159,7 @@ void skl_uninit_cdclk(struct drm_i915_private *dev_priv) { struct intel_cdclk_state cdclk_state = dev_priv->cdclk.hw; - cdclk_state.cdclk = cdclk_state.ref; + cdclk_state.cdclk = cdclk_state.bypass; cdclk_state.vco = 0; cdclk_state.voltage_level = skl_calc_voltage_level(cdclk_state.cdclk); @@ -1199,7 +1199,7 @@ static int bxt_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) { int ratio; - if (cdclk == dev_priv->cdclk.hw.ref) + if (cdclk == dev_priv->cdclk.hw.bypass) return 0; switch (cdclk) { @@ -1224,7 +1224,7 @@ static int glk_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) { int ratio; - if (cdclk == dev_priv->cdclk.hw.ref) + if (cdclk == dev_priv->cdclk.hw.bypass) return 0; switch (cdclk) { @@ -1268,7 +1268,7 @@ static void bxt_get_cdclk(struct drm_i915_private *dev_priv, bxt_de_pll_update(dev_priv, cdclk_state); - cdclk_state->cdclk = cdclk_state->ref; + cdclk_state->cdclk = cdclk_state->bypass = cdclk_state->ref; if (cdclk_state->vco == 0) goto out; @@ -1352,7 +1352,7 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, /* cdclk = vco / 2 / div{1,1.5,2,4} */ switch (DIV_ROUND_CLOSEST(vco, cdclk)) { default: - WARN_ON(cdclk != dev_priv->cdclk.hw.ref); + WARN_ON(cdclk != dev_priv->cdclk.hw.bypass); WARN_ON(vco != 0); /* fall through */ case 2: @@ -1378,7 +1378,7 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_write_timeout(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, - 0x80000000, 2000); + 0x80000000, 150, 2); mutex_unlock(&dev_priv->pcu_lock); if (ret) { @@ -1417,7 +1417,7 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, */ ret = sandybridge_pcode_write_timeout(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, - cdclk_state->voltage_level, 2000); + cdclk_state->voltage_level, 150, 2); mutex_unlock(&dev_priv->pcu_lock); if (ret) { @@ -1437,7 +1437,7 @@ static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK"); if (dev_priv->cdclk.hw.vco == 0 || - dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.ref) + dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.bypass) goto sanitize; /* DPLL okay; verify the cdclock @@ -1526,7 +1526,7 @@ void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) { struct intel_cdclk_state cdclk_state = dev_priv->cdclk.hw; - cdclk_state.cdclk = cdclk_state.ref; + cdclk_state.cdclk = cdclk_state.bypass; cdclk_state.vco = 0; cdclk_state.voltage_level = bxt_calc_voltage_level(cdclk_state.cdclk); @@ -1586,7 +1586,7 @@ static void cnl_get_cdclk(struct drm_i915_private *dev_priv, cnl_cdclk_pll_update(dev_priv, cdclk_state); - cdclk_state->cdclk = cdclk_state->ref; + cdclk_state->cdclk = cdclk_state->bypass = cdclk_state->ref; if (cdclk_state->vco == 0) goto out; @@ -1672,7 +1672,7 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, /* cdclk = vco / 2 / div{1,2} */ switch (DIV_ROUND_CLOSEST(vco, cdclk)) { default: - WARN_ON(cdclk != dev_priv->cdclk.hw.ref); + WARN_ON(cdclk != dev_priv->cdclk.hw.bypass); WARN_ON(vco != 0); /* fall through */ case 2: @@ -1717,7 +1717,7 @@ static int cnl_cdclk_pll_vco(struct drm_i915_private *dev_priv, int cdclk) { int ratio; - if (cdclk == dev_priv->cdclk.hw.ref) + if (cdclk == dev_priv->cdclk.hw.bypass) return 0; switch (cdclk) { @@ -1744,7 +1744,7 @@ static void cnl_sanitize_cdclk(struct drm_i915_private *dev_priv) intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK"); if (dev_priv->cdclk.hw.vco == 0 || - dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.ref) + dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.bypass) goto sanitize; /* DPLL okay; verify the cdclock @@ -1778,6 +1778,199 @@ sanitize: dev_priv->cdclk.hw.vco = -1; } +static int icl_calc_cdclk(int min_cdclk, unsigned int ref) +{ + int ranges_24[] = { 312000, 552000, 648000 }; + int ranges_19_38[] = { 307200, 556800, 652800 }; + int *ranges; + + switch (ref) { + default: + MISSING_CASE(ref); + case 24000: + ranges = ranges_24; + break; + case 19200: + case 38400: + ranges = ranges_19_38; + break; + } + + if (min_cdclk > ranges[1]) + return ranges[2]; + else if (min_cdclk > ranges[0]) + return ranges[1]; + else + return ranges[0]; +} + +static int icl_calc_cdclk_pll_vco(struct drm_i915_private *dev_priv, int cdclk) +{ + int ratio; + + if (cdclk == dev_priv->cdclk.hw.bypass) + return 0; + + switch (cdclk) { + default: + MISSING_CASE(cdclk); + case 307200: + case 556800: + case 652800: + WARN_ON(dev_priv->cdclk.hw.ref != 19200 && + dev_priv->cdclk.hw.ref != 38400); + break; + case 312000: + case 552000: + case 648000: + WARN_ON(dev_priv->cdclk.hw.ref != 24000); + } + + ratio = cdclk / (dev_priv->cdclk.hw.ref / 2); + + return dev_priv->cdclk.hw.ref * ratio; +} + +static void icl_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) +{ + unsigned int cdclk = cdclk_state->cdclk; + unsigned int vco = cdclk_state->vco; + int ret; + + mutex_lock(&dev_priv->pcu_lock); + ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, + SKL_CDCLK_PREPARE_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, 3); + mutex_unlock(&dev_priv->pcu_lock); + if (ret) { + DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", + ret); + return; + } + + if (dev_priv->cdclk.hw.vco != 0 && + dev_priv->cdclk.hw.vco != vco) + cnl_cdclk_pll_disable(dev_priv); + + if (dev_priv->cdclk.hw.vco != vco) + cnl_cdclk_pll_enable(dev_priv, vco); + + I915_WRITE(CDCLK_CTL, ICL_CDCLK_CD2X_PIPE_NONE | + skl_cdclk_decimal(cdclk)); + + mutex_lock(&dev_priv->pcu_lock); + /* TODO: add proper DVFS support. */ + sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, 2); + mutex_unlock(&dev_priv->pcu_lock); + + intel_update_cdclk(dev_priv); +} + +static void icl_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + u32 val; + + cdclk_state->bypass = 50000; + + val = I915_READ(SKL_DSSM); + switch (val & ICL_DSSM_CDCLK_PLL_REFCLK_MASK) { + default: + MISSING_CASE(val); + case ICL_DSSM_CDCLK_PLL_REFCLK_24MHz: + cdclk_state->ref = 24000; + break; + case ICL_DSSM_CDCLK_PLL_REFCLK_19_2MHz: + cdclk_state->ref = 19200; + break; + case ICL_DSSM_CDCLK_PLL_REFCLK_38_4MHz: + cdclk_state->ref = 38400; + break; + } + + val = I915_READ(BXT_DE_PLL_ENABLE); + if ((val & BXT_DE_PLL_PLL_ENABLE) == 0 || + (val & BXT_DE_PLL_LOCK) == 0) { + /* + * CDCLK PLL is disabled, the VCO/ratio doesn't matter, but + * setting it to zero is a way to signal that. + */ + cdclk_state->vco = 0; + cdclk_state->cdclk = cdclk_state->bypass; + return; + } + + cdclk_state->vco = (val & BXT_DE_PLL_RATIO_MASK) * cdclk_state->ref; + + val = I915_READ(CDCLK_CTL); + WARN_ON((val & BXT_CDCLK_CD2X_DIV_SEL_MASK) != 0); + + cdclk_state->cdclk = cdclk_state->vco / 2; +} + +/** + * icl_init_cdclk - Initialize CDCLK on ICL + * @dev_priv: i915 device + * + * Initialize CDCLK for ICL. This consists mainly of initializing + * dev_priv->cdclk.hw and sanitizing the state of the hardware if needed. This + * is generally done only during the display core initialization sequence, after + * which the DMC will take care of turning CDCLK off/on as needed. + */ +void icl_init_cdclk(struct drm_i915_private *dev_priv) +{ + struct intel_cdclk_state sanitized_state; + u32 val; + + /* This sets dev_priv->cdclk.hw. */ + intel_update_cdclk(dev_priv); + intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK"); + + /* This means CDCLK disabled. */ + if (dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.bypass) + goto sanitize; + + val = I915_READ(CDCLK_CTL); + + if ((val & BXT_CDCLK_CD2X_DIV_SEL_MASK) != 0) + goto sanitize; + + if ((val & CDCLK_FREQ_DECIMAL_MASK) != + skl_cdclk_decimal(dev_priv->cdclk.hw.cdclk)) + goto sanitize; + + return; + +sanitize: + DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); + + sanitized_state.ref = dev_priv->cdclk.hw.ref; + sanitized_state.cdclk = icl_calc_cdclk(0, sanitized_state.ref); + sanitized_state.vco = icl_calc_cdclk_pll_vco(dev_priv, + sanitized_state.cdclk); + + icl_set_cdclk(dev_priv, &sanitized_state); +} + +/** + * icl_uninit_cdclk - Uninitialize CDCLK on ICL + * @dev_priv: i915 device + * + * Uninitialize CDCLK for ICL. This is done only during the display core + * uninitialization sequence. + */ +void icl_uninit_cdclk(struct drm_i915_private *dev_priv) +{ + struct intel_cdclk_state cdclk_state = dev_priv->cdclk.hw; + + cdclk_state.cdclk = cdclk_state.bypass; + cdclk_state.vco = 0; + + icl_set_cdclk(dev_priv, &cdclk_state); +} + /** * cnl_init_cdclk - Initialize CDCLK on CNL * @dev_priv: i915 device @@ -1817,7 +2010,7 @@ void cnl_uninit_cdclk(struct drm_i915_private *dev_priv) { struct intel_cdclk_state cdclk_state = dev_priv->cdclk.hw; - cdclk_state.cdclk = cdclk_state.ref; + cdclk_state.cdclk = cdclk_state.bypass; cdclk_state.vco = 0; cdclk_state.voltage_level = cnl_calc_voltage_level(cdclk_state.cdclk); @@ -1858,9 +2051,10 @@ bool intel_cdclk_changed(const struct intel_cdclk_state *a, void intel_dump_cdclk_state(const struct intel_cdclk_state *cdclk_state, const char *context) { - DRM_DEBUG_DRIVER("%s %d kHz, VCO %d kHz, ref %d kHz, voltage level %d\n", + DRM_DEBUG_DRIVER("%s %d kHz, VCO %d kHz, ref %d kHz, bypass %d kHz, voltage level %d\n", context, cdclk_state->cdclk, cdclk_state->vco, - cdclk_state->ref, cdclk_state->voltage_level); + cdclk_state->ref, cdclk_state->bypass, + cdclk_state->voltage_level); } /** @@ -2215,6 +2409,36 @@ static int cnl_modeset_calc_cdclk(struct drm_atomic_state *state) return 0; } +static int icl_modeset_calc_cdclk(struct drm_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->dev); + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + unsigned int ref = intel_state->cdclk.logical.ref; + int min_cdclk, cdclk, vco; + + min_cdclk = intel_compute_min_cdclk(state); + if (min_cdclk < 0) + return min_cdclk; + + cdclk = icl_calc_cdclk(min_cdclk, ref); + vco = icl_calc_cdclk_pll_vco(dev_priv, cdclk); + + intel_state->cdclk.logical.vco = vco; + intel_state->cdclk.logical.cdclk = cdclk; + + if (!intel_state->active_crtcs) { + cdclk = icl_calc_cdclk(0, ref); + vco = icl_calc_cdclk_pll_vco(dev_priv, cdclk); + + intel_state->cdclk.actual.vco = vco; + intel_state->cdclk.actual.cdclk = cdclk; + } else { + intel_state->cdclk.actual = intel_state->cdclk.logical; + } + + return 0; +} + static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) { int max_cdclk_freq = dev_priv->max_cdclk_freq; @@ -2232,7 +2456,7 @@ static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) return max_cdclk_freq; else if (IS_CHERRYVIEW(dev_priv)) return max_cdclk_freq*95/100; - else if (INTEL_INFO(dev_priv)->gen < 4) + else if (INTEL_GEN(dev_priv) < 4) return 2*max_cdclk_freq*90/100; else return max_cdclk_freq*90/100; @@ -2248,7 +2472,12 @@ static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) */ void intel_update_max_cdclk(struct drm_i915_private *dev_priv) { - if (IS_CANNONLAKE(dev_priv)) { + if (IS_ICELAKE(dev_priv)) { + if (dev_priv->cdclk.hw.ref == 24000) + dev_priv->max_cdclk_freq = 648000; + else + dev_priv->max_cdclk_freq = 652800; + } else if (IS_CANNONLAKE(dev_priv)) { dev_priv->max_cdclk_freq = 528000; } else if (IS_GEN9_BC(dev_priv)) { u32 limit = I915_READ(SKL_DFSM) & SKL_DFSM_CDCLK_LIMIT_MASK; @@ -2354,6 +2583,30 @@ static int cnp_rawclk(struct drm_i915_private *dev_priv) return divider + fraction; } +static int icp_rawclk(struct drm_i915_private *dev_priv) +{ + u32 rawclk; + int divider, numerator, denominator, frequency; + + if (I915_READ(SFUSE_STRAP) & SFUSE_STRAP_RAW_FREQUENCY) { + frequency = 24000; + divider = 23; + numerator = 0; + denominator = 0; + } else { + frequency = 19200; + divider = 18; + numerator = 1; + denominator = 4; + } + + rawclk = CNP_RAWCLK_DIV(divider) | ICP_RAWCLK_NUM(numerator) | + ICP_RAWCLK_DEN(denominator); + + I915_WRITE(PCH_RAWCLK_FREQ, rawclk); + return frequency; +} + static int pch_rawclk(struct drm_i915_private *dev_priv) { return (I915_READ(PCH_RAWCLK_FREQ) & RAWCLK_FREQ_MASK) * 1000; @@ -2401,8 +2654,9 @@ static int g4x_hrawclk(struct drm_i915_private *dev_priv) */ void intel_update_rawclk(struct drm_i915_private *dev_priv) { - - if (HAS_PCH_CNP(dev_priv)) + if (HAS_PCH_ICP(dev_priv)) + dev_priv->rawclk_freq = icp_rawclk(dev_priv); + else if (HAS_PCH_CNP(dev_priv)) dev_priv->rawclk_freq = cnp_rawclk(dev_priv); else if (HAS_PCH_SPLIT(dev_priv)) dev_priv->rawclk_freq = pch_rawclk(dev_priv); @@ -2447,9 +2701,14 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) dev_priv->display.set_cdclk = cnl_set_cdclk; dev_priv->display.modeset_calc_cdclk = cnl_modeset_calc_cdclk; + } else if (IS_ICELAKE(dev_priv)) { + dev_priv->display.set_cdclk = icl_set_cdclk; + dev_priv->display.modeset_calc_cdclk = icl_modeset_calc_cdclk; } - if (IS_CANNONLAKE(dev_priv)) + if (IS_ICELAKE(dev_priv)) + dev_priv->display.get_cdclk = icl_get_cdclk; + else if (IS_CANNONLAKE(dev_priv)) dev_priv->display.get_cdclk = cnl_get_cdclk; else if (IS_GEN9_BC(dev_priv)) dev_priv->display.get_cdclk = skl_get_cdclk; diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index aa66e952a95d..c6a7beabd58d 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -39,7 +39,7 @@ #define CTM_COEFF_NEGATIVE(coeff) (((coeff) & CTM_COEFF_SIGN) != 0) #define CTM_COEFF_ABS(coeff) ((coeff) & (CTM_COEFF_SIGN - 1)) -#define LEGACY_LUT_LENGTH (sizeof(struct drm_color_lut) * 256) +#define LEGACY_LUT_LENGTH 256 /* Post offset values for RGB->YCBCR conversion */ #define POSTOFF_RGB_TO_YUV_HI 0x800 @@ -66,48 +66,49 @@ * of the CTM coefficient and we write the value from bit 3. We also round the * value. */ -#define I9XX_CSC_COEFF_FP(coeff, fbits) \ +#define ILK_CSC_COEFF_FP(coeff, fbits) \ (clamp_val(((coeff) >> (32 - (fbits) - 3)) + 4, 0, 0xfff) & 0xff8) -#define I9XX_CSC_COEFF_LIMITED_RANGE \ - I9XX_CSC_COEFF_FP(CTM_COEFF_LIMITED_RANGE, 9) -#define I9XX_CSC_COEFF_1_0 \ - ((7 << 12) | I9XX_CSC_COEFF_FP(CTM_COEFF_1_0, 8)) +#define ILK_CSC_COEFF_LIMITED_RANGE \ + ILK_CSC_COEFF_FP(CTM_COEFF_LIMITED_RANGE, 9) +#define ILK_CSC_COEFF_1_0 \ + ((7 << 12) | ILK_CSC_COEFF_FP(CTM_COEFF_1_0, 8)) static bool crtc_state_is_legacy_gamma(struct drm_crtc_state *state) { return !state->degamma_lut && !state->ctm && state->gamma_lut && - state->gamma_lut->length == LEGACY_LUT_LENGTH; + drm_color_lut_size(state->gamma_lut) == LEGACY_LUT_LENGTH; } /* * When using limited range, multiply the matrix given by userspace by - * the matrix that we would use for the limited range. We do the - * multiplication in U2.30 format. + * the matrix that we would use for the limited range. */ -static void ctm_mult_by_limited(uint64_t *result, int64_t *input) +static u64 *ctm_mult_by_limited(u64 *result, const u64 *input) { int i; - for (i = 0; i < 9; i++) - result[i] = 0; + for (i = 0; i < 9; i++) { + u64 user_coeff = input[i]; + u32 limited_coeff = CTM_COEFF_LIMITED_RANGE; + u32 abs_coeff = clamp_val(CTM_COEFF_ABS(user_coeff), 0, + CTM_COEFF_4_0 - 1) >> 2; - for (i = 0; i < 3; i++) { - int64_t user_coeff = input[i * 3 + i]; - uint64_t limited_coeff = CTM_COEFF_LIMITED_RANGE >> 2; - uint64_t abs_coeff = clamp_val(CTM_COEFF_ABS(user_coeff), - 0, - CTM_COEFF_4_0 - 1) >> 2; - - result[i * 3 + i] = (limited_coeff * abs_coeff) >> 27; - if (CTM_COEFF_NEGATIVE(user_coeff)) - result[i * 3 + i] |= CTM_COEFF_SIGN; + /* + * By scaling every co-efficient with limited range (16-235) + * vs full range (0-255) the final o/p will be scaled down to + * fit in the limited range supported by the panel. + */ + result[i] = mul_u32_u32(limited_coeff, abs_coeff) >> 30; + result[i] |= user_coeff & CTM_COEFF_SIGN; } + + return result; } -static void i9xx_load_ycbcr_conversion_matrix(struct intel_crtc *intel_crtc) +static void ilk_load_ycbcr_conversion_matrix(struct intel_crtc *intel_crtc) { int pipe = intel_crtc->pipe; struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); @@ -131,8 +132,7 @@ static void i9xx_load_ycbcr_conversion_matrix(struct intel_crtc *intel_crtc) I915_WRITE(PIPE_CSC_MODE(pipe), 0); } -/* Set up the pipe CSC unit. */ -static void i9xx_load_csc_matrix(struct drm_crtc_state *crtc_state) +static void ilk_load_csc_matrix(struct drm_crtc_state *crtc_state) { struct drm_crtc *crtc = crtc_state->crtc; struct drm_i915_private *dev_priv = to_i915(crtc->dev); @@ -140,21 +140,27 @@ static void i9xx_load_csc_matrix(struct drm_crtc_state *crtc_state) int i, pipe = intel_crtc->pipe; uint16_t coeffs[9] = { 0, }; struct intel_crtc_state *intel_crtc_state = to_intel_crtc_state(crtc_state); + bool limited_color_range = false; + + /* + * FIXME if there's a gamma LUT after the CSC, we should + * do the range compression using the gamma LUT instead. + */ + if (INTEL_GEN(dev_priv) >= 8 || IS_HASWELL(dev_priv)) + limited_color_range = intel_crtc_state->limited_color_range; if (intel_crtc_state->ycbcr420) { - i9xx_load_ycbcr_conversion_matrix(intel_crtc); + ilk_load_ycbcr_conversion_matrix(intel_crtc); return; } else if (crtc_state->ctm) { - struct drm_color_ctm *ctm = - (struct drm_color_ctm *)crtc_state->ctm->data; - uint64_t input[9] = { 0, }; - - if (intel_crtc_state->limited_color_range) { - ctm_mult_by_limited(input, ctm->matrix); - } else { - for (i = 0; i < ARRAY_SIZE(input); i++) - input[i] = ctm->matrix[i]; - } + struct drm_color_ctm *ctm = crtc_state->ctm->data; + const u64 *input; + u64 temp[9]; + + if (limited_color_range) + input = ctm_mult_by_limited(temp, ctm->matrix); + else + input = ctm->matrix; /* * Convert fixed point S31.32 input to format supported by the @@ -175,21 +181,21 @@ static void i9xx_load_csc_matrix(struct drm_crtc_state *crtc_state) if (abs_coeff < CTM_COEFF_0_125) coeffs[i] |= (3 << 12) | - I9XX_CSC_COEFF_FP(abs_coeff, 12); + ILK_CSC_COEFF_FP(abs_coeff, 12); else if (abs_coeff < CTM_COEFF_0_25) coeffs[i] |= (2 << 12) | - I9XX_CSC_COEFF_FP(abs_coeff, 11); + ILK_CSC_COEFF_FP(abs_coeff, 11); else if (abs_coeff < CTM_COEFF_0_5) coeffs[i] |= (1 << 12) | - I9XX_CSC_COEFF_FP(abs_coeff, 10); + ILK_CSC_COEFF_FP(abs_coeff, 10); else if (abs_coeff < CTM_COEFF_1_0) - coeffs[i] |= I9XX_CSC_COEFF_FP(abs_coeff, 9); + coeffs[i] |= ILK_CSC_COEFF_FP(abs_coeff, 9); else if (abs_coeff < CTM_COEFF_2_0) coeffs[i] |= (7 << 12) | - I9XX_CSC_COEFF_FP(abs_coeff, 8); + ILK_CSC_COEFF_FP(abs_coeff, 8); else coeffs[i] |= (6 << 12) | - I9XX_CSC_COEFF_FP(abs_coeff, 7); + ILK_CSC_COEFF_FP(abs_coeff, 7); } } else { /* @@ -201,11 +207,11 @@ static void i9xx_load_csc_matrix(struct drm_crtc_state *crtc_state) * into consideration. */ for (i = 0; i < 3; i++) { - if (intel_crtc_state->limited_color_range) + if (limited_color_range) coeffs[i * 3 + i] = - I9XX_CSC_COEFF_LIMITED_RANGE; + ILK_CSC_COEFF_LIMITED_RANGE; else - coeffs[i * 3 + i] = I9XX_CSC_COEFF_1_0; + coeffs[i * 3 + i] = ILK_CSC_COEFF_1_0; } } @@ -225,7 +231,7 @@ static void i9xx_load_csc_matrix(struct drm_crtc_state *crtc_state) if (INTEL_GEN(dev_priv) > 6) { uint16_t postoff = 0; - if (intel_crtc_state->limited_color_range) + if (limited_color_range) postoff = (16 * (1 << 12) / 255) & 0x1fff; I915_WRITE(PIPE_CSC_POSTOFF_HI(pipe), postoff); @@ -236,7 +242,7 @@ static void i9xx_load_csc_matrix(struct drm_crtc_state *crtc_state) } else { uint32_t mode = CSC_MODE_YUV_TO_RGB; - if (intel_crtc_state->limited_color_range) + if (limited_color_range) mode |= CSC_BLACK_SCREEN_OFFSET; I915_WRITE(PIPE_CSC_MODE(pipe), mode); @@ -255,8 +261,7 @@ static void cherryview_load_csc_matrix(struct drm_crtc_state *state) uint32_t mode; if (state->ctm) { - struct drm_color_ctm *ctm = - (struct drm_color_ctm *) state->ctm->data; + struct drm_color_ctm *ctm = state->ctm->data; uint16_t coeffs[9] = { 0, }; int i; @@ -323,7 +328,7 @@ static void i9xx_load_luts_internal(struct drm_crtc *crtc, } if (blob) { - struct drm_color_lut *lut = (struct drm_color_lut *) blob->data; + struct drm_color_lut *lut = blob->data; for (i = 0; i < 256; i++) { uint32_t word = (drm_color_lut_extract(lut[i].red, 8) << 16) | @@ -393,8 +398,7 @@ static void bdw_load_degamma_lut(struct drm_crtc_state *state) PAL_PREC_SPLIT_MODE | PAL_PREC_AUTO_INCREMENT); if (state->degamma_lut) { - struct drm_color_lut *lut = - (struct drm_color_lut *) state->degamma_lut->data; + struct drm_color_lut *lut = state->degamma_lut->data; for (i = 0; i < lut_size; i++) { uint32_t word = @@ -428,8 +432,7 @@ static void bdw_load_gamma_lut(struct drm_crtc_state *state, u32 offset) offset); if (state->gamma_lut) { - struct drm_color_lut *lut = - (struct drm_color_lut *) state->gamma_lut->data; + struct drm_color_lut *lut = state->gamma_lut->data; for (i = 0; i < lut_size; i++) { uint32_t word = @@ -561,7 +564,7 @@ static void cherryview_load_luts(struct drm_crtc_state *state) } if (state->degamma_lut) { - lut = (struct drm_color_lut *) state->degamma_lut->data; + lut = state->degamma_lut->data; lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size; for (i = 0; i < lut_size; i++) { /* Write LUT in U0.14 format. */ @@ -576,7 +579,7 @@ static void cherryview_load_luts(struct drm_crtc_state *state) } if (state->gamma_lut) { - lut = (struct drm_color_lut *) state->gamma_lut->data; + lut = state->gamma_lut->data; lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; for (i = 0; i < lut_size; i++) { /* Write LUT in U0.10 format. */ @@ -616,19 +619,17 @@ int intel_color_check(struct drm_crtc *crtc, struct drm_i915_private *dev_priv = to_i915(crtc->dev); size_t gamma_length, degamma_length; - degamma_length = INTEL_INFO(dev_priv)->color.degamma_lut_size * - sizeof(struct drm_color_lut); - gamma_length = INTEL_INFO(dev_priv)->color.gamma_lut_size * - sizeof(struct drm_color_lut); + degamma_length = INTEL_INFO(dev_priv)->color.degamma_lut_size; + gamma_length = INTEL_INFO(dev_priv)->color.gamma_lut_size; /* * We allow both degamma & gamma luts at the right size or * NULL. */ if ((!crtc_state->degamma_lut || - crtc_state->degamma_lut->length == degamma_length) && + drm_color_lut_size(crtc_state->degamma_lut) == degamma_length) && (!crtc_state->gamma_lut || - crtc_state->gamma_lut->length == gamma_length)) + drm_color_lut_size(crtc_state->gamma_lut) == gamma_length)) return 0; /* @@ -651,14 +652,14 @@ void intel_color_init(struct drm_crtc *crtc) dev_priv->display.load_csc_matrix = cherryview_load_csc_matrix; dev_priv->display.load_luts = cherryview_load_luts; } else if (IS_HASWELL(dev_priv)) { - dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix; + dev_priv->display.load_csc_matrix = ilk_load_csc_matrix; dev_priv->display.load_luts = haswell_load_luts; } else if (IS_BROADWELL(dev_priv) || IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) { - dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix; + dev_priv->display.load_csc_matrix = ilk_load_csc_matrix; dev_priv->display.load_luts = broadwell_load_luts; } else if (IS_GEMINILAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) { - dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix; + dev_priv->display.load_csc_matrix = ilk_load_csc_matrix; dev_priv->display.load_luts = glk_load_luts; } else { dev_priv->display.load_luts = i9xx_load_luts; diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 9f31aea51dff..c0a8805b277f 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -304,9 +304,6 @@ intel_crt_mode_valid(struct drm_connector *connector, int max_dotclk = dev_priv->max_dotclk_freq; int max_clock; - if (mode->flags & DRM_MODE_FLAG_DBLSCAN) - return MODE_NO_DBLESCAN; - if (mode->clock < 25000) return MODE_CLOCK_LOW; @@ -477,14 +474,6 @@ static bool valleyview_crt_detect_hotplug(struct drm_connector *connector) return ret; } -/** - * Uses CRT_HOTPLUG_EN and CRT_HOTPLUG_STAT to detect CRT presence. - * - * Not for i915G/i915GM - * - * \return true if CRT is connected. - * \return false if CRT is disconnected. - */ static bool intel_crt_detect_hotplug(struct drm_connector *connector) { struct drm_device *dev = connector->dev; @@ -810,10 +799,11 @@ intel_crt_detect(struct drm_connector *connector, else status = connector_status_unknown; intel_release_load_detect_pipe(connector, &tmp, ctx); - } else if (ret == 0) + } else if (ret == 0) { status = connector_status_unknown; - else if (ret < 0) + } else { status = ret; + } out: intel_display_power_put(dev_priv, intel_encoder->power_domain); @@ -966,8 +956,10 @@ void intel_crt_init(struct drm_i915_private *dev_priv) crt->base.power_domain = POWER_DOMAIN_PORT_CRT; if (I915_HAS_HOTPLUG(dev_priv) && - !dmi_check_system(intel_spurious_crt_detect)) + !dmi_check_system(intel_spurious_crt_detect)) { crt->base.hpd_pin = HPD_CRT; + crt->base.hotplug = intel_encoder_hotplug; + } if (HAS_DDI(dev_priv)) { crt->base.port = PORT_E; diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 7fe4aac0facc..41e6c75a7f3c 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -37,8 +37,9 @@ #define I915_CSR_GLK "i915/glk_dmc_ver1_04.bin" #define GLK_CSR_VERSION_REQUIRED CSR_VERSION(1, 4) -#define I915_CSR_CNL "i915/cnl_dmc_ver1_06.bin" -#define CNL_CSR_VERSION_REQUIRED CSR_VERSION(1, 6) +#define I915_CSR_CNL "i915/cnl_dmc_ver1_07.bin" +MODULE_FIRMWARE(I915_CSR_CNL); +#define CNL_CSR_VERSION_REQUIRED CSR_VERSION(1, 7) #define I915_CSR_KBL "i915/kbl_dmc_ver1_04.bin" MODULE_FIRMWARE(I915_CSR_KBL); diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 6aff9d096e13..8c2d778560f0 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -25,6 +25,7 @@ * */ +#include <drm/drm_scdc_helper.h> #include "i915_drv.h" #include "intel_drv.h" @@ -1615,6 +1616,35 @@ void intel_ddi_disable_transcoder_func(struct drm_i915_private *dev_priv, I915_WRITE(reg, val); } +int intel_ddi_toggle_hdcp_signalling(struct intel_encoder *intel_encoder, + bool enable) +{ + struct drm_device *dev = intel_encoder->base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + enum pipe pipe = 0; + int ret = 0; + uint32_t tmp; + + if (WARN_ON(!intel_display_power_get_if_enabled(dev_priv, + intel_encoder->power_domain))) + return -ENXIO; + + if (WARN_ON(!intel_encoder->get_hw_state(intel_encoder, &pipe))) { + ret = -EIO; + goto out; + } + + tmp = I915_READ(TRANS_DDI_FUNC_CTL(pipe)); + if (enable) + tmp |= TRANS_DDI_HDCP_SIGNALLING; + else + tmp &= ~TRANS_DDI_HDCP_SIGNALLING; + I915_WRITE(TRANS_DDI_FUNC_CTL(pipe), tmp); +out: + intel_display_power_put(dev_priv, intel_encoder->power_domain); + return ret; +} + bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector) { struct drm_device *dev = intel_connector->base.dev; @@ -2123,7 +2153,7 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, I915_WRITE(DPLL_CTRL2, val); - } else if (INTEL_INFO(dev_priv)->gen < 9) { + } else if (INTEL_GEN(dev_priv) < 9) { I915_WRITE(PORT_CLK_SEL(port), hsw_pll_to_ddi_pll_sel(pll)); } @@ -2401,6 +2431,48 @@ static void intel_enable_ddi_hdmi(struct intel_encoder *encoder, crtc_state->hdmi_high_tmds_clock_ratio, crtc_state->hdmi_scrambling); + /* Display WA #1143: skl,kbl,cfl */ + if (IS_GEN9_BC(dev_priv)) { + /* + * For some reason these chicken bits have been + * stuffed into a transcoder register, event though + * the bits affect a specific DDI port rather than + * a specific transcoder. + */ + static const enum transcoder port_to_transcoder[] = { + [PORT_A] = TRANSCODER_EDP, + [PORT_B] = TRANSCODER_A, + [PORT_C] = TRANSCODER_B, + [PORT_D] = TRANSCODER_C, + [PORT_E] = TRANSCODER_A, + }; + enum transcoder transcoder = port_to_transcoder[port]; + u32 val; + + val = I915_READ(CHICKEN_TRANS(transcoder)); + + if (port == PORT_E) + val |= DDIE_TRAINING_OVERRIDE_ENABLE | + DDIE_TRAINING_OVERRIDE_VALUE; + else + val |= DDI_TRAINING_OVERRIDE_ENABLE | + DDI_TRAINING_OVERRIDE_VALUE; + + I915_WRITE(CHICKEN_TRANS(transcoder), val); + POSTING_READ(CHICKEN_TRANS(transcoder)); + + udelay(1); + + if (port == PORT_E) + val &= ~(DDIE_TRAINING_OVERRIDE_ENABLE | + DDIE_TRAINING_OVERRIDE_VALUE); + else + val &= ~(DDI_TRAINING_OVERRIDE_ENABLE | + DDI_TRAINING_OVERRIDE_VALUE); + + I915_WRITE(CHICKEN_TRANS(transcoder), val); + } + /* In HDMI/DVI mode, the port width, and swing/emphasis values * are ignored so nothing special needs to be done besides * enabling the port. @@ -2420,6 +2492,11 @@ static void intel_enable_ddi(struct intel_encoder *encoder, intel_enable_ddi_hdmi(encoder, crtc_state, conn_state); else intel_enable_ddi_dp(encoder, crtc_state, conn_state); + + /* Enable hdcp if it's desired */ + if (conn_state->content_protection == + DRM_MODE_CONTENT_PROTECTION_DESIRED) + intel_hdcp_enable(to_intel_connector(conn_state->connector)); } static void intel_disable_ddi_dp(struct intel_encoder *encoder, @@ -2428,6 +2505,8 @@ static void intel_disable_ddi_dp(struct intel_encoder *encoder, { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + intel_dp->link_trained = false; + if (old_crtc_state->has_audio) intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state); @@ -2454,6 +2533,8 @@ static void intel_disable_ddi(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { + intel_hdcp_disable(to_intel_connector(old_conn_state->connector)); + if (intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_HDMI)) intel_disable_ddi_hdmi(encoder, old_crtc_state, old_conn_state); else @@ -2717,6 +2798,150 @@ intel_ddi_init_dp_connector(struct intel_digital_port *intel_dig_port) return connector; } +static int modeset_pipe(struct drm_crtc *crtc, + struct drm_modeset_acquire_ctx *ctx) +{ + struct drm_atomic_state *state; + struct drm_crtc_state *crtc_state; + int ret; + + state = drm_atomic_state_alloc(crtc->dev); + if (!state) + return -ENOMEM; + + state->acquire_ctx = ctx; + + crtc_state = drm_atomic_get_crtc_state(state, crtc); + if (IS_ERR(crtc_state)) { + ret = PTR_ERR(crtc_state); + goto out; + } + + crtc_state->mode_changed = true; + + ret = drm_atomic_add_affected_connectors(state, crtc); + if (ret) + goto out; + + ret = drm_atomic_add_affected_planes(state, crtc); + if (ret) + goto out; + + ret = drm_atomic_commit(state); + if (ret) + goto out; + + return 0; + + out: + drm_atomic_state_put(state); + + return ret; +} + +static int intel_hdmi_reset_link(struct intel_encoder *encoder, + struct drm_modeset_acquire_ctx *ctx) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_hdmi *hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_connector *connector = hdmi->attached_connector; + struct i2c_adapter *adapter = + intel_gmbus_get_adapter(dev_priv, hdmi->ddc_bus); + struct drm_connector_state *conn_state; + struct intel_crtc_state *crtc_state; + struct intel_crtc *crtc; + u8 config; + int ret; + + if (!connector || connector->base.status != connector_status_connected) + return 0; + + ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, + ctx); + if (ret) + return ret; + + conn_state = connector->base.state; + + crtc = to_intel_crtc(conn_state->crtc); + if (!crtc) + return 0; + + ret = drm_modeset_lock(&crtc->base.mutex, ctx); + if (ret) + return ret; + + crtc_state = to_intel_crtc_state(crtc->base.state); + + WARN_ON(!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)); + + if (!crtc_state->base.active) + return 0; + + if (!crtc_state->hdmi_high_tmds_clock_ratio && + !crtc_state->hdmi_scrambling) + return 0; + + if (conn_state->commit && + !try_wait_for_completion(&conn_state->commit->hw_done)) + return 0; + + ret = drm_scdc_readb(adapter, SCDC_TMDS_CONFIG, &config); + if (ret < 0) { + DRM_ERROR("Failed to read TMDS config: %d\n", ret); + return 0; + } + + if (!!(config & SCDC_TMDS_BIT_CLOCK_RATIO_BY_40) == + crtc_state->hdmi_high_tmds_clock_ratio && + !!(config & SCDC_SCRAMBLING_ENABLE) == + crtc_state->hdmi_scrambling) + return 0; + + /* + * HDMI 2.0 says that one should not send scrambled data + * prior to configuring the sink scrambling, and that + * TMDS clock/data transmission should be suspended when + * changing the TMDS clock rate in the sink. So let's + * just do a full modeset here, even though some sinks + * would be perfectly happy if were to just reconfigure + * the SCDC settings on the fly. + */ + return modeset_pipe(&crtc->base, ctx); +} + +static bool intel_ddi_hotplug(struct intel_encoder *encoder, + struct intel_connector *connector) +{ + struct drm_modeset_acquire_ctx ctx; + bool changed; + int ret; + + changed = intel_encoder_hotplug(encoder, connector); + + drm_modeset_acquire_init(&ctx, 0); + + for (;;) { + if (connector->base.connector_type == DRM_MODE_CONNECTOR_HDMIA) + ret = intel_hdmi_reset_link(encoder, &ctx); + else + ret = intel_dp_retrain_link(encoder, &ctx); + + if (ret == -EDEADLK) { + drm_modeset_backoff(&ctx); + continue; + } + + break; + } + + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); + WARN(ret, "Acquiring modeset locks failed with %i\n", ret); + + return changed; +} + static struct intel_connector * intel_ddi_init_hdmi_connector(struct intel_digital_port *intel_dig_port) { @@ -2761,39 +2986,45 @@ static bool intel_ddi_a_force_4_lanes(struct intel_digital_port *dport) return false; } +static int +intel_ddi_max_lanes(struct intel_digital_port *intel_dport) +{ + struct drm_i915_private *dev_priv = to_i915(intel_dport->base.base.dev); + enum port port = intel_dport->base.port; + int max_lanes = 4; + + if (INTEL_GEN(dev_priv) >= 11) + return max_lanes; + + if (port == PORT_A || port == PORT_E) { + if (I915_READ(DDI_BUF_CTL(PORT_A)) & DDI_A_4_LANES) + max_lanes = port == PORT_A ? 4 : 0; + else + /* Both A and E share 2 lanes */ + max_lanes = 2; + } + + /* + * Some BIOS might fail to set this bit on port A if eDP + * wasn't lit up at boot. Force this bit set when needed + * so we use the proper lane count for our calculations. + */ + if (intel_ddi_a_force_4_lanes(intel_dport)) { + DRM_DEBUG_KMS("Forcing DDI_A_4_LANES for port A\n"); + intel_dport->saved_port_bits |= DDI_A_4_LANES; + max_lanes = 4; + } + + return max_lanes; +} + void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) { struct intel_digital_port *intel_dig_port; struct intel_encoder *intel_encoder; struct drm_encoder *encoder; bool init_hdmi, init_dp, init_lspcon = false; - int max_lanes; - if (I915_READ(DDI_BUF_CTL(PORT_A)) & DDI_A_4_LANES) { - switch (port) { - case PORT_A: - max_lanes = 4; - break; - case PORT_E: - max_lanes = 0; - break; - default: - max_lanes = 4; - break; - } - } else { - switch (port) { - case PORT_A: - max_lanes = 2; - break; - case PORT_E: - max_lanes = 2; - break; - default: - max_lanes = 4; - break; - } - } init_hdmi = (dev_priv->vbt.ddi_port_info[port].supports_dvi || dev_priv->vbt.ddi_port_info[port].supports_hdmi); @@ -2827,6 +3058,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) drm_encoder_init(&dev_priv->drm, encoder, &intel_ddi_funcs, DRM_MODE_ENCODER_TMDS, "DDI %c", port_name(port)); + intel_encoder->hotplug = intel_ddi_hotplug; intel_encoder->compute_output_type = intel_ddi_compute_output_type; intel_encoder->compute_config = intel_ddi_compute_config; intel_encoder->enable = intel_enable_ddi; @@ -2839,10 +3071,20 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) intel_encoder->get_config = intel_ddi_get_config; intel_encoder->suspend = intel_dp_encoder_suspend; intel_encoder->get_power_domains = intel_ddi_get_power_domains; + intel_encoder->type = INTEL_OUTPUT_DDI; + intel_encoder->power_domain = intel_port_to_power_domain(port); + intel_encoder->port = port; + intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); + intel_encoder->cloneable = 0; - intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & - (DDI_BUF_PORT_REVERSAL | - DDI_A_4_LANES); + if (INTEL_GEN(dev_priv) >= 11) + intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & + DDI_BUF_PORT_REVERSAL; + else + intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & + (DDI_BUF_PORT_REVERSAL | DDI_A_4_LANES); + intel_dig_port->dp.output_reg = INVALID_MMIO_REG; + intel_dig_port->max_lanes = intel_ddi_max_lanes(intel_dig_port); switch (port) { case PORT_A: @@ -2865,30 +3107,14 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) intel_dig_port->ddi_io_power_domain = POWER_DOMAIN_PORT_DDI_E_IO; break; + case PORT_F: + intel_dig_port->ddi_io_power_domain = + POWER_DOMAIN_PORT_DDI_F_IO; + break; default: MISSING_CASE(port); } - /* - * Some BIOS might fail to set this bit on port A if eDP - * wasn't lit up at boot. Force this bit set when needed - * so we use the proper lane count for our calculations. - */ - if (intel_ddi_a_force_4_lanes(intel_dig_port)) { - DRM_DEBUG_KMS("Forcing DDI_A_4_LANES for port A\n"); - intel_dig_port->saved_port_bits |= DDI_A_4_LANES; - max_lanes = 4; - } - - intel_dig_port->dp.output_reg = INVALID_MMIO_REG; - intel_dig_port->max_lanes = max_lanes; - - intel_encoder->type = INTEL_OUTPUT_DDI; - intel_encoder->power_domain = intel_port_to_power_domain(port); - intel_encoder->port = port; - intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); - intel_encoder->cloneable = 0; - intel_infoframe_init(intel_dig_port); if (init_dp) { diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index d28592e43512..3dd350f7b8e6 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -56,6 +56,7 @@ static const char * const platform_names[] = { PLATFORM_NAME(GEMINILAKE), PLATFORM_NAME(COFFEELAKE), PLATFORM_NAME(CANNONLAKE), + PLATFORM_NAME(ICELAKE), }; #undef PLATFORM_NAME @@ -80,12 +81,16 @@ void intel_device_info_dump_flags(const struct intel_device_info *info, static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) { + int s; + drm_printf(p, "slice mask: %04x\n", sseu->slice_mask); drm_printf(p, "slice total: %u\n", hweight8(sseu->slice_mask)); drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu)); - drm_printf(p, "subslice mask %04x\n", sseu->subslice_mask); - drm_printf(p, "subslice per slice: %u\n", - hweight8(sseu->subslice_mask)); + for (s = 0; s < ARRAY_SIZE(sseu->subslice_mask); s++) { + drm_printf(p, "slice%d %u subslices mask=%04x\n", + s, hweight8(sseu->subslice_mask[s]), + sseu->subslice_mask[s]); + } drm_printf(p, "EU total: %u\n", sseu->eu_total); drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice); drm_printf(p, "has slice power gating: %s\n", @@ -119,22 +124,100 @@ void intel_device_info_dump(const struct intel_device_info *info, intel_device_info_dump_flags(info, p); } +void intel_device_info_dump_topology(const struct sseu_dev_info *sseu, + struct drm_printer *p) +{ + int s, ss; + + if (sseu->max_slices == 0) { + drm_printf(p, "Unavailable\n"); + return; + } + + for (s = 0; s < sseu->max_slices; s++) { + drm_printf(p, "slice%d: %u subslice(s) (0x%hhx):\n", + s, hweight8(sseu->subslice_mask[s]), + sseu->subslice_mask[s]); + + for (ss = 0; ss < sseu->max_subslices; ss++) { + u16 enabled_eus = sseu_get_eus(sseu, s, ss); + + drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n", + ss, hweight16(enabled_eus), enabled_eus); + } + } +} + +static u16 compute_eu_total(const struct sseu_dev_info *sseu) +{ + u16 i, total = 0; + + for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++) + total += hweight8(sseu->eu_mask[i]); + + return total; +} + static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; const u32 fuse2 = I915_READ(GEN8_FUSE2); + int s, ss; + const int eu_mask = 0xff; + u32 subslice_mask, eu_en; sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >> GEN10_F2_S_ENA_SHIFT; - sseu->subslice_mask = (1 << 4) - 1; - sseu->subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> - GEN10_F2_SS_DIS_SHIFT); + sseu->max_slices = 6; + sseu->max_subslices = 4; + sseu->max_eus_per_subslice = 8; + + subslice_mask = (1 << 4) - 1; + subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> + GEN10_F2_SS_DIS_SHIFT); + + /* + * Slice0 can have up to 3 subslices, but there are only 2 in + * slice1/2. + */ + sseu->subslice_mask[0] = subslice_mask; + for (s = 1; s < sseu->max_slices; s++) + sseu->subslice_mask[s] = subslice_mask & 0x3; + + /* Slice0 */ + eu_en = ~I915_READ(GEN8_EU_DISABLE0); + for (ss = 0; ss < sseu->max_subslices; ss++) + sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & eu_mask); + /* Slice1 */ + sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask); + eu_en = ~I915_READ(GEN8_EU_DISABLE1); + sseu_set_eus(sseu, 1, 1, eu_en & eu_mask); + /* Slice2 */ + sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask); + sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask); + /* Slice3 */ + sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask); + eu_en = ~I915_READ(GEN8_EU_DISABLE2); + sseu_set_eus(sseu, 3, 1, eu_en & eu_mask); + /* Slice4 */ + sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask); + sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask); + /* Slice5 */ + sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask); + eu_en = ~I915_READ(GEN10_EU_DISABLE3); + sseu_set_eus(sseu, 5, 1, eu_en & eu_mask); + + /* Do a second pass where we mark the subslices disabled if all their + * eus are off. + */ + for (s = 0; s < sseu->max_slices; s++) { + for (ss = 0; ss < sseu->max_subslices; ss++) { + if (sseu_get_eus(sseu, s, ss) == 0) + sseu->subslice_mask[s] &= ~BIT(ss); + } + } - sseu->eu_total = hweight32(~I915_READ(GEN8_EU_DISABLE0)); - sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE1)); - sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE2)); - sseu->eu_total += hweight8(~(I915_READ(GEN10_EU_DISABLE3) & - GEN10_EU_DIS_SS_MASK)); + sseu->eu_total = compute_eu_total(sseu); /* * CNL is expected to always have a uniform distribution @@ -155,26 +238,39 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; - u32 fuse, eu_dis; + u32 fuse; fuse = I915_READ(CHV_FUSE_GT); sseu->slice_mask = BIT(0); + sseu->max_slices = 1; + sseu->max_subslices = 2; + sseu->max_eus_per_subslice = 8; if (!(fuse & CHV_FGT_DISABLE_SS0)) { - sseu->subslice_mask |= BIT(0); - eu_dis = fuse & (CHV_FGT_EU_DIS_SS0_R0_MASK | - CHV_FGT_EU_DIS_SS0_R1_MASK); - sseu->eu_total += 8 - hweight32(eu_dis); + u8 disabled_mask = + ((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> + CHV_FGT_EU_DIS_SS0_R0_SHIFT) | + (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> + CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4); + + sseu->subslice_mask[0] |= BIT(0); + sseu_set_eus(sseu, 0, 0, ~disabled_mask); } if (!(fuse & CHV_FGT_DISABLE_SS1)) { - sseu->subslice_mask |= BIT(1); - eu_dis = fuse & (CHV_FGT_EU_DIS_SS1_R0_MASK | - CHV_FGT_EU_DIS_SS1_R1_MASK); - sseu->eu_total += 8 - hweight32(eu_dis); + u8 disabled_mask = + ((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> + CHV_FGT_EU_DIS_SS1_R0_SHIFT) | + (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> + CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4); + + sseu->subslice_mask[0] |= BIT(1); + sseu_set_eus(sseu, 0, 1, ~disabled_mask); } + sseu->eu_total = compute_eu_total(sseu); + /* * CHV expected to always have a uniform distribution of EU * across subslices. @@ -196,41 +292,52 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) { struct intel_device_info *info = mkwrite_device_info(dev_priv); struct sseu_dev_info *sseu = &info->sseu; - int s_max = 3, ss_max = 4, eu_max = 8; int s, ss; - u32 fuse2, eu_disable; - u8 eu_mask = 0xff; + u32 fuse2, eu_disable, subslice_mask; + const u8 eu_mask = 0xff; fuse2 = I915_READ(GEN8_FUSE2); sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; + /* BXT has a single slice and at most 3 subslices. */ + sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3; + sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4; + sseu->max_eus_per_subslice = 8; + /* * The subslice disable field is global, i.e. it applies * to each of the enabled slices. */ - sseu->subslice_mask = (1 << ss_max) - 1; - sseu->subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> - GEN9_F2_SS_DIS_SHIFT); + subslice_mask = (1 << sseu->max_subslices) - 1; + subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> + GEN9_F2_SS_DIS_SHIFT); /* * Iterate through enabled slices and subslices to * count the total enabled EU. */ - for (s = 0; s < s_max; s++) { + for (s = 0; s < sseu->max_slices; s++) { if (!(sseu->slice_mask & BIT(s))) /* skip disabled slice */ continue; + sseu->subslice_mask[s] = subslice_mask; + eu_disable = I915_READ(GEN9_EU_DISABLE(s)); - for (ss = 0; ss < ss_max; ss++) { + for (ss = 0; ss < sseu->max_subslices; ss++) { int eu_per_ss; + u8 eu_disabled_mask; - if (!(sseu->subslice_mask & BIT(ss))) + if (!(sseu->subslice_mask[s] & BIT(ss))) /* skip disabled subslice */ continue; - eu_per_ss = eu_max - hweight8((eu_disable >> (ss*8)) & - eu_mask); + eu_disabled_mask = (eu_disable >> (ss * 8)) & eu_mask; + + sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); + + eu_per_ss = sseu->max_eus_per_subslice - + hweight8(eu_disabled_mask); /* * Record which subslice(s) has(have) 7 EUs. we @@ -239,11 +346,11 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) */ if (eu_per_ss == 7) sseu->subslice_7eu[s] |= BIT(ss); - - sseu->eu_total += eu_per_ss; } } + sseu->eu_total = compute_eu_total(sseu); + /* * SKL is expected to always have a uniform distribution * of EU across subslices with the exception that any one @@ -269,8 +376,8 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) sseu->has_eu_pg = sseu->eu_per_subslice > 2; if (IS_GEN9_LP(dev_priv)) { -#define IS_SS_DISABLED(ss) (!(sseu->subslice_mask & BIT(ss))) - info->has_pooled_eu = hweight8(sseu->subslice_mask) == 3; +#define IS_SS_DISABLED(ss) (!(sseu->subslice_mask[0] & BIT(ss))) + info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3; sseu->min_eu_in_pool = 0; if (info->has_pooled_eu) { @@ -288,19 +395,22 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; - const int s_max = 3, ss_max = 3, eu_max = 8; int s, ss; - u32 fuse2, eu_disable[3]; /* s_max */ + u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */ fuse2 = I915_READ(GEN8_FUSE2); sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; + sseu->max_slices = 3; + sseu->max_subslices = 3; + sseu->max_eus_per_subslice = 8; + /* * The subslice disable field is global, i.e. it applies * to each of the enabled slices. */ - sseu->subslice_mask = GENMASK(ss_max - 1, 0); - sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> - GEN8_F2_SS_DIS_SHIFT); + subslice_mask = GENMASK(sseu->max_subslices - 1, 0); + subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> + GEN8_F2_SS_DIS_SHIFT); eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) & GEN8_EU_DIS0_S0_MASK; eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >> GEN8_EU_DIS0_S1_SHIFT) | @@ -314,30 +424,38 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) * Iterate through enabled slices and subslices to * count the total enabled EU. */ - for (s = 0; s < s_max; s++) { + for (s = 0; s < sseu->max_slices; s++) { if (!(sseu->slice_mask & BIT(s))) /* skip disabled slice */ continue; - for (ss = 0; ss < ss_max; ss++) { + sseu->subslice_mask[s] = subslice_mask; + + for (ss = 0; ss < sseu->max_subslices; ss++) { + u8 eu_disabled_mask; u32 n_disabled; - if (!(sseu->subslice_mask & BIT(ss))) + if (!(sseu->subslice_mask[ss] & BIT(ss))) /* skip disabled subslice */ continue; - n_disabled = hweight8(eu_disable[s] >> (ss * eu_max)); + eu_disabled_mask = + eu_disable[s] >> (ss * sseu->max_eus_per_subslice); + + sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); + + n_disabled = hweight8(eu_disabled_mask); /* * Record which subslices have 7 EUs. */ - if (eu_max - n_disabled == 7) + if (sseu->max_eus_per_subslice - n_disabled == 7) sseu->subslice_7eu[s] |= 1 << ss; - - sseu->eu_total += eu_max - n_disabled; } } + sseu->eu_total = compute_eu_total(sseu); + /* * BDW is expected to always have a uniform distribution of EU across * subslices with the exception that any one EU in any one subslice may @@ -356,6 +474,72 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) sseu->has_eu_pg = 0; } +static void haswell_sseu_info_init(struct drm_i915_private *dev_priv) +{ + struct intel_device_info *info = mkwrite_device_info(dev_priv); + struct sseu_dev_info *sseu = &info->sseu; + u32 fuse1; + int s, ss; + + /* + * There isn't a register to tell us how many slices/subslices. We + * work off the PCI-ids here. + */ + switch (info->gt) { + default: + MISSING_CASE(info->gt); + /* fall through */ + case 1: + sseu->slice_mask = BIT(0); + sseu->subslice_mask[0] = BIT(0); + break; + case 2: + sseu->slice_mask = BIT(0); + sseu->subslice_mask[0] = BIT(0) | BIT(1); + break; + case 3: + sseu->slice_mask = BIT(0) | BIT(1); + sseu->subslice_mask[0] = BIT(0) | BIT(1); + sseu->subslice_mask[1] = BIT(0) | BIT(1); + break; + } + + sseu->max_slices = hweight8(sseu->slice_mask); + sseu->max_subslices = hweight8(sseu->subslice_mask[0]); + + fuse1 = I915_READ(HSW_PAVP_FUSE1); + switch ((fuse1 & HSW_F1_EU_DIS_MASK) >> HSW_F1_EU_DIS_SHIFT) { + default: + MISSING_CASE((fuse1 & HSW_F1_EU_DIS_MASK) >> + HSW_F1_EU_DIS_SHIFT); + /* fall through */ + case HSW_F1_EU_DIS_10EUS: + sseu->eu_per_subslice = 10; + break; + case HSW_F1_EU_DIS_8EUS: + sseu->eu_per_subslice = 8; + break; + case HSW_F1_EU_DIS_6EUS: + sseu->eu_per_subslice = 6; + break; + } + sseu->max_eus_per_subslice = sseu->eu_per_subslice; + + for (s = 0; s < sseu->max_slices; s++) { + for (ss = 0; ss < sseu->max_subslices; ss++) { + sseu_set_eus(sseu, s, ss, + (1UL << sseu->eu_per_subslice) - 1); + } + } + + sseu->eu_total = compute_eu_total(sseu); + + /* No powergating for you. */ + sseu->has_slice_pg = 0; + sseu->has_subslice_pg = 0; + sseu->has_eu_pg = 0; +} + static u32 read_reference_ts_freq(struct drm_i915_private *dev_priv) { u32 ts_override = I915_READ(GEN9_TIMESTAMP_OVERRIDE); @@ -488,6 +672,9 @@ void intel_device_info_runtime_init(struct intel_device_info *info) info->num_scalers[PIPE_C] = 1; } + BUILD_BUG_ON(I915_NUM_ENGINES > + sizeof(intel_ring_mask_t) * BITS_PER_BYTE); + /* * Skylake and Broxton currently don't expose the topmost plane as its * use is exclusive with the legacy cursor and we only want to expose @@ -573,7 +760,9 @@ void intel_device_info_runtime_init(struct intel_device_info *info) } /* Initialize slice/subslice/EU info */ - if (IS_CHERRYVIEW(dev_priv)) + if (IS_HASWELL(dev_priv)) + haswell_sseu_info_init(dev_priv); + else if (IS_CHERRYVIEW(dev_priv)) cherryview_sseu_info_init(dev_priv); else if (IS_BROADWELL(dev_priv)) broadwell_sseu_info_init(dev_priv); @@ -585,3 +774,9 @@ void intel_device_info_runtime_init(struct intel_device_info *info) /* Initialize command stream timestamp frequency */ info->cs_timestamp_frequency_khz = read_timestamp_frequency(dev_priv); } + +void intel_driver_caps_print(const struct intel_driver_caps *caps, + struct drm_printer *p) +{ + drm_printf(p, "scheduler: %x\n", caps->scheduler); +} diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 49cb27bd04c1..0835752c8b22 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -69,6 +69,8 @@ enum intel_platform { INTEL_COFFEELAKE, /* gen10 */ INTEL_CANNONLAKE, + /* gen11 */ + INTEL_ICELAKE, INTEL_MAX_PLATFORMS }; @@ -94,6 +96,7 @@ enum intel_platform { func(has_l3_dpf); \ func(has_llc); \ func(has_logical_ring_contexts); \ + func(has_logical_ring_elsq); \ func(has_logical_ring_preemption); \ func(has_overlay); \ func(has_pooled_eu); \ @@ -110,10 +113,13 @@ enum intel_platform { func(supports_tv); \ func(has_ipc); +#define GEN_MAX_SLICES (6) /* CNL upper bound */ +#define GEN_MAX_SUBSLICES (7) + struct sseu_dev_info { u8 slice_mask; - u8 subslice_mask; - u8 eu_total; + u8 subslice_mask[GEN_MAX_SUBSLICES]; + u16 eu_total; u8 eu_per_subslice; u8 min_eu_in_pool; /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */ @@ -121,8 +127,21 @@ struct sseu_dev_info { u8 has_slice_pg:1; u8 has_subslice_pg:1; u8 has_eu_pg:1; + + /* Topology fields */ + u8 max_slices; + u8 max_subslices; + u8 max_eus_per_subslice; + + /* We don't have more than 8 eus per subslice at the moment and as we + * store eus enabled using bits, no need to multiply by eus per + * subslice. + */ + u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; }; +typedef u8 intel_ring_mask_t; + struct intel_device_info { u16 device_id; u16 gen_mask; @@ -130,19 +149,19 @@ struct intel_device_info { u8 gen; u8 gt; /* GT number, 0 if undefined */ u8 num_rings; - u8 ring_mask; /* Rings supported by the HW */ + intel_ring_mask_t ring_mask; /* Rings supported by the HW */ enum intel_platform platform; u32 platform_mask; + unsigned int page_sizes; /* page sizes supported by the HW */ + u32 display_mmio_offset; u8 num_pipes; u8 num_sprites[I915_MAX_PIPES]; u8 num_scalers[I915_MAX_PIPES]; - unsigned int page_sizes; /* page sizes supported by the HW */ - #define DEFINE_FLAG(name) u8 name:1 DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG); #undef DEFINE_FLAG @@ -165,9 +184,55 @@ struct intel_device_info { } color; }; +struct intel_driver_caps { + unsigned int scheduler; +}; + static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu) { - return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask); + unsigned int i, total = 0; + + for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++) + total += hweight8(sseu->subslice_mask[i]); + + return total; +} + +static inline int sseu_eu_idx(const struct sseu_dev_info *sseu, + int slice, int subslice) +{ + int subslice_stride = DIV_ROUND_UP(sseu->max_eus_per_subslice, + BITS_PER_BYTE); + int slice_stride = sseu->max_subslices * subslice_stride; + + return slice * slice_stride + subslice * subslice_stride; +} + +static inline u16 sseu_get_eus(const struct sseu_dev_info *sseu, + int slice, int subslice) +{ + int i, offset = sseu_eu_idx(sseu, slice, subslice); + u16 eu_mask = 0; + + for (i = 0; + i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) { + eu_mask |= ((u16) sseu->eu_mask[offset + i]) << + (i * BITS_PER_BYTE); + } + + return eu_mask; +} + +static inline void sseu_set_eus(struct sseu_dev_info *sseu, + int slice, int subslice, u16 eu_mask) +{ + int i, offset = sseu_eu_idx(sseu, slice, subslice); + + for (i = 0; + i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) { + sseu->eu_mask[offset + i] = + (eu_mask >> (BITS_PER_BYTE * i)) & 0xff; + } } const char *intel_platform_name(enum intel_platform platform); @@ -179,5 +244,10 @@ void intel_device_info_dump_flags(const struct intel_device_info *info, struct drm_printer *p); void intel_device_info_dump_runtime(const struct intel_device_info *info, struct drm_printer *p); +void intel_device_info_dump_topology(const struct sseu_dev_info *sseu, + struct drm_printer *p); + +void intel_driver_caps_print(const struct intel_driver_caps *caps, + struct drm_printer *p); #endif diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f288bcc7be22..3b48fd2561fe 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -558,11 +558,11 @@ int chv_calc_dpll_params(int refclk, struct dpll *clock) } #define INTELPllInvalid(s) do { /* DRM_DEBUG(s); */ return false; } while (0) -/** + +/* * Returns whether the given set of divisors are valid for a given refclk with * the given connectors. */ - static bool intel_PLL_is_valid(struct drm_i915_private *dev_priv, const struct intel_limit *limit, const struct dpll *clock) @@ -2029,12 +2029,12 @@ static unsigned int intel_cursor_alignment(const struct drm_i915_private *dev_pr static unsigned int intel_linear_alignment(const struct drm_i915_private *dev_priv) { - if (INTEL_INFO(dev_priv)->gen >= 9) + if (INTEL_GEN(dev_priv) >= 9) return 256 * 1024; else if (IS_I965G(dev_priv) || IS_I965GM(dev_priv) || IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) return 128 * 1024; - else if (INTEL_INFO(dev_priv)->gen >= 4) + else if (INTEL_GEN(dev_priv) >= 4) return 4 * 1024; else return 0; @@ -2067,14 +2067,26 @@ static unsigned int intel_surf_alignment(const struct drm_framebuffer *fb, } } +static bool intel_plane_uses_fence(const struct intel_plane_state *plane_state) +{ + struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + + return INTEL_GEN(dev_priv) < 4 || plane->has_fbc; +} + struct i915_vma * -intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation) +intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, + unsigned int rotation, + bool uses_fence, + unsigned long *out_flags) { struct drm_device *dev = fb->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj = intel_fb_obj(fb); struct i915_ggtt_view view; struct i915_vma *vma; + unsigned int pinctl; u32 alignment; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); @@ -2102,11 +2114,26 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation) atomic_inc(&dev_priv->gpu_error.pending_fb_pin); - vma = i915_gem_object_pin_to_display_plane(obj, alignment, &view); + pinctl = 0; + + /* Valleyview is definitely limited to scanning out the first + * 512MiB. Lets presume this behaviour was inherited from the + * g4x display engine and that all earlier gen are similarly + * limited. Testing suggests that it is a little more + * complicated than this. For example, Cherryview appears quite + * happy to scanout from anywhere within its global aperture. + */ + if (HAS_GMCH_DISPLAY(dev_priv)) + pinctl |= PIN_MAPPABLE; + + vma = i915_gem_object_pin_to_display_plane(obj, + alignment, &view, pinctl); if (IS_ERR(vma)) goto err; - if (i915_vma_is_map_and_fenceable(vma)) { + if (uses_fence && i915_vma_is_map_and_fenceable(vma)) { + int ret; + /* Install a fence for tiled scan-out. Pre-i965 always needs a * fence, whereas 965+ only requires a fence if using * framebuffer compression. For simplicity, we always, when @@ -2123,7 +2150,15 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation) * something and try to run the system in a "less than optimal" * mode that matches the user configuration. */ - i915_vma_pin_fence(vma); + ret = i915_vma_pin_fence(vma); + if (ret != 0 && INTEL_GEN(dev_priv) < 4) { + i915_gem_object_unpin_from_display_plane(vma); + vma = ERR_PTR(ret); + goto err; + } + + if (ret == 0 && vma->fence) + *out_flags |= PLANE_HAS_FENCE; } i915_vma_get(vma); @@ -2134,11 +2169,12 @@ err: return vma; } -void intel_unpin_fb_vma(struct i915_vma *vma) +void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags) { lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - i915_vma_unpin_fence(vma); + if (flags & PLANE_HAS_FENCE) + i915_vma_unpin_fence(vma); i915_gem_object_unpin_from_display_plane(vma); i915_vma_put(vma); } @@ -2387,6 +2423,20 @@ static unsigned int intel_fb_modifier_to_tiling(uint64_t fb_modifier) } } +/* + * From the Sky Lake PRM: + * "The Color Control Surface (CCS) contains the compression status of + * the cache-line pairs. The compression state of the cache-line pair + * is specified by 2 bits in the CCS. Each CCS cache-line represents + * an area on the main surface of 16 x16 sets of 128 byte Y-tiled + * cache-line-pairs. CCS is always Y tiled." + * + * Since cache line pairs refers to horizontally adjacent cache lines, + * each cache line in the CCS corresponds to an area of 32x16 cache + * lines on the main surface. Since each pixel is 4 bytes, this gives + * us a ratio of one byte in the CCS for each 8x16 pixels in the + * main surface. + */ static const struct drm_format_info ccs_formats[] = { { .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, }, { .format = DRM_FORMAT_XBGR8888, .depth = 24, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, }, @@ -2794,7 +2844,10 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, valid_fb: mutex_lock(&dev->struct_mutex); intel_state->vma = - intel_pin_and_fence_fb_obj(fb, primary->state->rotation); + intel_pin_and_fence_fb_obj(fb, + primary->state->rotation, + intel_plane_uses_fence(intel_state), + &intel_state->flags); mutex_unlock(&dev->struct_mutex); if (IS_ERR(intel_state->vma)) { DRM_ERROR("failed to pin boot fb on pipe %d: %li\n", @@ -2917,14 +2970,19 @@ static bool skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state return true; } -static int skl_check_main_surface(struct intel_plane_state *plane_state) +static int skl_check_main_surface(const struct intel_crtc_state *crtc_state, + struct intel_plane_state *plane_state) { + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); const struct drm_framebuffer *fb = plane_state->base.fb; unsigned int rotation = plane_state->base.rotation; int x = plane_state->base.src.x1 >> 16; int y = plane_state->base.src.y1 >> 16; int w = drm_rect_width(&plane_state->base.src) >> 16; int h = drm_rect_height(&plane_state->base.src) >> 16; + int dst_x = plane_state->base.dst.x1; + int pipe_src_w = crtc_state->pipe_src_w; int max_width = skl_max_plane_width(fb, 0, rotation); int max_height = 4096; u32 alignment, offset, aux_offset = plane_state->aux.offset; @@ -2935,6 +2993,24 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state) return -EINVAL; } + /* + * Display WA #1175: cnl,glk + * Planes other than the cursor may cause FIFO underflow and display + * corruption if starting less than 4 pixels from the right edge of + * the screen. + * Besides the above WA fix the similar problem, where planes other + * than the cursor ending less than 4 pixels from the left edge of the + * screen may cause FIFO underflow and display corruption. + */ + if ((IS_GEMINILAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) && + (dst_x + w < 4 || dst_x > pipe_src_w - 4)) { + DRM_DEBUG_KMS("requested plane X %s position %d invalid (valid range %d-%d)\n", + dst_x + w < 4 ? "end" : "start", + dst_x + w < 4 ? dst_x + w : dst_x, + 4, pipe_src_w - 4); + return -ERANGE; + } + intel_add_fb_offsets(&x, &y, plane_state, 0); offset = intel_compute_tile_offset(&x, &y, plane_state, 0); alignment = intel_surf_alignment(fb, 0); @@ -3026,8 +3102,6 @@ static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state) static int skl_check_ccs_aux_surface(struct intel_plane_state *plane_state) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); - struct intel_crtc *crtc = to_intel_crtc(plane_state->base.crtc); const struct drm_framebuffer *fb = plane_state->base.fb; int src_x = plane_state->base.src.x1 >> 16; int src_y = plane_state->base.src.y1 >> 16; @@ -3037,20 +3111,6 @@ static int skl_check_ccs_aux_surface(struct intel_plane_state *plane_state) int y = src_y / vsub; u32 offset; - switch (plane->id) { - case PLANE_PRIMARY: - case PLANE_SPRITE0: - break; - default: - DRM_DEBUG_KMS("RC support only on plane 1 and 2\n"); - return -EINVAL; - } - - if (crtc->pipe == PIPE_C) { - DRM_DEBUG_KMS("No RC support on pipe C\n"); - return -EINVAL; - } - if (plane_state->base.rotation & ~(DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_180)) { DRM_DEBUG_KMS("RC support only with 0/180 degree rotation %x\n", plane_state->base.rotation); @@ -3067,7 +3127,8 @@ static int skl_check_ccs_aux_surface(struct intel_plane_state *plane_state) return 0; } -int skl_check_plane_surface(struct intel_plane_state *plane_state) +int skl_check_plane_surface(const struct intel_crtc_state *crtc_state, + struct intel_plane_state *plane_state) { const struct drm_framebuffer *fb = plane_state->base.fb; unsigned int rotation = plane_state->base.rotation; @@ -3107,7 +3168,7 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state) plane_state->aux.y = 0; } - ret = skl_check_main_surface(plane_state); + ret = skl_check_main_surface(crtc_state, plane_state); if (ret) return ret; @@ -3133,7 +3194,7 @@ static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state, if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) dspcntr |= DISPPLANE_PIPE_CSC_ENABLE; - if (INTEL_GEN(dev_priv) < 4) + if (INTEL_GEN(dev_priv) < 5) dspcntr |= DISPPLANE_SEL_PIPE(crtc->pipe); switch (fb->format->format) { @@ -3514,6 +3575,12 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, PLANE_CTL_PIPE_GAMMA_ENABLE | PLANE_CTL_PIPE_CSC_ENABLE | PLANE_CTL_PLANE_GAMMA_DISABLE; + + if (plane_state->base.color_encoding == DRM_COLOR_YCBCR_BT709) + plane_ctl |= PLANE_CTL_YUV_TO_RGB_CSC_FORMAT_BT709; + + if (plane_state->base.color_range == DRM_COLOR_YCBCR_FULL_RANGE) + plane_ctl |= PLANE_CTL_YUV_RANGE_CORRECTION_DISABLE; } plane_ctl |= skl_plane_ctl_format(fb->format->format); @@ -3543,6 +3610,16 @@ u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, plane_color_ctl |= PLANE_COLOR_PLANE_GAMMA_DISABLE; plane_color_ctl |= glk_plane_color_ctl_alpha(fb->format->format); + if (intel_format_is_yuv(fb->format->format)) { + if (plane_state->base.color_encoding == DRM_COLOR_YCBCR_BT709) + plane_color_ctl |= PLANE_COLOR_CSC_MODE_YUV709_TO_RGB709; + else + plane_color_ctl |= PLANE_COLOR_CSC_MODE_YUV601_TO_RGB709; + + if (plane_state->base.color_range == DRM_COLOR_YCBCR_FULL_RANGE) + plane_color_ctl |= PLANE_COLOR_YUV_RANGE_CORRECTION_DISABLE; + } + return plane_color_ctl; } @@ -4726,8 +4803,7 @@ int skl_update_scaler_crtc(struct intel_crtc_state *state) /** * skl_update_scaler_plane - Stages update to scaler state for a given plane. - * - * @state: crtc's scaler state + * @crtc_state: crtc's scaler state * @plane_state: atomic plane state to update * * Return @@ -4757,7 +4833,7 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, return ret; /* check colorkey */ - if (plane_state->ckey.flags != I915_SET_COLORKEY_NONE) { + if (plane_state->ckey.flags) { DRM_DEBUG_KMS("[PLANE:%d:%s] scaling with color key not allowed", intel_plane->base.base.id, intel_plane->base.name); @@ -4924,6 +5000,7 @@ static void intel_crtc_dpms_overlay_disable(struct intel_crtc *intel_crtc) /** * intel_post_enable_primary - Perform operations after enabling primary plane * @crtc: the CRTC whose primary plane was just enabled + * @new_crtc_state: the enabling state * * Performs potentially sleeping operations that must be done after the primary * plane is enabled, such as updating FBC and IPS. Note that this may be @@ -5388,6 +5465,20 @@ static void glk_pipe_scaler_clock_gating_wa(struct drm_i915_private *dev_priv, I915_WRITE(CLKGATE_DIS_PSL(pipe), val); } +static void icl_pipe_mbus_enable(struct intel_crtc *crtc) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; + uint32_t val; + + val = MBUS_DBOX_BW_CREDIT(1) | MBUS_DBOX_A_CREDIT(2); + + /* Program B credit equally to all pipes */ + val |= MBUS_DBOX_B_CREDIT(24 / INTEL_INFO(dev_priv)->num_pipes); + + I915_WRITE(PIPE_MBUS_DBOX_CTL(pipe), val); +} + static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, struct drm_atomic_state *old_state) { @@ -5465,6 +5556,9 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, if (dev_priv->display.initial_watermarks != NULL) dev_priv->display.initial_watermarks(old_intel_state, pipe_config); + if (INTEL_GEN(dev_priv) >= 11) + icl_pipe_mbus_enable(intel_crtc); + /* XXX: Do the pipe assertions at the right place for BXT DSI. */ if (!transcoder_is_dsi(cpu_transcoder)) intel_enable_pipe(pipe_config); @@ -5641,6 +5735,8 @@ enum intel_display_power_domain intel_port_to_power_domain(enum port port) return POWER_DOMAIN_PORT_DDI_D_LANES; case PORT_E: return POWER_DOMAIN_PORT_DDI_E_LANES; + case PORT_F: + return POWER_DOMAIN_PORT_DDI_F_LANES; default: MISSING_CASE(port); return POWER_DOMAIN_PORT_OTHER; @@ -6275,7 +6371,7 @@ static bool intel_crtc_supports_double_wide(const struct intel_crtc *crtc) const struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); /* GDG double wide on either pipe, otherwise pipe A only */ - return INTEL_INFO(dev_priv)->gen < 4 && + return INTEL_GEN(dev_priv) < 4 && (crtc->pipe == PIPE_A || IS_I915G(dev_priv)); } @@ -6372,9 +6468,18 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc, * - LVDS dual channel mode * - Double wide pipe */ - if ((intel_crtc_has_type(pipe_config, INTEL_OUTPUT_LVDS) && - intel_is_dual_link_lvds(dev)) || pipe_config->double_wide) - pipe_config->pipe_src_w &= ~1; + if (pipe_config->pipe_src_w & 1) { + if (pipe_config->double_wide) { + DRM_DEBUG_KMS("Odd pipe source width not supported with double wide pipe\n"); + return -EINVAL; + } + + if (intel_crtc_has_type(pipe_config, INTEL_OUTPUT_LVDS) && + intel_is_dual_link_lvds(dev)) { + DRM_DEBUG_KMS("Odd pipe source width not supported with dual link LVDS\n"); + return -EINVAL; + } + } /* Cantiga+ cannot handle modes with a hsync front porch of 0. * WaPruneModeWithIncorrectHsyncOffset:ctg,elk,ilk,snb,ivb,vlv,hsw. @@ -8153,7 +8258,7 @@ static void haswell_set_pipemisc(struct drm_crtc *crtc) struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_crtc_state *config = intel_crtc->config; - if (IS_BROADWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 9) { + if (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9) { u32 val = 0; switch (intel_crtc->config->pipe_bpp) { @@ -8495,7 +8600,10 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, val = I915_READ(PLANE_CTL(pipe, plane_id)); - pixel_format = val & PLANE_CTL_FORMAT_MASK; + if (INTEL_GEN(dev_priv) >= 11) + pixel_format = val & ICL_PLANE_CTL_FORMAT_MASK; + else + pixel_format = val & PLANE_CTL_FORMAT_MASK; if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { alpha = I915_READ(PLANE_COLOR_CTL(pipe, plane_id)); @@ -9314,7 +9422,6 @@ static int intel_check_cursor(struct intel_crtc_state *crtc_state, ret = drm_atomic_helper_check_plane_state(&plane_state->base, &crtc_state->base, - &plane_state->clip, DRM_PLANE_HELPER_NO_SCALING, DRM_PLANE_HELPER_NO_SCALING, true, true); @@ -9488,7 +9595,8 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, if (HAS_DDI(dev_priv)) cntl |= CURSOR_PIPE_CSC_ENABLE; - cntl |= MCURSOR_PIPE_SELECT(crtc->pipe); + if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv)) + cntl |= MCURSOR_PIPE_SELECT(crtc->pipe); switch (plane_state->base.crtc_w) { case 64: @@ -10653,6 +10761,7 @@ static bool check_digital_port_conflicts(struct drm_atomic_state *state) struct drm_connector_list_iter conn_iter; unsigned int used_ports = 0; unsigned int used_mst_ports = 0; + bool ret = true; /* * Walk the connector list instead of the encoder @@ -10687,7 +10796,7 @@ static bool check_digital_port_conflicts(struct drm_atomic_state *state) /* the same port mustn't appear more than once */ if (used_ports & port_mask) - return false; + ret = false; used_ports |= port_mask; break; @@ -10705,7 +10814,7 @@ static bool check_digital_port_conflicts(struct drm_atomic_state *state) if (used_ports & used_mst_ports) return false; - return true; + return ret; } static void @@ -10950,24 +11059,17 @@ intel_compare_link_m_n(const struct intel_link_m_n *m_n, static void __printf(3, 4) pipe_config_err(bool adjust, const char *name, const char *format, ...) { - char *level; - unsigned int category; struct va_format vaf; va_list args; - if (adjust) { - level = KERN_DEBUG; - category = DRM_UT_KMS; - } else { - level = KERN_ERR; - category = DRM_UT_NONE; - } - va_start(args, format); vaf.fmt = format; vaf.va = &args; - drm_printk(level, category, "mismatch in %s %pV", name, &vaf); + if (adjust) + drm_dbg(DRM_UT_KMS, "mismatch in %s %pV", name, &vaf); + else + drm_err("mismatch in %s %pV", name, &vaf); va_end(args); } @@ -11944,6 +12046,14 @@ static int intel_atomic_check(struct drm_device *dev, int ret, i; bool any_ms = false; + /* Catch I915_MODE_FLAG_INHERITED */ + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, + crtc_state, i) { + if (crtc_state->mode.private_flags != + old_crtc_state->mode.private_flags) + crtc_state->mode_changed = true; + } + ret = drm_atomic_helper_check_modeset(dev, state); if (ret) return ret; @@ -11952,10 +12062,6 @@ static int intel_atomic_check(struct drm_device *dev, struct intel_crtc_state *pipe_config = to_intel_crtc_state(crtc_state); - /* Catch I915_MODE_FLAG_INHERITED */ - if (crtc_state->mode.private_flags != old_crtc_state->mode.private_flags) - crtc_state->mode_changed = true; - if (!needs_modeset(crtc_state)) continue; @@ -11964,13 +12070,6 @@ static int intel_atomic_check(struct drm_device *dev, continue; } - /* FIXME: For only active_changed we shouldn't need to do any - * state recomputation at all. */ - - ret = drm_atomic_add_affected_connectors(state, crtc); - if (ret) - return ret; - ret = intel_modeset_pipe_config(crtc, pipe_config); if (ret) { intel_dump_pipe_config(to_intel_crtc(crtc), @@ -11989,10 +12088,6 @@ static int intel_atomic_check(struct drm_device *dev, if (needs_modeset(crtc_state)) any_ms = true; - ret = drm_atomic_add_affected_planes(state, crtc); - if (ret) - return ret; - intel_dump_pipe_config(to_intel_crtc(crtc), pipe_config, needs_modeset(crtc_state) ? "[modeset]" : "[fastset]"); @@ -12026,7 +12121,7 @@ u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc) struct drm_device *dev = crtc->base.dev; if (!dev->max_vblank_count) - return drm_crtc_accurate_vblank_count(&crtc->base); + return (u32)drm_crtc_accurate_vblank_count(&crtc->base); return dev->driver->get_vblank_counter(dev, crtc->pipe); } @@ -12510,17 +12605,23 @@ struct wait_rps_boost { struct wait_queue_entry wait; struct drm_crtc *crtc; - struct drm_i915_gem_request *request; + struct i915_request *request; }; static int do_rps_boost(struct wait_queue_entry *_wait, unsigned mode, int sync, void *key) { struct wait_rps_boost *wait = container_of(_wait, typeof(*wait), wait); - struct drm_i915_gem_request *rq = wait->request; + struct i915_request *rq = wait->request; - gen6_rps_boost(rq, NULL); - i915_gem_request_put(rq); + /* + * If we missed the vblank, but the request is already running it + * is reasonable to assume that it will complete before the next + * vblank without our intervention, so leave RPS alone. + */ + if (!i915_request_started(rq)) + gen6_rps_boost(rq, NULL); + i915_request_put(rq); drm_crtc_vblank_put(wait->crtc); @@ -12558,10 +12659,46 @@ static void add_rps_boost_after_vblank(struct drm_crtc *crtc, add_wait_queue(drm_crtc_vblank_waitqueue(crtc), &wait->wait); } +static int intel_plane_pin_fb(struct intel_plane_state *plane_state) +{ + struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct drm_framebuffer *fb = plane_state->base.fb; + struct i915_vma *vma; + + if (plane->id == PLANE_CURSOR && + INTEL_INFO(dev_priv)->cursor_needs_physical) { + struct drm_i915_gem_object *obj = intel_fb_obj(fb); + const int align = intel_cursor_alignment(dev_priv); + + return i915_gem_object_attach_phys(obj, align); + } + + vma = intel_pin_and_fence_fb_obj(fb, + plane_state->base.rotation, + intel_plane_uses_fence(plane_state), + &plane_state->flags); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + plane_state->vma = vma; + + return 0; +} + +static void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state) +{ + struct i915_vma *vma; + + vma = fetch_and_zero(&old_plane_state->vma); + if (vma) + intel_unpin_fb_vma(vma, old_plane_state->flags); +} + /** * intel_prepare_plane_fb - Prepare fb for usage on plane * @plane: drm plane to prepare for - * @fb: framebuffer to prepare for presentation + * @new_state: the plane state being prepared * * Prepares a framebuffer for usage on a display plane. Generally this * involves pinning the underlying object and updating the frontbuffer tracking @@ -12632,20 +12769,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, return ret; } - if (plane->type == DRM_PLANE_TYPE_CURSOR && - INTEL_INFO(dev_priv)->cursor_needs_physical) { - const int align = intel_cursor_alignment(dev_priv); - - ret = i915_gem_object_attach_phys(obj, align); - } else { - struct i915_vma *vma; - - vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation); - if (!IS_ERR(vma)) - to_intel_plane_state(new_state)->vma = vma; - else - ret = PTR_ERR(vma); - } + ret = intel_plane_pin_fb(to_intel_plane_state(new_state)); i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY); @@ -12679,7 +12803,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, /** * intel_cleanup_plane_fb - Cleans up an fb after plane use * @plane: drm plane to clean up for - * @fb: old framebuffer that was on plane + * @old_state: the state from the previous modeset * * Cleans up a framebuffer that has just been removed from a plane. * @@ -12689,15 +12813,12 @@ void intel_cleanup_plane_fb(struct drm_plane *plane, struct drm_plane_state *old_state) { - struct i915_vma *vma; + struct drm_i915_private *dev_priv = to_i915(plane->dev); /* Should only be called after a successful intel_prepare_plane_fb()! */ - vma = fetch_and_zero(&to_intel_plane_state(old_state)->vma); - if (vma) { - mutex_lock(&plane->dev->struct_mutex); - intel_unpin_fb_vma(vma); - mutex_unlock(&plane->dev->struct_mutex); - } + mutex_lock(&dev_priv->drm.struct_mutex); + intel_plane_unpin_fb(to_intel_plane_state(old_state)); + mutex_unlock(&dev_priv->drm.struct_mutex); } int @@ -12747,7 +12868,7 @@ intel_check_primary_plane(struct intel_plane *plane, if (INTEL_GEN(dev_priv) >= 9) { /* use scaler when colorkey is not required */ - if (state->ckey.flags == I915_SET_COLORKEY_NONE) { + if (!state->ckey.flags) { min_scale = 1; max_scale = skl_max_scale(to_intel_crtc(crtc), crtc_state); } @@ -12756,7 +12877,6 @@ intel_check_primary_plane(struct intel_plane *plane, ret = drm_atomic_helper_check_plane_state(&state->base, &crtc_state->base, - &state->clip, min_scale, max_scale, can_position, true); if (ret) @@ -12766,7 +12886,7 @@ intel_check_primary_plane(struct intel_plane *plane, return 0; if (INTEL_GEN(dev_priv) >= 9) { - ret = skl_check_plane_surface(state); + ret = skl_check_plane_surface(crtc_state, state); if (ret) return ret; @@ -12944,8 +13064,6 @@ static bool intel_primary_plane_format_mod_supported(struct drm_plane *plane, return i965_mod_supported(format, modifier); else return i8xx_mod_supported(format, modifier); - - unreachable(); } static bool intel_cursor_plane_format_mod_supported(struct drm_plane *plane, @@ -12985,7 +13103,6 @@ intel_legacy_cursor_update(struct drm_plane *plane, struct intel_plane *intel_plane = to_intel_plane(plane); struct drm_framebuffer *old_fb; struct drm_crtc_state *crtc_state = crtc->state; - struct i915_vma *old_vma, *vma; /* * When crtc is inactive or there is a modeset pending, @@ -13044,25 +13161,9 @@ intel_legacy_cursor_update(struct drm_plane *plane, if (ret) goto out_free; - if (INTEL_INFO(dev_priv)->cursor_needs_physical) { - int align = intel_cursor_alignment(dev_priv); - - ret = i915_gem_object_attach_phys(intel_fb_obj(fb), align); - if (ret) { - DRM_DEBUG_KMS("failed to attach phys object\n"); - goto out_unlock; - } - } else { - vma = intel_pin_and_fence_fb_obj(fb, new_plane_state->rotation); - if (IS_ERR(vma)) { - DRM_DEBUG_KMS("failed to pin object\n"); - - ret = PTR_ERR(vma); - goto out_unlock; - } - - to_intel_plane_state(new_plane_state)->vma = vma; - } + ret = intel_plane_pin_fb(to_intel_plane_state(new_plane_state)); + if (ret) + goto out_unlock; old_fb = old_plane_state->fb; @@ -13082,9 +13183,7 @@ intel_legacy_cursor_update(struct drm_plane *plane, intel_plane->disable_plane(intel_plane, to_intel_crtc(crtc)); } - old_vma = fetch_and_zero(&to_intel_plane_state(old_plane_state)->vma); - if (old_vma) - intel_unpin_fb_vma(old_vma); + intel_plane_unpin_fb(to_intel_plane_state(old_plane_state)); out_unlock: mutex_unlock(&dev_priv->drm.struct_mutex); @@ -13112,6 +13211,32 @@ static const struct drm_plane_funcs intel_cursor_plane_funcs = { .format_mod_supported = intel_cursor_plane_format_mod_supported, }; +static bool i9xx_plane_has_fbc(struct drm_i915_private *dev_priv, + enum i9xx_plane_id i9xx_plane) +{ + if (!HAS_FBC(dev_priv)) + return false; + + if (IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) + return i9xx_plane == PLANE_A; /* tied to pipe A */ + else if (IS_IVYBRIDGE(dev_priv)) + return i9xx_plane == PLANE_A || i9xx_plane == PLANE_B || + i9xx_plane == PLANE_C; + else if (INTEL_GEN(dev_priv) >= 4) + return i9xx_plane == PLANE_A || i9xx_plane == PLANE_B; + else + return i9xx_plane == PLANE_A; +} + +static bool skl_plane_has_fbc(struct drm_i915_private *dev_priv, + enum pipe pipe, enum plane_id plane_id) +{ + if (!HAS_FBC(dev_priv)) + return false; + + return pipe == PIPE_A && plane_id == PLANE_PRIMARY; +} + static struct intel_plane * intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) { @@ -13153,21 +13278,29 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) else primary->i9xx_plane = (enum i9xx_plane_id) pipe; primary->id = PLANE_PRIMARY; - primary->frontbuffer_bit = INTEL_FRONTBUFFER_PRIMARY(pipe); + primary->frontbuffer_bit = INTEL_FRONTBUFFER(pipe, primary->id); + + if (INTEL_GEN(dev_priv) >= 9) + primary->has_fbc = skl_plane_has_fbc(dev_priv, + primary->pipe, + primary->id); + else + primary->has_fbc = i9xx_plane_has_fbc(dev_priv, + primary->i9xx_plane); + + if (primary->has_fbc) { + struct intel_fbc *fbc = &dev_priv->fbc; + + fbc->possible_framebuffer_bits |= primary->frontbuffer_bit; + } + primary->check_plane = intel_check_primary_plane; - if (INTEL_GEN(dev_priv) >= 10) { + if (INTEL_GEN(dev_priv) >= 9) { intel_primary_formats = skl_primary_formats; num_formats = ARRAY_SIZE(skl_primary_formats); - modifiers = skl_format_modifiers_ccs; - primary->update_plane = skl_update_plane; - primary->disable_plane = skl_disable_plane; - primary->get_hw_state = skl_plane_get_hw_state; - } else if (INTEL_GEN(dev_priv) >= 9) { - intel_primary_formats = skl_primary_formats; - num_formats = ARRAY_SIZE(skl_primary_formats); - if (pipe < PIPE_C) + if (skl_plane_has_ccs(dev_priv, pipe, PLANE_PRIMARY)) modifiers = skl_format_modifiers_ccs; else modifiers = skl_format_modifiers_noccs; @@ -13243,6 +13376,15 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) DRM_MODE_ROTATE_0, supported_rotations); + if (INTEL_GEN(dev_priv) >= 9) + drm_plane_create_color_properties(&primary->base, + BIT(DRM_COLOR_YCBCR_BT601) | + BIT(DRM_COLOR_YCBCR_BT709), + BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) | + BIT(DRM_COLOR_YCBCR_FULL_RANGE), + DRM_COLOR_YCBCR_BT709, + DRM_COLOR_YCBCR_LIMITED_RANGE); + drm_plane_helper_add(&primary->base, &intel_plane_helper_funcs); return primary; @@ -13281,7 +13423,7 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv, cursor->pipe = pipe; cursor->i9xx_plane = (enum i9xx_plane_id) pipe; cursor->id = PLANE_CURSOR; - cursor->frontbuffer_bit = INTEL_FRONTBUFFER_CURSOR(pipe); + cursor->frontbuffer_bit = INTEL_FRONTBUFFER(pipe, cursor->id); if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) { cursor->update_plane = i845_update_cursor; @@ -13447,8 +13589,8 @@ enum pipe intel_get_pipe_from_connector(struct intel_connector *connector) return to_intel_crtc(connector->base.state->crtc)->pipe; } -int intel_get_pipe_from_crtc_id(struct drm_device *dev, void *data, - struct drm_file *file) +int intel_get_pipe_from_crtc_id_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) { struct drm_i915_get_pipe_from_crtc_id *pipe_from_crtc_id = data; struct drm_crtc *drmmode_crtc; @@ -13597,7 +13739,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) if (found || IS_GEN9_BC(dev_priv)) intel_ddi_init(dev_priv, PORT_A); - /* DDI B, C and D detection is indicated by the SFUSE_STRAP + /* DDI B, C, D, and F detection is indicated by the SFUSE_STRAP * register */ found = I915_READ(SFUSE_STRAP); @@ -13607,6 +13749,8 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) intel_ddi_init(dev_priv, PORT_C); if (found & SFUSE_STRAP_DDID_DETECTED) intel_ddi_init(dev_priv, PORT_D); + if (found & SFUSE_STRAP_DDIF_DETECTED) + intel_ddi_init(dev_priv, PORT_F); /* * On SKL we don't have a way to detect DDI-E so we rely on VBT. */ @@ -13894,7 +14038,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, * gen2/3 display engine uses the fence if present, * so the tiling mode must match the fb modifier exactly. */ - if (INTEL_INFO(dev_priv)->gen < 4 && + if (INTEL_GEN(dev_priv) < 4 && tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) { DRM_DEBUG_KMS("tiling_mode must match fb modifier exactly on gen2/3\n"); goto err; @@ -14063,10 +14207,37 @@ static void intel_atomic_state_free(struct drm_atomic_state *state) kfree(state); } +static enum drm_mode_status +intel_mode_valid(struct drm_device *dev, + const struct drm_display_mode *mode) +{ + if (mode->vscan > 1) + return MODE_NO_VSCAN; + + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) + return MODE_NO_DBLESCAN; + + if (mode->flags & DRM_MODE_FLAG_HSKEW) + return MODE_H_ILLEGAL; + + if (mode->flags & (DRM_MODE_FLAG_CSYNC | + DRM_MODE_FLAG_NCSYNC | + DRM_MODE_FLAG_PCSYNC)) + return MODE_HSYNC; + + if (mode->flags & (DRM_MODE_FLAG_BCAST | + DRM_MODE_FLAG_PIXMUX | + DRM_MODE_FLAG_CLKDIV2)) + return MODE_BAD; + + return MODE_OK; +} + static const struct drm_mode_config_funcs intel_mode_funcs = { .fb_create = intel_user_framebuffer_create, .get_format_info = intel_get_format_info, .output_poll_changed = intel_fbdev_output_poll_changed, + .mode_valid = intel_mode_valid, .atomic_check = intel_atomic_check, .atomic_commit = intel_atomic_commit, .atomic_state_alloc = intel_atomic_state_alloc, @@ -14082,7 +14253,7 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) { intel_init_cdclk_hooks(dev_priv); - if (INTEL_INFO(dev_priv)->gen >= 9) { + if (INTEL_GEN(dev_priv) >= 9) { dev_priv->display.get_pipe_config = haswell_get_pipe_config; dev_priv->display.get_initial_plane_config = skylake_get_initial_plane_config; @@ -15217,6 +15388,10 @@ static void intel_hpd_poll_fini(struct drm_device *dev) for_each_intel_connector_iter(connector, &conn_iter) { if (connector->modeset_retry_work.func) cancel_work_sync(&connector->modeset_retry_work); + if (connector->hdcp_shim) { + cancel_delayed_work_sync(&connector->hdcp_check_work); + cancel_work_sync(&connector->hdcp_prop_work); + } } drm_connector_list_iter_end(&conn_iter); } diff --git a/drivers/gpu/drm/i915/intel_display.h b/drivers/gpu/drm/i915/intel_display.h index a0d2b6169361..4e7418b345bc 100644 --- a/drivers/gpu/drm/i915/intel_display.h +++ b/drivers/gpu/drm/i915/intel_display.h @@ -119,6 +119,7 @@ enum port { PORT_C, PORT_D, PORT_E, + PORT_F, I915_MAX_PORTS }; @@ -138,6 +139,17 @@ enum dpio_phy { #define I915_NUM_PHYS_VLV 2 +enum aux_ch { + AUX_CH_A, + AUX_CH_B, + AUX_CH_C, + AUX_CH_D, + _AUX_CH_E, /* does not exist */ + AUX_CH_F, +}; + +#define aux_ch_name(a) ((a) + 'A') + enum intel_display_power_domain { POWER_DOMAIN_PIPE_A, POWER_DOMAIN_PIPE_B, @@ -156,11 +168,13 @@ enum intel_display_power_domain { POWER_DOMAIN_PORT_DDI_C_LANES, POWER_DOMAIN_PORT_DDI_D_LANES, POWER_DOMAIN_PORT_DDI_E_LANES, + POWER_DOMAIN_PORT_DDI_F_LANES, POWER_DOMAIN_PORT_DDI_A_IO, POWER_DOMAIN_PORT_DDI_B_IO, POWER_DOMAIN_PORT_DDI_C_IO, POWER_DOMAIN_PORT_DDI_D_IO, POWER_DOMAIN_PORT_DDI_E_IO, + POWER_DOMAIN_PORT_DDI_F_IO, POWER_DOMAIN_PORT_DSI, POWER_DOMAIN_PORT_CRT, POWER_DOMAIN_PORT_OTHER, @@ -171,6 +185,8 @@ enum intel_display_power_domain { POWER_DOMAIN_AUX_B, POWER_DOMAIN_AUX_C, POWER_DOMAIN_AUX_D, + POWER_DOMAIN_AUX_F, + POWER_DOMAIN_AUX_IO_A, POWER_DOMAIN_GMBUS, POWER_DOMAIN_MODESET, POWER_DOMAIN_GT_IRQ, diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index a29868cd30c7..9a4a51e79fa1 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -36,7 +36,9 @@ #include <drm/drm_atomic_helper.h> #include <drm/drm_crtc.h> #include <drm/drm_crtc_helper.h> +#include <drm/drm_dp_helper.h> #include <drm/drm_edid.h> +#include <drm/drm_hdcp.h> #include "intel_drv.h" #include <drm/i915_drm.h> #include "i915_drv.h" @@ -94,15 +96,6 @@ static const struct dp_link_dpll chv_dpll[] = { { .p1 = 2, .p2 = 1, .n = 1, .m1 = 2, .m2 = 0x6c00000 } } }; -static const int bxt_rates[] = { 162000, 216000, 243000, 270000, - 324000, 432000, 540000 }; -static const int skl_rates[] = { 162000, 216000, 270000, - 324000, 432000, 540000 }; -static const int cnl_rates[] = { 162000, 216000, 270000, - 324000, 432000, 540000, - 648000, 810000 }; -static const int default_rates[] = { 162000, 270000, 540000 }; - /** * intel_dp_is_edp - is the given port attached to an eDP panel (either CPU or PCH) * @intel_dp: DP struct @@ -142,19 +135,44 @@ static void intel_dp_unset_edid(struct intel_dp *intel_dp); /* update sink rates from dpcd */ static void intel_dp_set_sink_rates(struct intel_dp *intel_dp) { + static const int dp_rates[] = { + 162000, 270000, 540000, 810000 + }; int i, max_rate; max_rate = drm_dp_bw_code_to_link_rate(intel_dp->dpcd[DP_MAX_LINK_RATE]); - for (i = 0; i < ARRAY_SIZE(default_rates); i++) { - if (default_rates[i] > max_rate) + for (i = 0; i < ARRAY_SIZE(dp_rates); i++) { + if (dp_rates[i] > max_rate) break; - intel_dp->sink_rates[i] = default_rates[i]; + intel_dp->sink_rates[i] = dp_rates[i]; } intel_dp->num_sink_rates = i; } +/* Get length of rates array potentially limited by max_rate. */ +static int intel_dp_rate_limit_len(const int *rates, int len, int max_rate) +{ + int i; + + /* Limit results by potentially reduced max rate */ + for (i = 0; i < len; i++) { + if (rates[len - i - 1] <= max_rate) + return len - i; + } + + return 0; +} + +/* Get length of common rates array potentially limited by max_rate. */ +static int intel_dp_common_len_rate_limit(const struct intel_dp *intel_dp, + int max_rate) +{ + return intel_dp_rate_limit_len(intel_dp->common_rates, + intel_dp->num_common_rates, max_rate); +} + /* Theoretical max between source and sink */ static int intel_dp_max_common_rate(struct intel_dp *intel_dp) { @@ -218,41 +236,85 @@ intel_dp_downstream_max_dotclock(struct intel_dp *intel_dp) return max_dotclk; } +static int cnl_max_source_rate(struct intel_dp *intel_dp) +{ + struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); + enum port port = dig_port->base.port; + + u32 voltage = I915_READ(CNL_PORT_COMP_DW3) & VOLTAGE_INFO_MASK; + + /* Low voltage SKUs are limited to max of 5.4G */ + if (voltage == VOLTAGE_INFO_0_85V) + return 540000; + + /* For this SKU 8.1G is supported in all ports */ + if (IS_CNL_WITH_PORT_F(dev_priv)) + return 810000; + + /* For other SKUs, max rate on ports A and D is 5.4G */ + if (port == PORT_A || port == PORT_D) + return 540000; + + return 810000; +} + static void intel_dp_set_source_rates(struct intel_dp *intel_dp) { + /* The values must be in increasing order */ + static const int cnl_rates[] = { + 162000, 216000, 270000, 324000, 432000, 540000, 648000, 810000 + }; + static const int bxt_rates[] = { + 162000, 216000, 243000, 270000, 324000, 432000, 540000 + }; + static const int skl_rates[] = { + 162000, 216000, 270000, 324000, 432000, 540000 + }; + static const int hsw_rates[] = { + 162000, 270000, 540000 + }; + static const int g4x_rates[] = { + 162000, 270000 + }; struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); - enum port port = dig_port->base.port; + const struct ddi_vbt_port_info *info = + &dev_priv->vbt.ddi_port_info[dig_port->base.port]; const int *source_rates; - int size; - u32 voltage; + int size, max_rate = 0, vbt_max_rate = info->dp_max_link_rate; /* This should only be done once */ WARN_ON(intel_dp->source_rates || intel_dp->num_source_rates); - if (IS_GEN9_LP(dev_priv)) { - source_rates = bxt_rates; - size = ARRAY_SIZE(bxt_rates); - } else if (IS_CANNONLAKE(dev_priv)) { + if (IS_CANNONLAKE(dev_priv)) { source_rates = cnl_rates; size = ARRAY_SIZE(cnl_rates); - voltage = I915_READ(CNL_PORT_COMP_DW3) & VOLTAGE_INFO_MASK; - if (port == PORT_A || port == PORT_D || - voltage == VOLTAGE_INFO_0_85V) - size -= 2; + max_rate = cnl_max_source_rate(intel_dp); + } else if (IS_GEN9_LP(dev_priv)) { + source_rates = bxt_rates; + size = ARRAY_SIZE(bxt_rates); } else if (IS_GEN9_BC(dev_priv)) { source_rates = skl_rates; size = ARRAY_SIZE(skl_rates); } else if ((IS_HASWELL(dev_priv) && !IS_HSW_ULX(dev_priv)) || IS_BROADWELL(dev_priv)) { - source_rates = default_rates; - size = ARRAY_SIZE(default_rates); + source_rates = hsw_rates; + size = ARRAY_SIZE(hsw_rates); } else { - source_rates = default_rates; - size = ARRAY_SIZE(default_rates) - 1; + source_rates = g4x_rates; + size = ARRAY_SIZE(g4x_rates); } + if (max_rate && vbt_max_rate) + max_rate = min(max_rate, vbt_max_rate); + else if (vbt_max_rate) + max_rate = vbt_max_rate; + + if (max_rate) + size = intel_dp_rate_limit_len(source_rates, size, max_rate); + intel_dp->source_rates = source_rates; intel_dp->num_source_rates = size; } @@ -304,27 +366,11 @@ static void intel_dp_set_common_rates(struct intel_dp *intel_dp) /* Paranoia, there should always be something in common. */ if (WARN_ON(intel_dp->num_common_rates == 0)) { - intel_dp->common_rates[0] = default_rates[0]; + intel_dp->common_rates[0] = 162000; intel_dp->num_common_rates = 1; } } -/* get length of common rates potentially limited by max_rate */ -static int intel_dp_common_len_rate_limit(struct intel_dp *intel_dp, - int max_rate) -{ - const int *common_rates = intel_dp->common_rates; - int i, common_len = intel_dp->num_common_rates; - - /* Limit results by potentially reduced max rate */ - for (i = 0; i < common_len; i++) { - if (common_rates[common_len - i - 1] <= max_rate) - return common_len - i; - } - - return 0; -} - static bool intel_dp_link_params_valid(struct intel_dp *intel_dp, int link_rate, uint8_t lane_count) { @@ -790,7 +836,8 @@ static void intel_pps_get_registers(struct intel_dp *intel_dp, regs->pp_stat = PP_STATUS(pps_idx); regs->pp_on = PP_ON_DELAYS(pps_idx); regs->pp_off = PP_OFF_DELAYS(pps_idx); - if (!IS_GEN9_LP(dev_priv) && !HAS_PCH_CNP(dev_priv)) + if (!IS_GEN9_LP(dev_priv) && !HAS_PCH_CNP(dev_priv) && + !HAS_PCH_ICP(dev_priv)) regs->pp_div = PP_DIVISOR(pps_idx); } @@ -895,7 +942,7 @@ static uint32_t intel_dp_aux_wait_done(struct intel_dp *intel_dp, bool has_aux_irq) { struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); - i915_reg_t ch_ctl = intel_dp->aux_ch_ctl_reg; + i915_reg_t ch_ctl = intel_dp->aux_ch_ctl_reg(intel_dp); uint32_t status; bool done; @@ -915,8 +962,7 @@ intel_dp_aux_wait_done(struct intel_dp *intel_dp, bool has_aux_irq) static uint32_t g4x_get_aux_clock_divider(struct intel_dp *intel_dp, int index) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); if (index) return 0; @@ -930,8 +976,7 @@ static uint32_t g4x_get_aux_clock_divider(struct intel_dp *intel_dp, int index) static uint32_t ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); if (index) return 0; @@ -941,7 +986,7 @@ static uint32_t ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index) * like to run at 2MHz. So, take the cdclk or PCH rawclk value and * divide by 2000 and use that */ - if (intel_dig_port->base.port == PORT_A) + if (intel_dp->aux_ch == AUX_CH_A) return DIV_ROUND_CLOSEST(dev_priv->cdclk.hw.cdclk, 2000); else return DIV_ROUND_CLOSEST(dev_priv->rawclk_freq, 2000); @@ -949,10 +994,9 @@ static uint32_t ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index) static uint32_t hsw_get_aux_clock_divider(struct intel_dp *intel_dp, int index) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); - if (intel_dig_port->base.port != PORT_A && HAS_PCH_LPT_H(dev_priv)) { + if (intel_dp->aux_ch != AUX_CH_A && HAS_PCH_LPT_H(dev_priv)) { /* Workaround for non-ULT HSW */ switch (index) { case 0: return 63; @@ -1022,14 +1066,15 @@ static uint32_t skl_get_aux_send_ctl(struct intel_dp *intel_dp, } static int -intel_dp_aux_ch(struct intel_dp *intel_dp, - const uint8_t *send, int send_bytes, - uint8_t *recv, int recv_size) +intel_dp_aux_xfer(struct intel_dp *intel_dp, + const uint8_t *send, int send_bytes, + uint8_t *recv, int recv_size, + u32 aux_send_ctl_flags) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); - i915_reg_t ch_ctl = intel_dp->aux_ch_ctl_reg; + i915_reg_t ch_ctl, ch_data[5]; uint32_t aux_clock_divider; int i, ret, recv_bytes; uint32_t status; @@ -1037,6 +1082,10 @@ intel_dp_aux_ch(struct intel_dp *intel_dp, bool has_aux_irq = HAS_AUX_IRQ(dev_priv); bool vdd; + ch_ctl = intel_dp->aux_ch_ctl_reg(intel_dp); + for (i = 0; i < ARRAY_SIZE(ch_data); i++) + ch_data[i] = intel_dp->aux_ch_data_reg(intel_dp, i); + pps_lock(intel_dp); /* @@ -1089,11 +1138,13 @@ intel_dp_aux_ch(struct intel_dp *intel_dp, send_bytes, aux_clock_divider); + send_ctl |= aux_send_ctl_flags; + /* Must try at least 3 times according to DP spec */ for (try = 0; try < 5; try++) { /* Load the send data into the aux channel data registers */ for (i = 0; i < send_bytes; i += 4) - I915_WRITE(intel_dp->aux_ch_data_reg[i >> 2], + I915_WRITE(ch_data[i >> 2], intel_dp_pack_aux(send + i, send_bytes - i)); @@ -1109,14 +1160,14 @@ intel_dp_aux_ch(struct intel_dp *intel_dp, DP_AUX_CH_CTL_TIME_OUT_ERROR | DP_AUX_CH_CTL_RECEIVE_ERROR); - if (status & DP_AUX_CH_CTL_TIME_OUT_ERROR) - continue; - /* DP CTS 1.2 Core Rev 1.1, 4.2.1.1 & 4.2.1.2 * 400us delay required for errors and timeouts * Timeout errors from the HW already meet this * requirement so skip to next iteration */ + if (status & DP_AUX_CH_CTL_TIME_OUT_ERROR) + continue; + if (status & DP_AUX_CH_CTL_RECEIVE_ERROR) { usleep_range(400, 500); continue; @@ -1162,14 +1213,6 @@ done: if (recv_bytes == 0 || recv_bytes > 20) { DRM_DEBUG_KMS("Forbidden recv_bytes = %d on aux transaction\n", recv_bytes); - /* - * FIXME: This patch was created on top of a series that - * organize the retries at drm level. There EBUSY should - * also take care for 1ms wait before retrying. - * That aux retries re-org is still needed and after that is - * merged we remove this sleep from here. - */ - usleep_range(1000, 1500); ret = -EBUSY; goto out; } @@ -1178,7 +1221,7 @@ done: recv_bytes = recv_size; for (i = 0; i < recv_bytes; i += 4) - intel_dp_unpack_aux(I915_READ(intel_dp->aux_ch_data_reg[i >> 2]), + intel_dp_unpack_aux(I915_READ(ch_data[i >> 2]), recv + i, recv_bytes - i); ret = recv_bytes; @@ -1195,6 +1238,17 @@ out: #define BARE_ADDRESS_SIZE 3 #define HEADER_SIZE (BARE_ADDRESS_SIZE + 1) + +static void +intel_dp_aux_header(u8 txbuf[HEADER_SIZE], + const struct drm_dp_aux_msg *msg) +{ + txbuf[0] = (msg->request << 4) | ((msg->address >> 16) & 0xf); + txbuf[1] = (msg->address >> 8) & 0xff; + txbuf[2] = msg->address & 0xff; + txbuf[3] = msg->size - 1; +} + static ssize_t intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) { @@ -1203,11 +1257,7 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) size_t txsize, rxsize; int ret; - txbuf[0] = (msg->request << 4) | - ((msg->address >> 16) & 0xf); - txbuf[1] = (msg->address >> 8) & 0xff; - txbuf[2] = msg->address & 0xff; - txbuf[3] = msg->size - 1; + intel_dp_aux_header(txbuf, msg); switch (msg->request & ~DP_AUX_I2C_MOT) { case DP_AUX_NATIVE_WRITE: @@ -1224,7 +1274,8 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) if (msg->buffer) memcpy(txbuf + HEADER_SIZE, msg->buffer, msg->size); - ret = intel_dp_aux_ch(intel_dp, txbuf, txsize, rxbuf, rxsize); + ret = intel_dp_aux_xfer(intel_dp, txbuf, txsize, + rxbuf, rxsize, 0); if (ret > 0) { msg->reply = rxbuf[0] >> 4; @@ -1246,7 +1297,8 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) if (WARN_ON(rxsize > 20)) return -E2BIG; - ret = intel_dp_aux_ch(intel_dp, txbuf, txsize, rxbuf, rxsize); + ret = intel_dp_aux_xfer(intel_dp, txbuf, txsize, + rxbuf, rxsize, 0); if (ret > 0) { msg->reply = rxbuf[0] >> 4; /* @@ -1268,166 +1320,173 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) return ret; } -static enum port intel_aux_port(struct drm_i915_private *dev_priv, - enum port port) +static enum aux_ch intel_aux_ch(struct intel_dp *intel_dp) { + struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum port port = encoder->port; const struct ddi_vbt_port_info *info = &dev_priv->vbt.ddi_port_info[port]; - enum port aux_port; + enum aux_ch aux_ch; if (!info->alternate_aux_channel) { + aux_ch = (enum aux_ch) port; + DRM_DEBUG_KMS("using AUX %c for port %c (platform default)\n", - port_name(port), port_name(port)); - return port; + aux_ch_name(aux_ch), port_name(port)); + return aux_ch; } switch (info->alternate_aux_channel) { case DP_AUX_A: - aux_port = PORT_A; + aux_ch = AUX_CH_A; break; case DP_AUX_B: - aux_port = PORT_B; + aux_ch = AUX_CH_B; break; case DP_AUX_C: - aux_port = PORT_C; + aux_ch = AUX_CH_C; break; case DP_AUX_D: - aux_port = PORT_D; + aux_ch = AUX_CH_D; + break; + case DP_AUX_F: + aux_ch = AUX_CH_F; break; default: MISSING_CASE(info->alternate_aux_channel); - aux_port = PORT_A; + aux_ch = AUX_CH_A; break; } DRM_DEBUG_KMS("using AUX %c for port %c (VBT)\n", - port_name(aux_port), port_name(port)); + aux_ch_name(aux_ch), port_name(port)); - return aux_port; + return aux_ch; } -static i915_reg_t g4x_aux_ctl_reg(struct drm_i915_private *dev_priv, - enum port port) +static enum intel_display_power_domain +intel_aux_power_domain(struct intel_dp *intel_dp) { - switch (port) { - case PORT_B: - case PORT_C: - case PORT_D: - return DP_AUX_CH_CTL(port); + switch (intel_dp->aux_ch) { + case AUX_CH_A: + return POWER_DOMAIN_AUX_A; + case AUX_CH_B: + return POWER_DOMAIN_AUX_B; + case AUX_CH_C: + return POWER_DOMAIN_AUX_C; + case AUX_CH_D: + return POWER_DOMAIN_AUX_D; + case AUX_CH_F: + return POWER_DOMAIN_AUX_F; default: - MISSING_CASE(port); - return DP_AUX_CH_CTL(PORT_B); + MISSING_CASE(intel_dp->aux_ch); + return POWER_DOMAIN_AUX_A; } } -static i915_reg_t g4x_aux_data_reg(struct drm_i915_private *dev_priv, - enum port port, int index) +static i915_reg_t g4x_aux_ctl_reg(struct intel_dp *intel_dp) { - switch (port) { - case PORT_B: - case PORT_C: - case PORT_D: - return DP_AUX_CH_DATA(port, index); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); + enum aux_ch aux_ch = intel_dp->aux_ch; + + switch (aux_ch) { + case AUX_CH_B: + case AUX_CH_C: + case AUX_CH_D: + return DP_AUX_CH_CTL(aux_ch); default: - MISSING_CASE(port); - return DP_AUX_CH_DATA(PORT_B, index); + MISSING_CASE(aux_ch); + return DP_AUX_CH_CTL(AUX_CH_B); } } -static i915_reg_t ilk_aux_ctl_reg(struct drm_i915_private *dev_priv, - enum port port) +static i915_reg_t g4x_aux_data_reg(struct intel_dp *intel_dp, int index) { - switch (port) { - case PORT_A: - return DP_AUX_CH_CTL(port); - case PORT_B: - case PORT_C: - case PORT_D: - return PCH_DP_AUX_CH_CTL(port); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); + enum aux_ch aux_ch = intel_dp->aux_ch; + + switch (aux_ch) { + case AUX_CH_B: + case AUX_CH_C: + case AUX_CH_D: + return DP_AUX_CH_DATA(aux_ch, index); default: - MISSING_CASE(port); - return DP_AUX_CH_CTL(PORT_A); + MISSING_CASE(aux_ch); + return DP_AUX_CH_DATA(AUX_CH_B, index); } } -static i915_reg_t ilk_aux_data_reg(struct drm_i915_private *dev_priv, - enum port port, int index) +static i915_reg_t ilk_aux_ctl_reg(struct intel_dp *intel_dp) { - switch (port) { - case PORT_A: - return DP_AUX_CH_DATA(port, index); - case PORT_B: - case PORT_C: - case PORT_D: - return PCH_DP_AUX_CH_DATA(port, index); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); + enum aux_ch aux_ch = intel_dp->aux_ch; + + switch (aux_ch) { + case AUX_CH_A: + return DP_AUX_CH_CTL(aux_ch); + case AUX_CH_B: + case AUX_CH_C: + case AUX_CH_D: + return PCH_DP_AUX_CH_CTL(aux_ch); default: - MISSING_CASE(port); - return DP_AUX_CH_DATA(PORT_A, index); + MISSING_CASE(aux_ch); + return DP_AUX_CH_CTL(AUX_CH_A); } } -static i915_reg_t skl_aux_ctl_reg(struct drm_i915_private *dev_priv, - enum port port) +static i915_reg_t ilk_aux_data_reg(struct intel_dp *intel_dp, int index) { - switch (port) { - case PORT_A: - case PORT_B: - case PORT_C: - case PORT_D: - return DP_AUX_CH_CTL(port); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); + enum aux_ch aux_ch = intel_dp->aux_ch; + + switch (aux_ch) { + case AUX_CH_A: + return DP_AUX_CH_DATA(aux_ch, index); + case AUX_CH_B: + case AUX_CH_C: + case AUX_CH_D: + return PCH_DP_AUX_CH_DATA(aux_ch, index); default: - MISSING_CASE(port); - return DP_AUX_CH_CTL(PORT_A); + MISSING_CASE(aux_ch); + return DP_AUX_CH_DATA(AUX_CH_A, index); } } -static i915_reg_t skl_aux_data_reg(struct drm_i915_private *dev_priv, - enum port port, int index) +static i915_reg_t skl_aux_ctl_reg(struct intel_dp *intel_dp) { - switch (port) { - case PORT_A: - case PORT_B: - case PORT_C: - case PORT_D: - return DP_AUX_CH_DATA(port, index); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); + enum aux_ch aux_ch = intel_dp->aux_ch; + + switch (aux_ch) { + case AUX_CH_A: + case AUX_CH_B: + case AUX_CH_C: + case AUX_CH_D: + case AUX_CH_F: + return DP_AUX_CH_CTL(aux_ch); default: - MISSING_CASE(port); - return DP_AUX_CH_DATA(PORT_A, index); + MISSING_CASE(aux_ch); + return DP_AUX_CH_CTL(AUX_CH_A); } } -static i915_reg_t intel_aux_ctl_reg(struct drm_i915_private *dev_priv, - enum port port) -{ - if (INTEL_INFO(dev_priv)->gen >= 9) - return skl_aux_ctl_reg(dev_priv, port); - else if (HAS_PCH_SPLIT(dev_priv)) - return ilk_aux_ctl_reg(dev_priv, port); - else - return g4x_aux_ctl_reg(dev_priv, port); -} - -static i915_reg_t intel_aux_data_reg(struct drm_i915_private *dev_priv, - enum port port, int index) -{ - if (INTEL_INFO(dev_priv)->gen >= 9) - return skl_aux_data_reg(dev_priv, port, index); - else if (HAS_PCH_SPLIT(dev_priv)) - return ilk_aux_data_reg(dev_priv, port, index); - else - return g4x_aux_data_reg(dev_priv, port, index); -} - -static void intel_aux_reg_init(struct intel_dp *intel_dp) +static i915_reg_t skl_aux_data_reg(struct intel_dp *intel_dp, int index) { struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); - enum port port = intel_aux_port(dev_priv, - dp_to_dig_port(intel_dp)->base.port); - int i; - - intel_dp->aux_ch_ctl_reg = intel_aux_ctl_reg(dev_priv, port); - for (i = 0; i < ARRAY_SIZE(intel_dp->aux_ch_data_reg); i++) - intel_dp->aux_ch_data_reg[i] = intel_aux_data_reg(dev_priv, port, i); + enum aux_ch aux_ch = intel_dp->aux_ch; + + switch (aux_ch) { + case AUX_CH_A: + case AUX_CH_B: + case AUX_CH_C: + case AUX_CH_D: + case AUX_CH_F: + return DP_AUX_CH_DATA(aux_ch, index); + default: + MISSING_CASE(aux_ch); + return DP_AUX_CH_DATA(AUX_CH_A, index); + } } static void @@ -1439,14 +1498,42 @@ intel_dp_aux_fini(struct intel_dp *intel_dp) static void intel_dp_aux_init(struct intel_dp *intel_dp) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - enum port port = intel_dig_port->base.port; + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); + struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; + + intel_dp->aux_ch = intel_aux_ch(intel_dp); + intel_dp->aux_power_domain = intel_aux_power_domain(intel_dp); + + if (INTEL_GEN(dev_priv) >= 9) { + intel_dp->aux_ch_ctl_reg = skl_aux_ctl_reg; + intel_dp->aux_ch_data_reg = skl_aux_data_reg; + } else if (HAS_PCH_SPLIT(dev_priv)) { + intel_dp->aux_ch_ctl_reg = ilk_aux_ctl_reg; + intel_dp->aux_ch_data_reg = ilk_aux_data_reg; + } else { + intel_dp->aux_ch_ctl_reg = g4x_aux_ctl_reg; + intel_dp->aux_ch_data_reg = g4x_aux_data_reg; + } + + if (INTEL_GEN(dev_priv) >= 9) + intel_dp->get_aux_clock_divider = skl_get_aux_clock_divider; + else if (IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) + intel_dp->get_aux_clock_divider = hsw_get_aux_clock_divider; + else if (HAS_PCH_SPLIT(dev_priv)) + intel_dp->get_aux_clock_divider = ilk_get_aux_clock_divider; + else + intel_dp->get_aux_clock_divider = g4x_get_aux_clock_divider; + + if (INTEL_GEN(dev_priv) >= 9) + intel_dp->get_aux_send_ctl = skl_get_aux_send_ctl; + else + intel_dp->get_aux_send_ctl = g4x_get_aux_send_ctl; - intel_aux_reg_init(intel_dp); drm_dp_aux_init(&intel_dp->aux); /* Failure to allocate our preferred name is not critical */ - intel_dp->aux.name = kasprintf(GFP_KERNEL, "DPDDC-%c", port_name(port)); + intel_dp->aux.name = kasprintf(GFP_KERNEL, "DPDDC-%c", + port_name(encoder->port)); intel_dp->aux.transfer = intel_dp_aux_transfer; } @@ -1826,6 +1913,7 @@ void intel_dp_set_link_params(struct intel_dp *intel_dp, int link_rate, uint8_t lane_count, bool link_mst) { + intel_dp->link_trained = false; intel_dp->link_rate = link_rate; intel_dp->lane_count = lane_count; intel_dp->link_mst = link_mst; @@ -2674,6 +2762,8 @@ static void intel_disable_dp(struct intel_encoder *encoder, { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + intel_dp->link_trained = false; + if (old_crtc_state->has_audio) intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state); @@ -3104,35 +3194,6 @@ intel_dp_get_link_status(struct intel_dp *intel_dp, uint8_t link_status[DP_LINK_ DP_LINK_STATUS_SIZE) == DP_LINK_STATUS_SIZE; } -static bool intel_dp_get_y_cord_status(struct intel_dp *intel_dp) -{ - uint8_t psr_caps = 0; - - if (drm_dp_dpcd_readb(&intel_dp->aux, DP_PSR_CAPS, &psr_caps) != 1) - return false; - return psr_caps & DP_PSR2_SU_Y_COORDINATE_REQUIRED; -} - -static bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp) -{ - uint8_t dprx = 0; - - if (drm_dp_dpcd_readb(&intel_dp->aux, DP_DPRX_FEATURE_ENUMERATION_LIST, - &dprx) != 1) - return false; - return dprx & DP_VSC_SDP_EXT_FOR_COLORIMETRY_SUPPORTED; -} - -static bool intel_dp_get_alpm_status(struct intel_dp *intel_dp) -{ - uint8_t alpm_caps = 0; - - if (drm_dp_dpcd_readb(&intel_dp->aux, DP_RECEIVER_ALPM_CAP, - &alpm_caps) != 1) - return false; - return alpm_caps & DP_ALPM_CAP; -} - /* These are source-specific values. */ uint8_t intel_dp_voltage_max(struct intel_dp *intel_dp) @@ -3683,40 +3744,7 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) dev_priv->no_aux_handshake = intel_dp->dpcd[DP_MAX_DOWNSPREAD] & DP_NO_AUX_HANDSHAKE_LINK_TRAINING; - /* Check if the panel supports PSR */ - drm_dp_dpcd_read(&intel_dp->aux, DP_PSR_SUPPORT, - intel_dp->psr_dpcd, - sizeof(intel_dp->psr_dpcd)); - if (intel_dp->psr_dpcd[0] & DP_PSR_IS_SUPPORTED) { - dev_priv->psr.sink_support = true; - DRM_DEBUG_KMS("Detected EDP PSR Panel.\n"); - } - - if (INTEL_GEN(dev_priv) >= 9 && - (intel_dp->psr_dpcd[0] & DP_PSR2_IS_SUPPORTED)) { - uint8_t frame_sync_cap; - - dev_priv->psr.sink_support = true; - if (drm_dp_dpcd_readb(&intel_dp->aux, - DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP, - &frame_sync_cap) != 1) - frame_sync_cap = 0; - dev_priv->psr.aux_frame_sync = frame_sync_cap ? true : false; - /* PSR2 needs frame sync as well */ - dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync; - DRM_DEBUG_KMS("PSR2 %s on sink", - dev_priv->psr.psr2_support ? "supported" : "not supported"); - - if (dev_priv->psr.psr2_support) { - dev_priv->psr.y_cord_support = - intel_dp_get_y_cord_status(intel_dp); - dev_priv->psr.colorimetry_support = - intel_dp_get_colorimetry_status(intel_dp); - dev_priv->psr.alpm = - intel_dp_get_alpm_status(intel_dp); - } - - } + intel_psr_init_dpcd(intel_dp); /* * Read the eDP display control registers. @@ -4247,12 +4275,85 @@ go_again: return -EINVAL; } -static void -intel_dp_retrain_link(struct intel_dp *intel_dp) +static bool +intel_dp_needs_link_retrain(struct intel_dp *intel_dp) +{ + u8 link_status[DP_LINK_STATUS_SIZE]; + + if (!intel_dp->link_trained) + return false; + + if (!intel_dp_get_link_status(intel_dp, link_status)) + return false; + + /* + * Validate the cached values of intel_dp->link_rate and + * intel_dp->lane_count before attempting to retrain. + */ + if (!intel_dp_link_params_valid(intel_dp, intel_dp->link_rate, + intel_dp->lane_count)) + return false; + + /* Retrain if Channel EQ or CR not ok */ + return !drm_dp_channel_eq_ok(link_status, intel_dp->lane_count); +} + +/* + * If display is now connected check links status, + * there has been known issues of link loss triggering + * long pulse. + * + * Some sinks (eg. ASUS PB287Q) seem to perform some + * weird HPD ping pong during modesets. So we can apparently + * end up with HPD going low during a modeset, and then + * going back up soon after. And once that happens we must + * retrain the link to get a picture. That's in case no + * userspace component reacted to intermittent HPD dip. + */ +int intel_dp_retrain_link(struct intel_encoder *encoder, + struct drm_modeset_acquire_ctx *ctx) { - struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_connector *connector = intel_dp->attached_connector; + struct drm_connector_state *conn_state; + struct intel_crtc_state *crtc_state; + struct intel_crtc *crtc; + int ret; + + /* FIXME handle the MST connectors as well */ + + if (!connector || connector->base.status != connector_status_connected) + return 0; + + ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, + ctx); + if (ret) + return ret; + + conn_state = connector->base.state; + + crtc = to_intel_crtc(conn_state->crtc); + if (!crtc) + return 0; + + ret = drm_modeset_lock(&crtc->base.mutex, ctx); + if (ret) + return ret; + + crtc_state = to_intel_crtc_state(crtc->base.state); + + WARN_ON(!intel_crtc_has_dp_encoder(crtc_state)); + + if (!crtc_state->base.active) + return 0; + + if (conn_state->commit && + !try_wait_for_completion(&conn_state->commit->hw_done)) + return 0; + + if (!intel_dp_needs_link_retrain(intel_dp)) + return 0; /* Suppress underruns caused by re-training */ intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false); @@ -4270,51 +4371,49 @@ intel_dp_retrain_link(struct intel_dp *intel_dp) if (crtc->config->has_pch_encoder) intel_set_pch_fifo_underrun_reporting(dev_priv, intel_crtc_pch_transcoder(crtc), true); + + return 0; } -static void -intel_dp_check_link_status(struct intel_dp *intel_dp) +/* + * If display is now connected check links status, + * there has been known issues of link loss triggering + * long pulse. + * + * Some sinks (eg. ASUS PB287Q) seem to perform some + * weird HPD ping pong during modesets. So we can apparently + * end up with HPD going low during a modeset, and then + * going back up soon after. And once that happens we must + * retrain the link to get a picture. That's in case no + * userspace component reacted to intermittent HPD dip. + */ +static bool intel_dp_hotplug(struct intel_encoder *encoder, + struct intel_connector *connector) { - struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); - struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)->base; - struct drm_connector_state *conn_state = - intel_dp->attached_connector->base.state; - u8 link_status[DP_LINK_STATUS_SIZE]; - - WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); - - if (!intel_dp_get_link_status(intel_dp, link_status)) { - DRM_ERROR("Failed to get link status\n"); - return; - } + struct drm_modeset_acquire_ctx ctx; + bool changed; + int ret; - if (!conn_state->crtc) - return; + changed = intel_encoder_hotplug(encoder, connector); - WARN_ON(!drm_modeset_is_locked(&conn_state->crtc->mutex)); + drm_modeset_acquire_init(&ctx, 0); - if (!conn_state->crtc->state->active) - return; + for (;;) { + ret = intel_dp_retrain_link(encoder, &ctx); - if (conn_state->commit && - !try_wait_for_completion(&conn_state->commit->hw_done)) - return; + if (ret == -EDEADLK) { + drm_modeset_backoff(&ctx); + continue; + } - /* - * Validate the cached values of intel_dp->link_rate and - * intel_dp->lane_count before attempting to retrain. - */ - if (!intel_dp_link_params_valid(intel_dp, intel_dp->link_rate, - intel_dp->lane_count)) - return; + break; + } - /* Retrain if Channel EQ or CR not ok */ - if (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count)) { - DRM_DEBUG_KMS("%s: channel EQ not ok, retraining\n", - intel_encoder->base.name); + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); + WARN(ret, "Acquiring modeset locks failed with %i\n", ret); - intel_dp_retrain_link(intel_dp); - } + return changed; } /* @@ -4372,7 +4471,9 @@ intel_dp_short_pulse(struct intel_dp *intel_dp) DRM_DEBUG_DRIVER("CP or sink specific irq unhandled\n"); } - intel_dp_check_link_status(intel_dp); + /* defer to the hotplug work for link retraining if needed */ + if (intel_dp_needs_link_retrain(intel_dp)) + return false; if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { DRM_DEBUG_KMS("Link Training Compliance Test requested\n"); @@ -4451,173 +4552,174 @@ edp_detect(struct intel_dp *intel_dp) return status; } -static bool ibx_digital_port_connected(struct drm_i915_private *dev_priv, - struct intel_digital_port *port) +static bool ibx_digital_port_connected(struct intel_encoder *encoder) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 bit; - switch (port->base.port) { - case PORT_B: + switch (encoder->hpd_pin) { + case HPD_PORT_B: bit = SDE_PORTB_HOTPLUG; break; - case PORT_C: + case HPD_PORT_C: bit = SDE_PORTC_HOTPLUG; break; - case PORT_D: + case HPD_PORT_D: bit = SDE_PORTD_HOTPLUG; break; default: - MISSING_CASE(port->base.port); + MISSING_CASE(encoder->hpd_pin); return false; } return I915_READ(SDEISR) & bit; } -static bool cpt_digital_port_connected(struct drm_i915_private *dev_priv, - struct intel_digital_port *port) +static bool cpt_digital_port_connected(struct intel_encoder *encoder) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 bit; - switch (port->base.port) { - case PORT_B: + switch (encoder->hpd_pin) { + case HPD_PORT_B: bit = SDE_PORTB_HOTPLUG_CPT; break; - case PORT_C: + case HPD_PORT_C: bit = SDE_PORTC_HOTPLUG_CPT; break; - case PORT_D: + case HPD_PORT_D: bit = SDE_PORTD_HOTPLUG_CPT; break; default: - MISSING_CASE(port->base.port); + MISSING_CASE(encoder->hpd_pin); return false; } return I915_READ(SDEISR) & bit; } -static bool spt_digital_port_connected(struct drm_i915_private *dev_priv, - struct intel_digital_port *port) +static bool spt_digital_port_connected(struct intel_encoder *encoder) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 bit; - switch (port->base.port) { - case PORT_A: + switch (encoder->hpd_pin) { + case HPD_PORT_A: bit = SDE_PORTA_HOTPLUG_SPT; break; - case PORT_E: + case HPD_PORT_E: bit = SDE_PORTE_HOTPLUG_SPT; break; default: - return cpt_digital_port_connected(dev_priv, port); + return cpt_digital_port_connected(encoder); } return I915_READ(SDEISR) & bit; } -static bool g4x_digital_port_connected(struct drm_i915_private *dev_priv, - struct intel_digital_port *port) +static bool g4x_digital_port_connected(struct intel_encoder *encoder) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 bit; - switch (port->base.port) { - case PORT_B: + switch (encoder->hpd_pin) { + case HPD_PORT_B: bit = PORTB_HOTPLUG_LIVE_STATUS_G4X; break; - case PORT_C: + case HPD_PORT_C: bit = PORTC_HOTPLUG_LIVE_STATUS_G4X; break; - case PORT_D: + case HPD_PORT_D: bit = PORTD_HOTPLUG_LIVE_STATUS_G4X; break; default: - MISSING_CASE(port->base.port); + MISSING_CASE(encoder->hpd_pin); return false; } return I915_READ(PORT_HOTPLUG_STAT) & bit; } -static bool gm45_digital_port_connected(struct drm_i915_private *dev_priv, - struct intel_digital_port *port) +static bool gm45_digital_port_connected(struct intel_encoder *encoder) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 bit; - switch (port->base.port) { - case PORT_B: + switch (encoder->hpd_pin) { + case HPD_PORT_B: bit = PORTB_HOTPLUG_LIVE_STATUS_GM45; break; - case PORT_C: + case HPD_PORT_C: bit = PORTC_HOTPLUG_LIVE_STATUS_GM45; break; - case PORT_D: + case HPD_PORT_D: bit = PORTD_HOTPLUG_LIVE_STATUS_GM45; break; default: - MISSING_CASE(port->base.port); + MISSING_CASE(encoder->hpd_pin); return false; } return I915_READ(PORT_HOTPLUG_STAT) & bit; } -static bool ilk_digital_port_connected(struct drm_i915_private *dev_priv, - struct intel_digital_port *port) +static bool ilk_digital_port_connected(struct intel_encoder *encoder) { - if (port->base.port == PORT_A) + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + if (encoder->hpd_pin == HPD_PORT_A) return I915_READ(DEISR) & DE_DP_A_HOTPLUG; else - return ibx_digital_port_connected(dev_priv, port); + return ibx_digital_port_connected(encoder); } -static bool snb_digital_port_connected(struct drm_i915_private *dev_priv, - struct intel_digital_port *port) +static bool snb_digital_port_connected(struct intel_encoder *encoder) { - if (port->base.port == PORT_A) + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + if (encoder->hpd_pin == HPD_PORT_A) return I915_READ(DEISR) & DE_DP_A_HOTPLUG; else - return cpt_digital_port_connected(dev_priv, port); + return cpt_digital_port_connected(encoder); } -static bool ivb_digital_port_connected(struct drm_i915_private *dev_priv, - struct intel_digital_port *port) +static bool ivb_digital_port_connected(struct intel_encoder *encoder) { - if (port->base.port == PORT_A) + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + if (encoder->hpd_pin == HPD_PORT_A) return I915_READ(DEISR) & DE_DP_A_HOTPLUG_IVB; else - return cpt_digital_port_connected(dev_priv, port); + return cpt_digital_port_connected(encoder); } -static bool bdw_digital_port_connected(struct drm_i915_private *dev_priv, - struct intel_digital_port *port) +static bool bdw_digital_port_connected(struct intel_encoder *encoder) { - if (port->base.port == PORT_A) + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + if (encoder->hpd_pin == HPD_PORT_A) return I915_READ(GEN8_DE_PORT_ISR) & GEN8_PORT_DP_A_HOTPLUG; else - return cpt_digital_port_connected(dev_priv, port); + return cpt_digital_port_connected(encoder); } -static bool bxt_digital_port_connected(struct drm_i915_private *dev_priv, - struct intel_digital_port *intel_dig_port) +static bool bxt_digital_port_connected(struct intel_encoder *encoder) { - struct intel_encoder *intel_encoder = &intel_dig_port->base; - enum port port; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 bit; - port = intel_hpd_pin_to_port(intel_encoder->hpd_pin); - switch (port) { - case PORT_A: + switch (encoder->hpd_pin) { + case HPD_PORT_A: bit = BXT_DE_PORT_HP_DDIA; break; - case PORT_B: + case HPD_PORT_B: bit = BXT_DE_PORT_HP_DDIB; break; - case PORT_C: + case HPD_PORT_C: bit = BXT_DE_PORT_HP_DDIC; break; default: - MISSING_CASE(port); + MISSING_CASE(encoder->hpd_pin); return false; } @@ -4626,33 +4728,33 @@ static bool bxt_digital_port_connected(struct drm_i915_private *dev_priv, /* * intel_digital_port_connected - is the specified port connected? - * @dev_priv: i915 private structure - * @port: the port to test + * @encoder: intel_encoder * - * Return %true if @port is connected, %false otherwise. + * Return %true if port is connected, %false otherwise. */ -bool intel_digital_port_connected(struct drm_i915_private *dev_priv, - struct intel_digital_port *port) +bool intel_digital_port_connected(struct intel_encoder *encoder) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + if (HAS_GMCH_DISPLAY(dev_priv)) { if (IS_GM45(dev_priv)) - return gm45_digital_port_connected(dev_priv, port); + return gm45_digital_port_connected(encoder); else - return g4x_digital_port_connected(dev_priv, port); + return g4x_digital_port_connected(encoder); } if (IS_GEN5(dev_priv)) - return ilk_digital_port_connected(dev_priv, port); + return ilk_digital_port_connected(encoder); else if (IS_GEN6(dev_priv)) - return snb_digital_port_connected(dev_priv, port); + return snb_digital_port_connected(encoder); else if (IS_GEN7(dev_priv)) - return ivb_digital_port_connected(dev_priv, port); + return ivb_digital_port_connected(encoder); else if (IS_GEN8(dev_priv)) - return bdw_digital_port_connected(dev_priv, port); + return bdw_digital_port_connected(encoder); else if (IS_GEN9_LP(dev_priv)) - return bxt_digital_port_connected(dev_priv, port); + return bxt_digital_port_connected(encoder); else - return spt_digital_port_connected(dev_priv, port); + return spt_digital_port_connected(encoder); } static struct edid * @@ -4711,8 +4813,7 @@ intel_dp_long_pulse(struct intel_connector *connector) /* Can't disconnect eDP, but you can close the lid... */ if (intel_dp_is_edp(intel_dp)) status = edp_detect(intel_dp); - else if (intel_digital_port_connected(dev_priv, - dp_to_dig_port(intel_dp))) + else if (intel_digital_port_connected(&dp_to_dig_port(intel_dp)->base)) status = intel_dp_detect_dpcd(intel_dp); else status = connector_status_disconnected; @@ -4757,20 +4858,6 @@ intel_dp_long_pulse(struct intel_connector *connector) */ status = connector_status_disconnected; goto out; - } else { - /* - * If display is now connected check links status, - * there has been known issues of link loss triggerring - * long pulse. - * - * Some sinks (eg. ASUS PB287Q) seem to perform some - * weird HPD ping pong during modesets. So we can apparently - * end up with HPD going low during a modeset, and then - * going back up soon after. And once that happens we must - * retrain the link to get a picture. That's in case no - * userspace component reacted to intermittent HPD dip. - */ - intel_dp_check_link_status(intel_dp); } /* @@ -4981,6 +5068,238 @@ void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder) pps_unlock(intel_dp); } +static +int intel_dp_hdcp_write_an_aksv(struct intel_digital_port *intel_dig_port, + u8 *an) +{ + struct intel_dp *intel_dp = enc_to_intel_dp(&intel_dig_port->base.base); + static const struct drm_dp_aux_msg msg = { + .request = DP_AUX_NATIVE_WRITE, + .address = DP_AUX_HDCP_AKSV, + .size = DRM_HDCP_KSV_LEN, + }; + uint8_t txbuf[HEADER_SIZE + DRM_HDCP_KSV_LEN] = {}, rxbuf[2], reply = 0; + ssize_t dpcd_ret; + int ret; + + /* Output An first, that's easy */ + dpcd_ret = drm_dp_dpcd_write(&intel_dig_port->dp.aux, DP_AUX_HDCP_AN, + an, DRM_HDCP_AN_LEN); + if (dpcd_ret != DRM_HDCP_AN_LEN) { + DRM_ERROR("Failed to write An over DP/AUX (%zd)\n", dpcd_ret); + return dpcd_ret >= 0 ? -EIO : dpcd_ret; + } + + /* + * Since Aksv is Oh-So-Secret, we can't access it in software. So in + * order to get it on the wire, we need to create the AUX header as if + * we were writing the data, and then tickle the hardware to output the + * data once the header is sent out. + */ + intel_dp_aux_header(txbuf, &msg); + + ret = intel_dp_aux_xfer(intel_dp, txbuf, HEADER_SIZE + msg.size, + rxbuf, sizeof(rxbuf), + DP_AUX_CH_CTL_AUX_AKSV_SELECT); + if (ret < 0) { + DRM_ERROR("Write Aksv over DP/AUX failed (%d)\n", ret); + return ret; + } else if (ret == 0) { + DRM_ERROR("Aksv write over DP/AUX was empty\n"); + return -EIO; + } + + reply = (rxbuf[0] >> 4) & DP_AUX_NATIVE_REPLY_MASK; + return reply == DP_AUX_NATIVE_REPLY_ACK ? 0 : -EIO; +} + +static int intel_dp_hdcp_read_bksv(struct intel_digital_port *intel_dig_port, + u8 *bksv) +{ + ssize_t ret; + ret = drm_dp_dpcd_read(&intel_dig_port->dp.aux, DP_AUX_HDCP_BKSV, bksv, + DRM_HDCP_KSV_LEN); + if (ret != DRM_HDCP_KSV_LEN) { + DRM_ERROR("Read Bksv from DP/AUX failed (%zd)\n", ret); + return ret >= 0 ? -EIO : ret; + } + return 0; +} + +static int intel_dp_hdcp_read_bstatus(struct intel_digital_port *intel_dig_port, + u8 *bstatus) +{ + ssize_t ret; + /* + * For some reason the HDMI and DP HDCP specs call this register + * definition by different names. In the HDMI spec, it's called BSTATUS, + * but in DP it's called BINFO. + */ + ret = drm_dp_dpcd_read(&intel_dig_port->dp.aux, DP_AUX_HDCP_BINFO, + bstatus, DRM_HDCP_BSTATUS_LEN); + if (ret != DRM_HDCP_BSTATUS_LEN) { + DRM_ERROR("Read bstatus from DP/AUX failed (%zd)\n", ret); + return ret >= 0 ? -EIO : ret; + } + return 0; +} + +static +int intel_dp_hdcp_read_bcaps(struct intel_digital_port *intel_dig_port, + u8 *bcaps) +{ + ssize_t ret; + + ret = drm_dp_dpcd_read(&intel_dig_port->dp.aux, DP_AUX_HDCP_BCAPS, + bcaps, 1); + if (ret != 1) { + DRM_ERROR("Read bcaps from DP/AUX failed (%zd)\n", ret); + return ret >= 0 ? -EIO : ret; + } + + return 0; +} + +static +int intel_dp_hdcp_repeater_present(struct intel_digital_port *intel_dig_port, + bool *repeater_present) +{ + ssize_t ret; + u8 bcaps; + + ret = intel_dp_hdcp_read_bcaps(intel_dig_port, &bcaps); + if (ret) + return ret; + + *repeater_present = bcaps & DP_BCAPS_REPEATER_PRESENT; + return 0; +} + +static +int intel_dp_hdcp_read_ri_prime(struct intel_digital_port *intel_dig_port, + u8 *ri_prime) +{ + ssize_t ret; + ret = drm_dp_dpcd_read(&intel_dig_port->dp.aux, DP_AUX_HDCP_RI_PRIME, + ri_prime, DRM_HDCP_RI_LEN); + if (ret != DRM_HDCP_RI_LEN) { + DRM_ERROR("Read Ri' from DP/AUX failed (%zd)\n", ret); + return ret >= 0 ? -EIO : ret; + } + return 0; +} + +static +int intel_dp_hdcp_read_ksv_ready(struct intel_digital_port *intel_dig_port, + bool *ksv_ready) +{ + ssize_t ret; + u8 bstatus; + ret = drm_dp_dpcd_read(&intel_dig_port->dp.aux, DP_AUX_HDCP_BSTATUS, + &bstatus, 1); + if (ret != 1) { + DRM_ERROR("Read bstatus from DP/AUX failed (%zd)\n", ret); + return ret >= 0 ? -EIO : ret; + } + *ksv_ready = bstatus & DP_BSTATUS_READY; + return 0; +} + +static +int intel_dp_hdcp_read_ksv_fifo(struct intel_digital_port *intel_dig_port, + int num_downstream, u8 *ksv_fifo) +{ + ssize_t ret; + int i; + + /* KSV list is read via 15 byte window (3 entries @ 5 bytes each) */ + for (i = 0; i < num_downstream; i += 3) { + size_t len = min(num_downstream - i, 3) * DRM_HDCP_KSV_LEN; + ret = drm_dp_dpcd_read(&intel_dig_port->dp.aux, + DP_AUX_HDCP_KSV_FIFO, + ksv_fifo + i * DRM_HDCP_KSV_LEN, + len); + if (ret != len) { + DRM_ERROR("Read ksv[%d] from DP/AUX failed (%zd)\n", i, + ret); + return ret >= 0 ? -EIO : ret; + } + } + return 0; +} + +static +int intel_dp_hdcp_read_v_prime_part(struct intel_digital_port *intel_dig_port, + int i, u32 *part) +{ + ssize_t ret; + + if (i >= DRM_HDCP_V_PRIME_NUM_PARTS) + return -EINVAL; + + ret = drm_dp_dpcd_read(&intel_dig_port->dp.aux, + DP_AUX_HDCP_V_PRIME(i), part, + DRM_HDCP_V_PRIME_PART_LEN); + if (ret != DRM_HDCP_V_PRIME_PART_LEN) { + DRM_ERROR("Read v'[%d] from DP/AUX failed (%zd)\n", i, ret); + return ret >= 0 ? -EIO : ret; + } + return 0; +} + +static +int intel_dp_hdcp_toggle_signalling(struct intel_digital_port *intel_dig_port, + bool enable) +{ + /* Not used for single stream DisplayPort setups */ + return 0; +} + +static +bool intel_dp_hdcp_check_link(struct intel_digital_port *intel_dig_port) +{ + ssize_t ret; + u8 bstatus; + + ret = drm_dp_dpcd_read(&intel_dig_port->dp.aux, DP_AUX_HDCP_BSTATUS, + &bstatus, 1); + if (ret != 1) { + DRM_ERROR("Read bstatus from DP/AUX failed (%zd)\n", ret); + return false; + } + + return !(bstatus & (DP_BSTATUS_LINK_FAILURE | DP_BSTATUS_REAUTH_REQ)); +} + +static +int intel_dp_hdcp_capable(struct intel_digital_port *intel_dig_port, + bool *hdcp_capable) +{ + ssize_t ret; + u8 bcaps; + + ret = intel_dp_hdcp_read_bcaps(intel_dig_port, &bcaps); + if (ret) + return ret; + + *hdcp_capable = bcaps & DP_BCAPS_HDCP_CAPABLE; + return 0; +} + +static const struct intel_hdcp_shim intel_dp_hdcp_shim = { + .write_an_aksv = intel_dp_hdcp_write_an_aksv, + .read_bksv = intel_dp_hdcp_read_bksv, + .read_bstatus = intel_dp_hdcp_read_bstatus, + .repeater_present = intel_dp_hdcp_repeater_present, + .read_ri_prime = intel_dp_hdcp_read_ri_prime, + .read_ksv_ready = intel_dp_hdcp_read_ksv_ready, + .read_ksv_fifo = intel_dp_hdcp_read_ksv_fifo, + .read_v_prime_part = intel_dp_hdcp_read_v_prime_part, + .toggle_signalling = intel_dp_hdcp_toggle_signalling, + .check_link = intel_dp_hdcp_check_link, + .hdcp_capable = intel_dp_hdcp_capable, +}; + static void intel_edp_panel_vdd_sanitize(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); @@ -5115,36 +5434,12 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) } if (!intel_dp->is_mst) { - struct drm_modeset_acquire_ctx ctx; - struct drm_connector *connector = &intel_dp->attached_connector->base; - struct drm_crtc *crtc; - int iret; - bool handled = false; - - drm_modeset_acquire_init(&ctx, 0); -retry: - iret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, &ctx); - if (iret) - goto err; - - crtc = connector->state->crtc; - if (crtc) { - iret = drm_modeset_lock(&crtc->mutex, &ctx); - if (iret) - goto err; - } + bool handled; handled = intel_dp_short_pulse(intel_dp); -err: - if (iret == -EDEADLK) { - drm_modeset_backoff(&ctx); - goto retry; - } - - drm_modeset_drop_locks(&ctx); - drm_modeset_acquire_fini(&ctx); - WARN(iret, "Acquiring modeset locks failed with %i\n", iret); + /* Short pulse can signify loss of hdcp authentication */ + intel_hdcp_check_link(intel_dp->attached_connector); if (!handled) { intel_dp->detect_done = false; @@ -5223,7 +5518,8 @@ intel_pps_readout_hw_state(struct intel_dp *intel_dp, struct edp_power_seq *seq) pp_on = I915_READ(regs.pp_on); pp_off = I915_READ(regs.pp_off); - if (!IS_GEN9_LP(dev_priv) && !HAS_PCH_CNP(dev_priv)) { + if (!IS_GEN9_LP(dev_priv) && !HAS_PCH_CNP(dev_priv) && + !HAS_PCH_ICP(dev_priv)) { I915_WRITE(regs.pp_ctrl, pp_ctl); pp_div = I915_READ(regs.pp_div); } @@ -5241,7 +5537,8 @@ intel_pps_readout_hw_state(struct intel_dp *intel_dp, struct edp_power_seq *seq) seq->t10 = (pp_off & PANEL_POWER_DOWN_DELAY_MASK) >> PANEL_POWER_DOWN_DELAY_SHIFT; - if (IS_GEN9_LP(dev_priv) || HAS_PCH_CNP(dev_priv)) { + if (IS_GEN9_LP(dev_priv) || HAS_PCH_CNP(dev_priv) || + HAS_PCH_ICP(dev_priv)) { seq->t11_t12 = ((pp_ctl & BXT_POWER_CYCLE_DELAY_MASK) >> BXT_POWER_CYCLE_DELAY_SHIFT) * 1000; } else { @@ -5412,7 +5709,8 @@ intel_dp_init_panel_power_sequencer_registers(struct intel_dp *intel_dp, (seq->t10 << PANEL_POWER_DOWN_DELAY_SHIFT); /* Compute the divisor for the pp clock, simply match the Bspec * formula. */ - if (IS_GEN9_LP(dev_priv) || HAS_PCH_CNP(dev_priv)) { + if (IS_GEN9_LP(dev_priv) || HAS_PCH_CNP(dev_priv) || + HAS_PCH_ICP(dev_priv)) { pp_div = I915_READ(regs.pp_ctrl); pp_div &= ~BXT_POWER_CYCLE_DELAY_MASK; pp_div |= (DIV_ROUND_UP(seq->t11_t12, 1000) @@ -5438,7 +5736,8 @@ intel_dp_init_panel_power_sequencer_registers(struct intel_dp *intel_dp, I915_WRITE(regs.pp_on, pp_on); I915_WRITE(regs.pp_off, pp_off); - if (IS_GEN9_LP(dev_priv) || HAS_PCH_CNP(dev_priv)) + if (IS_GEN9_LP(dev_priv) || HAS_PCH_CNP(dev_priv) || + HAS_PCH_ICP(dev_priv)) I915_WRITE(regs.pp_ctrl, pp_div); else I915_WRITE(regs.pp_div, pp_div); @@ -5446,7 +5745,8 @@ intel_dp_init_panel_power_sequencer_registers(struct intel_dp *intel_dp, DRM_DEBUG_KMS("panel power sequencer register settings: PP_ON %#x, PP_OFF %#x, PP_DIV %#x\n", I915_READ(regs.pp_on), I915_READ(regs.pp_off), - (IS_GEN9_LP(dev_priv) || HAS_PCH_CNP(dev_priv)) ? + (IS_GEN9_LP(dev_priv) || HAS_PCH_CNP(dev_priv) || + HAS_PCH_ICP(dev_priv)) ? (I915_READ(regs.pp_ctrl) & BXT_POWER_CYCLE_DELAY_MASK) : I915_READ(regs.pp_div)); } @@ -5960,37 +6260,6 @@ out_vdd_off: return false; } -/* Set up the hotplug pin and aux power domain. */ -static void -intel_dp_init_connector_port_info(struct intel_digital_port *intel_dig_port) -{ - struct intel_encoder *encoder = &intel_dig_port->base; - struct intel_dp *intel_dp = &intel_dig_port->dp; - - encoder->hpd_pin = intel_hpd_pin(encoder->port); - - switch (encoder->port) { - case PORT_A: - intel_dp->aux_power_domain = POWER_DOMAIN_AUX_A; - break; - case PORT_B: - intel_dp->aux_power_domain = POWER_DOMAIN_AUX_B; - break; - case PORT_C: - intel_dp->aux_power_domain = POWER_DOMAIN_AUX_C; - break; - case PORT_D: - intel_dp->aux_power_domain = POWER_DOMAIN_AUX_D; - break; - case PORT_E: - /* FIXME: Check VBT for actual wiring of PORT E */ - intel_dp->aux_power_domain = POWER_DOMAIN_AUX_D; - break; - default: - MISSING_CASE(encoder->port); - } -} - static void intel_dp_modeset_retry_work_fn(struct work_struct *work) { struct intel_connector *intel_connector; @@ -6042,20 +6311,6 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, intel_dp->active_pipe = INVALID_PIPE; /* intel_dp vfuncs */ - if (INTEL_GEN(dev_priv) >= 9) - intel_dp->get_aux_clock_divider = skl_get_aux_clock_divider; - else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - intel_dp->get_aux_clock_divider = hsw_get_aux_clock_divider; - else if (HAS_PCH_SPLIT(dev_priv)) - intel_dp->get_aux_clock_divider = ilk_get_aux_clock_divider; - else - intel_dp->get_aux_clock_divider = g4x_get_aux_clock_divider; - - if (INTEL_GEN(dev_priv) >= 9) - intel_dp->get_aux_send_ctl = skl_get_aux_send_ctl; - else - intel_dp->get_aux_send_ctl = g4x_get_aux_send_ctl; - if (HAS_DDI(dev_priv)) intel_dp->prepare_link_retrain = intel_ddi_prepare_link_retrain; @@ -6096,7 +6351,7 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, connector->interlace_allowed = true; connector->doublescan_allowed = 0; - intel_dp_init_connector_port_info(intel_dig_port); + intel_encoder->hpd_pin = intel_hpd_pin_default(dev_priv, port); intel_dp_aux_init(intel_dp); @@ -6112,7 +6367,8 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, /* init MST on ports that can support it */ if (HAS_DP_MST(dev_priv) && !intel_dp_is_edp(intel_dp) && - (port == PORT_B || port == PORT_C || port == PORT_D)) + (port == PORT_B || port == PORT_C || + port == PORT_D || port == PORT_F)) intel_dp_mst_encoder_init(intel_dig_port, intel_connector->base.base.id); @@ -6124,6 +6380,12 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, intel_dp_add_properties(intel_dp, connector); + if (is_hdcp_supported(dev_priv, port) && !intel_dp_is_edp(intel_dp)) { + int ret = intel_hdcp_init(intel_connector, &intel_dp_hdcp_shim); + if (ret) + DRM_DEBUG_KMS("HDCP init failed, skipping.\n"); + } + /* For G4X desktop chip, PEG_BAND_GAP_DATA 3:0 must first be written * 0xd. Failure to do so will result in spurious interrupts being * generated on the port when a cable is not attached. @@ -6166,6 +6428,7 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, "DP %c", port_name(port))) goto err_encoder_init; + intel_encoder->hotplug = intel_dp_hotplug; intel_encoder->compute_config = intel_dp_compute_config; intel_encoder->get_hw_state = intel_dp_get_hw_state; intel_encoder->get_config = intel_dp_get_config; diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c index cf8fef8b6f58..f59b59bb0a21 100644 --- a/drivers/gpu/drm/i915/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/intel_dp_link_training.c @@ -248,6 +248,7 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp) int tries; u32 training_pattern; uint8_t link_status[DP_LINK_STATUS_SIZE]; + bool channel_eq = false; training_pattern = intel_dp_training_pattern(intel_dp); @@ -259,7 +260,6 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp) return false; } - intel_dp->channel_eq_status = false; for (tries = 0; tries < 5; tries++) { drm_dp_link_train_channel_eq_delay(intel_dp->dpcd); @@ -279,7 +279,7 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp) if (drm_dp_channel_eq_ok(link_status, intel_dp->lane_count)) { - intel_dp->channel_eq_status = true; + channel_eq = true; DRM_DEBUG_KMS("Channel EQ done. DP Training " "successful\n"); break; @@ -301,12 +301,14 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp) intel_dp_set_idle_link_train(intel_dp); - return intel_dp->channel_eq_status; + return channel_eq; } void intel_dp_stop_link_train(struct intel_dp *intel_dp) { + intel_dp->link_trained = true; + intel_dp_set_link_train(intel_dp, DP_TRAINING_PATTERN_DISABLE); } diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c index 76473e9836c6..c8e9e44e5981 100644 --- a/drivers/gpu/drm/i915/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/intel_dpio_phy.c @@ -147,7 +147,7 @@ struct bxt_ddi_phy_info { */ struct { /** - * @port: which port maps to this channel. + * @channel.port: which port maps to this channel. */ enum port port; } channel[2]; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 30f791f89d64..d4368589b355 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -41,20 +41,21 @@ #include <drm/drm_atomic.h> /** - * _wait_for - magic (register) wait macro + * __wait_for - magic wait macro * - * Does the right thing for modeset paths when run under kdgb or similar atomic - * contexts. Note that it's important that we check the condition again after - * having timed out, since the timeout could be due to preemption or similar and - * we've never had a chance to check the condition before the timeout. + * Macro to help avoid open coding check/wait/timeout patterns. Note that it's + * important that we check the condition again after having timed out, since the + * timeout could be due to preemption or similar and we've never had a chance to + * check the condition before the timeout. */ -#define _wait_for(COND, US, Wmin, Wmax) ({ \ +#define __wait_for(OP, COND, US, Wmin, Wmax) ({ \ unsigned long timeout__ = jiffies + usecs_to_jiffies(US) + 1; \ long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \ int ret__; \ might_sleep(); \ for (;;) { \ bool expired__ = time_after(jiffies, timeout__); \ + OP; \ if (COND) { \ ret__ = 0; \ break; \ @@ -70,7 +71,9 @@ ret__; \ }) -#define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) +#define _wait_for(COND, US, Wmin, Wmax) __wait_for(, (COND), (US), (Wmin), \ + (Wmax)) +#define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) /* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */ #if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) @@ -201,6 +204,7 @@ struct intel_fbdev { struct drm_fb_helper helper; struct intel_framebuffer *fb; struct i915_vma *vma; + unsigned long vma_flags; async_cookie_t cookie; int preferred_bpp; }; @@ -211,7 +215,8 @@ struct intel_encoder { enum intel_output_type type; enum port port; unsigned int cloneable; - void (*hot_plug)(struct intel_encoder *); + bool (*hotplug)(struct intel_encoder *encoder, + struct intel_connector *connector); enum intel_output_type (*compute_output_type)(struct intel_encoder *, struct intel_crtc_state *, struct drm_connector_state *); @@ -298,6 +303,80 @@ struct intel_panel { } backlight; }; +/* + * This structure serves as a translation layer between the generic HDCP code + * and the bus-specific code. What that means is that HDCP over HDMI differs + * from HDCP over DP, so to account for these differences, we need to + * communicate with the receiver through this shim. + * + * For completeness, the 2 buses differ in the following ways: + * - DP AUX vs. DDC + * HDCP registers on the receiver are set via DP AUX for DP, and + * they are set via DDC for HDMI. + * - Receiver register offsets + * The offsets of the registers are different for DP vs. HDMI + * - Receiver register masks/offsets + * For instance, the ready bit for the KSV fifo is in a different + * place on DP vs HDMI + * - Receiver register names + * Seriously. In the DP spec, the 16-bit register containing + * downstream information is called BINFO, on HDMI it's called + * BSTATUS. To confuse matters further, DP has a BSTATUS register + * with a completely different definition. + * - KSV FIFO + * On HDMI, the ksv fifo is read all at once, whereas on DP it must + * be read 3 keys at a time + * - Aksv output + * Since Aksv is hidden in hardware, there's different procedures + * to send it over DP AUX vs DDC + */ +struct intel_hdcp_shim { + /* Outputs the transmitter's An and Aksv values to the receiver. */ + int (*write_an_aksv)(struct intel_digital_port *intel_dig_port, u8 *an); + + /* Reads the receiver's key selection vector */ + int (*read_bksv)(struct intel_digital_port *intel_dig_port, u8 *bksv); + + /* + * Reads BINFO from DP receivers and BSTATUS from HDMI receivers. The + * definitions are the same in the respective specs, but the names are + * different. Call it BSTATUS since that's the name the HDMI spec + * uses and it was there first. + */ + int (*read_bstatus)(struct intel_digital_port *intel_dig_port, + u8 *bstatus); + + /* Determines whether a repeater is present downstream */ + int (*repeater_present)(struct intel_digital_port *intel_dig_port, + bool *repeater_present); + + /* Reads the receiver's Ri' value */ + int (*read_ri_prime)(struct intel_digital_port *intel_dig_port, u8 *ri); + + /* Determines if the receiver's KSV FIFO is ready for consumption */ + int (*read_ksv_ready)(struct intel_digital_port *intel_dig_port, + bool *ksv_ready); + + /* Reads the ksv fifo for num_downstream devices */ + int (*read_ksv_fifo)(struct intel_digital_port *intel_dig_port, + int num_downstream, u8 *ksv_fifo); + + /* Reads a 32-bit part of V' from the receiver */ + int (*read_v_prime_part)(struct intel_digital_port *intel_dig_port, + int i, u32 *part); + + /* Enables HDCP signalling on the port */ + int (*toggle_signalling)(struct intel_digital_port *intel_dig_port, + bool enable); + + /* Ensures the link is still protected */ + bool (*check_link)(struct intel_digital_port *intel_dig_port); + + /* Detects panel's hdcp capability. This is optional for HDMI. */ + int (*hdcp_capable)(struct intel_digital_port *intel_dig_port, + bool *hdcp_capable); +}; + struct intel_connector { struct drm_connector base; /* @@ -329,6 +408,12 @@ struct intel_connector { /* Work struct to schedule a uevent on link train failure */ struct work_struct modeset_retry_work; + + const struct intel_hdcp_shim *hdcp_shim; + struct mutex hdcp_mutex; + uint64_t hdcp_value; /* protected by hdcp_mutex */ + struct delayed_work hdcp_check_work; + struct work_struct hdcp_prop_work; }; struct intel_digital_connector_state { @@ -406,8 +491,9 @@ struct intel_atomic_state { struct intel_plane_state { struct drm_plane_state base; - struct drm_rect clip; struct i915_vma *vma; + unsigned long flags; +#define PLANE_HAS_FENCE BIT(0) struct { u32 offset; @@ -850,6 +936,7 @@ struct intel_plane { enum plane_id id; enum pipe pipe; bool can_scale; + bool has_fbc; int max_downscale; uint32_t frontbuffer_bit; @@ -956,17 +1043,16 @@ struct intel_dp_compliance { struct intel_dp { i915_reg_t output_reg; - i915_reg_t aux_ch_ctl_reg; - i915_reg_t aux_ch_data_reg[5]; uint32_t DP; int link_rate; uint8_t lane_count; uint8_t sink_count; bool link_mst; + bool link_trained; bool has_audio; bool detect_done; - bool channel_eq_status; bool reset_link_params; + enum aux_ch aux_ch; uint8_t dpcd[DP_RECEIVER_CAP_SIZE]; uint8_t psr_dpcd[EDP_PSR_RECEIVER_CAP_SIZE]; uint8_t downstream_ports[DP_MAX_DOWNSTREAM_PORTS]; @@ -1041,6 +1127,9 @@ struct intel_dp { int send_bytes, uint32_t aux_clock_divider); + i915_reg_t (*aux_ch_ctl_reg)(struct intel_dp *dp); + i915_reg_t (*aux_ch_data_reg)(struct intel_dp *dp, int index); + /* This is called before a link training is starterd */ void (*prepare_link_retrain)(struct intel_dp *intel_dp); @@ -1298,6 +1387,8 @@ void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv, u32 bxt_signal_levels(struct intel_dp *intel_dp); uint32_t ddi_signal_levels(struct intel_dp *intel_dp); u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder); +int intel_ddi_toggle_hdcp_signalling(struct intel_encoder *intel_encoder, + bool enable); unsigned int intel_fb_align_height(const struct drm_framebuffer *fb, int plane, unsigned int height); @@ -1323,6 +1414,8 @@ void cnl_init_cdclk(struct drm_i915_private *dev_priv); void cnl_uninit_cdclk(struct drm_i915_private *dev_priv); void bxt_init_cdclk(struct drm_i915_private *dev_priv); void bxt_uninit_cdclk(struct drm_i915_private *dev_priv); +void icl_init_cdclk(struct drm_i915_private *dev_priv); +void icl_uninit_cdclk(struct drm_i915_private *dev_priv); void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv); void intel_update_max_cdclk(struct drm_i915_private *dev_priv); void intel_update_cdclk(struct drm_i915_private *dev_priv); @@ -1371,8 +1464,8 @@ struct drm_display_mode * intel_encoder_current_mode(struct intel_encoder *encoder); enum pipe intel_get_pipe_from_connector(struct intel_connector *connector); -int intel_get_pipe_from_crtc_id(struct drm_device *dev, void *data, - struct drm_file *file_priv); +int intel_get_pipe_from_crtc_id_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); enum transcoder intel_pipe_to_cpu_transcoder(struct drm_i915_private *dev_priv, enum pipe pipe); static inline bool @@ -1417,8 +1510,11 @@ void intel_release_load_detect_pipe(struct drm_connector *connector, struct intel_load_detect_pipe *old, struct drm_modeset_acquire_ctx *ctx); struct i915_vma * -intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation); -void intel_unpin_fb_vma(struct i915_vma *vma); +intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, + unsigned int rotation, + bool uses_fence, + unsigned long *out_flags); +void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags); struct drm_framebuffer * intel_framebuffer_create(struct drm_i915_gem_object *obj, struct drm_mode_fb_cmd2 *mode_cmd); @@ -1505,9 +1601,11 @@ u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); +u32 glk_color_ctl(const struct intel_plane_state *plane_state); u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane, unsigned int rotation); -int skl_check_plane_surface(struct intel_plane_state *plane_state); +int skl_check_plane_surface(const struct intel_crtc_state *crtc_state, + struct intel_plane_state *plane_state); int i9xx_check_plane_surface(struct intel_plane_state *plane_state); /* intel_csr.c */ @@ -1529,6 +1627,8 @@ int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, int link_rate, uint8_t lane_count); void intel_dp_start_link_train(struct intel_dp *intel_dp); void intel_dp_stop_link_train(struct intel_dp *intel_dp); +int intel_dp_retrain_link(struct intel_encoder *encoder, + struct drm_modeset_acquire_ctx *ctx); void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode); void intel_dp_encoder_reset(struct drm_encoder *encoder); void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder); @@ -1590,8 +1690,7 @@ static inline unsigned int intel_dp_unused_lane_mask(int lane_count) bool intel_dp_read_dpcd(struct intel_dp *intel_dp); int intel_dp_link_required(int pixel_clock, int bpp); int intel_dp_max_data_rate(int max_link_clock, int max_lanes); -bool intel_digital_port_connected(struct drm_i915_private *dev_priv, - struct intel_digital_port *port); +bool intel_digital_port_connected(struct intel_encoder *encoder); /* intel_dp_aux_backlight.c */ int intel_dp_aux_init_backlight_funcs(struct intel_connector *intel_connector); @@ -1609,7 +1708,8 @@ int intel_dsi_dcs_init_backlight_funcs(struct intel_connector *intel_connector); void intel_dvo_init(struct drm_i915_private *dev_priv); /* intel_hotplug.c */ void intel_hpd_poll_init(struct drm_i915_private *dev_priv); - +bool intel_encoder_hotplug(struct intel_encoder *encoder, + struct intel_connector *connector); /* legacy fbdev emulation in intel_fbdev.c */ #ifdef CONFIG_DRM_FBDEV_EMULATION @@ -1758,8 +1858,20 @@ static inline void intel_backlight_device_unregister(struct intel_connector *con } #endif /* CONFIG_BACKLIGHT_CLASS_DEVICE */ +/* intel_hdcp.c */ +void intel_hdcp_atomic_check(struct drm_connector *connector, + struct drm_connector_state *old_state, + struct drm_connector_state *new_state); +int intel_hdcp_init(struct intel_connector *connector, + const struct intel_hdcp_shim *hdcp_shim); +int intel_hdcp_enable(struct intel_connector *connector); +int intel_hdcp_disable(struct intel_connector *connector); +int intel_hdcp_check_link(struct intel_connector *connector); +bool is_hdcp_supported(struct drm_i915_private *dev_priv, enum port port); /* intel_psr.c */ +#define CAN_PSR(dev_priv) (HAS_PSR(dev_priv) && dev_priv->psr.sink_support) +void intel_psr_init_dpcd(struct intel_dp *intel_dp); void intel_psr_enable(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state); void intel_psr_disable(struct intel_dp *intel_dp, @@ -1886,8 +1998,7 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv); void gen6_rps_busy(struct drm_i915_private *dev_priv); void gen6_rps_reset_ei(struct drm_i915_private *dev_priv); void gen6_rps_idle(struct drm_i915_private *dev_priv); -void gen6_rps_boost(struct drm_i915_gem_request *rq, - struct intel_rps_client *rps); +void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps); void g4x_wm_get_hw_state(struct drm_device *dev); void vlv_wm_get_hw_state(struct drm_device *dev); void ilk_wm_get_hw_state(struct drm_device *dev); @@ -1919,12 +2030,13 @@ bool intel_sdvo_init(struct drm_i915_private *dev_priv, /* intel_sprite.c */ +bool intel_format_is_yuv(u32 format); int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, int usecs); struct intel_plane *intel_sprite_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe, int plane); -int intel_sprite_set_colorkey(struct drm_device *dev, void *data, - struct drm_file *file_priv); +int intel_sprite_set_colorkey_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state); void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state); void skl_update_plane(struct intel_plane *plane, @@ -1932,6 +2044,8 @@ void skl_update_plane(struct intel_plane *plane, const struct intel_plane_state *plane_state); void skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc); bool skl_plane_get_hw_state(struct intel_plane *plane); +bool skl_plane_has_ccs(struct drm_i915_private *dev_priv, + enum pipe pipe, enum plane_id plane_id); /* intel_tv.c */ void intel_tv_init(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index f67d321376e4..51a1d6868b1e 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -1266,11 +1266,6 @@ intel_dsi_mode_valid(struct drm_connector *connector, DRM_DEBUG_KMS("\n"); - if (mode->flags & DRM_MODE_FLAG_DBLSCAN) { - DRM_DEBUG_KMS("MODE_NO_DBLESCAN\n"); - return MODE_NO_DBLESCAN; - } - if (fixed_mode) { if (mode->hdisplay > fixed_mode->hdisplay) return MODE_PANEL; diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index 754baa00bea9..eb0c559b2715 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -219,9 +219,6 @@ intel_dvo_mode_valid(struct drm_connector *connector, int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; int target_clock = mode->clock; - if (mode->flags & DRM_MODE_FLAG_DBLSCAN) - return MODE_NO_DBLESCAN; - /* XXX: Validate clock range */ if (fixed_mode) { @@ -248,7 +245,8 @@ static bool intel_dvo_compute_config(struct intel_encoder *encoder, intel_dvo->attached_connector->panel.fixed_mode; struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; - /* If we have timings from the BIOS for the panel, put them in + /* + * If we have timings from the BIOS for the panel, put them in * to the adjusted mode. The CRTC will be set up for this mode, * with the panel scaling set up to source from the H/VDisplay * of the original mode. @@ -296,11 +294,6 @@ static void intel_dvo_pre_enable(struct intel_encoder *encoder, I915_WRITE(dvo_reg, dvo_val); } -/** - * Detect the output connection on our DVO device. - * - * Unimplemented. - */ static enum drm_connector_status intel_dvo_detect(struct drm_connector *connector, bool force) { @@ -316,7 +309,8 @@ static int intel_dvo_get_modes(struct drm_connector *connector) const struct drm_display_mode *fixed_mode = to_intel_connector(connector)->panel.fixed_mode; - /* We should probably have an i2c driver get_modes function for those + /* + * We should probably have an i2c driver get_modes function for those * devices which will have a fixed set of modes determined by the chip * (TV-out, for example), but for now with just TMDS and LVDS, * that's not the case. @@ -374,7 +368,7 @@ static const struct drm_encoder_funcs intel_dvo_enc_funcs = { .destroy = intel_dvo_enc_destroy, }; -/** +/* * Attempts to get a fixed panel timing for LVDS (currently only the i830). * * Other chips with DVO LVDS will need to extend this to deal with the LVDS @@ -446,7 +440,8 @@ void intel_dvo_init(struct drm_i915_private *dev_priv) uint32_t dpll[I915_MAX_PIPES]; enum port port; - /* Allow the I2C driver info to specify the GPIO to be used in + /* + * Allow the I2C driver info to specify the GPIO to be used in * special cases, but otherwise default to what's defined * in the spec. */ @@ -457,7 +452,8 @@ void intel_dvo_init(struct drm_i915_private *dev_priv) else gpio = GMBUS_PIN_DPB; - /* Set up the I2C bus necessary for the chip we're probing. + /* + * Set up the I2C bus necessary for the chip we're probing. * It appears that everything is on GPIOE except for panels * on i830 laptops, which are on GPIOB (DVOA). */ @@ -465,12 +461,14 @@ void intel_dvo_init(struct drm_i915_private *dev_priv) intel_dvo->dev = *dvo; - /* GMBUS NAK handling seems to be unstable, hence let the + /* + * GMBUS NAK handling seems to be unstable, hence let the * transmitter detection run in bit banging mode for now. */ intel_gmbus_force_bit(i2c, true); - /* ns2501 requires the DVO 2x clock before it will + /* + * ns2501 requires the DVO 2x clock before it will * respond to i2c accesses, so make sure we have * have the clock enabled before we attempt to * initialize the device. @@ -528,7 +526,8 @@ void intel_dvo_init(struct drm_i915_private *dev_priv) intel_connector_attach_encoder(intel_connector, intel_encoder); if (dvo->type == INTEL_DVO_CHIP_LVDS) { - /* For our LVDS chipsets, we should hopefully be able + /* + * For our LVDS chipsets, we should hopefully be able * to dig the fixed panel mode out of the BIOS data. * However, it's in a different format from the BIOS * data on chipsets with integrated LVDS (stored in AIM diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index fa960cfd2764..4ba139c27fba 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -38,9 +38,11 @@ */ #define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) +#define DEFAULT_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) #define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) #define GEN10_LR_CONTEXT_RENDER_SIZE (18 * PAGE_SIZE) +#define GEN11_LR_CONTEXT_RENDER_SIZE (14 * PAGE_SIZE) #define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) @@ -121,6 +123,22 @@ static const struct engine_info intel_engines[] = { .mmio_base = GEN8_BSD2_RING_BASE, .irq_shift = GEN8_VCS2_IRQ_SHIFT, }, + [VCS3] = { + .hw_id = VCS3_HW, + .uabi_id = I915_EXEC_BSD, + .class = VIDEO_DECODE_CLASS, + .instance = 2, + .mmio_base = GEN11_BSD3_RING_BASE, + .irq_shift = 0, /* not used */ + }, + [VCS4] = { + .hw_id = VCS4_HW, + .uabi_id = I915_EXEC_BSD, + .class = VIDEO_DECODE_CLASS, + .instance = 3, + .mmio_base = GEN11_BSD4_RING_BASE, + .irq_shift = 0, /* not used */ + }, [VECS] = { .hw_id = VECS_HW, .uabi_id = I915_EXEC_VEBOX, @@ -129,6 +147,14 @@ static const struct engine_info intel_engines[] = { .mmio_base = VEBOX_RING_BASE, .irq_shift = GEN8_VECS_IRQ_SHIFT, }, + [VECS2] = { + .hw_id = VECS2_HW, + .uabi_id = I915_EXEC_VEBOX, + .class = VIDEO_ENHANCEMENT_CLASS, + .instance = 1, + .mmio_base = GEN11_VEBOX2_RING_BASE, + .irq_shift = 0, /* not used */ + }, }; /** @@ -157,6 +183,9 @@ __intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) switch (INTEL_GEN(dev_priv)) { default: MISSING_CASE(INTEL_GEN(dev_priv)); + return DEFAULT_LR_CONTEXT_RENDER_SIZE; + case 11: + return GEN11_LR_CONTEXT_RENDER_SIZE; case 10: return GEN10_LR_CONTEXT_RENDER_SIZE; case 9: @@ -205,6 +234,9 @@ intel_engine_setup(struct drm_i915_private *dev_priv, GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes)); class_info = &intel_engine_classes[info->class]; + BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH)); + BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH)); + if (GEM_WARN_ON(info->class > MAX_ENGINE_CLASS)) return -EINVAL; @@ -225,7 +257,25 @@ intel_engine_setup(struct drm_i915_private *dev_priv, class_info->name, info->instance) >= sizeof(engine->name)); engine->hw_id = engine->guc_id = info->hw_id; - engine->mmio_base = info->mmio_base; + if (INTEL_GEN(dev_priv) >= 11) { + switch (engine->id) { + case VCS: + engine->mmio_base = GEN11_BSD_RING_BASE; + break; + case VCS2: + engine->mmio_base = GEN11_BSD2_RING_BASE; + break; + case VECS: + engine->mmio_base = GEN11_VEBOX_RING_BASE; + break; + default: + /* take the original value for all other engines */ + engine->mmio_base = info->mmio_base; + break; + } + } else { + engine->mmio_base = info->mmio_base; + } engine->irq_shift = info->irq_shift; engine->class = info->class; engine->instance = info->instance; @@ -418,6 +468,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine) BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists)); GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); + execlists->queue_priority = INT_MIN; execlists->queue = RB_ROOT; execlists->first = NULL; } @@ -626,7 +677,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine) * Similarly the preempt context must always be available so that * we can interrupt the engine at any time. */ - if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) { + if (engine->i915->preempt_context) { ring = engine->context_pin(engine, engine->i915->preempt_context); if (IS_ERR(ring)) { @@ -651,7 +702,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine) err_breadcrumbs: intel_engine_fini_breadcrumbs(engine); err_unpin_preempt: - if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) + if (engine->i915->preempt_context) engine->context_unpin(engine, engine->i915->preempt_context); err_unpin_kernel: engine->context_unpin(engine, engine->i915->kernel_context); @@ -681,12 +732,12 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) if (engine->default_state) i915_gem_object_put(engine->default_state); - if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) + if (engine->i915->preempt_context) engine->context_unpin(engine, engine->i915->preempt_context); engine->context_unpin(engine, engine->i915->kernel_context); } -u64 intel_engine_get_active_head(struct intel_engine_cs *engine) +u64 intel_engine_get_active_head(const struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; u64 acthd; @@ -702,7 +753,7 @@ u64 intel_engine_get_active_head(struct intel_engine_cs *engine) return acthd; } -u64 intel_engine_get_last_batch_head(struct intel_engine_cs *engine) +u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; u64 bbaddr; @@ -1389,7 +1440,8 @@ int init_workarounds_ring(struct intel_engine_cs *engine) struct drm_i915_private *dev_priv = engine->i915; int err; - WARN_ON(engine->id != RCS); + if (GEM_WARN_ON(engine->id != RCS)) + return -EINVAL; dev_priv->workarounds.count = 0; dev_priv->workarounds.hw_whitelist_count[engine->id] = 0; @@ -1420,20 +1472,20 @@ int init_workarounds_ring(struct intel_engine_cs *engine) return 0; } -int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) +int intel_ring_workarounds_emit(struct i915_request *rq) { - struct i915_workarounds *w = &req->i915->workarounds; + struct i915_workarounds *w = &rq->i915->workarounds; u32 *cs; int ret, i; if (w->count == 0) return 0; - ret = req->engine->emit_flush(req, EMIT_BARRIER); + ret = rq->engine->emit_flush(rq, EMIT_BARRIER); if (ret) return ret; - cs = intel_ring_begin(req, (w->count * 2 + 2)); + cs = intel_ring_begin(rq, w->count * 2 + 2); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1444,9 +1496,9 @@ int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) } *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); - ret = req->engine->emit_flush(req, EMIT_BARRIER); + ret = rq->engine->emit_flush(rq, EMIT_BARRIER); if (ret) return ret; @@ -1499,10 +1551,6 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) if (I915_SELFTEST_ONLY(engine->breadcrumbs.mock)) return true; - /* Interrupt/tasklet pending? */ - if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) - return false; - /* Waiting to drain ELSP? */ if (READ_ONCE(engine->execlists.active)) return false; @@ -1550,7 +1598,7 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) { const struct i915_gem_context * const kernel_context = engine->i915->kernel_context; - struct drm_i915_gem_request *rq; + struct i915_request *rq; lockdep_assert_held(&engine->i915->drm.struct_mutex); @@ -1662,13 +1710,13 @@ unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915) } static void print_request(struct drm_printer *m, - struct drm_i915_gem_request *rq, + struct i915_request *rq, const char *prefix) { - drm_printf(m, "%s%x%s [%x:%x] prio=%d @ %dms: %s\n", prefix, + drm_printf(m, "%s%x%s [%llx:%x] prio=%d @ %dms: %s\n", prefix, rq->global_seqno, - i915_gem_request_completed(rq) ? "!" : "", - rq->ctx->hw_id, rq->fence.seqno, + i915_request_completed(rq) ? "!" : "", + rq->fence.context, rq->fence.seqno, rq->priotree.priority, jiffies_to_msecs(jiffies - rq->emitted_jiffies), rq->timeline->common->name); @@ -1703,73 +1751,20 @@ static void hexdump(struct drm_printer *m, const void *buf, size_t len) } } -void intel_engine_dump(struct intel_engine_cs *engine, - struct drm_printer *m, - const char *header, ...) +static void intel_engine_print_registers(const struct intel_engine_cs *engine, + struct drm_printer *m) { - struct intel_breadcrumbs * const b = &engine->breadcrumbs; - const struct intel_engine_execlists * const execlists = &engine->execlists; - struct i915_gpu_error * const error = &engine->i915->gpu_error; struct drm_i915_private *dev_priv = engine->i915; - struct drm_i915_gem_request *rq; - struct rb_node *rb; - char hdr[80]; + const struct intel_engine_execlists * const execlists = + &engine->execlists; u64 addr; - if (header) { - va_list ap; - - va_start(ap, header); - drm_vprintf(m, header, &ap); - va_end(ap); - } - - if (i915_terminally_wedged(&engine->i915->gpu_error)) - drm_printf(m, "*** WEDGED ***\n"); - - drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", - intel_engine_get_seqno(engine), - intel_engine_last_submit(engine), - engine->hangcheck.seqno, - jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), - engine->timeline->inflight_seqnos); - drm_printf(m, "\tReset count: %d (global %d)\n", - i915_reset_engine_count(error, engine), - i915_reset_count(error)); - - rcu_read_lock(); - - drm_printf(m, "\tRequests:\n"); - - rq = list_first_entry(&engine->timeline->requests, - struct drm_i915_gem_request, link); - if (&rq->link != &engine->timeline->requests) - print_request(m, rq, "\t\tfirst "); - - rq = list_last_entry(&engine->timeline->requests, - struct drm_i915_gem_request, link); - if (&rq->link != &engine->timeline->requests) - print_request(m, rq, "\t\tlast "); - - rq = i915_gem_find_active_request(engine); - if (rq) { - print_request(m, rq, "\t\tactive "); - drm_printf(m, - "\t\t[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]\n", - rq->head, rq->postfix, rq->tail, - rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, - rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); - } - - drm_printf(m, "\tRING_START: 0x%08x [0x%08x]\n", - I915_READ(RING_START(engine->mmio_base)), - rq ? i915_ggtt_offset(rq->ring->vma) : 0); - drm_printf(m, "\tRING_HEAD: 0x%08x [0x%08x]\n", - I915_READ(RING_HEAD(engine->mmio_base)) & HEAD_ADDR, - rq ? rq->ring->head : 0); - drm_printf(m, "\tRING_TAIL: 0x%08x [0x%08x]\n", - I915_READ(RING_TAIL(engine->mmio_base)) & TAIL_ADDR, - rq ? rq->ring->tail : 0); + drm_printf(m, "\tRING_START: 0x%08x\n", + I915_READ(RING_START(engine->mmio_base))); + drm_printf(m, "\tRING_HEAD: 0x%08x\n", + I915_READ(RING_HEAD(engine->mmio_base)) & HEAD_ADDR); + drm_printf(m, "\tRING_TAIL: 0x%08x\n", + I915_READ(RING_TAIL(engine->mmio_base)) & TAIL_ADDR); drm_printf(m, "\tRING_CTL: 0x%08x%s\n", I915_READ(RING_CTL(engine->mmio_base)), I915_READ(RING_CTL(engine->mmio_base)) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? " [waiting]" : ""); @@ -1778,6 +1773,11 @@ void intel_engine_dump(struct intel_engine_cs *engine, I915_READ(RING_MI_MODE(engine->mmio_base)), I915_READ(RING_MI_MODE(engine->mmio_base)) & (MODE_IDLE) ? " [idle]" : ""); } + + if (INTEL_GEN(dev_priv) >= 6) { + drm_printf(m, "\tRING_IMR: %08x\n", I915_READ_IMR(engine)); + } + if (HAS_LEGACY_SEMAPHORES(dev_priv)) { drm_printf(m, "\tSYNC_0: 0x%08x\n", I915_READ(RING_SYNC_0(engine->mmio_base))); @@ -1788,8 +1788,6 @@ void intel_engine_dump(struct intel_engine_cs *engine, I915_READ(RING_SYNC_2(engine->mmio_base))); } - rcu_read_unlock(); - addr = intel_engine_get_active_head(engine); drm_printf(m, "\tACTHD: 0x%08x_%08x\n", upper_32_bits(addr), lower_32_bits(addr)); @@ -1851,10 +1849,13 @@ void intel_engine_dump(struct intel_engine_cs *engine, rcu_read_lock(); for (idx = 0; idx < execlists_num_ports(execlists); idx++) { + struct i915_request *rq; unsigned int count; rq = port_unpack(&execlists->port[idx], &count); if (rq) { + char hdr[80]; + snprintf(hdr, sizeof(hdr), "\t\tELSP[%d] count=%d, rq: ", idx, count); @@ -1873,10 +1874,82 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n", I915_READ(RING_PP_DIR_DCLV(engine))); } +} + +void intel_engine_dump(struct intel_engine_cs *engine, + struct drm_printer *m, + const char *header, ...) +{ + struct intel_breadcrumbs * const b = &engine->breadcrumbs; + const struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_gpu_error * const error = &engine->i915->gpu_error; + struct i915_request *rq; + struct rb_node *rb; + + if (header) { + va_list ap; + + va_start(ap, header); + drm_vprintf(m, header, &ap); + va_end(ap); + } + + if (i915_terminally_wedged(&engine->i915->gpu_error)) + drm_printf(m, "*** WEDGED ***\n"); + + drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", + intel_engine_get_seqno(engine), + intel_engine_last_submit(engine), + engine->hangcheck.seqno, + jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), + engine->timeline->inflight_seqnos); + drm_printf(m, "\tReset count: %d (global %d)\n", + i915_reset_engine_count(error, engine), + i915_reset_count(error)); + + rcu_read_lock(); + + drm_printf(m, "\tRequests:\n"); + + rq = list_first_entry(&engine->timeline->requests, + struct i915_request, link); + if (&rq->link != &engine->timeline->requests) + print_request(m, rq, "\t\tfirst "); + + rq = list_last_entry(&engine->timeline->requests, + struct i915_request, link); + if (&rq->link != &engine->timeline->requests) + print_request(m, rq, "\t\tlast "); + + rq = i915_gem_find_active_request(engine); + if (rq) { + print_request(m, rq, "\t\tactive "); + drm_printf(m, + "\t\t[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]\n", + rq->head, rq->postfix, rq->tail, + rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, + rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); + drm_printf(m, "\t\tring->start: 0x%08x\n", + i915_ggtt_offset(rq->ring->vma)); + drm_printf(m, "\t\tring->head: 0x%08x\n", + rq->ring->head); + drm_printf(m, "\t\tring->tail: 0x%08x\n", + rq->ring->tail); + } + + rcu_read_unlock(); + + if (intel_runtime_pm_get_if_in_use(engine->i915)) { + intel_engine_print_registers(engine, m); + intel_runtime_pm_put(engine->i915); + } else { + drm_printf(m, "\tDevice is asleep; skipping register dump\n"); + } spin_lock_irq(&engine->timeline->lock); list_for_each_entry(rq, &engine->timeline->requests, link) print_request(m, rq, "\t\tE "); + drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority); for (rb = execlists->first; rb; rb = rb_next(rb)) { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); @@ -1895,10 +1968,6 @@ void intel_engine_dump(struct intel_engine_cs *engine, } spin_unlock_irq(&b->rb_lock); - if (INTEL_GEN(dev_priv) >= 6) { - drm_printf(m, "\tRING_IMR: %08x\n", I915_READ_IMR(engine)); - } - drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s) (execlists? %s)\n", engine->irq_posted, yesno(test_bit(ENGINE_IRQ_BREADCRUMB, diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index f88c1b5dae4c..707d49c12638 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -46,16 +46,6 @@ static inline bool fbc_supported(struct drm_i915_private *dev_priv) return HAS_FBC(dev_priv); } -static inline bool fbc_on_pipe_a_only(struct drm_i915_private *dev_priv) -{ - return IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8; -} - -static inline bool fbc_on_plane_a_only(struct drm_i915_private *dev_priv) -{ - return INTEL_GEN(dev_priv) < 4; -} - static inline bool no_fbc_on_multiple_pipes(struct drm_i915_private *dev_priv) { return INTEL_GEN(dev_priv) <= 3; @@ -183,7 +173,7 @@ static void g4x_fbc_activate(struct drm_i915_private *dev_priv) else dpfc_ctl |= DPFC_CTL_LIMIT_1X; - if (params->vma->fence) { + if (params->flags & PLANE_HAS_FENCE) { dpfc_ctl |= DPFC_CTL_FENCE_EN | params->vma->fence->id; I915_WRITE(DPFC_FENCE_YOFF, params->crtc.fence_y_offset); } else { @@ -241,7 +231,7 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv) break; } - if (params->vma->fence) { + if (params->flags & PLANE_HAS_FENCE) { dpfc_ctl |= DPFC_CTL_FENCE_EN; if (IS_GEN5(dev_priv)) dpfc_ctl |= params->vma->fence->id; @@ -324,7 +314,7 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv) break; } - if (params->vma->fence) { + if (params->flags & PLANE_HAS_FENCE) { dpfc_ctl |= IVB_DPFC_CTL_FENCE_EN; I915_WRITE(SNB_DPFC_CTL_SA, SNB_CPU_FENCE_ENABLE | @@ -492,7 +482,8 @@ static void intel_fbc_schedule_activation(struct intel_crtc *crtc) schedule_work(&work->work); } -static void intel_fbc_deactivate(struct drm_i915_private *dev_priv) +static void intel_fbc_deactivate(struct drm_i915_private *dev_priv, + const char *reason) { struct intel_fbc *fbc = &dev_priv->fbc; @@ -505,6 +496,8 @@ static void intel_fbc_deactivate(struct drm_i915_private *dev_priv) if (fbc->active) intel_fbc_hw_deactivate(dev_priv); + + fbc->no_fbc_reason = reason; } static bool multiple_pipes_ok(struct intel_crtc *crtc, @@ -668,11 +661,13 @@ void intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv) static bool stride_is_valid(struct drm_i915_private *dev_priv, unsigned int stride) { - /* These should have been caught earlier. */ - WARN_ON(stride < 512); - WARN_ON((stride & (64 - 1)) != 0); + /* This should have been caught earlier. */ + if (WARN_ON_ONCE((stride & (64 - 1)) != 0)) + return false; /* Below are the additional FBC restrictions. */ + if (stride < 512) + return false; if (IS_GEN2(dev_priv) || IS_GEN3(dev_priv)) return stride == 4096 || stride == 8192; @@ -748,6 +743,7 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, struct drm_framebuffer *fb = plane_state->base.fb; cache->vma = NULL; + cache->flags = 0; cache->crtc.mode_flags = crtc_state->base.adjusted_mode.flags; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) @@ -773,6 +769,9 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, cache->fb.stride = fb->pitches[0]; cache->vma = plane_state->vma; + cache->flags = plane_state->flags; + if (WARN_ON(cache->flags & PLANE_HAS_FENCE && !cache->vma->fence)) + cache->flags &= ~PLANE_HAS_FENCE; } static bool intel_fbc_can_activate(struct intel_crtc *crtc) @@ -794,8 +793,7 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) return false; } - if ((cache->crtc.mode_flags & DRM_MODE_FLAG_INTERLACE) || - (cache->crtc.mode_flags & DRM_MODE_FLAG_DBLSCAN)) { + if (cache->crtc.mode_flags & DRM_MODE_FLAG_INTERLACE) { fbc->no_fbc_reason = "incompatible mode"; return false; } @@ -811,8 +809,14 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) * Note that is possible for a tiled surface to be unmappable (and * so have no fence associated with it) due to aperture constaints * at the time of pinning. + * + * FIXME with 90/270 degree rotation we should use the fence on + * the normal GTT view (the rotated view doesn't even have a + * fence). Would need changes to the FBC fence Y offset as well. + * For now this will effecively disable FBC with 90/270 degree + * rotation. */ - if (!cache->vma->fence) { + if (!(cache->flags & PLANE_HAS_FENCE)) { fbc->no_fbc_reason = "framebuffer not tiled or fenced"; return false; } @@ -855,6 +859,17 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) return false; } + /* + * Work around a problem on GEN9+ HW, where enabling FBC on a plane + * having a Y offset that isn't divisible by 4 causes FIFO underrun + * and screen flicker. + */ + if (IS_GEN(dev_priv, 9, 10) && + (fbc->state_cache.plane.adjusted_y & 3)) { + fbc->no_fbc_reason = "plane Y offset is misaligned"; + return false; + } + return true; } @@ -893,6 +908,7 @@ static void intel_fbc_get_reg_params(struct intel_crtc *crtc, memset(params, 0, sizeof(*params)); params->vma = cache->vma; + params->flags = cache->flags; params->crtc.pipe = crtc->pipe; params->crtc.i9xx_plane = to_intel_plane(crtc->base.primary)->i9xx_plane; @@ -921,6 +937,7 @@ void intel_fbc_pre_update(struct intel_crtc *crtc, { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_fbc *fbc = &dev_priv->fbc; + const char *reason = "update pending"; if (!fbc_supported(dev_priv)) return; @@ -928,7 +945,7 @@ void intel_fbc_pre_update(struct intel_crtc *crtc, mutex_lock(&fbc->lock); if (!multiple_pipes_ok(crtc, plane_state)) { - fbc->no_fbc_reason = "more than one pipe active"; + reason = "more than one pipe active"; goto deactivate; } @@ -938,11 +955,35 @@ void intel_fbc_pre_update(struct intel_crtc *crtc, intel_fbc_update_state_cache(crtc, crtc_state, plane_state); deactivate: - intel_fbc_deactivate(dev_priv); + intel_fbc_deactivate(dev_priv, reason); unlock: mutex_unlock(&fbc->lock); } +/** + * __intel_fbc_disable - disable FBC + * @dev_priv: i915 device instance + * + * This is the low level function that actually disables FBC. Callers should + * grab the FBC lock. + */ +static void __intel_fbc_disable(struct drm_i915_private *dev_priv) +{ + struct intel_fbc *fbc = &dev_priv->fbc; + struct intel_crtc *crtc = fbc->crtc; + + WARN_ON(!mutex_is_locked(&fbc->lock)); + WARN_ON(!fbc->enabled); + WARN_ON(fbc->active); + + DRM_DEBUG_KMS("Disabling FBC on pipe %c\n", pipe_name(crtc->pipe)); + + __intel_fbc_cleanup_cfb(dev_priv); + + fbc->enabled = false; + fbc->crtc = NULL; +} + static void __intel_fbc_post_update(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -954,6 +995,13 @@ static void __intel_fbc_post_update(struct intel_crtc *crtc) if (!fbc->enabled || fbc->crtc != crtc) return; + if (!i915_modparams.enable_fbc) { + intel_fbc_deactivate(dev_priv, "disabled at runtime per module param"); + __intel_fbc_disable(dev_priv); + + return; + } + if (!intel_fbc_can_activate(crtc)) { WARN_ON(fbc->active); return; @@ -971,9 +1019,8 @@ static void __intel_fbc_post_update(struct intel_crtc *crtc) intel_fbc_reg_params_equal(&old_params, &fbc->params)) return; - intel_fbc_deactivate(dev_priv); + intel_fbc_deactivate(dev_priv, "FBC enabled (active or scheduled)"); intel_fbc_schedule_activation(crtc); - fbc->no_fbc_reason = "FBC enabled (active or scheduled)"; } void intel_fbc_post_update(struct intel_crtc *crtc) @@ -1014,7 +1061,7 @@ void intel_fbc_invalidate(struct drm_i915_private *dev_priv, fbc->busy_bits |= intel_fbc_get_frontbuffer_bit(fbc) & frontbuffer_bits; if (fbc->enabled && fbc->busy_bits) - intel_fbc_deactivate(dev_priv); + intel_fbc_deactivate(dev_priv, "frontbuffer write"); mutex_unlock(&fbc->lock); } @@ -1085,13 +1132,10 @@ void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv, struct intel_crtc_state *crtc_state; struct intel_crtc *crtc = to_intel_crtc(plane_state->base.crtc); - if (!plane_state->base.visible) + if (!plane->has_fbc) continue; - if (fbc_on_pipe_a_only(dev_priv) && crtc->pipe != PIPE_A) - continue; - - if (fbc_on_plane_a_only(dev_priv) && plane->i9xx_plane != PLANE_A) + if (!plane_state->base.visible) continue; crtc_state = intel_atomic_get_new_crtc_state(state, crtc); @@ -1162,31 +1206,6 @@ out: } /** - * __intel_fbc_disable - disable FBC - * @dev_priv: i915 device instance - * - * This is the low level function that actually disables FBC. Callers should - * grab the FBC lock. - */ -static void __intel_fbc_disable(struct drm_i915_private *dev_priv) -{ - struct intel_fbc *fbc = &dev_priv->fbc; - struct intel_crtc *crtc = fbc->crtc; - - WARN_ON(!mutex_is_locked(&fbc->lock)); - WARN_ON(!fbc->enabled); - WARN_ON(fbc->active); - WARN_ON(crtc->active); - - DRM_DEBUG_KMS("Disabling FBC on pipe %c\n", pipe_name(crtc->pipe)); - - __intel_fbc_cleanup_cfb(dev_priv); - - fbc->enabled = false; - fbc->crtc = NULL; -} - -/** * intel_fbc_disable - disable FBC if it's associated with crtc * @crtc: the CRTC * @@ -1200,6 +1219,8 @@ void intel_fbc_disable(struct intel_crtc *crtc) if (!fbc_supported(dev_priv)) return; + WARN_ON(crtc->active); + mutex_lock(&fbc->lock); if (fbc->crtc == crtc) __intel_fbc_disable(dev_priv); @@ -1222,8 +1243,10 @@ void intel_fbc_global_disable(struct drm_i915_private *dev_priv) return; mutex_lock(&fbc->lock); - if (fbc->enabled) + if (fbc->enabled) { + WARN_ON(fbc->crtc->active); __intel_fbc_disable(dev_priv); + } mutex_unlock(&fbc->lock); cancel_work_sync(&fbc->work.work); @@ -1244,7 +1267,7 @@ static void intel_fbc_underrun_work_fn(struct work_struct *work) DRM_DEBUG_KMS("Disabling FBC due to FIFO underrun.\n"); fbc->underrun_detected = true; - intel_fbc_deactivate(dev_priv); + intel_fbc_deactivate(dev_priv, "FIFO underrun"); out: mutex_unlock(&fbc->lock); } @@ -1348,7 +1371,6 @@ static bool need_fbc_vtd_wa(struct drm_i915_private *dev_priv) void intel_fbc_init(struct drm_i915_private *dev_priv) { struct intel_fbc *fbc = &dev_priv->fbc; - enum pipe pipe; INIT_WORK(&fbc->work.work, intel_fbc_work_fn); INIT_WORK(&fbc->underrun_work, intel_fbc_underrun_work_fn); @@ -1369,14 +1391,6 @@ void intel_fbc_init(struct drm_i915_private *dev_priv) return; } - for_each_pipe(dev_priv, pipe) { - fbc->possible_framebuffer_bits |= - INTEL_FRONTBUFFER_PRIMARY(pipe); - - if (fbc_on_pipe_a_only(dev_priv)) - break; - } - /* This value was pulled out of someone's hat */ if (INTEL_GEN(dev_priv) <= 4 && !IS_GM45(dev_priv)) I915_WRITE(FBC_CONTROL, 500 << FBC_CTL_INTERVAL_SHIFT); diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index da48af11eb6b..6f12adc06365 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -48,7 +48,8 @@ static void intel_fbdev_invalidate(struct intel_fbdev *ifbdev) { struct drm_i915_gem_object *obj = ifbdev->fb->obj; - unsigned int origin = ifbdev->vma->fence ? ORIGIN_GTT : ORIGIN_CPU; + unsigned int origin = + ifbdev->vma_flags & PLANE_HAS_FENCE ? ORIGIN_GTT : ORIGIN_CPU; intel_fb_obj_invalidate(obj, origin); } @@ -177,6 +178,7 @@ static int intelfb_create(struct drm_fb_helper *helper, struct fb_info *info; struct drm_framebuffer *fb; struct i915_vma *vma; + unsigned long flags = 0; bool prealloc = false; void __iomem *vaddr; int ret; @@ -211,7 +213,9 @@ static int intelfb_create(struct drm_fb_helper *helper, * This also validates that any existing fb inherited from the * BIOS is suitable for own access. */ - vma = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, DRM_MODE_ROTATE_0); + vma = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, + DRM_MODE_ROTATE_0, + false, &flags); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto out_unlock; @@ -268,6 +272,7 @@ static int intelfb_create(struct drm_fb_helper *helper, DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08x\n", fb->width, fb->height, i915_ggtt_offset(vma)); ifbdev->vma = vma; + ifbdev->vma_flags = flags; intel_runtime_pm_put(dev_priv); mutex_unlock(&dev->struct_mutex); @@ -275,7 +280,7 @@ static int intelfb_create(struct drm_fb_helper *helper, return 0; out_unpin: - intel_unpin_fb_vma(vma); + intel_unpin_fb_vma(vma, flags); out_unlock: intel_runtime_pm_put(dev_priv); mutex_unlock(&dev->struct_mutex); @@ -513,7 +518,7 @@ static void intel_fbdev_destroy(struct intel_fbdev *ifbdev) if (ifbdev->vma) { mutex_lock(&ifbdev->helper.dev->struct_mutex); - intel_unpin_fb_vma(ifbdev->vma); + intel_unpin_fb_vma(ifbdev->vma, ifbdev->vma_flags); mutex_unlock(&ifbdev->helper.dev->struct_mutex); } diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c index fcfc217e754e..3a8d3d06c26a 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c @@ -79,6 +79,7 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, spin_unlock(&dev_priv->fb_tracking.lock); } + might_sleep(); intel_psr_invalidate(dev_priv, frontbuffer_bits); intel_edp_drrs_invalidate(dev_priv, frontbuffer_bits); intel_fbc_invalidate(dev_priv, frontbuffer_bits, origin); @@ -108,6 +109,7 @@ static void intel_frontbuffer_flush(struct drm_i915_private *dev_priv, if (!frontbuffer_bits) return; + might_sleep(); intel_edp_drrs_flush(dev_priv, frontbuffer_bits); intel_psr_flush(dev_priv, frontbuffer_bits, origin); intel_fbc_flush(dev_priv, frontbuffer_bits, origin); diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index 3c6bf5a34c3c..ff08ea0ebf49 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -23,6 +23,7 @@ */ #include "intel_guc.h" +#include "intel_guc_ads.h" #include "intel_guc_submission.h" #include "i915_drv.h" @@ -63,6 +64,7 @@ void intel_guc_init_early(struct intel_guc *guc) { intel_guc_fw_init_early(guc); intel_guc_ct_init_early(&guc->ct); + intel_guc_log_init_early(guc); mutex_init(&guc->send_mutex); guc->send = intel_guc_send_nop; @@ -86,8 +88,10 @@ int intel_guc_init_wq(struct intel_guc *guc) */ guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log", WQ_HIGHPRI | WQ_FREEZABLE); - if (!guc->log.runtime.flush_wq) + if (!guc->log.runtime.flush_wq) { + DRM_ERROR("Couldn't allocate workqueue for GuC log\n"); return -ENOMEM; + } /* * Even though both sending GuC action, and adding a new workitem to @@ -108,6 +112,8 @@ int intel_guc_init_wq(struct intel_guc *guc) WQ_HIGHPRI); if (!guc->preempt_wq) { destroy_workqueue(guc->log.runtime.flush_wq); + DRM_ERROR("Couldn't allocate workqueue for GuC " + "preemption\n"); return -ENOMEM; } } @@ -163,10 +169,25 @@ int intel_guc_init(struct intel_guc *guc) return ret; GEM_BUG_ON(!guc->shared_data); + ret = intel_guc_log_create(guc); + if (ret) + goto err_shared; + + ret = intel_guc_ads_create(guc); + if (ret) + goto err_log; + GEM_BUG_ON(!guc->ads_vma); + /* We need to notify the guc whenever we change the GGTT */ i915_ggtt_enable_guc(dev_priv); return 0; + +err_log: + intel_guc_log_destroy(guc); +err_shared: + guc_shared_data_destroy(guc); + return ret; } void intel_guc_fini(struct intel_guc *guc) @@ -174,6 +195,8 @@ void intel_guc_fini(struct intel_guc *guc) struct drm_i915_private *dev_priv = guc_to_i915(guc); i915_ggtt_disable_guc(dev_priv); + intel_guc_ads_destroy(guc); + intel_guc_log_destroy(guc); guc_shared_data_destroy(guc); } @@ -197,6 +220,19 @@ static u32 get_core_family(struct drm_i915_private *dev_priv) } } +static u32 get_log_verbosity_flags(void) +{ + if (i915_modparams.guc_log_level > 0) { + u32 verbosity = i915_modparams.guc_log_level - 1; + + GEM_BUG_ON(verbosity > GUC_LOG_VERBOSITY_MAX); + return verbosity << GUC_LOG_VERBOSITY_SHIFT; + } + + GEM_BUG_ON(i915_modparams.enable_guc < 0); + return GUC_LOG_DISABLED; +} + /* * Initialise the GuC parameter block before starting the firmware * transfer. These parameters are read by the firmware on startup @@ -229,12 +265,7 @@ void intel_guc_init_params(struct intel_guc *guc) params[GUC_CTL_LOG_PARAMS] = guc->log.flags; - if (i915_modparams.guc_log_level >= 0) { - params[GUC_CTL_DEBUG] = - i915_modparams.guc_log_level << GUC_LOG_VERBOSITY_SHIFT; - } else { - params[GUC_CTL_DEBUG] = GUC_LOG_DISABLED; - } + params[GUC_CTL_DEBUG] = get_log_verbosity_flags(); /* If GuC submission is enabled, set up additional parameters here */ if (USES_GUC_SUBMISSION(dev_priv)) { @@ -339,7 +370,7 @@ int intel_guc_sample_forcewake(struct intel_guc *guc) u32 action[2]; action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE; - /* WaRsDisableCoarsePowerGating:skl,bxt */ + /* WaRsDisableCoarsePowerGating:skl,cnl */ if (!HAS_RC6(dev_priv) || NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) action[1] = 0; else @@ -372,22 +403,15 @@ int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset) /** * intel_guc_suspend() - notify GuC entering suspend state - * @dev_priv: i915 device private + * @guc: the guc */ -int intel_guc_suspend(struct drm_i915_private *dev_priv) +int intel_guc_suspend(struct intel_guc *guc) { - struct intel_guc *guc = &dev_priv->guc; - u32 data[3]; - - if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) - return 0; - - gen9_disable_guc_interrupts(dev_priv); - - data[0] = INTEL_GUC_ACTION_ENTER_S_STATE; - /* any value greater than GUC_POWER_D0 */ - data[1] = GUC_POWER_D1; - data[2] = guc_ggtt_offset(guc->shared_data); + u32 data[] = { + INTEL_GUC_ACTION_ENTER_S_STATE, + GUC_POWER_D1, /* any value greater than GUC_POWER_D0 */ + guc_ggtt_offset(guc->shared_data) + }; return intel_guc_send(guc, data, ARRAY_SIZE(data)); } @@ -417,22 +441,15 @@ int intel_guc_reset_engine(struct intel_guc *guc, /** * intel_guc_resume() - notify GuC resuming from suspend state - * @dev_priv: i915 device private + * @guc: the guc */ -int intel_guc_resume(struct drm_i915_private *dev_priv) +int intel_guc_resume(struct intel_guc *guc) { - struct intel_guc *guc = &dev_priv->guc; - u32 data[3]; - - if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) - return 0; - - if (i915_modparams.guc_log_level >= 0) - gen9_enable_guc_interrupts(dev_priv); - - data[0] = INTEL_GUC_ACTION_EXIT_S_STATE; - data[1] = GUC_POWER_D0; - data[2] = guc_ggtt_offset(guc->shared_data); + u32 data[] = { + INTEL_GUC_ACTION_EXIT_S_STATE, + GUC_POWER_D0, + guc_ggtt_offset(guc->shared_data) + }; return intel_guc_send(guc, data, ARRAY_SIZE(data)); } diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 52856a97477d..b9424ac644ac 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -127,8 +127,8 @@ int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len); int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len); int intel_guc_sample_forcewake(struct intel_guc *guc); int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset); -int intel_guc_suspend(struct drm_i915_private *dev_priv); -int intel_guc_resume(struct drm_i915_private *dev_priv); +int intel_guc_suspend(struct intel_guc *guc); +int intel_guc_resume(struct intel_guc *guc); struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_guc_ads.c b/drivers/gpu/drm/i915/intel_guc_ads.c new file mode 100644 index 000000000000..ac627534667d --- /dev/null +++ b/drivers/gpu/drm/i915/intel_guc_ads.c @@ -0,0 +1,151 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "intel_guc_ads.h" +#include "intel_uc.h" +#include "i915_drv.h" + +/* + * The Additional Data Struct (ADS) has pointers for different buffers used by + * the GuC. One single gem object contains the ADS struct itself (guc_ads), the + * scheduling policies (guc_policies), a structure describing a collection of + * register sets (guc_mmio_reg_state) and some extra pages for the GuC to save + * its internal state for sleep. + */ + +static void guc_policy_init(struct guc_policy *policy) +{ + policy->execution_quantum = POLICY_DEFAULT_EXECUTION_QUANTUM_US; + policy->preemption_time = POLICY_DEFAULT_PREEMPTION_TIME_US; + policy->fault_time = POLICY_DEFAULT_FAULT_TIME_US; + policy->policy_flags = 0; +} + +static void guc_policies_init(struct guc_policies *policies) +{ + struct guc_policy *policy; + u32 p, i; + + policies->dpc_promote_time = POLICY_DEFAULT_DPC_PROMOTE_TIME_US; + policies->max_num_work_items = POLICY_MAX_NUM_WI; + + for (p = 0; p < GUC_CLIENT_PRIORITY_NUM; p++) { + for (i = GUC_RENDER_ENGINE; i < GUC_MAX_ENGINES_NUM; i++) { + policy = &policies->policy[p][i]; + + guc_policy_init(policy); + } + } + + policies->is_valid = 1; +} + +/* + * The first 80 dwords of the register state context, containing the + * execlists and ppgtt registers. + */ +#define LR_HW_CONTEXT_SIZE (80 * sizeof(u32)) + +/** + * intel_guc_ads_create() - creates GuC ADS + * @guc: intel_guc struct + * + */ +int intel_guc_ads_create(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct i915_vma *vma; + struct page *page; + /* The ads obj includes the struct itself and buffers passed to GuC */ + struct { + struct guc_ads ads; + struct guc_policies policies; + struct guc_mmio_reg_state reg_state; + u8 reg_state_buffer[GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE]; + } __packed *blob; + struct intel_engine_cs *engine; + enum intel_engine_id id; + const u32 skipped_offset = LRC_HEADER_PAGES * PAGE_SIZE; + const u32 skipped_size = LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE; + u32 base; + + GEM_BUG_ON(guc->ads_vma); + + vma = intel_guc_allocate_vma(guc, PAGE_ALIGN(sizeof(*blob))); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + guc->ads_vma = vma; + + page = i915_vma_first_page(vma); + blob = kmap(page); + + /* GuC scheduling policies */ + guc_policies_init(&blob->policies); + + /* MMIO reg state */ + for_each_engine(engine, dev_priv, id) { + blob->reg_state.white_list[engine->guc_id].mmio_start = + engine->mmio_base + GUC_MMIO_WHITE_LIST_START; + + /* Nothing to be saved or restored for now. */ + blob->reg_state.white_list[engine->guc_id].count = 0; + } + + /* + * The GuC requires a "Golden Context" when it reinitialises + * engines after a reset. Here we use the Render ring default + * context, which must already exist and be pinned in the GGTT, + * so its address won't change after we've told the GuC where + * to find it. Note that we have to skip our header (1 page), + * because our GuC shared data is there. + */ + blob->ads.golden_context_lrca = + guc_ggtt_offset(dev_priv->kernel_context->engine[RCS].state) + + skipped_offset; + + /* + * The GuC expects us to exclude the portion of the context image that + * it skips from the size it is to read. It starts reading from after + * the execlist context (so skipping the first page [PPHWSP] and 80 + * dwords). Weird guc is weird. + */ + for_each_engine(engine, dev_priv, id) + blob->ads.eng_state_size[engine->guc_id] = + engine->context_size - skipped_size; + + base = guc_ggtt_offset(vma); + blob->ads.scheduler_policies = base + ptr_offset(blob, policies); + blob->ads.reg_state_buffer = base + ptr_offset(blob, reg_state_buffer); + blob->ads.reg_state_addr = base + ptr_offset(blob, reg_state); + + kunmap(page); + + return 0; +} + +void intel_guc_ads_destroy(struct intel_guc *guc) +{ + i915_vma_unpin_and_release(&guc->ads_vma); +} diff --git a/drivers/gpu/drm/i915/intel_guc_ads.h b/drivers/gpu/drm/i915/intel_guc_ads.h new file mode 100644 index 000000000000..c4735742c564 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_guc_ads.h @@ -0,0 +1,33 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_GUC_ADS_H_ +#define _INTEL_GUC_ADS_H_ + +struct intel_guc; + +int intel_guc_ads_create(struct intel_guc *guc); +void intel_guc_ads_destroy(struct intel_guc *guc); + +#endif diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c b/drivers/gpu/drm/i915/intel_guc_fw.c index 3b0932942857..d07f2b985f1c 100644 --- a/drivers/gpu/drm/i915/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/intel_guc_fw.c @@ -269,15 +269,15 @@ static int guc_fw_xfer(struct intel_uc_fw *guc_fw, struct i915_vma *vma) } /** - * intel_guc_fw_upload() - finish preparing the GuC for activity + * intel_guc_fw_upload() - load GuC uCode to device * @guc: intel_guc structure * - * Called during driver loading and also after a GPU reset. + * Called from intel_uc_init_hw() during driver load, resume from sleep and + * after a GPU reset. * - * The main action required here it to load the GuC uCode into the device. * The firmware image should have already been fetched into memory by the - * earlier call to intel_guc_init(), so here we need only check that - * worked, and then transfer the image to the h/w. + * earlier call to intel_uc_init_fw(), so here we need to only check that + * fetch succeeded, and then transfer the image to the h/w. * * Return: non-zero code on error */ diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c index eaedd63e3819..c0c2e7d1c7d7 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.c +++ b/drivers/gpu/drm/i915/intel_guc_log.c @@ -33,11 +33,10 @@ static void guc_log_capture_logs(struct intel_guc *guc); /** * DOC: GuC firmware log * - * Firmware log is enabled by setting i915.guc_log_level to non-negative level. + * Firmware log is enabled by setting i915.guc_log_level to the positive level. * Log data is printed out via reading debugfs i915_guc_log_dump. Reading from * i915_guc_load_status will print out firmware loading status and scratch * registers value. - * */ static int guc_log_flush_complete(struct intel_guc *guc) @@ -59,11 +58,17 @@ static int guc_log_flush(struct intel_guc *guc) return intel_guc_send(guc, action, ARRAY_SIZE(action)); } -static int guc_log_control(struct intel_guc *guc, u32 control_val) +static int guc_log_control(struct intel_guc *guc, bool enable, u32 verbosity) { + union guc_log_control control_val = { + { + .logging_enabled = enable, + .verbosity = verbosity, + }, + }; u32 action[] = { INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING, - control_val + control_val.value }; return intel_guc_send(guc, action, ARRAY_SIZE(action)); @@ -78,7 +83,8 @@ static int subbuf_start_callback(struct rchan_buf *buf, void *prev_subbuf, size_t prev_padding) { - /* Use no-overwrite mode by default, where relay will stop accepting + /* + * Use no-overwrite mode by default, where relay will stop accepting * new data if there are no empty sub buffers left. * There is no strict synchronization enforced by relay between Consumer * and Producer. In overwrite mode, there is a possibility of getting @@ -104,7 +110,8 @@ static struct dentry *create_buf_file_callback(const char *filename, { struct dentry *buf_file; - /* This to enable the use of a single buffer for the relay channel and + /* + * This to enable the use of a single buffer for the relay channel and * correspondingly have a single file exposed to User, through which * it can collect the logs in order without any post-processing. * Need to set 'is_global' even if parent is NULL for early logging. @@ -114,7 +121,8 @@ static struct dentry *create_buf_file_callback(const char *filename, if (!parent) return NULL; - /* Not using the channel filename passed as an argument, since for each + /* + * Not using the channel filename passed as an argument, since for each * channel relay appends the corresponding CPU number to the filename * passed in relay_open(). This should be fine as relay just needs a * dentry of the file associated with the channel buffer and that file's @@ -147,13 +155,16 @@ static int guc_log_relay_file_create(struct intel_guc *guc) struct dentry *log_dir; int ret; - if (i915_modparams.guc_log_level < 0) + if (!i915_modparams.guc_log_level) return 0; + mutex_lock(&guc->log.runtime.relay_lock); + /* For now create the log file in /sys/kernel/debug/dri/0 dir */ log_dir = dev_priv->drm.primary->debugfs_root; - /* If /sys/kernel/debug/dri/0 location do not exist, then debugfs is + /* + * If /sys/kernel/debug/dri/0 location do not exist, then debugfs is * not mounted and so can't create the relay file. * The relay API seems to fit well with debugfs only, for availing relay * there are 3 requirements which can be met for debugfs file only in a @@ -166,25 +177,41 @@ static int guc_log_relay_file_create(struct intel_guc *guc) */ if (!log_dir) { DRM_ERROR("Debugfs dir not available yet for GuC log file\n"); - return -ENODEV; + ret = -ENODEV; + goto out_unlock; } ret = relay_late_setup_files(guc->log.runtime.relay_chan, "guc_log", log_dir); if (ret < 0 && ret != -EEXIST) { DRM_ERROR("Couldn't associate relay chan with file %d\n", ret); - return ret; + goto out_unlock; } - return 0; + ret = 0; + +out_unlock: + mutex_unlock(&guc->log.runtime.relay_lock); + return ret; +} + +static bool guc_log_has_relay(struct intel_guc *guc) +{ + lockdep_assert_held(&guc->log.runtime.relay_lock); + + return guc->log.runtime.relay_chan != NULL; } static void guc_move_to_next_buf(struct intel_guc *guc) { - /* Make sure the updates made in the sub buffer are visible when + /* + * Make sure the updates made in the sub buffer are visible when * Consumer sees the following update to offset inside the sub buffer. */ smp_wmb(); + if (!guc_log_has_relay(guc)) + return; + /* All data has been written, so now move the offset of sub buffer. */ relay_reserve(guc->log.runtime.relay_chan, guc->log.vma->obj->base.size); @@ -194,10 +221,11 @@ static void guc_move_to_next_buf(struct intel_guc *guc) static void *guc_get_write_buffer(struct intel_guc *guc) { - if (!guc->log.runtime.relay_chan) + if (!guc_log_has_relay(guc)) return NULL; - /* Just get the base address of a new sub buffer and copy data into it + /* + * Just get the base address of a new sub buffer and copy data into it * ourselves. NULL will be returned in no-overwrite mode, if all sub * buffers are full. Could have used the relay_write() to indirectly * copy the data, but that would have been bit convoluted, as we need to @@ -262,15 +290,30 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) /* Get the pointer to shared GuC log buffer */ log_buf_state = src_data = guc->log.runtime.buf_addr; + mutex_lock(&guc->log.runtime.relay_lock); + /* Get the pointer to local buffer to store the logs */ log_buf_snapshot_state = dst_data = guc_get_write_buffer(guc); + if (unlikely(!log_buf_snapshot_state)) { + /* + * Used rate limited to avoid deluge of messages, logs might be + * getting consumed by User at a slow rate. + */ + DRM_ERROR_RATELIMITED("no sub-buffer to capture logs\n"); + guc->log.capture_miss_count++; + mutex_unlock(&guc->log.runtime.relay_lock); + + return; + } + /* Actual logs are present from the 2nd page */ src_data += PAGE_SIZE; dst_data += PAGE_SIZE; for (type = GUC_ISR_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) { - /* Make a copy of the state structure, inside GuC log buffer + /* + * Make a copy of the state structure, inside GuC log buffer * (which is uncached mapped), on the stack to avoid reading * from it multiple times. */ @@ -290,14 +333,12 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) log_buf_state->flush_to_file = 0; log_buf_state++; - if (unlikely(!log_buf_snapshot_state)) - continue; - /* First copy the state structure in snapshot buffer */ memcpy(log_buf_snapshot_state, &log_buf_state_local, sizeof(struct guc_log_buffer_state)); - /* The write pointer could have been updated by GuC firmware, + /* + * The write pointer could have been updated by GuC firmware, * after sending the flush interrupt to Host, for consistency * set write pointer value to same value of sampled_write_ptr * in the snapshot buffer. @@ -332,15 +373,9 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) dst_data += buffer_size; } - if (log_buf_snapshot_state) - guc_move_to_next_buf(guc); - else { - /* Used rate limited to avoid deluge of messages, logs might be - * getting consumed by User at a slow rate. - */ - DRM_ERROR_RATELIMITED("no sub-buffer to capture logs\n"); - guc->log.capture_miss_count++; - } + guc_move_to_next_buf(guc); + + mutex_unlock(&guc->log.runtime.relay_lock); } static void capture_logs_work(struct work_struct *work) @@ -360,19 +395,21 @@ static int guc_log_runtime_create(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); void *vaddr; - struct rchan *guc_log_relay_chan; - size_t n_subbufs, subbuf_size; int ret; lockdep_assert_held(&dev_priv->drm.struct_mutex); + if (!guc->log.vma) + return -ENODEV; + GEM_BUG_ON(guc_log_has_runtime(guc)); ret = i915_gem_object_set_to_wc_domain(guc->log.vma->obj, true); if (ret) return ret; - /* Create a WC (Uncached for read) vmalloc mapping of log + /* + * Create a WC (Uncached for read) vmalloc mapping of log * buffer pages, so that we can directly get the data * (up-to-date) from memory. */ @@ -384,17 +421,55 @@ static int guc_log_runtime_create(struct intel_guc *guc) guc->log.runtime.buf_addr = vaddr; + return 0; +} + +static void guc_log_runtime_destroy(struct intel_guc *guc) +{ + /* + * It's possible that the runtime stuff was never allocated because + * GuC log was disabled at the boot time. + */ + if (!guc_log_has_runtime(guc)) + return; + + i915_gem_object_unpin_map(guc->log.vma->obj); + guc->log.runtime.buf_addr = NULL; +} + +void intel_guc_log_init_early(struct intel_guc *guc) +{ + mutex_init(&guc->log.runtime.relay_lock); + INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work); +} + +int intel_guc_log_relay_create(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct rchan *guc_log_relay_chan; + size_t n_subbufs, subbuf_size; + int ret; + + if (!i915_modparams.guc_log_level) + return 0; + + mutex_lock(&guc->log.runtime.relay_lock); + + GEM_BUG_ON(guc_log_has_relay(guc)); + /* Keep the size of sub buffers same as shared log buffer */ - subbuf_size = guc->log.vma->obj->base.size; + subbuf_size = GUC_LOG_SIZE; - /* Store up to 8 snapshots, which is large enough to buffer sufficient + /* + * Store up to 8 snapshots, which is large enough to buffer sufficient * boot time logs and provides enough leeway to User, in terms of * latency, for consuming the logs from relay. Also doesn't take * up too much memory. */ n_subbufs = 8; - /* Create a relay channel, so that we have buffers for storing + /* + * Create a relay channel, so that we have buffers for storing * the GuC firmware logs, the channel will be linked with a file * later on when debugfs is registered. */ @@ -404,33 +479,39 @@ static int guc_log_runtime_create(struct intel_guc *guc) DRM_ERROR("Couldn't create relay chan for GuC logging\n"); ret = -ENOMEM; - goto err_vaddr; + goto err; } GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size); guc->log.runtime.relay_chan = guc_log_relay_chan; - INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work); + mutex_unlock(&guc->log.runtime.relay_lock); + return 0; -err_vaddr: - i915_gem_object_unpin_map(guc->log.vma->obj); - guc->log.runtime.buf_addr = NULL; +err: + mutex_unlock(&guc->log.runtime.relay_lock); + /* logging will be off */ + i915_modparams.guc_log_level = 0; return ret; } -static void guc_log_runtime_destroy(struct intel_guc *guc) +void intel_guc_log_relay_destroy(struct intel_guc *guc) { + mutex_lock(&guc->log.runtime.relay_lock); + /* - * It's possible that the runtime stuff was never allocated because - * guc_log_level was < 0 at the time - **/ - if (!guc_log_has_runtime(guc)) - return; + * It's possible that the relay was never allocated because + * GuC log was disabled at the boot time. + */ + if (!guc_log_has_relay(guc)) + goto out_unlock; relay_close(guc->log.runtime.relay_chan); - i915_gem_object_unpin_map(guc->log.vma->obj); - guc->log.runtime.buf_addr = NULL; + guc->log.runtime.relay_chan = NULL; + +out_unlock: + mutex_unlock(&guc->log.runtime.relay_lock); } static int guc_log_late_setup(struct intel_guc *guc) @@ -438,16 +519,24 @@ static int guc_log_late_setup(struct intel_guc *guc) struct drm_i915_private *dev_priv = guc_to_i915(guc); int ret; - lockdep_assert_held(&dev_priv->drm.struct_mutex); - if (!guc_log_has_runtime(guc)) { - /* If log_level was set as -1 at boot time, then setup needed to - * handle log buffer flush interrupts would not have been done yet, - * so do that now. + /* + * If log was disabled at boot time, then setup needed to handle + * log buffer flush interrupts would not have been done yet, so + * do that now. */ - ret = guc_log_runtime_create(guc); + ret = intel_guc_log_relay_create(guc); if (ret) goto err; + + mutex_lock(&dev_priv->drm.struct_mutex); + intel_runtime_pm_get(dev_priv); + ret = guc_log_runtime_create(guc); + intel_runtime_pm_put(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); + + if (ret) + goto err_relay; } ret = guc_log_relay_file_create(guc); @@ -457,10 +546,14 @@ static int guc_log_late_setup(struct intel_guc *guc) return 0; err_runtime: + mutex_lock(&dev_priv->drm.struct_mutex); guc_log_runtime_destroy(guc); + mutex_unlock(&dev_priv->drm.struct_mutex); +err_relay: + intel_guc_log_relay_destroy(guc); err: /* logging will remain off */ - i915_modparams.guc_log_level = -1; + i915_modparams.guc_log_level = 0; return ret; } @@ -470,7 +563,8 @@ static void guc_log_capture_logs(struct intel_guc *guc) guc_read_update_log_buffer(guc); - /* Generally device is expected to be active only at this + /* + * Generally device is expected to be active only at this * time, so get/put should be really quick. */ intel_runtime_pm_get(dev_priv); @@ -482,20 +576,26 @@ static void guc_flush_logs(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); - if (!USES_GUC_SUBMISSION(dev_priv) || - (i915_modparams.guc_log_level < 0)) + if (!USES_GUC_SUBMISSION(dev_priv) || !i915_modparams.guc_log_level) return; /* First disable the interrupts, will be renabled afterwards */ + mutex_lock(&dev_priv->drm.struct_mutex); + intel_runtime_pm_get(dev_priv); gen9_disable_guc_interrupts(dev_priv); + intel_runtime_pm_put(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); - /* Before initiating the forceful flush, wait for any pending/ongoing + /* + * Before initiating the forceful flush, wait for any pending/ongoing * flush to complete otherwise forceful flush may not actually happen. */ flush_work(&guc->log.runtime.flush_work); /* Ask GuC to update the log buffer state */ + intel_runtime_pm_get(dev_priv); guc_log_flush(guc); + intel_runtime_pm_put(dev_priv); /* GuC would have updated log buffer by now, so capture it */ guc_log_capture_logs(guc); @@ -506,21 +606,12 @@ int intel_guc_log_create(struct intel_guc *guc) struct i915_vma *vma; unsigned long offset; u32 flags; - u32 size; int ret; GEM_BUG_ON(guc->log.vma); - if (i915_modparams.guc_log_level > GUC_LOG_VERBOSITY_MAX) - i915_modparams.guc_log_level = GUC_LOG_VERBOSITY_MAX; - - /* The first page is to save log buffer state. Allocate one - * extra page for others in case for overlap */ - size = (1 + GUC_LOG_DPC_PAGES + 1 + - GUC_LOG_ISR_PAGES + 1 + - GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT; - - /* We require SSE 4.1 for fast reads from the GuC log buffer and + /* + * We require SSE 4.1 for fast reads from the GuC log buffer and * it should be present on the chipsets supporting GuC based * submisssions. */ @@ -529,7 +620,7 @@ int intel_guc_log_create(struct intel_guc *guc) goto err; } - vma = intel_guc_allocate_vma(guc, size); + vma = intel_guc_allocate_vma(guc, GUC_LOG_SIZE); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err; @@ -537,7 +628,7 @@ int intel_guc_log_create(struct intel_guc *guc) guc->log.vma = vma; - if (i915_modparams.guc_log_level >= 0) { + if (i915_modparams.guc_log_level) { ret = guc_log_runtime_create(guc); if (ret < 0) goto err_vma; @@ -558,7 +649,7 @@ err_vma: i915_vma_unpin_and_release(&guc->log.vma); err: /* logging will be off */ - i915_modparams.guc_log_level = -1; + i915_modparams.guc_log_level = 0; return ret; } @@ -568,35 +659,46 @@ void intel_guc_log_destroy(struct intel_guc *guc) i915_vma_unpin_and_release(&guc->log.vma); } -int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val) +int intel_guc_log_control(struct intel_guc *guc, u64 control_val) { - struct intel_guc *guc = &dev_priv->guc; - - union guc_log_control log_param; + struct drm_i915_private *dev_priv = guc_to_i915(guc); + bool enable_logging = control_val > 0; + u32 verbosity; int ret; - log_param.value = control_val; + if (!guc->log.vma) + return -ENODEV; - if (log_param.verbosity < GUC_LOG_VERBOSITY_MIN || - log_param.verbosity > GUC_LOG_VERBOSITY_MAX) + BUILD_BUG_ON(GUC_LOG_VERBOSITY_MIN); + if (control_val > 1 + GUC_LOG_VERBOSITY_MAX) return -EINVAL; /* This combination doesn't make sense & won't have any effect */ - if (!log_param.logging_enabled && (i915_modparams.guc_log_level < 0)) + if (!enable_logging && !i915_modparams.guc_log_level) return 0; - ret = guc_log_control(guc, log_param.value); + verbosity = enable_logging ? control_val - 1 : 0; + + ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex); + if (ret) + return ret; + intel_runtime_pm_get(dev_priv); + ret = guc_log_control(guc, enable_logging, verbosity); + intel_runtime_pm_put(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); + if (ret < 0) { DRM_DEBUG_DRIVER("guc_logging_control action failed %d\n", ret); return ret; } - if (log_param.logging_enabled) { - i915_modparams.guc_log_level = log_param.verbosity; + if (enable_logging) { + i915_modparams.guc_log_level = 1 + verbosity; - /* If log_level was set as -1 at boot time, then the relay channel file - * wouldn't have been created by now and interrupts also would not have - * been enabled. Try again now, just in case. + /* + * If log was disabled at boot time, then the relay channel file + * wouldn't have been created by now and interrupts also would + * not have been enabled. Try again now, just in case. */ ret = guc_log_late_setup(guc); if (ret < 0) { @@ -605,9 +707,14 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val) } /* GuC logging is currently the only user of Guc2Host interrupts */ + mutex_lock(&dev_priv->drm.struct_mutex); + intel_runtime_pm_get(dev_priv); gen9_enable_guc_interrupts(dev_priv); + intel_runtime_pm_put(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); } else { - /* Once logging is disabled, GuC won't generate logs & send an + /* + * Once logging is disabled, GuC won't generate logs & send an * interrupt. But there could be some data in the log buffer * which is yet to be captured. So request GuC to update the log * buffer state and then collect the left over logs. @@ -615,7 +722,7 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val) guc_flush_logs(guc); /* As logging is disabled, update log level to reflect that */ - i915_modparams.guc_log_level = -1; + i915_modparams.guc_log_level = 0; } return ret; @@ -623,23 +730,27 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val) void i915_guc_log_register(struct drm_i915_private *dev_priv) { - if (!USES_GUC_SUBMISSION(dev_priv) || - (i915_modparams.guc_log_level < 0)) + if (!USES_GUC_SUBMISSION(dev_priv) || !i915_modparams.guc_log_level) return; - mutex_lock(&dev_priv->drm.struct_mutex); guc_log_late_setup(&dev_priv->guc); - mutex_unlock(&dev_priv->drm.struct_mutex); } void i915_guc_log_unregister(struct drm_i915_private *dev_priv) { + struct intel_guc *guc = &dev_priv->guc; + if (!USES_GUC_SUBMISSION(dev_priv)) return; mutex_lock(&dev_priv->drm.struct_mutex); /* GuC logging is currently the only user of Guc2Host interrupts */ + intel_runtime_pm_get(dev_priv); gen9_disable_guc_interrupts(dev_priv); - guc_log_runtime_destroy(&dev_priv->guc); + intel_runtime_pm_put(dev_priv); + + guc_log_runtime_destroy(guc); mutex_unlock(&dev_priv->drm.struct_mutex); + + intel_guc_log_relay_destroy(guc); } diff --git a/drivers/gpu/drm/i915/intel_guc_log.h b/drivers/gpu/drm/i915/intel_guc_log.h index f512cf79339b..dab0e949567a 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.h +++ b/drivers/gpu/drm/i915/intel_guc_log.h @@ -32,6 +32,13 @@ struct drm_i915_private; struct intel_guc; +/* + * The first page is to save log buffer state. Allocate one + * extra page for others in case for overlap + */ +#define GUC_LOG_SIZE ((1 + GUC_LOG_DPC_PAGES + 1 + GUC_LOG_ISR_PAGES + \ + 1 + GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT) + struct intel_guc_log { u32 flags; struct i915_vma *vma; @@ -41,6 +48,8 @@ struct intel_guc_log { struct workqueue_struct *flush_wq; struct work_struct flush_work; struct rchan *relay_chan; + /* To serialize the access to relay_chan */ + struct mutex relay_lock; } runtime; /* logging related stats */ u32 capture_miss_count; @@ -52,7 +61,10 @@ struct intel_guc_log { int intel_guc_log_create(struct intel_guc *guc); void intel_guc_log_destroy(struct intel_guc *guc); -int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val); +void intel_guc_log_init_early(struct intel_guc *guc); +int intel_guc_log_relay_create(struct intel_guc *guc); +void intel_guc_log_relay_destroy(struct intel_guc *guc); +int intel_guc_log_control(struct intel_guc *guc, u64 control_val); void i915_guc_log_register(struct drm_i915_private *dev_priv); void i915_guc_log_unregister(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 4d2409466a3a..8a8ad2fe158d 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -26,8 +26,14 @@ #include <trace/events/dma_fence.h> #include "intel_guc_submission.h" +#include "intel_lrc_reg.h" #include "i915_drv.h" +#define GUC_PREEMPT_FINISHED 0x1 +#define GUC_PREEMPT_BREADCRUMB_DWORDS 0x8 +#define GUC_PREEMPT_BREADCRUMB_BYTES \ + (sizeof(u32) * GUC_PREEMPT_BREADCRUMB_DWORDS) + /** * DOC: GuC-based command submission * @@ -73,15 +79,13 @@ * ELSP context descriptor dword into Work Item. * See guc_add_request() * - * ADS: - * The Additional Data Struct (ADS) has pointers for different buffers used by - * the GuC. One single gem object contains the ADS struct itself (guc_ads), the - * scheduling policies (guc_policies), a structure describing a collection of - * register sets (guc_mmio_reg_state) and some extra pages for the GuC to save - * its internal state for sleep. - * */ +static inline struct i915_priolist *to_priolist(struct rb_node *rb) +{ + return rb_entry(rb, struct i915_priolist, node); +} + static inline bool is_high_priority(struct intel_guc_client *client) { return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH || @@ -503,8 +507,7 @@ static void guc_ring_doorbell(struct intel_guc_client *client) GEM_BUG_ON(db->db_status != GUC_DOORBELL_ENABLED); } -static void guc_add_request(struct intel_guc *guc, - struct drm_i915_gem_request *rq) +static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) { struct intel_guc_client *client = guc->execbuf_client; struct intel_engine_cs *engine = rq->engine; @@ -538,8 +541,6 @@ static void flush_ggtt_writes(struct i915_vma *vma) POSTING_READ_FW(GUC_STATUS); } -#define GUC_PREEMPT_FINISHED 0x1 -#define GUC_PREEMPT_BREADCRUMB_DWORDS 0x8 static void inject_preempt_context(struct work_struct *work) { struct guc_preempt_work *preempt_work = @@ -549,37 +550,17 @@ static void inject_preempt_context(struct work_struct *work) preempt_work[engine->id]); struct intel_guc_client *client = guc->preempt_client; struct guc_stage_desc *stage_desc = __get_stage_desc(client); - struct intel_ring *ring = client->owner->engine[engine->id].ring; u32 ctx_desc = lower_32_bits(intel_lr_context_descriptor(client->owner, engine)); - u32 *cs = ring->vaddr + ring->tail; u32 data[7]; - if (engine->id == RCS) { - cs = gen8_emit_ggtt_write_rcs(cs, GUC_PREEMPT_FINISHED, - intel_hws_preempt_done_address(engine)); - } else { - cs = gen8_emit_ggtt_write(cs, GUC_PREEMPT_FINISHED, - intel_hws_preempt_done_address(engine)); - *cs++ = MI_NOOP; - *cs++ = MI_NOOP; - } - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; - - GEM_BUG_ON(!IS_ALIGNED(ring->size, - GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32))); - GEM_BUG_ON((void *)cs - (ring->vaddr + ring->tail) != - GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32)); - - ring->tail += GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32); - ring->tail &= (ring->size - 1); - - flush_ggtt_writes(ring->vma); - + /* + * The ring contains commands to write GUC_PREEMPT_FINISHED into HWSP. + * See guc_fill_preempt_context(). + */ spin_lock_irq(&client->wq_lock); guc_wq_item_append(client, engine->guc_id, ctx_desc, - ring->tail / sizeof(u64), 0); + GUC_PREEMPT_BREADCRUMB_BYTES / sizeof(u64), 0); spin_unlock_irq(&client->wq_lock); /* @@ -655,7 +636,7 @@ static void guc_submit(struct intel_engine_cs *engine) unsigned int n; for (n = 0; n < execlists_num_ports(execlists); n++) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; unsigned int count; rq = port_unpack(&port[n], &count); @@ -669,19 +650,18 @@ static void guc_submit(struct intel_engine_cs *engine) } } -static void port_assign(struct execlist_port *port, - struct drm_i915_gem_request *rq) +static void port_assign(struct execlist_port *port, struct i915_request *rq) { GEM_BUG_ON(port_isset(port)); - port_set(port, i915_gem_request_get(rq)); + port_set(port, i915_request_get(rq)); } static void guc_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; - struct drm_i915_gem_request *last = NULL; + struct i915_request *last = NULL; const struct execlist_port * const last_port = &execlists->port[execlists->port_mask]; bool submit = false; @@ -691,15 +671,12 @@ static void guc_dequeue(struct intel_engine_cs *engine) rb = execlists->first; GEM_BUG_ON(rb_first(&execlists->queue) != rb); - if (!rb) - goto unlock; - if (port_isset(port)) { - if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) { + if (engine->i915->preempt_context) { struct guc_preempt_work *preempt_work = &engine->i915->guc.preempt_work[engine->id]; - if (rb_entry(rb, struct i915_priolist, node)->priority > + if (execlists->queue_priority > max(port_request(port)->priotree.priority, 0)) { execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT); @@ -715,9 +692,9 @@ static void guc_dequeue(struct intel_engine_cs *engine) } GEM_BUG_ON(port_isset(port)); - do { - struct i915_priolist *p = rb_entry(rb, typeof(*p), node); - struct drm_i915_gem_request *rq, *rn; + while (rb) { + struct i915_priolist *p = to_priolist(rb); + struct i915_request *rq, *rn; list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { if (last && rq->ctx != last->ctx) { @@ -734,9 +711,8 @@ static void guc_dequeue(struct intel_engine_cs *engine) INIT_LIST_HEAD(&rq->priotree.link); - __i915_gem_request_submit(rq); - trace_i915_gem_request_in(rq, - port_index(port, execlists)); + __i915_request_submit(rq); + trace_i915_request_in(rq, port_index(port, execlists)); last = rq; submit = true; } @@ -746,14 +722,21 @@ static void guc_dequeue(struct intel_engine_cs *engine) INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); - } while (rb); + } done: + execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN; execlists->first = rb; if (submit) { port_assign(port, last); execlists_set_active(execlists, EXECLISTS_ACTIVE_USER); guc_submit(engine); } + + /* We must always keep the beast fed if we have work piled up */ + GEM_BUG_ON(port_isset(execlists->port) && + !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); + GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); + unlock: spin_unlock_irq(&engine->timeline->lock); } @@ -763,12 +746,12 @@ static void guc_submission_tasklet(unsigned long data) struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; - struct drm_i915_gem_request *rq; + struct i915_request *rq; rq = port_request(&port[0]); - while (rq && i915_gem_request_completed(rq)) { - trace_i915_gem_request_out(rq); - i915_gem_request_put(rq); + while (rq && i915_request_completed(rq)) { + trace_i915_request_out(rq); + i915_request_put(rq); execlists_port_complete(execlists, port); @@ -839,10 +822,12 @@ static int guc_clients_doorbell_init(struct intel_guc *guc) if (ret) return ret; - ret = create_doorbell(guc->preempt_client); - if (ret) { - destroy_doorbell(guc->execbuf_client); - return ret; + if (guc->preempt_client) { + ret = create_doorbell(guc->preempt_client); + if (ret) { + destroy_doorbell(guc->execbuf_client); + return ret; + } } return 0; @@ -855,8 +840,11 @@ static void guc_clients_doorbell_fini(struct intel_guc *guc) * Instead of trying (in vain) to communicate with it, let's just * cleanup the doorbell HW and our internal state. */ - __destroy_doorbell(guc->preempt_client); - __update_doorbell_desc(guc->preempt_client, GUC_DOORBELL_INVALID); + if (guc->preempt_client) { + __destroy_doorbell(guc->preempt_client); + __update_doorbell_desc(guc->preempt_client, + GUC_DOORBELL_INVALID); + } __destroy_doorbell(guc->execbuf_client); __update_doorbell_desc(guc->execbuf_client, GUC_DOORBELL_INVALID); } @@ -968,6 +956,62 @@ static void guc_client_free(struct intel_guc_client *client) kfree(client); } +static inline bool ctx_save_restore_disabled(struct intel_context *ce) +{ + u32 sr = ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1]; + +#define SR_DISABLED \ + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | \ + CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT) + + return (sr & SR_DISABLED) == SR_DISABLED; + +#undef SR_DISABLED +} + +static void guc_fill_preempt_context(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct intel_guc_client *client = guc->preempt_client; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, dev_priv, id) { + struct intel_context *ce = &client->owner->engine[id]; + u32 addr = intel_hws_preempt_done_address(engine); + u32 *cs; + + GEM_BUG_ON(!ce->pin_count); + + /* + * We rely on this context image *not* being saved after + * preemption. This ensures that the RING_HEAD / RING_TAIL + * remain pointing at initial values forever. + */ + GEM_BUG_ON(!ctx_save_restore_disabled(ce)); + + cs = ce->ring->vaddr; + if (id == RCS) { + cs = gen8_emit_ggtt_write_rcs(cs, + GUC_PREEMPT_FINISHED, + addr); + } else { + cs = gen8_emit_ggtt_write(cs, + GUC_PREEMPT_FINISHED, + addr); + *cs++ = MI_NOOP; + *cs++ = MI_NOOP; + } + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + + GEM_BUG_ON((void *)cs - ce->ring->vaddr != + GUC_PREEMPT_BREADCRUMB_BYTES); + + flush_ggtt_writes(ce->ring->vma); + } +} + static int guc_clients_create(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); @@ -986,17 +1030,21 @@ static int guc_clients_create(struct intel_guc *guc) } guc->execbuf_client = client; - client = guc_client_alloc(dev_priv, - INTEL_INFO(dev_priv)->ring_mask, - GUC_CLIENT_PRIORITY_KMD_HIGH, - dev_priv->preempt_context); - if (IS_ERR(client)) { - DRM_ERROR("Failed to create GuC client for preemption!\n"); - guc_client_free(guc->execbuf_client); - guc->execbuf_client = NULL; - return PTR_ERR(client); + if (dev_priv->preempt_context) { + client = guc_client_alloc(dev_priv, + INTEL_INFO(dev_priv)->ring_mask, + GUC_CLIENT_PRIORITY_KMD_HIGH, + dev_priv->preempt_context); + if (IS_ERR(client)) { + DRM_ERROR("Failed to create GuC client for preemption!\n"); + guc_client_free(guc->execbuf_client); + guc->execbuf_client = NULL; + return PTR_ERR(client); + } + guc->preempt_client = client; + + guc_fill_preempt_context(guc); } - guc->preempt_client = client; return 0; } @@ -1005,122 +1053,12 @@ static void guc_clients_destroy(struct intel_guc *guc) { struct intel_guc_client *client; - client = fetch_and_zero(&guc->execbuf_client); - guc_client_free(client); - client = fetch_and_zero(&guc->preempt_client); - guc_client_free(client); -} - -static void guc_policy_init(struct guc_policy *policy) -{ - policy->execution_quantum = POLICY_DEFAULT_EXECUTION_QUANTUM_US; - policy->preemption_time = POLICY_DEFAULT_PREEMPTION_TIME_US; - policy->fault_time = POLICY_DEFAULT_FAULT_TIME_US; - policy->policy_flags = 0; -} - -static void guc_policies_init(struct guc_policies *policies) -{ - struct guc_policy *policy; - u32 p, i; - - policies->dpc_promote_time = POLICY_DEFAULT_DPC_PROMOTE_TIME_US; - policies->max_num_work_items = POLICY_MAX_NUM_WI; - - for (p = 0; p < GUC_CLIENT_PRIORITY_NUM; p++) { - for (i = GUC_RENDER_ENGINE; i < GUC_MAX_ENGINES_NUM; i++) { - policy = &policies->policy[p][i]; - - guc_policy_init(policy); - } - } - - policies->is_valid = 1; -} - -/* - * The first 80 dwords of the register state context, containing the - * execlists and ppgtt registers. - */ -#define LR_HW_CONTEXT_SIZE (80 * sizeof(u32)) - -static int guc_ads_create(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct i915_vma *vma; - struct page *page; - /* The ads obj includes the struct itself and buffers passed to GuC */ - struct { - struct guc_ads ads; - struct guc_policies policies; - struct guc_mmio_reg_state reg_state; - u8 reg_state_buffer[GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE]; - } __packed *blob; - struct intel_engine_cs *engine; - enum intel_engine_id id; - const u32 skipped_offset = LRC_HEADER_PAGES * PAGE_SIZE; - const u32 skipped_size = LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE; - u32 base; - - GEM_BUG_ON(guc->ads_vma); - - vma = intel_guc_allocate_vma(guc, PAGE_ALIGN(sizeof(*blob))); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - guc->ads_vma = vma; + if (client) + guc_client_free(client); - page = i915_vma_first_page(vma); - blob = kmap(page); - - /* GuC scheduling policies */ - guc_policies_init(&blob->policies); - - /* MMIO reg state */ - for_each_engine(engine, dev_priv, id) { - blob->reg_state.white_list[engine->guc_id].mmio_start = - engine->mmio_base + GUC_MMIO_WHITE_LIST_START; - - /* Nothing to be saved or restored for now. */ - blob->reg_state.white_list[engine->guc_id].count = 0; - } - - /* - * The GuC requires a "Golden Context" when it reinitialises - * engines after a reset. Here we use the Render ring default - * context, which must already exist and be pinned in the GGTT, - * so its address won't change after we've told the GuC where - * to find it. Note that we have to skip our header (1 page), - * because our GuC shared data is there. - */ - blob->ads.golden_context_lrca = - guc_ggtt_offset(dev_priv->kernel_context->engine[RCS].state) + - skipped_offset; - - /* - * The GuC expects us to exclude the portion of the context image that - * it skips from the size it is to read. It starts reading from after - * the execlist context (so skipping the first page [PPHWSP] and 80 - * dwords). Weird guc is weird. - */ - for_each_engine(engine, dev_priv, id) - blob->ads.eng_state_size[engine->guc_id] = - engine->context_size - skipped_size; - - base = guc_ggtt_offset(vma); - blob->ads.scheduler_policies = base + ptr_offset(blob, policies); - blob->ads.reg_state_buffer = base + ptr_offset(blob, reg_state_buffer); - blob->ads.reg_state_addr = base + ptr_offset(blob, reg_state); - - kunmap(page); - - return 0; -} - -static void guc_ads_destroy(struct intel_guc *guc) -{ - i915_vma_unpin_and_release(&guc->ads_vma); + client = fetch_and_zero(&guc->execbuf_client); + guc_client_free(client); } /* @@ -1146,15 +1084,6 @@ int intel_guc_submission_init(struct intel_guc *guc) */ GEM_BUG_ON(!guc->stage_desc_pool); - ret = intel_guc_log_create(guc); - if (ret < 0) - goto err_stage_desc_pool; - - ret = guc_ads_create(guc); - if (ret < 0) - goto err_log; - GEM_BUG_ON(!guc->ads_vma); - WARN_ON(!guc_verify_doorbells(guc)); ret = guc_clients_create(guc); if (ret) @@ -1167,11 +1096,6 @@ int intel_guc_submission_init(struct intel_guc *guc) return 0; -err_log: - intel_guc_log_destroy(guc); -err_stage_desc_pool: - guc_stage_desc_pool_destroy(guc); - return ret; } void intel_guc_submission_fini(struct intel_guc *guc) @@ -1186,8 +1110,6 @@ void intel_guc_submission_fini(struct intel_guc *guc) guc_clients_destroy(guc); WARN_ON(!guc_verify_doorbells(guc)); - guc_ads_destroy(guc); - intel_guc_log_destroy(guc); guc_stage_desc_pool_destroy(guc); } @@ -1294,7 +1216,8 @@ int intel_guc_submission_enable(struct intel_guc *guc) GEM_BUG_ON(!guc->execbuf_client); guc_reset_wq(guc->execbuf_client); - guc_reset_wq(guc->preempt_client); + if (guc->preempt_client) + guc_reset_wq(guc->preempt_client); err = intel_guc_sample_forcewake(guc); if (err) diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index 53747318f4a7..c8ea510629fa 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -359,7 +359,7 @@ static void hangcheck_accumulate_sample(struct intel_engine_cs *engine, case ENGINE_DEAD: if (drm_debug & DRM_UT_DRIVER) { struct drm_printer p = drm_debug_printer("hangcheck"); - intel_engine_dump(engine, &p, "%s", engine->name); + intel_engine_dump(engine, &p, "%s\n", engine->name); } break; diff --git a/drivers/gpu/drm/i915/intel_hdcp.c b/drivers/gpu/drm/i915/intel_hdcp.c new file mode 100644 index 000000000000..14ca5d3057a7 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_hdcp.c @@ -0,0 +1,807 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) 2017 Google, Inc. + * + * Authors: + * Sean Paul <seanpaul@chromium.org> + */ + +#include <drm/drmP.h> +#include <drm/drm_hdcp.h> +#include <linux/i2c.h> +#include <linux/random.h> + +#include "intel_drv.h" +#include "i915_reg.h" + +#define KEY_LOAD_TRIES 5 + +static int intel_hdcp_poll_ksv_fifo(struct intel_digital_port *intel_dig_port, + const struct intel_hdcp_shim *shim) +{ + int ret, read_ret; + bool ksv_ready; + + /* Poll for ksv list ready (spec says max time allowed is 5s) */ + ret = __wait_for(read_ret = shim->read_ksv_ready(intel_dig_port, + &ksv_ready), + read_ret || ksv_ready, 5 * 1000 * 1000, 1000, + 100 * 1000); + if (ret) + return ret; + if (read_ret) + return read_ret; + if (!ksv_ready) + return -ETIMEDOUT; + + return 0; +} + +static void intel_hdcp_clear_keys(struct drm_i915_private *dev_priv) +{ + I915_WRITE(HDCP_KEY_CONF, HDCP_CLEAR_KEYS_TRIGGER); + I915_WRITE(HDCP_KEY_STATUS, HDCP_KEY_LOAD_DONE | HDCP_KEY_LOAD_STATUS | + HDCP_FUSE_IN_PROGRESS | HDCP_FUSE_ERROR | HDCP_FUSE_DONE); +} + +static int intel_hdcp_load_keys(struct drm_i915_private *dev_priv) +{ + int ret; + u32 val; + + val = I915_READ(HDCP_KEY_STATUS); + if ((val & HDCP_KEY_LOAD_DONE) && (val & HDCP_KEY_LOAD_STATUS)) + return 0; + + /* + * On HSW and BDW HW loads the HDCP1.4 Key when Display comes + * out of reset. So if Key is not already loaded, its an error state. + */ + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + if (!(I915_READ(HDCP_KEY_STATUS) & HDCP_KEY_LOAD_DONE)) + return -ENXIO; + + /* + * Initiate loading the HDCP key from fuses. + * + * BXT+ platforms, HDCP key needs to be loaded by SW. Only SKL and KBL + * differ in the key load trigger process from other platforms. + */ + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + mutex_lock(&dev_priv->pcu_lock); + ret = sandybridge_pcode_write(dev_priv, + SKL_PCODE_LOAD_HDCP_KEYS, 1); + mutex_unlock(&dev_priv->pcu_lock); + if (ret) { + DRM_ERROR("Failed to initiate HDCP key load (%d)\n", + ret); + return ret; + } + } else { + I915_WRITE(HDCP_KEY_CONF, HDCP_KEY_LOAD_TRIGGER); + } + + /* Wait for the keys to load (500us) */ + ret = __intel_wait_for_register(dev_priv, HDCP_KEY_STATUS, + HDCP_KEY_LOAD_DONE, HDCP_KEY_LOAD_DONE, + 10, 1, &val); + if (ret) + return ret; + else if (!(val & HDCP_KEY_LOAD_STATUS)) + return -ENXIO; + + /* Send Aksv over to PCH display for use in authentication */ + I915_WRITE(HDCP_KEY_CONF, HDCP_AKSV_SEND_TRIGGER); + + return 0; +} + +/* Returns updated SHA-1 index */ +static int intel_write_sha_text(struct drm_i915_private *dev_priv, u32 sha_text) +{ + I915_WRITE(HDCP_SHA_TEXT, sha_text); + if (intel_wait_for_register(dev_priv, HDCP_REP_CTL, + HDCP_SHA1_READY, HDCP_SHA1_READY, 1)) { + DRM_ERROR("Timed out waiting for SHA1 ready\n"); + return -ETIMEDOUT; + } + return 0; +} + +static +u32 intel_hdcp_get_repeater_ctl(struct intel_digital_port *intel_dig_port) +{ + enum port port = intel_dig_port->base.port; + switch (port) { + case PORT_A: + return HDCP_DDIA_REP_PRESENT | HDCP_DDIA_SHA1_M0; + case PORT_B: + return HDCP_DDIB_REP_PRESENT | HDCP_DDIB_SHA1_M0; + case PORT_C: + return HDCP_DDIC_REP_PRESENT | HDCP_DDIC_SHA1_M0; + case PORT_D: + return HDCP_DDID_REP_PRESENT | HDCP_DDID_SHA1_M0; + case PORT_E: + return HDCP_DDIE_REP_PRESENT | HDCP_DDIE_SHA1_M0; + default: + break; + } + DRM_ERROR("Unknown port %d\n", port); + return -EINVAL; +} + +static +bool intel_hdcp_is_ksv_valid(u8 *ksv) +{ + int i, ones = 0; + /* KSV has 20 1's and 20 0's */ + for (i = 0; i < DRM_HDCP_KSV_LEN; i++) + ones += hweight8(ksv[i]); + if (ones != 20) + return false; + return true; +} + +/* Implements Part 2 of the HDCP authorization procedure */ +static +int intel_hdcp_auth_downstream(struct intel_digital_port *intel_dig_port, + const struct intel_hdcp_shim *shim) +{ + struct drm_i915_private *dev_priv; + u32 vprime, sha_text, sha_leftovers, rep_ctl; + u8 bstatus[2], num_downstream, *ksv_fifo; + int ret, i, j, sha_idx; + + dev_priv = intel_dig_port->base.base.dev->dev_private; + + ret = intel_hdcp_poll_ksv_fifo(intel_dig_port, shim); + if (ret) { + DRM_ERROR("KSV list failed to become ready (%d)\n", ret); + return ret; + } + + ret = shim->read_bstatus(intel_dig_port, bstatus); + if (ret) + return ret; + + if (DRM_HDCP_MAX_DEVICE_EXCEEDED(bstatus[0]) || + DRM_HDCP_MAX_CASCADE_EXCEEDED(bstatus[1])) { + DRM_ERROR("Max Topology Limit Exceeded\n"); + return -EPERM; + } + + /* + * When repeater reports 0 device count, HDCP1.4 spec allows disabling + * the HDCP encryption. That implies that repeater can't have its own + * display. As there is no consumption of encrypted content in the + * repeater with 0 downstream devices, we are failing the + * authentication. + */ + num_downstream = DRM_HDCP_NUM_DOWNSTREAM(bstatus[0]); + if (num_downstream == 0) + return -EINVAL; + + ksv_fifo = kzalloc(num_downstream * DRM_HDCP_KSV_LEN, GFP_KERNEL); + if (!ksv_fifo) + return -ENOMEM; + + ret = shim->read_ksv_fifo(intel_dig_port, num_downstream, ksv_fifo); + if (ret) + return ret; + + /* Process V' values from the receiver */ + for (i = 0; i < DRM_HDCP_V_PRIME_NUM_PARTS; i++) { + ret = shim->read_v_prime_part(intel_dig_port, i, &vprime); + if (ret) + return ret; + I915_WRITE(HDCP_SHA_V_PRIME(i), vprime); + } + + /* + * We need to write the concatenation of all device KSVs, BINFO (DP) || + * BSTATUS (HDMI), and M0 (which is added via HDCP_REP_CTL). This byte + * stream is written via the HDCP_SHA_TEXT register in 32-bit + * increments. Every 64 bytes, we need to write HDCP_REP_CTL again. This + * index will keep track of our progress through the 64 bytes as well as + * helping us work the 40-bit KSVs through our 32-bit register. + * + * NOTE: data passed via HDCP_SHA_TEXT should be big-endian + */ + sha_idx = 0; + sha_text = 0; + sha_leftovers = 0; + rep_ctl = intel_hdcp_get_repeater_ctl(intel_dig_port); + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_32); + for (i = 0; i < num_downstream; i++) { + unsigned int sha_empty; + u8 *ksv = &ksv_fifo[i * DRM_HDCP_KSV_LEN]; + + /* Fill up the empty slots in sha_text and write it out */ + sha_empty = sizeof(sha_text) - sha_leftovers; + for (j = 0; j < sha_empty; j++) + sha_text |= ksv[j] << ((sizeof(sha_text) - j - 1) * 8); + + ret = intel_write_sha_text(dev_priv, sha_text); + if (ret < 0) + return ret; + + /* Programming guide writes this every 64 bytes */ + sha_idx += sizeof(sha_text); + if (!(sha_idx % 64)) + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_32); + + /* Store the leftover bytes from the ksv in sha_text */ + sha_leftovers = DRM_HDCP_KSV_LEN - sha_empty; + sha_text = 0; + for (j = 0; j < sha_leftovers; j++) + sha_text |= ksv[sha_empty + j] << + ((sizeof(sha_text) - j - 1) * 8); + + /* + * If we still have room in sha_text for more data, continue. + * Otherwise, write it out immediately. + */ + if (sizeof(sha_text) > sha_leftovers) + continue; + + ret = intel_write_sha_text(dev_priv, sha_text); + if (ret < 0) + return ret; + sha_leftovers = 0; + sha_text = 0; + sha_idx += sizeof(sha_text); + } + + /* + * We need to write BINFO/BSTATUS, and M0 now. Depending on how many + * bytes are leftover from the last ksv, we might be able to fit them + * all in sha_text (first 2 cases), or we might need to split them up + * into 2 writes (last 2 cases). + */ + if (sha_leftovers == 0) { + /* Write 16 bits of text, 16 bits of M0 */ + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_16); + ret = intel_write_sha_text(dev_priv, + bstatus[0] << 8 | bstatus[1]); + if (ret < 0) + return ret; + sha_idx += sizeof(sha_text); + + /* Write 32 bits of M0 */ + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_0); + ret = intel_write_sha_text(dev_priv, 0); + if (ret < 0) + return ret; + sha_idx += sizeof(sha_text); + + /* Write 16 bits of M0 */ + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_16); + ret = intel_write_sha_text(dev_priv, 0); + if (ret < 0) + return ret; + sha_idx += sizeof(sha_text); + + } else if (sha_leftovers == 1) { + /* Write 24 bits of text, 8 bits of M0 */ + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_24); + sha_text |= bstatus[0] << 16 | bstatus[1] << 8; + /* Only 24-bits of data, must be in the LSB */ + sha_text = (sha_text & 0xffffff00) >> 8; + ret = intel_write_sha_text(dev_priv, sha_text); + if (ret < 0) + return ret; + sha_idx += sizeof(sha_text); + + /* Write 32 bits of M0 */ + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_0); + ret = intel_write_sha_text(dev_priv, 0); + if (ret < 0) + return ret; + sha_idx += sizeof(sha_text); + + /* Write 24 bits of M0 */ + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_8); + ret = intel_write_sha_text(dev_priv, 0); + if (ret < 0) + return ret; + sha_idx += sizeof(sha_text); + + } else if (sha_leftovers == 2) { + /* Write 32 bits of text */ + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_32); + sha_text |= bstatus[0] << 24 | bstatus[1] << 16; + ret = intel_write_sha_text(dev_priv, sha_text); + if (ret < 0) + return ret; + sha_idx += sizeof(sha_text); + + /* Write 64 bits of M0 */ + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_0); + for (i = 0; i < 2; i++) { + ret = intel_write_sha_text(dev_priv, 0); + if (ret < 0) + return ret; + sha_idx += sizeof(sha_text); + } + } else if (sha_leftovers == 3) { + /* Write 32 bits of text */ + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_32); + sha_text |= bstatus[0] << 24; + ret = intel_write_sha_text(dev_priv, sha_text); + if (ret < 0) + return ret; + sha_idx += sizeof(sha_text); + + /* Write 8 bits of text, 24 bits of M0 */ + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_8); + ret = intel_write_sha_text(dev_priv, bstatus[1]); + if (ret < 0) + return ret; + sha_idx += sizeof(sha_text); + + /* Write 32 bits of M0 */ + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_0); + ret = intel_write_sha_text(dev_priv, 0); + if (ret < 0) + return ret; + sha_idx += sizeof(sha_text); + + /* Write 8 bits of M0 */ + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_24); + ret = intel_write_sha_text(dev_priv, 0); + if (ret < 0) + return ret; + sha_idx += sizeof(sha_text); + } else { + DRM_ERROR("Invalid number of leftovers %d\n", sha_leftovers); + return -EINVAL; + } + + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_32); + /* Fill up to 64-4 bytes with zeros (leave the last write for length) */ + while ((sha_idx % 64) < (64 - sizeof(sha_text))) { + ret = intel_write_sha_text(dev_priv, 0); + if (ret < 0) + return ret; + sha_idx += sizeof(sha_text); + } + + /* + * Last write gets the length of the concatenation in bits. That is: + * - 5 bytes per device + * - 10 bytes for BINFO/BSTATUS(2), M0(8) + */ + sha_text = (num_downstream * 5 + 10) * 8; + ret = intel_write_sha_text(dev_priv, sha_text); + if (ret < 0) + return ret; + + /* Tell the HW we're done with the hash and wait for it to ACK */ + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_COMPLETE_HASH); + if (intel_wait_for_register(dev_priv, HDCP_REP_CTL, + HDCP_SHA1_COMPLETE, + HDCP_SHA1_COMPLETE, 1)) { + DRM_ERROR("Timed out waiting for SHA1 complete\n"); + return -ETIMEDOUT; + } + if (!(I915_READ(HDCP_REP_CTL) & HDCP_SHA1_V_MATCH)) { + DRM_ERROR("SHA-1 mismatch, HDCP failed\n"); + return -ENXIO; + } + + DRM_DEBUG_KMS("HDCP is enabled (%d downstream devices)\n", + num_downstream); + return 0; +} + +/* Implements Part 1 of the HDCP authorization procedure */ +static int intel_hdcp_auth(struct intel_digital_port *intel_dig_port, + const struct intel_hdcp_shim *shim) +{ + struct drm_i915_private *dev_priv; + enum port port; + unsigned long r0_prime_gen_start; + int ret, i, tries = 2; + union { + u32 reg[2]; + u8 shim[DRM_HDCP_AN_LEN]; + } an; + union { + u32 reg[2]; + u8 shim[DRM_HDCP_KSV_LEN]; + } bksv; + union { + u32 reg; + u8 shim[DRM_HDCP_RI_LEN]; + } ri; + bool repeater_present, hdcp_capable; + + dev_priv = intel_dig_port->base.base.dev->dev_private; + + port = intel_dig_port->base.port; + + /* + * Detects whether the display is HDCP capable. Although we check for + * valid Bksv below, the HDCP over DP spec requires that we check + * whether the display supports HDCP before we write An. For HDMI + * displays, this is not necessary. + */ + if (shim->hdcp_capable) { + ret = shim->hdcp_capable(intel_dig_port, &hdcp_capable); + if (ret) + return ret; + if (!hdcp_capable) { + DRM_ERROR("Panel is not HDCP capable\n"); + return -EINVAL; + } + } + + /* Initialize An with 2 random values and acquire it */ + for (i = 0; i < 2; i++) + I915_WRITE(PORT_HDCP_ANINIT(port), get_random_u32()); + I915_WRITE(PORT_HDCP_CONF(port), HDCP_CONF_CAPTURE_AN); + + /* Wait for An to be acquired */ + if (intel_wait_for_register(dev_priv, PORT_HDCP_STATUS(port), + HDCP_STATUS_AN_READY, + HDCP_STATUS_AN_READY, 1)) { + DRM_ERROR("Timed out waiting for An\n"); + return -ETIMEDOUT; + } + + an.reg[0] = I915_READ(PORT_HDCP_ANLO(port)); + an.reg[1] = I915_READ(PORT_HDCP_ANHI(port)); + ret = shim->write_an_aksv(intel_dig_port, an.shim); + if (ret) + return ret; + + r0_prime_gen_start = jiffies; + + memset(&bksv, 0, sizeof(bksv)); + + /* HDCP spec states that we must retry the bksv if it is invalid */ + for (i = 0; i < tries; i++) { + ret = shim->read_bksv(intel_dig_port, bksv.shim); + if (ret) + return ret; + if (intel_hdcp_is_ksv_valid(bksv.shim)) + break; + } + if (i == tries) { + DRM_ERROR("HDCP failed, Bksv is invalid\n"); + return -ENODEV; + } + + I915_WRITE(PORT_HDCP_BKSVLO(port), bksv.reg[0]); + I915_WRITE(PORT_HDCP_BKSVHI(port), bksv.reg[1]); + + ret = shim->repeater_present(intel_dig_port, &repeater_present); + if (ret) + return ret; + if (repeater_present) + I915_WRITE(HDCP_REP_CTL, + intel_hdcp_get_repeater_ctl(intel_dig_port)); + + ret = shim->toggle_signalling(intel_dig_port, true); + if (ret) + return ret; + + I915_WRITE(PORT_HDCP_CONF(port), HDCP_CONF_AUTH_AND_ENC); + + /* Wait for R0 ready */ + if (wait_for(I915_READ(PORT_HDCP_STATUS(port)) & + (HDCP_STATUS_R0_READY | HDCP_STATUS_ENC), 1)) { + DRM_ERROR("Timed out waiting for R0 ready\n"); + return -ETIMEDOUT; + } + + /* + * Wait for R0' to become available. The spec says 100ms from Aksv, but + * some monitors can take longer than this. We'll set the timeout at + * 300ms just to be sure. + * + * On DP, there's an R0_READY bit available but no such bit + * exists on HDMI. Since the upper-bound is the same, we'll just do + * the stupid thing instead of polling on one and not the other. + */ + wait_remaining_ms_from_jiffies(r0_prime_gen_start, 300); + + ri.reg = 0; + ret = shim->read_ri_prime(intel_dig_port, ri.shim); + if (ret) + return ret; + I915_WRITE(PORT_HDCP_RPRIME(port), ri.reg); + + /* Wait for Ri prime match */ + if (wait_for(I915_READ(PORT_HDCP_STATUS(port)) & + (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), 1)) { + DRM_ERROR("Timed out waiting for Ri prime match (%x)\n", + I915_READ(PORT_HDCP_STATUS(port))); + return -ETIMEDOUT; + } + + /* Wait for encryption confirmation */ + if (intel_wait_for_register(dev_priv, PORT_HDCP_STATUS(port), + HDCP_STATUS_ENC, HDCP_STATUS_ENC, 20)) { + DRM_ERROR("Timed out waiting for encryption\n"); + return -ETIMEDOUT; + } + + /* + * XXX: If we have MST-connected devices, we need to enable encryption + * on those as well. + */ + + if (repeater_present) + return intel_hdcp_auth_downstream(intel_dig_port, shim); + + DRM_DEBUG_KMS("HDCP is enabled (no repeater present)\n"); + return 0; +} + +static +struct intel_digital_port *conn_to_dig_port(struct intel_connector *connector) +{ + return enc_to_dig_port(&intel_attached_encoder(&connector->base)->base); +} + +static int _intel_hdcp_disable(struct intel_connector *connector) +{ + struct drm_i915_private *dev_priv = connector->base.dev->dev_private; + struct intel_digital_port *intel_dig_port = conn_to_dig_port(connector); + enum port port = intel_dig_port->base.port; + int ret; + + DRM_DEBUG_KMS("[%s:%d] HDCP is being disabled...\n", + connector->base.name, connector->base.base.id); + + I915_WRITE(PORT_HDCP_CONF(port), 0); + if (intel_wait_for_register(dev_priv, PORT_HDCP_STATUS(port), ~0, 0, + 20)) { + DRM_ERROR("Failed to disable HDCP, timeout clearing status\n"); + return -ETIMEDOUT; + } + + ret = connector->hdcp_shim->toggle_signalling(intel_dig_port, false); + if (ret) { + DRM_ERROR("Failed to disable HDCP signalling\n"); + return ret; + } + + DRM_DEBUG_KMS("HDCP is disabled\n"); + return 0; +} + +static int _intel_hdcp_enable(struct intel_connector *connector) +{ + struct drm_i915_private *dev_priv = connector->base.dev->dev_private; + int i, ret, tries = 3; + + DRM_DEBUG_KMS("[%s:%d] HDCP is being enabled...\n", + connector->base.name, connector->base.base.id); + + if (!(I915_READ(SKL_FUSE_STATUS) & SKL_FUSE_PG_DIST_STATUS(1))) { + DRM_ERROR("PG1 is disabled, cannot load keys\n"); + return -ENXIO; + } + + for (i = 0; i < KEY_LOAD_TRIES; i++) { + ret = intel_hdcp_load_keys(dev_priv); + if (!ret) + break; + intel_hdcp_clear_keys(dev_priv); + } + if (ret) { + DRM_ERROR("Could not load HDCP keys, (%d)\n", ret); + return ret; + } + + /* Incase of authentication failures, HDCP spec expects reauth. */ + for (i = 0; i < tries; i++) { + ret = intel_hdcp_auth(conn_to_dig_port(connector), + connector->hdcp_shim); + if (!ret) + return 0; + + DRM_DEBUG_KMS("HDCP Auth failure (%d)\n", ret); + + /* Ensuring HDCP encryption and signalling are stopped. */ + _intel_hdcp_disable(connector); + } + + DRM_ERROR("HDCP authentication failed (%d tries/%d)\n", tries, ret); + return ret; +} + +static void intel_hdcp_check_work(struct work_struct *work) +{ + struct intel_connector *connector = container_of(to_delayed_work(work), + struct intel_connector, + hdcp_check_work); + if (!intel_hdcp_check_link(connector)) + schedule_delayed_work(&connector->hdcp_check_work, + DRM_HDCP_CHECK_PERIOD_MS); +} + +static void intel_hdcp_prop_work(struct work_struct *work) +{ + struct intel_connector *connector = container_of(work, + struct intel_connector, + hdcp_prop_work); + struct drm_device *dev = connector->base.dev; + struct drm_connector_state *state; + + drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); + mutex_lock(&connector->hdcp_mutex); + + /* + * This worker is only used to flip between ENABLED/DESIRED. Either of + * those to UNDESIRED is handled by core. If hdcp_value == UNDESIRED, + * we're running just after hdcp has been disabled, so just exit + */ + if (connector->hdcp_value != DRM_MODE_CONTENT_PROTECTION_UNDESIRED) { + state = connector->base.state; + state->content_protection = connector->hdcp_value; + } + + mutex_unlock(&connector->hdcp_mutex); + drm_modeset_unlock(&dev->mode_config.connection_mutex); +} + +bool is_hdcp_supported(struct drm_i915_private *dev_priv, enum port port) +{ + /* PORT E doesn't have HDCP, and PORT F is disabled */ + return ((INTEL_GEN(dev_priv) >= 8 || IS_HASWELL(dev_priv)) && + !IS_CHERRYVIEW(dev_priv) && port < PORT_E); +} + +int intel_hdcp_init(struct intel_connector *connector, + const struct intel_hdcp_shim *hdcp_shim) +{ + int ret; + + ret = drm_connector_attach_content_protection_property( + &connector->base); + if (ret) + return ret; + + connector->hdcp_shim = hdcp_shim; + mutex_init(&connector->hdcp_mutex); + INIT_DELAYED_WORK(&connector->hdcp_check_work, intel_hdcp_check_work); + INIT_WORK(&connector->hdcp_prop_work, intel_hdcp_prop_work); + return 0; +} + +int intel_hdcp_enable(struct intel_connector *connector) +{ + int ret; + + if (!connector->hdcp_shim) + return -ENOENT; + + mutex_lock(&connector->hdcp_mutex); + + ret = _intel_hdcp_enable(connector); + if (ret) + goto out; + + connector->hdcp_value = DRM_MODE_CONTENT_PROTECTION_ENABLED; + schedule_work(&connector->hdcp_prop_work); + schedule_delayed_work(&connector->hdcp_check_work, + DRM_HDCP_CHECK_PERIOD_MS); +out: + mutex_unlock(&connector->hdcp_mutex); + return ret; +} + +int intel_hdcp_disable(struct intel_connector *connector) +{ + int ret = 0; + + if (!connector->hdcp_shim) + return -ENOENT; + + mutex_lock(&connector->hdcp_mutex); + + if (connector->hdcp_value != DRM_MODE_CONTENT_PROTECTION_UNDESIRED) { + connector->hdcp_value = DRM_MODE_CONTENT_PROTECTION_UNDESIRED; + ret = _intel_hdcp_disable(connector); + } + + mutex_unlock(&connector->hdcp_mutex); + cancel_delayed_work_sync(&connector->hdcp_check_work); + return ret; +} + +void intel_hdcp_atomic_check(struct drm_connector *connector, + struct drm_connector_state *old_state, + struct drm_connector_state *new_state) +{ + uint64_t old_cp = old_state->content_protection; + uint64_t new_cp = new_state->content_protection; + struct drm_crtc_state *crtc_state; + + if (!new_state->crtc) { + /* + * If the connector is being disabled with CP enabled, mark it + * desired so it's re-enabled when the connector is brought back + */ + if (old_cp == DRM_MODE_CONTENT_PROTECTION_ENABLED) + new_state->content_protection = + DRM_MODE_CONTENT_PROTECTION_DESIRED; + return; + } + + /* + * Nothing to do if the state didn't change, or HDCP was activated since + * the last commit + */ + if (old_cp == new_cp || + (old_cp == DRM_MODE_CONTENT_PROTECTION_DESIRED && + new_cp == DRM_MODE_CONTENT_PROTECTION_ENABLED)) + return; + + crtc_state = drm_atomic_get_new_crtc_state(new_state->state, + new_state->crtc); + crtc_state->mode_changed = true; +} + +/* Implements Part 3 of the HDCP authorization procedure */ +int intel_hdcp_check_link(struct intel_connector *connector) +{ + struct drm_i915_private *dev_priv = connector->base.dev->dev_private; + struct intel_digital_port *intel_dig_port = conn_to_dig_port(connector); + enum port port = intel_dig_port->base.port; + int ret = 0; + + if (!connector->hdcp_shim) + return -ENOENT; + + mutex_lock(&connector->hdcp_mutex); + + if (connector->hdcp_value == DRM_MODE_CONTENT_PROTECTION_UNDESIRED) + goto out; + + if (!(I915_READ(PORT_HDCP_STATUS(port)) & HDCP_STATUS_ENC)) { + DRM_ERROR("%s:%d HDCP check failed: link is not encrypted,%x\n", + connector->base.name, connector->base.base.id, + I915_READ(PORT_HDCP_STATUS(port))); + ret = -ENXIO; + connector->hdcp_value = DRM_MODE_CONTENT_PROTECTION_DESIRED; + schedule_work(&connector->hdcp_prop_work); + goto out; + } + + if (connector->hdcp_shim->check_link(intel_dig_port)) { + if (connector->hdcp_value != + DRM_MODE_CONTENT_PROTECTION_UNDESIRED) { + connector->hdcp_value = + DRM_MODE_CONTENT_PROTECTION_ENABLED; + schedule_work(&connector->hdcp_prop_work); + } + goto out; + } + + DRM_DEBUG_KMS("[%s:%d] HDCP link failed, retrying authentication\n", + connector->base.name, connector->base.base.id); + + ret = _intel_hdcp_disable(connector); + if (ret) { + DRM_ERROR("Failed to disable hdcp (%d)\n", ret); + connector->hdcp_value = DRM_MODE_CONTENT_PROTECTION_DESIRED; + schedule_work(&connector->hdcp_prop_work); + goto out; + } + + ret = _intel_hdcp_enable(connector); + if (ret) { + DRM_ERROR("Failed to enable hdcp (%d)\n", ret); + connector->hdcp_value = DRM_MODE_CONTENT_PROTECTION_DESIRED; + schedule_work(&connector->hdcp_prop_work); + goto out; + } + +out: + mutex_unlock(&connector->hdcp_mutex); + return ret; +} diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 179d0ad3889d..1baef4ac7ecb 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -34,6 +34,7 @@ #include <drm/drm_atomic_helper.h> #include <drm/drm_crtc.h> #include <drm/drm_edid.h> +#include <drm/drm_hdcp.h> #include <drm/drm_scdc_helper.h> #include "intel_drv.h" #include <drm/i915_drm.h> @@ -876,6 +877,248 @@ void intel_dp_dual_mode_set_tmds_output(struct intel_hdmi *hdmi, bool enable) adapter, enable); } +static int intel_hdmi_hdcp_read(struct intel_digital_port *intel_dig_port, + unsigned int offset, void *buffer, size_t size) +{ + struct intel_hdmi *hdmi = &intel_dig_port->hdmi; + struct drm_i915_private *dev_priv = + intel_dig_port->base.base.dev->dev_private; + struct i2c_adapter *adapter = intel_gmbus_get_adapter(dev_priv, + hdmi->ddc_bus); + int ret; + u8 start = offset & 0xff; + struct i2c_msg msgs[] = { + { + .addr = DRM_HDCP_DDC_ADDR, + .flags = 0, + .len = 1, + .buf = &start, + }, + { + .addr = DRM_HDCP_DDC_ADDR, + .flags = I2C_M_RD, + .len = size, + .buf = buffer + } + }; + ret = i2c_transfer(adapter, msgs, ARRAY_SIZE(msgs)); + if (ret == ARRAY_SIZE(msgs)) + return 0; + return ret >= 0 ? -EIO : ret; +} + +static int intel_hdmi_hdcp_write(struct intel_digital_port *intel_dig_port, + unsigned int offset, void *buffer, size_t size) +{ + struct intel_hdmi *hdmi = &intel_dig_port->hdmi; + struct drm_i915_private *dev_priv = + intel_dig_port->base.base.dev->dev_private; + struct i2c_adapter *adapter = intel_gmbus_get_adapter(dev_priv, + hdmi->ddc_bus); + int ret; + u8 *write_buf; + struct i2c_msg msg; + + write_buf = kzalloc(size + 1, GFP_KERNEL); + if (!write_buf) + return -ENOMEM; + + write_buf[0] = offset & 0xff; + memcpy(&write_buf[1], buffer, size); + + msg.addr = DRM_HDCP_DDC_ADDR; + msg.flags = 0, + msg.len = size + 1, + msg.buf = write_buf; + + ret = i2c_transfer(adapter, &msg, 1); + if (ret == 1) + return 0; + return ret >= 0 ? -EIO : ret; +} + +static +int intel_hdmi_hdcp_write_an_aksv(struct intel_digital_port *intel_dig_port, + u8 *an) +{ + struct intel_hdmi *hdmi = &intel_dig_port->hdmi; + struct drm_i915_private *dev_priv = + intel_dig_port->base.base.dev->dev_private; + struct i2c_adapter *adapter = intel_gmbus_get_adapter(dev_priv, + hdmi->ddc_bus); + int ret; + + ret = intel_hdmi_hdcp_write(intel_dig_port, DRM_HDCP_DDC_AN, an, + DRM_HDCP_AN_LEN); + if (ret) { + DRM_ERROR("Write An over DDC failed (%d)\n", ret); + return ret; + } + + ret = intel_gmbus_output_aksv(adapter); + if (ret < 0) { + DRM_ERROR("Failed to output aksv (%d)\n", ret); + return ret; + } + return 0; +} + +static int intel_hdmi_hdcp_read_bksv(struct intel_digital_port *intel_dig_port, + u8 *bksv) +{ + int ret; + ret = intel_hdmi_hdcp_read(intel_dig_port, DRM_HDCP_DDC_BKSV, bksv, + DRM_HDCP_KSV_LEN); + if (ret) + DRM_ERROR("Read Bksv over DDC failed (%d)\n", ret); + return ret; +} + +static +int intel_hdmi_hdcp_read_bstatus(struct intel_digital_port *intel_dig_port, + u8 *bstatus) +{ + int ret; + ret = intel_hdmi_hdcp_read(intel_dig_port, DRM_HDCP_DDC_BSTATUS, + bstatus, DRM_HDCP_BSTATUS_LEN); + if (ret) + DRM_ERROR("Read bstatus over DDC failed (%d)\n", ret); + return ret; +} + +static +int intel_hdmi_hdcp_repeater_present(struct intel_digital_port *intel_dig_port, + bool *repeater_present) +{ + int ret; + u8 val; + + ret = intel_hdmi_hdcp_read(intel_dig_port, DRM_HDCP_DDC_BCAPS, &val, 1); + if (ret) { + DRM_ERROR("Read bcaps over DDC failed (%d)\n", ret); + return ret; + } + *repeater_present = val & DRM_HDCP_DDC_BCAPS_REPEATER_PRESENT; + return 0; +} + +static +int intel_hdmi_hdcp_read_ri_prime(struct intel_digital_port *intel_dig_port, + u8 *ri_prime) +{ + int ret; + ret = intel_hdmi_hdcp_read(intel_dig_port, DRM_HDCP_DDC_RI_PRIME, + ri_prime, DRM_HDCP_RI_LEN); + if (ret) + DRM_ERROR("Read Ri' over DDC failed (%d)\n", ret); + return ret; +} + +static +int intel_hdmi_hdcp_read_ksv_ready(struct intel_digital_port *intel_dig_port, + bool *ksv_ready) +{ + int ret; + u8 val; + + ret = intel_hdmi_hdcp_read(intel_dig_port, DRM_HDCP_DDC_BCAPS, &val, 1); + if (ret) { + DRM_ERROR("Read bcaps over DDC failed (%d)\n", ret); + return ret; + } + *ksv_ready = val & DRM_HDCP_DDC_BCAPS_KSV_FIFO_READY; + return 0; +} + +static +int intel_hdmi_hdcp_read_ksv_fifo(struct intel_digital_port *intel_dig_port, + int num_downstream, u8 *ksv_fifo) +{ + int ret; + ret = intel_hdmi_hdcp_read(intel_dig_port, DRM_HDCP_DDC_KSV_FIFO, + ksv_fifo, num_downstream * DRM_HDCP_KSV_LEN); + if (ret) { + DRM_ERROR("Read ksv fifo over DDC failed (%d)\n", ret); + return ret; + } + return 0; +} + +static +int intel_hdmi_hdcp_read_v_prime_part(struct intel_digital_port *intel_dig_port, + int i, u32 *part) +{ + int ret; + + if (i >= DRM_HDCP_V_PRIME_NUM_PARTS) + return -EINVAL; + + ret = intel_hdmi_hdcp_read(intel_dig_port, DRM_HDCP_DDC_V_PRIME(i), + part, DRM_HDCP_V_PRIME_PART_LEN); + if (ret) + DRM_ERROR("Read V'[%d] over DDC failed (%d)\n", i, ret); + return ret; +} + +static +int intel_hdmi_hdcp_toggle_signalling(struct intel_digital_port *intel_dig_port, + bool enable) +{ + int ret; + + if (!enable) + usleep_range(6, 60); /* Bspec says >= 6us */ + + ret = intel_ddi_toggle_hdcp_signalling(&intel_dig_port->base, enable); + if (ret) { + DRM_ERROR("%s HDCP signalling failed (%d)\n", + enable ? "Enable" : "Disable", ret); + return ret; + } + return 0; +} + +static +bool intel_hdmi_hdcp_check_link(struct intel_digital_port *intel_dig_port) +{ + struct drm_i915_private *dev_priv = + intel_dig_port->base.base.dev->dev_private; + enum port port = intel_dig_port->base.port; + int ret; + union { + u32 reg; + u8 shim[DRM_HDCP_RI_LEN]; + } ri; + + ret = intel_hdmi_hdcp_read_ri_prime(intel_dig_port, ri.shim); + if (ret) + return false; + + I915_WRITE(PORT_HDCP_RPRIME(port), ri.reg); + + /* Wait for Ri prime match */ + if (wait_for(I915_READ(PORT_HDCP_STATUS(port)) & + (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), 1)) { + DRM_ERROR("Ri' mismatch detected, link check failed (%x)\n", + I915_READ(PORT_HDCP_STATUS(port))); + return false; + } + return true; +} + +static const struct intel_hdcp_shim intel_hdmi_hdcp_shim = { + .write_an_aksv = intel_hdmi_hdcp_write_an_aksv, + .read_bksv = intel_hdmi_hdcp_read_bksv, + .read_bstatus = intel_hdmi_hdcp_read_bstatus, + .repeater_present = intel_hdmi_hdcp_repeater_present, + .read_ri_prime = intel_hdmi_hdcp_read_ri_prime, + .read_ksv_ready = intel_hdmi_hdcp_read_ksv_ready, + .read_ksv_fifo = intel_hdmi_hdcp_read_ksv_fifo, + .read_v_prime_part = intel_hdmi_hdcp_read_v_prime_part, + .toggle_signalling = intel_hdmi_hdcp_toggle_signalling, + .check_link = intel_hdmi_hdcp_check_link, +}; + static void intel_hdmi_prepare(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { @@ -1314,9 +1557,6 @@ intel_hdmi_mode_valid(struct drm_connector *connector, bool force_dvi = READ_ONCE(to_intel_digital_connector_state(connector->state)->force_audio) == HDMI_AUDIO_OFF_DVI; - if (mode->flags & DRM_MODE_FLAG_DBLSCAN) - return MODE_NO_DBLESCAN; - clock = mode->clock; if ((mode->flags & DRM_MODE_FLAG_3D_MASK) == DRM_MODE_FLAG_3D_FRAME_PACKING) @@ -1567,7 +1807,10 @@ intel_hdmi_dp_dual_mode_detect(struct drm_connector *connector, bool has_edid) * there's nothing connected to the port. */ if (type == DRM_DP_DUAL_MODE_UNKNOWN) { - if (has_edid && + /* An overridden EDID imply that we want this port for testing. + * Make sure not to set limits for that port. + */ + if (has_edid && !connector->override_edid && intel_bios_is_port_dp_dual_mode(dev_priv, port)) { DRM_DEBUG_KMS("Assuming DP dual mode adaptor presence based on VBT\n"); type = DRM_DP_DUAL_MODE_TYPE1_DVI; @@ -1932,6 +2175,9 @@ static u8 cnp_port_to_ddc_pin(struct drm_i915_private *dev_priv, case PORT_D: ddc_pin = GMBUS_PIN_4_CNP; break; + case PORT_F: + ddc_pin = GMBUS_PIN_3_BXT; + break; default: MISSING_CASE(port); ddc_pin = GMBUS_PIN_1_BXT; @@ -1940,6 +2186,37 @@ static u8 cnp_port_to_ddc_pin(struct drm_i915_private *dev_priv, return ddc_pin; } +static u8 icl_port_to_ddc_pin(struct drm_i915_private *dev_priv, enum port port) +{ + u8 ddc_pin; + + switch (port) { + case PORT_A: + ddc_pin = GMBUS_PIN_1_BXT; + break; + case PORT_B: + ddc_pin = GMBUS_PIN_2_BXT; + break; + case PORT_C: + ddc_pin = GMBUS_PIN_9_TC1_ICP; + break; + case PORT_D: + ddc_pin = GMBUS_PIN_10_TC2_ICP; + break; + case PORT_E: + ddc_pin = GMBUS_PIN_11_TC3_ICP; + break; + case PORT_F: + ddc_pin = GMBUS_PIN_12_TC4_ICP; + break; + default: + MISSING_CASE(port); + ddc_pin = GMBUS_PIN_2_BXT; + break; + } + return ddc_pin; +} + static u8 g4x_port_to_ddc_pin(struct drm_i915_private *dev_priv, enum port port) { @@ -1982,6 +2259,8 @@ static u8 intel_hdmi_ddc_pin(struct drm_i915_private *dev_priv, ddc_pin = bxt_port_to_ddc_pin(dev_priv, port); else if (HAS_PCH_CNP(dev_priv)) ddc_pin = cnp_port_to_ddc_pin(dev_priv, port); + else if (IS_ICELAKE(dev_priv)) + ddc_pin = icl_port_to_ddc_pin(dev_priv, port); else ddc_pin = g4x_port_to_ddc_pin(dev_priv, port); @@ -2052,7 +2331,7 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, if (WARN_ON(port == PORT_A)) return; - intel_encoder->hpd_pin = intel_hpd_pin(port); + intel_encoder->hpd_pin = intel_hpd_pin_default(dev_priv, port); if (HAS_DDI(dev_priv)) intel_connector->get_hw_state = intel_ddi_connector_get_hw_state; @@ -2061,6 +2340,13 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, intel_hdmi_add_properties(intel_hdmi, connector); + if (is_hdcp_supported(dev_priv, port)) { + int ret = intel_hdcp_init(intel_connector, + &intel_hdmi_hdcp_shim); + if (ret) + DRM_DEBUG_KMS("HDCP init failed, skipping.\n"); + } + intel_connector_attach_encoder(intel_connector, intel_encoder); intel_hdmi->attached_connector = intel_connector; @@ -2097,6 +2383,7 @@ void intel_hdmi_init(struct drm_i915_private *dev_priv, &intel_hdmi_enc_funcs, DRM_MODE_ENCODER_TMDS, "HDMI %c", port_name(port)); + intel_encoder->hotplug = intel_encoder_hotplug; intel_encoder->compute_config = intel_hdmi_compute_config; if (HAS_PCH_SPLIT(dev_priv)) { intel_encoder->disable = pch_disable_hdmi; diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index 875d5d218d5c..0e3d3e89d66a 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -78,12 +78,14 @@ /** * intel_hpd_port - return port hard associated with certain pin. + * @dev_priv: private driver data pointer * @pin: the hpd pin to get associated port * * Return port that is associatade with @pin and PORT_NONE if no port is * hard associated with that @pin. */ -enum port intel_hpd_pin_to_port(enum hpd_pin pin) +enum port intel_hpd_pin_to_port(struct drm_i915_private *dev_priv, + enum hpd_pin pin) { switch (pin) { case HPD_PORT_A: @@ -95,6 +97,8 @@ enum port intel_hpd_pin_to_port(enum hpd_pin pin) case HPD_PORT_D: return PORT_D; case HPD_PORT_E: + if (IS_CNL_WITH_PORT_F(dev_priv)) + return PORT_F; return PORT_E; default: return PORT_NONE; /* no port for this pin */ @@ -102,13 +106,17 @@ enum port intel_hpd_pin_to_port(enum hpd_pin pin) } /** - * intel_hpd_pin - return pin hard associated with certain port. + * intel_hpd_pin_default - return default pin associated with certain port. + * @dev_priv: private driver data pointer * @port: the hpd port to get associated pin * + * It is only valid and used by digital port encoder. + * * Return pin that is associatade with @port and HDP_NONE if no pin is * hard associated with that @port. */ -enum hpd_pin intel_hpd_pin(enum port port) +enum hpd_pin intel_hpd_pin_default(struct drm_i915_private *dev_priv, + enum port port) { switch (port) { case PORT_A: @@ -121,6 +129,9 @@ enum hpd_pin intel_hpd_pin(enum port port) return HPD_PORT_D; case PORT_E: return HPD_PORT_E; + case PORT_F: + if (IS_CNL_WITH_PORT_F(dev_priv)) + return HPD_PORT_E; default: MISSING_CASE(port); return HPD_NONE; @@ -263,24 +274,26 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work) intel_runtime_pm_put(dev_priv); } -static bool intel_hpd_irq_event(struct drm_device *dev, - struct drm_connector *connector) +bool intel_encoder_hotplug(struct intel_encoder *encoder, + struct intel_connector *connector) { + struct drm_device *dev = connector->base.dev; enum drm_connector_status old_status; WARN_ON(!mutex_is_locked(&dev->mode_config.mutex)); - old_status = connector->status; + old_status = connector->base.status; - connector->status = drm_helper_probe_detect(connector, NULL, false); + connector->base.status = + drm_helper_probe_detect(&connector->base, NULL, false); - if (old_status == connector->status) + if (old_status == connector->base.status) return false; DRM_DEBUG_KMS("[CONNECTOR:%d:%s] status updated from %s to %s\n", - connector->base.id, - connector->name, + connector->base.base.id, + connector->base.name, drm_get_connector_status_name(old_status), - drm_get_connector_status_name(connector->status)); + drm_get_connector_status_name(connector->base.status)); return true; } @@ -370,10 +383,9 @@ static void i915_hotplug_work_func(struct work_struct *work) if (hpd_event_bits & (1 << intel_encoder->hpd_pin)) { DRM_DEBUG_KMS("Connector %s (pin %i) received hotplug event.\n", connector->name, intel_encoder->hpd_pin); - if (intel_encoder->hot_plug) - intel_encoder->hot_plug(intel_encoder); - if (intel_hpd_irq_event(dev, connector)) - changed = true; + + changed |= intel_encoder->hotplug(intel_encoder, + intel_connector); } } drm_connector_list_iter_end(&conn_iter); @@ -417,7 +429,7 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, if (!(BIT(i) & pin_mask)) continue; - port = intel_hpd_pin_to_port(i); + port = intel_hpd_pin_to_port(dev_priv, i); is_dig_port = port != PORT_NONE && dev_priv->hotplug.irq_port[port]; diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 8ed05182f944..65e2afb9b955 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -27,160 +27,9 @@ #include "intel_huc.h" #include "i915_drv.h" -/** - * DOC: HuC Firmware - * - * Motivation: - * GEN9 introduces a new dedicated firmware for usage in media HEVC (High - * Efficiency Video Coding) operations. Userspace can use the firmware - * capabilities by adding HuC specific commands to batch buffers. - * - * Implementation: - * The same firmware loader is used as the GuC. However, the actual - * loading to HW is deferred until GEM initialization is done. - * - * Note that HuC firmware loading must be done before GuC loading. - */ - -#define BXT_HUC_FW_MAJOR 01 -#define BXT_HUC_FW_MINOR 07 -#define BXT_BLD_NUM 1398 - -#define SKL_HUC_FW_MAJOR 01 -#define SKL_HUC_FW_MINOR 07 -#define SKL_BLD_NUM 1398 - -#define KBL_HUC_FW_MAJOR 02 -#define KBL_HUC_FW_MINOR 00 -#define KBL_BLD_NUM 1810 - -#define HUC_FW_PATH(platform, major, minor, bld_num) \ - "i915/" __stringify(platform) "_huc_ver" __stringify(major) "_" \ - __stringify(minor) "_" __stringify(bld_num) ".bin" - -#define I915_SKL_HUC_UCODE HUC_FW_PATH(skl, SKL_HUC_FW_MAJOR, \ - SKL_HUC_FW_MINOR, SKL_BLD_NUM) -MODULE_FIRMWARE(I915_SKL_HUC_UCODE); - -#define I915_BXT_HUC_UCODE HUC_FW_PATH(bxt, BXT_HUC_FW_MAJOR, \ - BXT_HUC_FW_MINOR, BXT_BLD_NUM) -MODULE_FIRMWARE(I915_BXT_HUC_UCODE); - -#define I915_KBL_HUC_UCODE HUC_FW_PATH(kbl, KBL_HUC_FW_MAJOR, \ - KBL_HUC_FW_MINOR, KBL_BLD_NUM) -MODULE_FIRMWARE(I915_KBL_HUC_UCODE); - -static void huc_fw_select(struct intel_uc_fw *huc_fw) -{ - struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw); - struct drm_i915_private *dev_priv = huc_to_i915(huc); - - GEM_BUG_ON(huc_fw->type != INTEL_UC_FW_TYPE_HUC); - - if (!HAS_HUC(dev_priv)) - return; - - if (i915_modparams.huc_firmware_path) { - huc_fw->path = i915_modparams.huc_firmware_path; - huc_fw->major_ver_wanted = 0; - huc_fw->minor_ver_wanted = 0; - } else if (IS_SKYLAKE(dev_priv)) { - huc_fw->path = I915_SKL_HUC_UCODE; - huc_fw->major_ver_wanted = SKL_HUC_FW_MAJOR; - huc_fw->minor_ver_wanted = SKL_HUC_FW_MINOR; - } else if (IS_BROXTON(dev_priv)) { - huc_fw->path = I915_BXT_HUC_UCODE; - huc_fw->major_ver_wanted = BXT_HUC_FW_MAJOR; - huc_fw->minor_ver_wanted = BXT_HUC_FW_MINOR; - } else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { - huc_fw->path = I915_KBL_HUC_UCODE; - huc_fw->major_ver_wanted = KBL_HUC_FW_MAJOR; - huc_fw->minor_ver_wanted = KBL_HUC_FW_MINOR; - } else { - DRM_WARN("%s: No firmware known for this platform!\n", - intel_uc_fw_type_repr(huc_fw->type)); - } -} - -/** - * intel_huc_init_early() - initializes HuC struct - * @huc: intel_huc struct - * - * On platforms with HuC selects firmware for uploading - */ void intel_huc_init_early(struct intel_huc *huc) { - struct intel_uc_fw *huc_fw = &huc->fw; - - intel_uc_fw_init(huc_fw, INTEL_UC_FW_TYPE_HUC); - huc_fw_select(huc_fw); -} - -/** - * huc_ucode_xfer() - DMA's the firmware - * @dev_priv: the drm_i915_private device - * - * Transfer the firmware image to RAM for execution by the microcontroller. - * - * Return: 0 on success, non-zero on failure - */ -static int huc_ucode_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma) -{ - struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw); - struct drm_i915_private *dev_priv = huc_to_i915(huc); - unsigned long offset = 0; - u32 size; - int ret; - - GEM_BUG_ON(huc_fw->type != INTEL_UC_FW_TYPE_HUC); - - intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - - /* Set the source address for the uCode */ - offset = guc_ggtt_offset(vma) + huc_fw->header_offset; - I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); - I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF); - - /* Hardware doesn't look at destination address for HuC. Set it to 0, - * but still program the correct address space. - */ - I915_WRITE(DMA_ADDR_1_LOW, 0); - I915_WRITE(DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM); - - size = huc_fw->header_size + huc_fw->ucode_size; - I915_WRITE(DMA_COPY_SIZE, size); - - /* Start the DMA */ - I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(HUC_UKERNEL | START_DMA)); - - /* Wait for DMA to finish */ - ret = intel_wait_for_register_fw(dev_priv, DMA_CTRL, START_DMA, 0, 100); - - DRM_DEBUG_DRIVER("HuC DMA transfer wait over with ret %d\n", ret); - - /* Disable the bits once DMA is over */ - I915_WRITE(DMA_CTRL, _MASKED_BIT_DISABLE(HUC_UKERNEL)); - - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - - return ret; -} - -/** - * intel_huc_init_hw() - load HuC uCode to device - * @huc: intel_huc structure - * - * Called from intel_uc_init_hw() during driver loading and also after a GPU - * reset. Be note that HuC loading must be done before GuC loading. - * - * The firmware image should have already been fetched into memory by the - * earlier call to intel_uc_init_fw(), so here we need only check that - * is succeeded, and then transfer the image to the h/w. - * - */ -int intel_huc_init_hw(struct intel_huc *huc) -{ - return intel_uc_fw_upload(&huc->fw, huc_ucode_xfer); + intel_huc_fw_init_early(huc); } /** @@ -199,6 +48,7 @@ int intel_huc_auth(struct intel_huc *huc) struct drm_i915_private *i915 = huc_to_i915(huc); struct intel_guc *guc = &i915->guc; struct i915_vma *vma; + u32 status; int ret; if (huc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) @@ -209,28 +59,35 @@ int intel_huc_auth(struct intel_huc *huc) if (IS_ERR(vma)) { ret = PTR_ERR(vma); DRM_ERROR("HuC: Failed to pin huc fw object %d\n", ret); - return ret; + goto fail; } ret = intel_guc_auth_huc(guc, guc_ggtt_offset(vma) + huc->fw.rsa_offset); if (ret) { DRM_ERROR("HuC: GuC did not ack Auth request %d\n", ret); - goto out; + goto fail_unpin; } /* Check authentication status, it should be done by now */ - ret = intel_wait_for_register(i915, - HUC_STATUS2, - HUC_FW_VERIFIED, - HUC_FW_VERIFIED, - 50); + ret = __intel_wait_for_register(i915, + HUC_STATUS2, + HUC_FW_VERIFIED, + HUC_FW_VERIFIED, + 2, 50, &status); if (ret) { - DRM_ERROR("HuC: Authentication failed %d\n", ret); - goto out; + DRM_ERROR("HuC: Firmware not verified %#x\n", status); + goto fail_unpin; } -out: i915_vma_unpin(vma); + return 0; + +fail_unpin: + i915_vma_unpin(vma); +fail: + huc->fw.load_status = INTEL_UC_FIRMWARE_FAIL; + + DRM_ERROR("HuC: Authentication failed %d\n", ret); return ret; } diff --git a/drivers/gpu/drm/i915/intel_huc.h b/drivers/gpu/drm/i915/intel_huc.h index 40039db59e04..5d6e804f9771 100644 --- a/drivers/gpu/drm/i915/intel_huc.h +++ b/drivers/gpu/drm/i915/intel_huc.h @@ -26,6 +26,7 @@ #define _INTEL_HUC_H_ #include "intel_uc_fw.h" +#include "intel_huc_fw.h" struct intel_huc { /* Generic uC firmware management */ @@ -35,7 +36,6 @@ struct intel_huc { }; void intel_huc_init_early(struct intel_huc *huc); -int intel_huc_init_hw(struct intel_huc *huc); int intel_huc_auth(struct intel_huc *huc); #endif diff --git a/drivers/gpu/drm/i915/intel_huc_fw.c b/drivers/gpu/drm/i915/intel_huc_fw.c new file mode 100644 index 000000000000..c66afa9b989a --- /dev/null +++ b/drivers/gpu/drm/i915/intel_huc_fw.c @@ -0,0 +1,166 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2014-2018 Intel Corporation + */ + +#include "intel_huc_fw.h" +#include "i915_drv.h" + +/** + * DOC: HuC Firmware + * + * Motivation: + * GEN9 introduces a new dedicated firmware for usage in media HEVC (High + * Efficiency Video Coding) operations. Userspace can use the firmware + * capabilities by adding HuC specific commands to batch buffers. + * + * Implementation: + * The same firmware loader is used as the GuC. However, the actual + * loading to HW is deferred until GEM initialization is done. + * + * Note that HuC firmware loading must be done before GuC loading. + */ + +#define BXT_HUC_FW_MAJOR 01 +#define BXT_HUC_FW_MINOR 07 +#define BXT_BLD_NUM 1398 + +#define SKL_HUC_FW_MAJOR 01 +#define SKL_HUC_FW_MINOR 07 +#define SKL_BLD_NUM 1398 + +#define KBL_HUC_FW_MAJOR 02 +#define KBL_HUC_FW_MINOR 00 +#define KBL_BLD_NUM 1810 + +#define HUC_FW_PATH(platform, major, minor, bld_num) \ + "i915/" __stringify(platform) "_huc_ver" __stringify(major) "_" \ + __stringify(minor) "_" __stringify(bld_num) ".bin" + +#define I915_SKL_HUC_UCODE HUC_FW_PATH(skl, SKL_HUC_FW_MAJOR, \ + SKL_HUC_FW_MINOR, SKL_BLD_NUM) +MODULE_FIRMWARE(I915_SKL_HUC_UCODE); + +#define I915_BXT_HUC_UCODE HUC_FW_PATH(bxt, BXT_HUC_FW_MAJOR, \ + BXT_HUC_FW_MINOR, BXT_BLD_NUM) +MODULE_FIRMWARE(I915_BXT_HUC_UCODE); + +#define I915_KBL_HUC_UCODE HUC_FW_PATH(kbl, KBL_HUC_FW_MAJOR, \ + KBL_HUC_FW_MINOR, KBL_BLD_NUM) +MODULE_FIRMWARE(I915_KBL_HUC_UCODE); + +static void huc_fw_select(struct intel_uc_fw *huc_fw) +{ + struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw); + struct drm_i915_private *dev_priv = huc_to_i915(huc); + + GEM_BUG_ON(huc_fw->type != INTEL_UC_FW_TYPE_HUC); + + if (!HAS_HUC(dev_priv)) + return; + + if (i915_modparams.huc_firmware_path) { + huc_fw->path = i915_modparams.huc_firmware_path; + huc_fw->major_ver_wanted = 0; + huc_fw->minor_ver_wanted = 0; + } else if (IS_SKYLAKE(dev_priv)) { + huc_fw->path = I915_SKL_HUC_UCODE; + huc_fw->major_ver_wanted = SKL_HUC_FW_MAJOR; + huc_fw->minor_ver_wanted = SKL_HUC_FW_MINOR; + } else if (IS_BROXTON(dev_priv)) { + huc_fw->path = I915_BXT_HUC_UCODE; + huc_fw->major_ver_wanted = BXT_HUC_FW_MAJOR; + huc_fw->minor_ver_wanted = BXT_HUC_FW_MINOR; + } else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { + huc_fw->path = I915_KBL_HUC_UCODE; + huc_fw->major_ver_wanted = KBL_HUC_FW_MAJOR; + huc_fw->minor_ver_wanted = KBL_HUC_FW_MINOR; + } else { + DRM_WARN("%s: No firmware known for this platform!\n", + intel_uc_fw_type_repr(huc_fw->type)); + } +} + +/** + * intel_huc_fw_init_early() - initializes HuC firmware struct + * @huc: intel_huc struct + * + * On platforms with HuC selects firmware for uploading + */ +void intel_huc_fw_init_early(struct intel_huc *huc) +{ + struct intel_uc_fw *huc_fw = &huc->fw; + + intel_uc_fw_init(huc_fw, INTEL_UC_FW_TYPE_HUC); + huc_fw_select(huc_fw); +} + +/** + * huc_fw_xfer() - DMA's the firmware + * @huc_fw: the firmware descriptor + * @vma: the firmware image (bound into the GGTT) + * + * Transfer the firmware image to RAM for execution by the microcontroller. + * + * Return: 0 on success, non-zero on failure + */ +static int huc_fw_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma) +{ + struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw); + struct drm_i915_private *dev_priv = huc_to_i915(huc); + unsigned long offset = 0; + u32 size; + int ret; + + GEM_BUG_ON(huc_fw->type != INTEL_UC_FW_TYPE_HUC); + + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + /* Set the source address for the uCode */ + offset = guc_ggtt_offset(vma) + huc_fw->header_offset; + I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); + I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF); + + /* Hardware doesn't look at destination address for HuC. Set it to 0, + * but still program the correct address space. + */ + I915_WRITE(DMA_ADDR_1_LOW, 0); + I915_WRITE(DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM); + + size = huc_fw->header_size + huc_fw->ucode_size; + I915_WRITE(DMA_COPY_SIZE, size); + + /* Start the DMA */ + I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(HUC_UKERNEL | START_DMA)); + + /* Wait for DMA to finish */ + ret = intel_wait_for_register_fw(dev_priv, DMA_CTRL, START_DMA, 0, 100); + + DRM_DEBUG_DRIVER("HuC DMA transfer wait over with ret %d\n", ret); + + /* Disable the bits once DMA is over */ + I915_WRITE(DMA_CTRL, _MASKED_BIT_DISABLE(HUC_UKERNEL)); + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + + return ret; +} + +/** + * intel_huc_fw_upload() - load HuC uCode to device + * @huc: intel_huc structure + * + * Called from intel_uc_init_hw() during driver load, resume from sleep and + * after a GPU reset. Note that HuC must be loaded before GuC. + * + * The firmware image should have already been fetched into memory by the + * earlier call to intel_uc_init_fw(), so here we need to only check that + * fetch succeeded, and then transfer the image to the h/w. + * + * Return: non-zero code on error + */ +int intel_huc_fw_upload(struct intel_huc *huc) +{ + return intel_uc_fw_upload(&huc->fw, huc_fw_xfer); +} diff --git a/drivers/gpu/drm/i915/intel_huc_fw.h b/drivers/gpu/drm/i915/intel_huc_fw.h new file mode 100644 index 000000000000..8a00a0ebddc5 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_huc_fw.h @@ -0,0 +1,15 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2014-2018 Intel Corporation + */ + +#ifndef _INTEL_HUC_FW_H_ +#define _INTEL_HUC_FW_H_ + +struct intel_huc; + +void intel_huc_fw_init_early(struct intel_huc *huc); +int intel_huc_fw_upload(struct intel_huc *huc); + +#endif diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index ef9f91a0b0c9..e6875509bcd9 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -30,6 +30,7 @@ #include <linux/i2c-algo-bit.h> #include <linux/export.h> #include <drm/drmP.h> +#include <drm/drm_hdcp.h> #include "intel_drv.h" #include <drm/i915_drm.h> #include "i915_drv.h" @@ -75,11 +76,22 @@ static const struct gmbus_pin gmbus_pins_cnp[] = { [GMBUS_PIN_4_CNP] = { "dpd", GPIOE }, }; +static const struct gmbus_pin gmbus_pins_icp[] = { + [GMBUS_PIN_1_BXT] = { "dpa", GPIOA }, + [GMBUS_PIN_2_BXT] = { "dpb", GPIOB }, + [GMBUS_PIN_9_TC1_ICP] = { "tc1", GPIOC }, + [GMBUS_PIN_10_TC2_ICP] = { "tc2", GPIOD }, + [GMBUS_PIN_11_TC3_ICP] = { "tc3", GPIOE }, + [GMBUS_PIN_12_TC4_ICP] = { "tc4", GPIOF }, +}; + /* pin is expected to be valid */ static const struct gmbus_pin *get_gmbus_pin(struct drm_i915_private *dev_priv, unsigned int pin) { - if (HAS_PCH_CNP(dev_priv)) + if (HAS_PCH_ICP(dev_priv)) + return &gmbus_pins_icp[pin]; + else if (HAS_PCH_CNP(dev_priv)) return &gmbus_pins_cnp[pin]; else if (IS_GEN9_LP(dev_priv)) return &gmbus_pins_bxt[pin]; @@ -96,7 +108,9 @@ bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv, { unsigned int size; - if (HAS_PCH_CNP(dev_priv)) + if (HAS_PCH_ICP(dev_priv)) + size = ARRAY_SIZE(gmbus_pins_icp); + else if (HAS_PCH_CNP(dev_priv)) size = ARRAY_SIZE(gmbus_pins_cnp); else if (IS_GEN9_LP(dev_priv)) size = ARRAY_SIZE(gmbus_pins_bxt); @@ -402,7 +416,8 @@ gmbus_xfer_read(struct drm_i915_private *dev_priv, struct i2c_msg *msg, static int gmbus_xfer_write_chunk(struct drm_i915_private *dev_priv, - unsigned short addr, u8 *buf, unsigned int len) + unsigned short addr, u8 *buf, unsigned int len, + u32 gmbus1_index) { unsigned int chunk_size = len; u32 val, loop; @@ -415,7 +430,7 @@ gmbus_xfer_write_chunk(struct drm_i915_private *dev_priv, I915_WRITE_FW(GMBUS3, val); I915_WRITE_FW(GMBUS1, - GMBUS_CYCLE_WAIT | + gmbus1_index | GMBUS_CYCLE_WAIT | (chunk_size << GMBUS_BYTE_COUNT_SHIFT) | (addr << GMBUS_SLAVE_ADDR_SHIFT) | GMBUS_SLAVE_WRITE | GMBUS_SW_RDY); @@ -438,7 +453,8 @@ gmbus_xfer_write_chunk(struct drm_i915_private *dev_priv, } static int -gmbus_xfer_write(struct drm_i915_private *dev_priv, struct i2c_msg *msg) +gmbus_xfer_write(struct drm_i915_private *dev_priv, struct i2c_msg *msg, + u32 gmbus1_index) { u8 *buf = msg->buf; unsigned int tx_size = msg->len; @@ -448,7 +464,8 @@ gmbus_xfer_write(struct drm_i915_private *dev_priv, struct i2c_msg *msg) do { len = min(tx_size, GMBUS_BYTE_COUNT_MAX); - ret = gmbus_xfer_write_chunk(dev_priv, msg->addr, buf, len); + ret = gmbus_xfer_write_chunk(dev_priv, msg->addr, buf, len, + gmbus1_index); if (ret) return ret; @@ -460,21 +477,21 @@ gmbus_xfer_write(struct drm_i915_private *dev_priv, struct i2c_msg *msg) } /* - * The gmbus controller can combine a 1 or 2 byte write with a read that - * immediately follows it by using an "INDEX" cycle. + * The gmbus controller can combine a 1 or 2 byte write with another read/write + * that immediately follows it by using an "INDEX" cycle. */ static bool -gmbus_is_index_read(struct i2c_msg *msgs, int i, int num) +gmbus_is_index_xfer(struct i2c_msg *msgs, int i, int num) { return (i + 1 < num && msgs[i].addr == msgs[i + 1].addr && !(msgs[i].flags & I2C_M_RD) && (msgs[i].len == 1 || msgs[i].len == 2) && - (msgs[i + 1].flags & I2C_M_RD)); + msgs[i + 1].len > 0); } static int -gmbus_xfer_index_read(struct drm_i915_private *dev_priv, struct i2c_msg *msgs) +gmbus_index_xfer(struct drm_i915_private *dev_priv, struct i2c_msg *msgs) { u32 gmbus1_index = 0; u32 gmbus5 = 0; @@ -491,7 +508,10 @@ gmbus_xfer_index_read(struct drm_i915_private *dev_priv, struct i2c_msg *msgs) if (gmbus5) I915_WRITE_FW(GMBUS5, gmbus5); - ret = gmbus_xfer_read(dev_priv, &msgs[1], gmbus1_index); + if (msgs[1].flags & I2C_M_RD) + ret = gmbus_xfer_read(dev_priv, &msgs[1], gmbus1_index); + else + ret = gmbus_xfer_write(dev_priv, &msgs[1], gmbus1_index); /* Clear GMBUS5 after each index transfer */ if (gmbus5) @@ -501,7 +521,8 @@ gmbus_xfer_index_read(struct drm_i915_private *dev_priv, struct i2c_msg *msgs) } static int -do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num) +do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num, + u32 gmbus0_source) { struct intel_gmbus *bus = container_of(adapter, struct intel_gmbus, @@ -518,17 +539,17 @@ do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num) pch_gmbus_clock_gating(dev_priv, false); retry: - I915_WRITE_FW(GMBUS0, bus->reg0); + I915_WRITE_FW(GMBUS0, gmbus0_source | bus->reg0); for (; i < num; i += inc) { inc = 1; - if (gmbus_is_index_read(msgs, i, num)) { - ret = gmbus_xfer_index_read(dev_priv, &msgs[i]); - inc = 2; /* an index read is two msgs */ + if (gmbus_is_index_xfer(msgs, i, num)) { + ret = gmbus_index_xfer(dev_priv, &msgs[i]); + inc = 2; /* an index transmission is two msgs */ } else if (msgs[i].flags & I2C_M_RD) { ret = gmbus_xfer_read(dev_priv, &msgs[i], 0); } else { - ret = gmbus_xfer_write(dev_priv, &msgs[i]); + ret = gmbus_xfer_write(dev_priv, &msgs[i], 0); } if (!ret) @@ -643,7 +664,7 @@ gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num) if (ret < 0) bus->force_bit &= ~GMBUS_FORCE_BIT_RETRY; } else { - ret = do_gmbus_xfer(adapter, msgs, num); + ret = do_gmbus_xfer(adapter, msgs, num, 0); if (ret == -EAGAIN) bus->force_bit |= GMBUS_FORCE_BIT_RETRY; } @@ -653,6 +674,45 @@ gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num) return ret; } +int intel_gmbus_output_aksv(struct i2c_adapter *adapter) +{ + struct intel_gmbus *bus = container_of(adapter, struct intel_gmbus, + adapter); + struct drm_i915_private *dev_priv = bus->dev_priv; + int ret; + u8 cmd = DRM_HDCP_DDC_AKSV; + u8 buf[DRM_HDCP_KSV_LEN] = { 0 }; + struct i2c_msg msgs[] = { + { + .addr = DRM_HDCP_DDC_ADDR, + .flags = 0, + .len = sizeof(cmd), + .buf = &cmd, + }, + { + .addr = DRM_HDCP_DDC_ADDR, + .flags = 0, + .len = sizeof(buf), + .buf = buf, + } + }; + + intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); + mutex_lock(&dev_priv->gmbus_mutex); + + /* + * In order to output Aksv to the receiver, use an indexed write to + * pass the i2c command, and tell GMBUS to use the HW-provided value + * instead of sourcing GMBUS3 for the data. + */ + ret = do_gmbus_xfer(adapter, msgs, ARRAY_SIZE(msgs), GMBUS_AKSV_SELECT); + + mutex_unlock(&dev_priv->gmbus_mutex); + intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS); + + return ret; +} + static u32 gmbus_func(struct i2c_adapter *adapter) { return i2c_bit_algo.functionality(adapter) & diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c index 5809b29044fc..6269750e2b54 100644 --- a/drivers/gpu/drm/i915/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/intel_lpe_audio.c @@ -74,7 +74,6 @@ static struct platform_device * lpe_audio_platdev_create(struct drm_i915_private *dev_priv) { - int ret; struct drm_device *dev = &dev_priv->drm; struct platform_device_info pinfo = {}; struct resource *rsc; @@ -119,24 +118,19 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv) spin_lock_init(&pdata->lpe_audio_slock); platdev = platform_device_register_full(&pinfo); + kfree(rsc); + kfree(pdata); + if (IS_ERR(platdev)) { - ret = PTR_ERR(platdev); DRM_ERROR("Failed to allocate LPE audio platform device\n"); - goto err; + return platdev; } - kfree(rsc); - pm_runtime_forbid(&platdev->dev); pm_runtime_set_active(&platdev->dev); pm_runtime_enable(&platdev->dev); return platdev; - -err: - kfree(rsc); - kfree(pdata); - return ERR_PTR(ret); } static void lpe_audio_platdev_destroy(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e0fca035ff78..697af5add78b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -137,6 +137,7 @@ #include <drm/i915_drm.h> #include "i915_drv.h" #include "i915_gem_render_state.h" +#include "intel_lrc_reg.h" #include "intel_mocs.h" #define RING_EXECLIST_QFULL (1 << 0x2) @@ -156,60 +157,10 @@ #define GEN8_CTX_STATUS_COMPLETED_MASK \ (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED) -#define CTX_LRI_HEADER_0 0x01 -#define CTX_CONTEXT_CONTROL 0x02 -#define CTX_RING_HEAD 0x04 -#define CTX_RING_TAIL 0x06 -#define CTX_RING_BUFFER_START 0x08 -#define CTX_RING_BUFFER_CONTROL 0x0a -#define CTX_BB_HEAD_U 0x0c -#define CTX_BB_HEAD_L 0x0e -#define CTX_BB_STATE 0x10 -#define CTX_SECOND_BB_HEAD_U 0x12 -#define CTX_SECOND_BB_HEAD_L 0x14 -#define CTX_SECOND_BB_STATE 0x16 -#define CTX_BB_PER_CTX_PTR 0x18 -#define CTX_RCS_INDIRECT_CTX 0x1a -#define CTX_RCS_INDIRECT_CTX_OFFSET 0x1c -#define CTX_LRI_HEADER_1 0x21 -#define CTX_CTX_TIMESTAMP 0x22 -#define CTX_PDP3_UDW 0x24 -#define CTX_PDP3_LDW 0x26 -#define CTX_PDP2_UDW 0x28 -#define CTX_PDP2_LDW 0x2a -#define CTX_PDP1_UDW 0x2c -#define CTX_PDP1_LDW 0x2e -#define CTX_PDP0_UDW 0x30 -#define CTX_PDP0_LDW 0x32 -#define CTX_LRI_HEADER_2 0x41 -#define CTX_R_PWR_CLK_STATE 0x42 -#define CTX_GPGPU_CSR_BASE_ADDRESS 0x44 - -#define CTX_REG(reg_state, pos, reg, val) do { \ - (reg_state)[(pos)+0] = i915_mmio_reg_offset(reg); \ - (reg_state)[(pos)+1] = (val); \ -} while (0) - -#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \ - const u64 _addr = i915_page_dir_dma_addr((ppgtt), (n)); \ - reg_state[CTX_PDP ## n ## _UDW+1] = upper_32_bits(_addr); \ - reg_state[CTX_PDP ## n ## _LDW+1] = lower_32_bits(_addr); \ -} while (0) - -#define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \ - reg_state[CTX_PDP0_UDW + 1] = upper_32_bits(px_dma(&ppgtt->pml4)); \ - reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \ -} while (0) - -#define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17 -#define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x26 -#define GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x19 - /* Typical size of the average request (2 pipecontrols and a MI_BB) */ #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ #define WA_TAIL_DWORDS 2 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS) -#define PREEMPT_ID 0x1 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine); @@ -218,6 +169,23 @@ static void execlists_init_reg_state(u32 *reg_state, struct intel_engine_cs *engine, struct intel_ring *ring); +static inline struct i915_priolist *to_priolist(struct rb_node *rb) +{ + return rb_entry(rb, struct i915_priolist, node); +} + +static inline int rq_prio(const struct i915_request *rq) +{ + return rq->priotree.priority; +} + +static inline bool need_preempt(const struct intel_engine_cs *engine, + const struct i915_request *last, + int prio) +{ + return engine->i915->preempt_context && prio > max(rq_prio(last), 0); +} + /** * intel_lr_context_descriptor_update() - calculate & cache the descriptor * descriptor for a pinned context @@ -236,6 +204,18 @@ static void execlists_init_reg_state(u32 *reg_state, * bits 32-52: ctx ID, a globally unique tag * bits 53-54: mbz, reserved for use by hardware * bits 55-63: group ID, currently unused and set to 0 + * + * Starting from Gen11, the upper dword of the descriptor has a new format: + * + * bits 32-36: reserved + * bits 37-47: SW context ID + * bits 48:53: engine instance + * bit 54: mbz, reserved for use by hardware + * bits 55-60: SW counter + * bits 61-63: engine class + * + * engine info, SW context ID and SW counter need to form a unique number + * (Context ID) per lrc. */ static void intel_lr_context_descriptor_update(struct i915_gem_context *ctx, @@ -244,12 +224,32 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx, struct intel_context *ce = &ctx->engine[engine->id]; u64 desc; - BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (1<<GEN8_CTX_ID_WIDTH)); + BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH))); + BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH))); desc = ctx->desc_template; /* bits 0-11 */ + GEM_BUG_ON(desc & GENMASK_ULL(63, 12)); + desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE; /* bits 12-31 */ - desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ + GEM_BUG_ON(desc & GENMASK_ULL(63, 32)); + + if (INTEL_GEN(ctx->i915) >= 11) { + GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH)); + desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT; + /* bits 37-47 */ + + desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT; + /* bits 48-53 */ + + /* TODO: decide what to do with SW counter (bits 55-60) */ + + desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT; + /* bits 61-63 */ + } else { + GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH)); + desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ + } ce->lrc_desc = desc; } @@ -273,7 +273,7 @@ find_priolist: parent = &execlists->queue.rb_node; while (*parent) { rb = *parent; - p = rb_entry(rb, typeof(*p), node); + p = to_priolist(rb); if (prio > p->priority) { parent = &rb->rb_left; } else if (prio < p->priority) { @@ -313,10 +313,10 @@ find_priolist: if (first) execlists->first = &p->node; - return ptr_pack_bits(p, first, 1); + return p; } -static void unwind_wa_tail(struct drm_i915_gem_request *rq) +static void unwind_wa_tail(struct i915_request *rq) { rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES); assert_ring_tail_valid(rq->ring, rq->tail); @@ -324,7 +324,7 @@ static void unwind_wa_tail(struct drm_i915_gem_request *rq) static void __unwind_incomplete_requests(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *rq, *rn; + struct i915_request *rq, *rn; struct i915_priolist *uninitialized_var(p); int last_prio = I915_PRIORITY_INVALID; @@ -333,20 +333,16 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) list_for_each_entry_safe_reverse(rq, rn, &engine->timeline->requests, link) { - if (i915_gem_request_completed(rq)) + if (i915_request_completed(rq)) return; - __i915_gem_request_unsubmit(rq); + __i915_request_unsubmit(rq); unwind_wa_tail(rq); - GEM_BUG_ON(rq->priotree.priority == I915_PRIORITY_INVALID); - if (rq->priotree.priority != last_prio) { - p = lookup_priolist(engine, - &rq->priotree, - rq->priotree.priority); - p = ptr_mask_bits(p, 1); - - last_prio = rq->priotree.priority; + GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); + if (rq_prio(rq) != last_prio) { + last_prio = rq_prio(rq); + p = lookup_priolist(engine, &rq->priotree, last_prio); } list_add(&rq->priotree.link, &p->requests); @@ -365,8 +361,7 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) } static inline void -execlists_context_status_change(struct drm_i915_gem_request *rq, - unsigned long status) +execlists_context_status_change(struct i915_request *rq, unsigned long status) { /* * Only used when GVT-g is enabled now. When GVT-g is disabled, @@ -380,14 +375,14 @@ execlists_context_status_change(struct drm_i915_gem_request *rq, } static inline void -execlists_context_schedule_in(struct drm_i915_gem_request *rq) +execlists_context_schedule_in(struct i915_request *rq) { execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); intel_engine_context_in(rq->engine); } static inline void -execlists_context_schedule_out(struct drm_i915_gem_request *rq) +execlists_context_schedule_out(struct i915_request *rq) { intel_engine_context_out(rq->engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); @@ -402,7 +397,7 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state) ASSIGN_CTX_PDP(ppgtt, reg_state, 0); } -static u64 execlists_update_context(struct drm_i915_gem_request *rq) +static u64 execlists_update_context(struct i915_request *rq) { struct intel_context *ce = &rq->ctx->engine[rq->engine->id]; struct i915_hw_ppgtt *ppgtt = @@ -422,19 +417,31 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) return ce->lrc_desc; } -static inline void elsp_write(u64 desc, u32 __iomem *elsp) +static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port) { - writel(upper_32_bits(desc), elsp); - writel(lower_32_bits(desc), elsp); + if (execlists->ctrl_reg) { + writel(lower_32_bits(desc), execlists->submit_reg + port * 2); + writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1); + } else { + writel(upper_32_bits(desc), execlists->submit_reg); + writel(lower_32_bits(desc), execlists->submit_reg); + } } static void execlists_submit_ports(struct intel_engine_cs *engine) { - struct execlist_port *port = engine->execlists.port; + struct intel_engine_execlists *execlists = &engine->execlists; + struct execlist_port *port = execlists->port; unsigned int n; - for (n = execlists_num_ports(&engine->execlists); n--; ) { - struct drm_i915_gem_request *rq; + /* + * ELSQ note: the submit queue is not cleared after being submitted + * to the HW so we need to make sure we always clean it up. This is + * currently ensured by the fact that we always write the same number + * of elsq entries, keep this in mind before changing the loop below. + */ + for (n = execlists_num_ports(execlists); n--; ) { + struct i915_request *rq; unsigned int count; u64 desc; @@ -447,18 +454,24 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) desc = execlists_update_context(rq); GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); - GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x\n", + GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x, prio=%d\n", engine->name, n, port[n].context_id, count, - rq->global_seqno); + rq->global_seqno, + rq_prio(rq)); } else { GEM_BUG_ON(!n); desc = 0; } - elsp_write(desc, engine->execlists.elsp); + write_desc(execlists, desc, n); } - execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK); + + /* we need to manually load the submit queue */ + if (execlists->ctrl_reg) + writel(EL_CTRL_LOAD, execlists->ctrl_reg); + + execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK); } static bool ctx_single_port_submission(const struct i915_gem_context *ctx) @@ -479,37 +492,47 @@ static bool can_merge_ctx(const struct i915_gem_context *prev, return true; } -static void port_assign(struct execlist_port *port, - struct drm_i915_gem_request *rq) +static void port_assign(struct execlist_port *port, struct i915_request *rq) { GEM_BUG_ON(rq == port_request(port)); if (port_isset(port)) - i915_gem_request_put(port_request(port)); + i915_request_put(port_request(port)); - port_set(port, port_pack(i915_gem_request_get(rq), port_count(port))); + port_set(port, port_pack(i915_request_get(rq), port_count(port))); } static void inject_preempt_context(struct intel_engine_cs *engine) { + struct intel_engine_execlists *execlists = &engine->execlists; struct intel_context *ce = &engine->i915->preempt_context->engine[engine->id]; unsigned int n; - GEM_BUG_ON(engine->i915->preempt_context->hw_id != PREEMPT_ID); - GEM_BUG_ON(!IS_ALIGNED(ce->ring->size, WA_TAIL_BYTES)); - - memset(ce->ring->vaddr + ce->ring->tail, 0, WA_TAIL_BYTES); - ce->ring->tail += WA_TAIL_BYTES; - ce->ring->tail &= (ce->ring->size - 1); - ce->lrc_reg_state[CTX_RING_TAIL+1] = ce->ring->tail; + GEM_BUG_ON(execlists->preempt_complete_status != + upper_32_bits(ce->lrc_desc)); + GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] & + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | + CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) != + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | + CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)); + /* + * Switch to our empty preempt context so + * the state of the GPU is known (idle). + */ GEM_TRACE("%s\n", engine->name); - for (n = execlists_num_ports(&engine->execlists); --n; ) - elsp_write(0, engine->execlists.elsp); + for (n = execlists_num_ports(execlists); --n; ) + write_desc(execlists, 0, n); + + write_desc(execlists, ce->lrc_desc, n); + + /* we need to manually load the submit queue */ + if (execlists->ctrl_reg) + writel(EL_CTRL_LOAD, execlists->ctrl_reg); - elsp_write(ce->lrc_desc, engine->execlists.elsp); execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK); + execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT); } static void execlists_dequeue(struct intel_engine_cs *engine) @@ -518,7 +541,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) struct execlist_port *port = execlists->port; const struct execlist_port * const last_port = &execlists->port[execlists->port_mask]; - struct drm_i915_gem_request *last = port_request(port); + struct i915_request *last = port_request(port); struct rb_node *rb; bool submit = false; @@ -546,8 +569,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) spin_lock_irq(&engine->timeline->lock); rb = execlists->first; GEM_BUG_ON(rb_first(&execlists->queue) != rb); - if (!rb) - goto unlock; if (last) { /* @@ -570,55 +591,49 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) goto unlock; - if (HAS_LOGICAL_RING_PREEMPTION(engine->i915) && - rb_entry(rb, struct i915_priolist, node)->priority > - max(last->priotree.priority, 0)) { - /* - * Switch to our empty preempt context so - * the state of the GPU is known (idle). - */ + if (need_preempt(engine, last, execlists->queue_priority)) { inject_preempt_context(engine); - execlists_set_active(execlists, - EXECLISTS_ACTIVE_PREEMPT); goto unlock; - } else { - /* - * In theory, we could coalesce more requests onto - * the second port (the first port is active, with - * no preemptions pending). However, that means we - * then have to deal with the possible lite-restore - * of the second port (as we submit the ELSP, there - * may be a context-switch) but also we may complete - * the resubmission before the context-switch. Ergo, - * coalescing onto the second port will cause a - * preemption event, but we cannot predict whether - * that will affect port[0] or port[1]. - * - * If the second port is already active, we can wait - * until the next context-switch before contemplating - * new requests. The GPU will be busy and we should be - * able to resubmit the new ELSP before it idles, - * avoiding pipeline bubbles (momentary pauses where - * the driver is unable to keep up the supply of new - * work). - */ - if (port_count(&port[1])) - goto unlock; - - /* WaIdleLiteRestore:bdw,skl - * Apply the wa NOOPs to prevent - * ring:HEAD == req:TAIL as we resubmit the - * request. See gen8_emit_breadcrumb() for - * where we prepare the padding after the - * end of the request. - */ - last->tail = last->wa_tail; } + + /* + * In theory, we could coalesce more requests onto + * the second port (the first port is active, with + * no preemptions pending). However, that means we + * then have to deal with the possible lite-restore + * of the second port (as we submit the ELSP, there + * may be a context-switch) but also we may complete + * the resubmission before the context-switch. Ergo, + * coalescing onto the second port will cause a + * preemption event, but we cannot predict whether + * that will affect port[0] or port[1]. + * + * If the second port is already active, we can wait + * until the next context-switch before contemplating + * new requests. The GPU will be busy and we should be + * able to resubmit the new ELSP before it idles, + * avoiding pipeline bubbles (momentary pauses where + * the driver is unable to keep up the supply of new + * work). However, we have to double check that the + * priorities of the ports haven't been switch. + */ + if (port_count(&port[1])) + goto unlock; + + /* + * WaIdleLiteRestore:bdw,skl + * Apply the wa NOOPs to prevent + * ring:HEAD == rq:TAIL as we resubmit the + * request. See gen8_emit_breadcrumb() for + * where we prepare the padding after the + * end of the request. + */ + last->tail = last->wa_tail; } - do { - struct i915_priolist *p = rb_entry(rb, typeof(*p), node); - struct drm_i915_gem_request *rq, *rn; + while (rb) { + struct i915_priolist *p = to_priolist(rb); + struct i915_request *rq, *rn; list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { /* @@ -668,8 +683,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) } INIT_LIST_HEAD(&rq->priotree.link); - __i915_gem_request_submit(rq); - trace_i915_gem_request_in(rq, port_index(port, execlists)); + __i915_request_submit(rq); + trace_i915_request_in(rq, port_index(port, execlists)); last = rq; submit = true; } @@ -679,11 +694,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine) INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); - } while (rb); + } done: + execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN; execlists->first = rb; if (submit) port_assign(port, last); + + /* We must always keep the beast fed if we have work piled up */ + GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); + unlock: spin_unlock_irq(&engine->timeline->lock); @@ -691,6 +711,9 @@ unlock: execlists_set_active(execlists, EXECLISTS_ACTIVE_USER); execlists_submit_ports(engine); } + + GEM_BUG_ON(port_isset(execlists->port) && + !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); } void @@ -700,12 +723,17 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) unsigned int num_ports = execlists_num_ports(execlists); while (num_ports-- && port_isset(port)) { - struct drm_i915_gem_request *rq = port_request(port); + struct i915_request *rq = port_request(port); GEM_BUG_ON(!execlists->active); intel_engine_context_out(rq->engine); - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_PREEMPTED); - i915_gem_request_put(rq); + + execlists_context_status_change(rq, + i915_request_completed(rq) ? + INTEL_CONTEXT_SCHEDULE_OUT : + INTEL_CONTEXT_SCHEDULE_PREEMPTED); + + i915_request_put(rq); memset(port, 0, sizeof(*port)); port++; @@ -715,34 +743,50 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) static void execlists_cancel_requests(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; - struct drm_i915_gem_request *rq, *rn; + struct i915_request *rq, *rn; struct rb_node *rb; unsigned long flags; GEM_TRACE("%s\n", engine->name); - spin_lock_irqsave(&engine->timeline->lock, flags); + /* + * Before we call engine->cancel_requests(), we should have exclusive + * access to the submission state. This is arranged for us by the + * caller disabling the interrupt generation, the tasklet and other + * threads that may then access the same state, giving us a free hand + * to reset state. However, we still need to let lockdep be aware that + * we know this state may be accessed in hardirq context, so we + * disable the irq around this manipulation and we want to keep + * the spinlock focused on its duties and not accidentally conflate + * coverage to the submission's irq state. (Similarly, although we + * shouldn't need to disable irq around the manipulation of the + * submission's irq state, we also wish to remind ourselves that + * it is irq state.) + */ + local_irq_save(flags); /* Cancel the requests on the HW and clear the ELSP tracker. */ execlists_cancel_port_requests(execlists); + spin_lock(&engine->timeline->lock); + /* Mark all executing requests as skipped. */ list_for_each_entry(rq, &engine->timeline->requests, link) { GEM_BUG_ON(!rq->global_seqno); - if (!i915_gem_request_completed(rq)) + if (!i915_request_completed(rq)) dma_fence_set_error(&rq->fence, -EIO); } /* Flush the queued requests to the timeline list (for retiring). */ rb = execlists->first; while (rb) { - struct i915_priolist *p = rb_entry(rb, typeof(*p), node); + struct i915_priolist *p = to_priolist(rb); list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { INIT_LIST_HEAD(&rq->priotree.link); dma_fence_set_error(&rq->fence, -EIO); - __i915_gem_request_submit(rq); + __i915_request_submit(rq); } rb = rb_next(rb); @@ -754,11 +798,13 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) /* Remaining _unready_ requests will be nop'ed when submitted */ - + execlists->queue_priority = INT_MIN; execlists->queue = RB_ROOT; execlists->first = NULL; GEM_BUG_ON(port_isset(execlists->port)); + spin_unlock(&engine->timeline->lock); + /* * The port is checked prior to scheduling a tasklet, but * just in case we have suspended the tasklet to do the @@ -770,7 +816,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) /* Mark all CS interrupts as complete */ execlists->active = 0; - spin_unlock_irqrestore(&engine->timeline->lock, flags); + local_irq_restore(flags); } /* @@ -783,8 +829,10 @@ static void execlists_submission_tasklet(unsigned long data) struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port * const port = execlists->port; struct drm_i915_private *dev_priv = engine->i915; + bool fw = false; - /* We can skip acquiring intel_runtime_pm_get() here as it was taken + /* + * We can skip acquiring intel_runtime_pm_get() here as it was taken * on our behalf by the request (see i915_gem_mark_busy()) and it will * not be relinquished until the device is idle (see * i915_gem_idle_work_handler()). As a precaution, we make sure @@ -793,9 +841,8 @@ static void execlists_submission_tasklet(unsigned long data) */ GEM_BUG_ON(!dev_priv->gt.awake); - intel_uncore_forcewake_get(dev_priv, execlists->fw_domains); - - /* Prefer doing test_and_clear_bit() as a two stage operation to avoid + /* + * Prefer doing test_and_clear_bit() as a two stage operation to avoid * imposing the cost of a locked atomic transaction when submitting a * new request (outside of the context-switch interrupt). */ @@ -811,18 +858,17 @@ static void execlists_submission_tasklet(unsigned long data) execlists->csb_head = -1; /* force mmio read of CSB ptrs */ } - /* The write will be ordered by the uncached read (itself - * a memory barrier), so we do not need another in the form - * of a locked instruction. The race between the interrupt - * handler and the split test/clear is harmless as we order - * our clear before the CSB read. If the interrupt arrived - * first between the test and the clear, we read the updated - * CSB and clear the bit. If the interrupt arrives as we read - * the CSB or later (i.e. after we had cleared the bit) the bit - * is set and we do a new loop. - */ - __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + /* Clear before reading to catch new interrupts */ + clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + smp_mb__after_atomic(); + if (unlikely(execlists->csb_head == -1)) { /* following a reset */ + if (!fw) { + intel_uncore_forcewake_get(dev_priv, + execlists->fw_domains); + fw = true; + } + head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); tail = GEN8_CSB_WRITE_PTR(head); head = GEN8_CSB_READ_PTR(head); @@ -835,13 +881,13 @@ static void execlists_submission_tasklet(unsigned long data) head = execlists->csb_head; tail = READ_ONCE(buf[write_idx]); } - GEM_TRACE("%s cs-irq head=%d [%d], tail=%d [%d]\n", + GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n", engine->name, - head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), - tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))))); + head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?", + tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?"); while (head != tail) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; unsigned int status; unsigned int count; @@ -886,7 +932,7 @@ static void execlists_submission_tasklet(unsigned long data) GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE); if (status & GEN8_CTX_STATUS_COMPLETE && - buf[2*head + 1] == PREEMPT_ID) { + buf[2*head + 1] == execlists->preempt_complete_status) { GEM_TRACE("%s preempt-idle\n", engine->name); execlists_cancel_port_requests(execlists); @@ -907,23 +953,28 @@ static void execlists_submission_tasklet(unsigned long data) GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); - /* Check the context/desc id for this event matches */ - GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); - rq = port_unpack(port, &count); - GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x\n", + GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n", engine->name, port->context_id, count, - rq ? rq->global_seqno : 0); + rq ? rq->global_seqno : 0, + rq ? rq_prio(rq) : 0); + + /* Check the context/desc id for this event matches */ + GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); + GEM_BUG_ON(count == 0); if (--count == 0) { GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); GEM_BUG_ON(port_isset(&port[1]) && !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)); - GEM_BUG_ON(!i915_gem_request_completed(rq)); + GEM_BUG_ON(!i915_request_completed(rq)); execlists_context_schedule_out(rq); - trace_i915_gem_request_out(rq); - i915_gem_request_put(rq); + trace_i915_request_out(rq); + i915_request_put(rq); + + GEM_TRACE("%s completed ctx=%d\n", + engine->name, port->context_id); execlists_port_complete(execlists, port); } else { @@ -948,21 +999,26 @@ static void execlists_submission_tasklet(unsigned long data) if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)) execlists_dequeue(engine); - intel_uncore_forcewake_put(dev_priv, execlists->fw_domains); + if (fw) + intel_uncore_forcewake_put(dev_priv, execlists->fw_domains); } -static void insert_request(struct intel_engine_cs *engine, - struct i915_priotree *pt, - int prio) +static void queue_request(struct intel_engine_cs *engine, + struct i915_priotree *pt, + int prio) { - struct i915_priolist *p = lookup_priolist(engine, pt, prio); + list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests); +} - list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests); - if (ptr_unmask_bits(p, 1)) +static void submit_queue(struct intel_engine_cs *engine, int prio) +{ + if (prio > engine->execlists.queue_priority) { + engine->execlists.queue_priority = prio; tasklet_hi_schedule(&engine->execlists.tasklet); + } } -static void execlists_submit_request(struct drm_i915_gem_request *request) +static void execlists_submit_request(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; unsigned long flags; @@ -970,7 +1026,8 @@ static void execlists_submit_request(struct drm_i915_gem_request *request) /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&engine->timeline->lock, flags); - insert_request(engine, &request->priotree, request->priotree.priority); + queue_request(engine, &request->priotree, rq_prio(request)); + submit_queue(engine, rq_prio(request)); GEM_BUG_ON(!engine->execlists.first); GEM_BUG_ON(list_empty(&request->priotree.link)); @@ -978,9 +1035,9 @@ static void execlists_submit_request(struct drm_i915_gem_request *request) spin_unlock_irqrestore(&engine->timeline->lock, flags); } -static struct drm_i915_gem_request *pt_to_request(struct i915_priotree *pt) +static struct i915_request *pt_to_request(struct i915_priotree *pt) { - return container_of(pt, struct drm_i915_gem_request, priotree); + return container_of(pt, struct i915_request, priotree); } static struct intel_engine_cs * @@ -998,7 +1055,7 @@ pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked) return engine; } -static void execlists_schedule(struct drm_i915_gem_request *request, int prio) +static void execlists_schedule(struct i915_request *request, int prio) { struct intel_engine_cs *engine; struct i915_dependency *dep, *p; @@ -1007,7 +1064,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) GEM_BUG_ON(prio == I915_PRIORITY_INVALID); - if (i915_gem_request_completed(request)) + if (i915_request_completed(request)) return; if (prio <= READ_ONCE(request->priotree.priority)) @@ -1019,13 +1076,14 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) stack.signaler = &request->priotree; list_add(&stack.dfs_link, &dfs); - /* Recursively bump all dependent priorities to match the new request. + /* + * Recursively bump all dependent priorities to match the new request. * * A naive approach would be to use recursion: * static void update_priorities(struct i915_priotree *pt, prio) { * list_for_each_entry(dep, &pt->signalers_list, signal_link) * update_priorities(dep->signal, prio) - * insert_request(pt); + * queue_request(pt); * } * but that may have unlimited recursion depth and so runs a very * real risk of overunning the kernel stack. Instead, we build @@ -1036,27 +1094,29 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) * end result is a topological list of requests in reverse order, the * last element in the list is the request we must execute first. */ - list_for_each_entry_safe(dep, p, &dfs, dfs_link) { + list_for_each_entry(dep, &dfs, dfs_link) { struct i915_priotree *pt = dep->signaler; - /* Within an engine, there can be no cycle, but we may + /* + * Within an engine, there can be no cycle, but we may * refer to the same dependency chain multiple times * (redundant dependencies are not eliminated) and across * engines. */ list_for_each_entry(p, &pt->signalers_list, signal_link) { - if (i915_gem_request_completed(pt_to_request(p->signaler))) + GEM_BUG_ON(p == dep); /* no cycles! */ + + if (i915_priotree_signaled(p->signaler)) continue; GEM_BUG_ON(p->signaler->priority < pt->priority); if (prio > READ_ONCE(p->signaler->priority)) list_move_tail(&p->dfs_link, &dfs); } - - list_safe_reset_next(dep, p, dfs_link); } - /* If we didn't need to bump any existing priorities, and we haven't + /* + * If we didn't need to bump any existing priorities, and we haven't * yet submitted this request (i.e. there is no potential race with * execlists_submit_request()), we can set our own priority and skip * acquiring the engine locks. @@ -1086,8 +1146,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) pt->priority = prio; if (!list_empty(&pt->link)) { __list_del_entry(&pt->link); - insert_request(engine, pt, prio); + queue_request(engine, pt, prio); } + submit_queue(engine, prio); } spin_unlock_irq(&engine->timeline->lock); @@ -1130,11 +1191,9 @@ execlists_context_pin(struct intel_engine_cs *engine, goto out; GEM_BUG_ON(!ce->pin_count); /* no overflow please! */ - if (!ce->state) { - ret = execlists_context_deferred_alloc(ctx, engine); - if (ret) - goto err; - } + ret = execlists_context_deferred_alloc(ctx, engine); + if (ret) + goto err; GEM_BUG_ON(!ce->state); ret = __context_pin(ctx, ce->state); @@ -1191,7 +1250,7 @@ static void execlists_context_unpin(struct intel_engine_cs *engine, i915_gem_context_put(ctx); } -static int execlists_request_alloc(struct drm_i915_gem_request *request) +static int execlists_request_alloc(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; struct intel_context *ce = &request->ctx->engine[engine->id]; @@ -1368,6 +1427,40 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) return batch; } +static u32 * +gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) +{ + int i; + + /* + * WaPipeControlBefore3DStateSamplePattern: cnl + * + * Ensure the engine is idle prior to programming a + * 3DSTATE_SAMPLE_PATTERN during a context restore. + */ + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_CS_STALL, + 0); + /* + * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for + * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in + * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is + * confusing. Since gen8_emit_pipe_control() already advances the + * batch by 6 dwords, we advance the other 10 here, completing a + * cacheline. It's not clear if the workaround requires this padding + * before other commands, or if it's just the regular padding we would + * already have for the workaround bb, so leave it here for now. + */ + for (i = 0; i < 10; i++) + *batch++ = MI_NOOP; + + /* Pad to end of cacheline */ + while ((unsigned long)batch % CACHELINE_BYTES) + *batch++ = MI_NOOP; + + return batch; +} + #define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE) static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) @@ -1416,12 +1509,14 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) unsigned int i; int ret; - if (WARN_ON(engine->id != RCS || !engine->scratch)) + if (GEM_WARN_ON(engine->id != RCS)) return -EINVAL; switch (INTEL_GEN(engine->i915)) { case 10: - return 0; + wa_bb_fn[0] = gen10_init_indirectctx_bb; + wa_bb_fn[1] = NULL; + break; case 9: wa_bb_fn[0] = gen9_init_indirectctx_bb; wa_bb_fn[1] = NULL; @@ -1451,7 +1546,8 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) */ for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) { wa_bb[i]->offset = batch_ptr - batch; - if (WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, CACHELINE_BYTES))) { + if (GEM_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, + CACHELINE_BYTES))) { ret = -EINVAL; break; } @@ -1477,47 +1573,48 @@ static u8 gtiir[] = { [VECS] = 3, }; -static int gen8_init_common_ring(struct intel_engine_cs *engine) +static void enable_execlists(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - struct intel_engine_execlists * const execlists = &engine->execlists; - int ret; - ret = intel_mocs_init_engine(engine); - if (ret) - return ret; + I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff); - intel_engine_reset_breadcrumbs(engine); - intel_engine_init_hangcheck(engine); + /* + * Make sure we're not enabling the new 12-deep CSB + * FIFO as that requires a slightly updated handling + * in the ctx switch irq. Since we're currently only + * using only 2 elements of the enhanced execlists the + * deeper FIFO it's not needed and it's not worth adding + * more statements to the irq handler to support it. + */ + if (INTEL_GEN(dev_priv) >= 11) + I915_WRITE(RING_MODE_GEN7(engine), + _MASKED_BIT_DISABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); + else + I915_WRITE(RING_MODE_GEN7(engine), + _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); - I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff); - I915_WRITE(RING_MODE_GEN7(engine), - _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); I915_WRITE(RING_HWS_PGA(engine->mmio_base), engine->status_page.ggtt_offset); POSTING_READ(RING_HWS_PGA(engine->mmio_base)); - DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name); + /* Following the reset, we need to reload the CSB read/write pointers */ + engine->execlists.csb_head = -1; +} - GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir)); +static int gen8_init_common_ring(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + int ret; - /* - * Clear any pending interrupt state. - * - * We do it twice out of paranoia that some of the IIR are double - * buffered, and if we only reset it once there may still be - * an interrupt pending. - */ - I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), - GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift); - I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), - GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift); - clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - execlists->csb_head = -1; - execlists->active = 0; + ret = intel_mocs_init_engine(engine); + if (ret) + return ret; + + intel_engine_reset_breadcrumbs(engine); + intel_engine_init_hangcheck(engine); - execlists->elsp = - dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine)); + enable_execlists(engine); /* After a GPU reset, we may have requests to replay */ if (execlists->first) @@ -1559,8 +1656,33 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine) return init_workarounds_ring(engine); } +static void reset_irq(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int i; + + GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir)); + + /* + * Clear any pending interrupt state. + * + * We do it twice out of paranoia that some of the IIR are double + * buffered, and if we only reset it once there may still be + * an interrupt pending. + */ + for (i = 0; i < 2; i++) { + I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), + GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift); + POSTING_READ(GEN8_GT_IIR(gtiir[engine->id])); + } + GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) & + (GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift)); + + clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); +} + static void reset_common_ring(struct intel_engine_cs *engine, - struct drm_i915_gem_request *request) + struct i915_request *request) { struct intel_engine_execlists * const execlists = &engine->execlists; struct intel_context *ce; @@ -1568,7 +1690,11 @@ static void reset_common_ring(struct intel_engine_cs *engine, GEM_TRACE("%s seqno=%x\n", engine->name, request ? request->global_seqno : 0); - spin_lock_irqsave(&engine->timeline->lock, flags); + + /* See execlists_cancel_requests() for the irq/spinlock split. */ + local_irq_save(flags); + + reset_irq(engine); /* * Catch up with any missed context-switch interrupts. @@ -1582,11 +1708,17 @@ static void reset_common_ring(struct intel_engine_cs *engine, execlists_cancel_port_requests(execlists); /* Push back any incomplete requests for replay after the reset. */ + spin_lock(&engine->timeline->lock); __unwind_incomplete_requests(engine); + spin_unlock(&engine->timeline->lock); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + /* Mark all CS interrupts as complete */ + execlists->active = 0; - /* If the request was innocent, we leave the request in the ELSP + local_irq_restore(flags); + + /* + * If the request was innocent, we leave the request in the ELSP * and will try to replay it on restarting. The context image may * have been corrupted by the reset, in which case we may have * to service a new GPU hang, but more likely we can continue on @@ -1599,7 +1731,8 @@ static void reset_common_ring(struct intel_engine_cs *engine, if (!request || request->fence.error != -EIO) return; - /* We want a simple context + ring to execute the breadcrumb update. + /* + * We want a simple context + ring to execute the breadcrumb update. * We cannot rely on the context being intact across the GPU hang, * so clear it and rebuild just what we need for the breadcrumb. * All pending requests for this context will be zapped, and any @@ -1622,15 +1755,15 @@ static void reset_common_ring(struct intel_engine_cs *engine, unwind_wa_tail(request); } -static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) +static int intel_logical_ring_emit_pdps(struct i915_request *rq) { - struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt; - struct intel_engine_cs *engine = req->engine; + struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt; + struct intel_engine_cs *engine = rq->engine; const int num_lri_cmds = GEN8_3LVL_PDPES * 2; u32 *cs; int i; - cs = intel_ring_begin(req, num_lri_cmds * 2 + 2); + cs = intel_ring_begin(rq, num_lri_cmds * 2 + 2); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1645,12 +1778,12 @@ static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) } *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } -static int gen8_emit_bb_start(struct drm_i915_gem_request *req, +static int gen8_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, const unsigned int flags) { @@ -1663,18 +1796,18 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, * it is unsafe in case of lite-restore (because the ctx is * not idle). PML4 is allocated during ppgtt init so this is * not needed in 48-bit.*/ - if (req->ctx->ppgtt && - (intel_engine_flag(req->engine) & req->ctx->ppgtt->pd_dirty_rings) && - !i915_vm_is_48bit(&req->ctx->ppgtt->base) && - !intel_vgpu_active(req->i915)) { - ret = intel_logical_ring_emit_pdps(req); + if (rq->ctx->ppgtt && + (intel_engine_flag(rq->engine) & rq->ctx->ppgtt->pd_dirty_rings) && + !i915_vm_is_48bit(&rq->ctx->ppgtt->base) && + !intel_vgpu_active(rq->i915)) { + ret = intel_logical_ring_emit_pdps(rq); if (ret) return ret; - req->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(req->engine); + rq->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine); } - cs = intel_ring_begin(req, 4); + cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1703,7 +1836,7 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); *cs++ = lower_32_bits(offset); *cs++ = upper_32_bits(offset); - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -1722,7 +1855,7 @@ static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine) I915_WRITE_IMR(engine, ~engine->irq_keep_mask); } -static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) +static int gen8_emit_flush(struct i915_request *request, u32 mode) { u32 cmd, *cs; @@ -1754,7 +1887,7 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) return 0; } -static int gen8_emit_flush_render(struct drm_i915_gem_request *request, +static int gen8_emit_flush_render(struct i915_request *request, u32 mode) { struct intel_engine_cs *engine = request->engine; @@ -1829,7 +1962,7 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, * used as a workaround for not being allowed to do lite * restore with HEAD==TAIL (WaIdleLiteRestore). */ -static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs) +static void gen8_emit_wa_tail(struct i915_request *request, u32 *cs) { /* Ensure there's always at least one preemption point per-request. */ *cs++ = MI_ARB_CHECK; @@ -1837,7 +1970,7 @@ static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs) request->wa_tail = intel_ring_offset(request, cs); } -static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) +static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) { /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); @@ -1853,8 +1986,7 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) } static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS; -static void gen8_emit_breadcrumb_rcs(struct drm_i915_gem_request *request, - u32 *cs) +static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) { /* We're using qword write, seqno should be aligned to 8 bytes. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1); @@ -1870,15 +2002,15 @@ static void gen8_emit_breadcrumb_rcs(struct drm_i915_gem_request *request, } static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS; -static int gen8_init_rcs_context(struct drm_i915_gem_request *req) +static int gen8_init_rcs_context(struct i915_request *rq) { int ret; - ret = intel_ring_workarounds_emit(req); + ret = intel_ring_workarounds_emit(rq); if (ret) return ret; - ret = intel_rcs_context_init_mocs(req); + ret = intel_rcs_context_init_mocs(rq); /* * Failing to program the MOCS is non-fatal.The system will not * run at peak performance. So generate an error and carry on. @@ -1886,7 +2018,7 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req) if (ret) DRM_ERROR("MOCS failed to program: expect performance issues.\n"); - return i915_gem_render_state_emit(req); + return i915_gem_render_state_emit(rq); } /** @@ -1917,6 +2049,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) intel_engine_cleanup_common(engine); lrc_destroy_wa_ctx(engine); + engine->i915 = NULL; dev_priv->engine[engine->id] = NULL; kfree(engine); @@ -1933,6 +2066,12 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine) engine->unpark = NULL; engine->flags |= I915_ENGINE_SUPPORTS_STATS; + + engine->i915->caps.scheduler = + I915_SCHEDULER_CAP_ENABLED | + I915_SCHEDULER_CAP_PRIORITY; + if (engine->i915->preempt_context) + engine->i915->caps.scheduler |= I915_SCHEDULER_CAP_PREEMPTION; } static void @@ -1953,8 +2092,17 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->set_default_submission = execlists_set_default_submission; - engine->irq_enable = gen8_logical_ring_enable_irq; - engine->irq_disable = gen8_logical_ring_disable_irq; + if (INTEL_GEN(engine->i915) < 11) { + engine->irq_enable = gen8_logical_ring_enable_irq; + engine->irq_disable = gen8_logical_ring_disable_irq; + } else { + /* + * TODO: On Gen11 interrupt masks need to be clear + * to allow C6 entry. Keep interrupts enabled at + * and take the hit of generating extra interrupts + * until a more refined solution exists. + */ + } engine->emit_bb_start = gen8_emit_bb_start; } @@ -2006,6 +2154,21 @@ static int logical_ring_init(struct intel_engine_cs *engine) if (ret) goto error; + if (HAS_LOGICAL_RING_ELSQ(engine->i915)) { + engine->execlists.submit_reg = engine->i915->regs + + i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(engine)); + engine->execlists.ctrl_reg = engine->i915->regs + + i915_mmio_reg_offset(RING_EXECLIST_CONTROL(engine)); + } else { + engine->execlists.submit_reg = engine->i915->regs + + i915_mmio_reg_offset(RING_ELSP(engine)); + } + + engine->execlists.preempt_complete_status = ~0u; + if (engine->i915->preempt_context) + engine->execlists.preempt_complete_status = + upper_32_bits(engine->i915->preempt_context->engine[engine->id].lrc_desc); + return 0; error: @@ -2085,7 +2248,7 @@ make_rpcs(struct drm_i915_private *dev_priv) if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) { rpcs |= GEN8_RPCS_SS_CNT_ENABLE; - rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) << + rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask[0]) << GEN8_RPCS_SS_CNT_SHIFT; rpcs |= GEN8_RPCS_ENABLE; } @@ -2109,6 +2272,10 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) default: MISSING_CASE(INTEL_GEN(engine->i915)); /* fall through */ + case 11: + indirect_ctx_offset = + GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + break; case 10: indirect_ctx_offset = GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; @@ -2147,6 +2314,8 @@ static void execlists_init_reg_state(u32 *regs, MI_LRI_FORCE_POSTED; CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(engine), + _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | + CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT) | _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | (HAS_RESOURCE_STREAMER(dev_priv) ? CTX_CTRL_RS_CTX_ENABLE : 0))); @@ -2266,6 +2435,10 @@ populate_lr_context(struct i915_gem_context *ctx, if (!engine->default_state) regs[CTX_CONTEXT_CONTROL + 1] |= _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); + if (ctx == ctx->i915->preempt_context && INTEL_GEN(engine->i915) < 11) + regs[CTX_CONTEXT_CONTROL + 1] |= + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | + CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT); i915_gem_object_unpin_map(ctx_obj); @@ -2282,7 +2455,8 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct intel_ring *ring; int ret; - WARN_ON(ce->state); + if (ce->state) + return 0; context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 6d4f9b995a11..59d7b86012e9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -37,10 +37,14 @@ #define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH (1 << 3) #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0) #define CTX_CTRL_RS_CTX_ENABLE (1 << 1) +#define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT (1 << 2) #define RING_CONTEXT_STATUS_BUF_BASE(engine) _MMIO((engine)->mmio_base + 0x370) #define RING_CONTEXT_STATUS_BUF_LO(engine, i) _MMIO((engine)->mmio_base + 0x370 + (i) * 8) #define RING_CONTEXT_STATUS_BUF_HI(engine, i) _MMIO((engine)->mmio_base + 0x370 + (i) * 8 + 4) #define RING_CONTEXT_STATUS_PTR(engine) _MMIO((engine)->mmio_base + 0x3a0) +#define RING_EXECLIST_SQ_CONTENTS(engine) _MMIO((engine)->mmio_base + 0x510) +#define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base + 0x550) +#define EL_CTRL_LOAD (1 << 0) /* The docs specify that the write pointer wraps around after 5h, "After status * is written out to the last available status QW at offset 5h, this pointer diff --git a/drivers/gpu/drm/i915/intel_lrc_reg.h b/drivers/gpu/drm/i915/intel_lrc_reg.h new file mode 100644 index 000000000000..169a2239d6c7 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_lrc_reg.h @@ -0,0 +1,68 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2014-2018 Intel Corporation + */ + +#ifndef _INTEL_LRC_REG_H_ +#define _INTEL_LRC_REG_H_ + +#include <linux/types.h> + +/* GEN8+ Reg State Context */ +#define CTX_LRI_HEADER_0 0x01 +#define CTX_CONTEXT_CONTROL 0x02 +#define CTX_RING_HEAD 0x04 +#define CTX_RING_TAIL 0x06 +#define CTX_RING_BUFFER_START 0x08 +#define CTX_RING_BUFFER_CONTROL 0x0a +#define CTX_BB_HEAD_U 0x0c +#define CTX_BB_HEAD_L 0x0e +#define CTX_BB_STATE 0x10 +#define CTX_SECOND_BB_HEAD_U 0x12 +#define CTX_SECOND_BB_HEAD_L 0x14 +#define CTX_SECOND_BB_STATE 0x16 +#define CTX_BB_PER_CTX_PTR 0x18 +#define CTX_RCS_INDIRECT_CTX 0x1a +#define CTX_RCS_INDIRECT_CTX_OFFSET 0x1c +#define CTX_LRI_HEADER_1 0x21 +#define CTX_CTX_TIMESTAMP 0x22 +#define CTX_PDP3_UDW 0x24 +#define CTX_PDP3_LDW 0x26 +#define CTX_PDP2_UDW 0x28 +#define CTX_PDP2_LDW 0x2a +#define CTX_PDP1_UDW 0x2c +#define CTX_PDP1_LDW 0x2e +#define CTX_PDP0_UDW 0x30 +#define CTX_PDP0_LDW 0x32 +#define CTX_LRI_HEADER_2 0x41 +#define CTX_R_PWR_CLK_STATE 0x42 +#define CTX_GPGPU_CSR_BASE_ADDRESS 0x44 + +#define CTX_REG(reg_state, pos, reg, val) do { \ + u32 *reg_state__ = (reg_state); \ + const u32 pos__ = (pos); \ + (reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \ + (reg_state__)[(pos__) + 1] = (val); \ +} while (0) + +#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \ + u32 *reg_state__ = (reg_state); \ + const u64 addr__ = i915_page_dir_dma_addr((ppgtt), (n)); \ + (reg_state__)[CTX_PDP ## n ## _UDW + 1] = upper_32_bits(addr__); \ + (reg_state__)[CTX_PDP ## n ## _LDW + 1] = lower_32_bits(addr__); \ +} while (0) + +#define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \ + u32 *reg_state__ = (reg_state); \ + const u64 addr__ = px_dma(&ppgtt->pml4); \ + (reg_state__)[CTX_PDP0_UDW + 1] = upper_32_bits(addr__); \ + (reg_state__)[CTX_PDP0_LDW + 1] = lower_32_bits(addr__); \ +} while (0) + +#define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17 +#define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x26 +#define GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x19 +#define GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x1A + +#endif /* _INTEL_LRC_REG_H_ */ diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c index dcbc786479f9..8ae8f42f430a 100644 --- a/drivers/gpu/drm/i915/intel_lspcon.c +++ b/drivers/gpu/drm/i915/intel_lspcon.c @@ -167,11 +167,10 @@ static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon) { struct intel_dp *intel_dp = lspcon_to_intel_dp(lspcon); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); unsigned long start = jiffies; while (1) { - if (intel_digital_port_connected(dev_priv, dig_port)) { + if (intel_digital_port_connected(&dig_port->base)) { DRM_DEBUG_KMS("LSPCON recovering in PCON mode after %u ms\n", jiffies_to_msecs(jiffies - start)); return; diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index ef80499113ee..d35d2d50f595 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -189,7 +189,7 @@ static void intel_lvds_pps_get_hw_state(struct drm_i915_private *dev_priv, /* Convert from 100ms to 100us units */ pps->t4 = val * 1000; - if (INTEL_INFO(dev_priv)->gen <= 4 && + if (INTEL_GEN(dev_priv) <= 4 && pps->t1_t2 == 0 && pps->t5 == 0 && pps->t3 == 0 && pps->tx == 0) { DRM_DEBUG_KMS("Panel power timings uninitialized, " "setting defaults\n"); @@ -268,7 +268,9 @@ static void intel_pre_enable_lvds(struct intel_encoder *encoder, /* set the corresponsding LVDS_BORDER bit */ temp &= ~LVDS_BORDER_ENABLE; temp |= pipe_config->gmch_pfit.lvds_border_bits; - /* Set the B0-B3 data pairs corresponding to whether we're going to + + /* + * Set the B0-B3 data pairs corresponding to whether we're going to * set the DPLLs for dual-channel mode or not. */ if (lvds_encoder->is_dual_link) @@ -276,7 +278,8 @@ static void intel_pre_enable_lvds(struct intel_encoder *encoder, else temp &= ~(LVDS_B0B3_POWER_UP | LVDS_CLKB_POWER_UP); - /* It would be nice to set 24 vs 18-bit mode (LVDS_A3_POWER_UP) + /* + * It would be nice to set 24 vs 18-bit mode (LVDS_A3_POWER_UP) * appropriately here, but we need to look more thoroughly into how * panels behave in the two modes. For now, let's just maintain the * value we got from the BIOS. @@ -284,12 +287,16 @@ static void intel_pre_enable_lvds(struct intel_encoder *encoder, temp &= ~LVDS_A3_POWER_MASK; temp |= lvds_encoder->a3_power; - /* Set the dithering flag on LVDS as needed, note that there is no + /* + * Set the dithering flag on LVDS as needed, note that there is no * special lvds dither control bit on pch-split platforms, dithering is - * only controlled through the PIPECONF reg. */ + * only controlled through the PIPECONF reg. + */ if (IS_GEN4(dev_priv)) { - /* Bspec wording suggests that LVDS port dithering only exists - * for 18bpp panels. */ + /* + * Bspec wording suggests that LVDS port dithering only exists + * for 18bpp panels. + */ if (pipe_config->dither && pipe_config->pipe_bpp == 18) temp |= LVDS_ENABLE_DITHER; else @@ -304,7 +311,7 @@ static void intel_pre_enable_lvds(struct intel_encoder *encoder, I915_WRITE(lvds_encoder->reg, temp); } -/** +/* * Sets the power state for the panel. */ static void intel_enable_lvds(struct intel_encoder *encoder, @@ -441,7 +448,7 @@ static bool intel_lvds_compute_config(struct intel_encoder *intel_encoder, return true; } -/** +/* * Detect the LVDS connection. * * Since LVDS doesn't have hotlug, we use the lid as a proxy. Open means @@ -464,7 +471,7 @@ intel_lvds_detect(struct drm_connector *connector, bool force) return connector_status_connected; } -/** +/* * Return the list of DDC modes if available, or the BIOS fixed mode otherwise. */ static int intel_lvds_get_modes(struct drm_connector *connector) @@ -893,7 +900,8 @@ static bool compute_is_dual_link_lvds(struct intel_lvds_encoder *lvds_encoder) if (dmi_check_system(intel_dual_link_lvds)) return true; - /* BIOS should set the proper LVDS register value at boot, but + /* + * BIOS should set the proper LVDS register value at boot, but * in reality, it doesn't set the value when the lid is closed; * we need to check "the value to be set" in VBT when LVDS * register is uninitialized. @@ -907,13 +915,17 @@ static bool compute_is_dual_link_lvds(struct intel_lvds_encoder *lvds_encoder) static bool intel_lvds_supported(struct drm_i915_private *dev_priv) { - /* With the introduction of the PCH we gained a dedicated - * LVDS presence pin, use it. */ + /* + * With the introduction of the PCH we gained a dedicated + * LVDS presence pin, use it. + */ if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv)) return true; - /* Otherwise LVDS was only attached to mobile products, - * except for the inglorious 830gm */ + /* + * Otherwise LVDS was only attached to mobile products, + * except for the inglorious 830gm + */ if (INTEL_GEN(dev_priv) <= 4 && IS_MOBILE(dev_priv) && !IS_I830(dev_priv)) return true; @@ -923,7 +935,7 @@ static bool intel_lvds_supported(struct drm_i915_private *dev_priv) /** * intel_lvds_init - setup LVDS connectors on this device - * @dev: drm device + * @dev_priv: i915 device * * Create the connector, register the LVDS DDC bus, and try to figure out what * modes we can display on the LVDS panel (if present). diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index f4c46b0b8f0a..c0b34b7943b9 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -187,7 +187,7 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv, table->table = broxton_mocs_table; result = true; } else { - WARN_ONCE(INTEL_INFO(dev_priv)->gen >= 9, + WARN_ONCE(INTEL_GEN(dev_priv) >= 9, "Platform that should have a MOCS table does not.\n"); } @@ -265,7 +265,7 @@ int intel_mocs_init_engine(struct intel_engine_cs *engine) /** * emit_mocs_control_table() - emit the mocs control table - * @req: Request to set up the MOCS table for. + * @rq: Request to set up the MOCS table for. * @table: The values to program into the control regs. * * This function simply emits a MI_LOAD_REGISTER_IMM command for the @@ -273,17 +273,17 @@ int intel_mocs_init_engine(struct intel_engine_cs *engine) * * Return: 0 on success, otherwise the error status. */ -static int emit_mocs_control_table(struct drm_i915_gem_request *req, +static int emit_mocs_control_table(struct i915_request *rq, const struct drm_i915_mocs_table *table) { - enum intel_engine_id engine = req->engine->id; + enum intel_engine_id engine = rq->engine->id; unsigned int index; u32 *cs; if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) return -ENODEV; - cs = intel_ring_begin(req, 2 + 2 * GEN9_NUM_MOCS_ENTRIES); + cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -308,7 +308,7 @@ static int emit_mocs_control_table(struct drm_i915_gem_request *req, } *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -323,7 +323,7 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, /** * emit_mocs_l3cc_table() - emit the mocs control table - * @req: Request to set up the MOCS table for. + * @rq: Request to set up the MOCS table for. * @table: The values to program into the control regs. * * This function simply emits a MI_LOAD_REGISTER_IMM command for the @@ -332,7 +332,7 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, * * Return: 0 on success, otherwise the error status. */ -static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, +static int emit_mocs_l3cc_table(struct i915_request *rq, const struct drm_i915_mocs_table *table) { unsigned int i; @@ -341,7 +341,7 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) return -ENODEV; - cs = intel_ring_begin(req, 2 + GEN9_NUM_MOCS_ENTRIES); + cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -370,7 +370,7 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, } *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -417,7 +417,7 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv) /** * intel_rcs_context_init_mocs() - program the MOCS register. - * @req: Request to set up the MOCS tables for. + * @rq: Request to set up the MOCS tables for. * * This function will emit a batch buffer with the values required for * programming the MOCS register values for all the currently supported @@ -431,19 +431,19 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv) * * Return: 0 on success, otherwise the error status. */ -int intel_rcs_context_init_mocs(struct drm_i915_gem_request *req) +int intel_rcs_context_init_mocs(struct i915_request *rq) { struct drm_i915_mocs_table t; int ret; - if (get_mocs_settings(req->i915, &t)) { + if (get_mocs_settings(rq->i915, &t)) { /* Program the RCS control registers */ - ret = emit_mocs_control_table(req, &t); + ret = emit_mocs_control_table(rq, &t); if (ret) return ret; /* Now program the l3cc registers */ - ret = emit_mocs_l3cc_table(req, &t); + ret = emit_mocs_l3cc_table(rq, &t); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/intel_mocs.h b/drivers/gpu/drm/i915/intel_mocs.h index ce4a5dfa5f94..d1751f91c1a4 100644 --- a/drivers/gpu/drm/i915/intel_mocs.h +++ b/drivers/gpu/drm/i915/intel_mocs.h @@ -52,7 +52,7 @@ #include <drm/drmP.h> #include "i915_drv.h" -int intel_rcs_context_init_mocs(struct drm_i915_gem_request *req); +int intel_rcs_context_init_mocs(struct i915_request *rq); void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv); int intel_mocs_init_engine(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/intel_modes.c b/drivers/gpu/drm/i915/intel_modes.c index 4e43f873c889..b39846613e3c 100644 --- a/drivers/gpu/drm/i915/intel_modes.c +++ b/drivers/gpu/drm/i915/intel_modes.c @@ -30,21 +30,6 @@ #include "intel_drv.h" #include "i915_drv.h" -static void intel_connector_update_eld_conn_type(struct drm_connector *connector) -{ - u8 conn_type; - - if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort || - connector->connector_type == DRM_MODE_CONNECTOR_eDP) { - conn_type = DRM_ELD_CONN_TYPE_DP; - } else { - conn_type = DRM_ELD_CONN_TYPE_HDMI; - } - - connector->eld[DRM_ELD_SAD_COUNT_CONN_TYPE] &= ~DRM_ELD_CONN_TYPE_MASK; - connector->eld[DRM_ELD_SAD_COUNT_CONN_TYPE] |= conn_type; -} - /** * intel_connector_update_modes - update connector from edid * @connector: DRM connector device to use @@ -58,8 +43,6 @@ int intel_connector_update_modes(struct drm_connector *connector, drm_mode_connector_update_edid_property(connector, edid); ret = drm_add_edid_modes(connector, edid); - intel_connector_update_eld_conn_type(connector); - return ret; } diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 41e9465d44a8..36671a937fa4 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -234,50 +234,50 @@ static void intel_overlay_unmap_regs(struct intel_overlay *overlay, } static void intel_overlay_submit_request(struct intel_overlay *overlay, - struct drm_i915_gem_request *req, + struct i915_request *rq, i915_gem_retire_fn retire) { GEM_BUG_ON(i915_gem_active_peek(&overlay->last_flip, &overlay->i915->drm.struct_mutex)); i915_gem_active_set_retire_fn(&overlay->last_flip, retire, &overlay->i915->drm.struct_mutex); - i915_gem_active_set(&overlay->last_flip, req); - i915_add_request(req); + i915_gem_active_set(&overlay->last_flip, rq); + i915_request_add(rq); } static int intel_overlay_do_wait_request(struct intel_overlay *overlay, - struct drm_i915_gem_request *req, + struct i915_request *rq, i915_gem_retire_fn retire) { - intel_overlay_submit_request(overlay, req, retire); + intel_overlay_submit_request(overlay, rq, retire); return i915_gem_active_retire(&overlay->last_flip, &overlay->i915->drm.struct_mutex); } -static struct drm_i915_gem_request *alloc_request(struct intel_overlay *overlay) +static struct i915_request *alloc_request(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; struct intel_engine_cs *engine = dev_priv->engine[RCS]; - return i915_gem_request_alloc(engine, dev_priv->kernel_context); + return i915_request_alloc(engine, dev_priv->kernel_context); } /* overlay needs to be disable in OCMD reg */ static int intel_overlay_on(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; - struct drm_i915_gem_request *req; + struct i915_request *rq; u32 *cs; WARN_ON(overlay->active); - req = alloc_request(overlay); - if (IS_ERR(req)) - return PTR_ERR(req); + rq = alloc_request(overlay); + if (IS_ERR(rq)) + return PTR_ERR(rq); - cs = intel_ring_begin(req, 4); + cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) { - i915_add_request(req); + i915_request_add(rq); return PTR_ERR(cs); } @@ -290,9 +290,9 @@ static int intel_overlay_on(struct intel_overlay *overlay) *cs++ = overlay->flip_addr | OFC_UPDATE; *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); - return intel_overlay_do_wait_request(overlay, req, NULL); + return intel_overlay_do_wait_request(overlay, rq, NULL); } static void intel_overlay_flip_prepare(struct intel_overlay *overlay, @@ -322,7 +322,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay, bool load_polyphase_filter) { struct drm_i915_private *dev_priv = overlay->i915; - struct drm_i915_gem_request *req; + struct i915_request *rq; u32 flip_addr = overlay->flip_addr; u32 tmp, *cs; @@ -336,23 +336,23 @@ static int intel_overlay_continue(struct intel_overlay *overlay, if (tmp & (1 << 17)) DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp); - req = alloc_request(overlay); - if (IS_ERR(req)) - return PTR_ERR(req); + rq = alloc_request(overlay); + if (IS_ERR(rq)) + return PTR_ERR(rq); - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) { - i915_add_request(req); + i915_request_add(rq); return PTR_ERR(cs); } *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE; *cs++ = flip_addr; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); intel_overlay_flip_prepare(overlay, vma); - intel_overlay_submit_request(overlay, req, NULL); + intel_overlay_submit_request(overlay, rq, NULL); return 0; } @@ -373,7 +373,7 @@ static void intel_overlay_release_old_vma(struct intel_overlay *overlay) } static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active, - struct drm_i915_gem_request *req) + struct i915_request *rq) { struct intel_overlay *overlay = container_of(active, typeof(*overlay), last_flip); @@ -382,7 +382,7 @@ static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active, } static void intel_overlay_off_tail(struct i915_gem_active *active, - struct drm_i915_gem_request *req) + struct i915_request *rq) { struct intel_overlay *overlay = container_of(active, typeof(*overlay), last_flip); @@ -401,7 +401,7 @@ static void intel_overlay_off_tail(struct i915_gem_active *active, /* overlay needs to be disabled in OCMD reg */ static int intel_overlay_off(struct intel_overlay *overlay) { - struct drm_i915_gem_request *req; + struct i915_request *rq; u32 *cs, flip_addr = overlay->flip_addr; WARN_ON(!overlay->active); @@ -412,13 +412,13 @@ static int intel_overlay_off(struct intel_overlay *overlay) * of the hw. Do it in both cases */ flip_addr |= OFC_UPDATE; - req = alloc_request(overlay); - if (IS_ERR(req)) - return PTR_ERR(req); + rq = alloc_request(overlay); + if (IS_ERR(rq)) + return PTR_ERR(rq); - cs = intel_ring_begin(req, 6); + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) { - i915_add_request(req); + i915_request_add(rq); return PTR_ERR(cs); } @@ -432,11 +432,11 @@ static int intel_overlay_off(struct intel_overlay *overlay) *cs++ = flip_addr; *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); intel_overlay_flip_prepare(overlay, NULL); - return intel_overlay_do_wait_request(overlay, req, + return intel_overlay_do_wait_request(overlay, rq, intel_overlay_off_tail); } @@ -468,23 +468,23 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) { /* synchronous slowpath */ - struct drm_i915_gem_request *req; + struct i915_request *rq; - req = alloc_request(overlay); - if (IS_ERR(req)) - return PTR_ERR(req); + rq = alloc_request(overlay); + if (IS_ERR(rq)) + return PTR_ERR(rq); - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) { - i915_add_request(req); + i915_request_add(rq); return PTR_ERR(cs); } *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); - ret = intel_overlay_do_wait_request(overlay, req, + ret = intel_overlay_do_wait_request(overlay, rq, intel_overlay_release_old_vid_tail); if (ret) return ret; @@ -801,7 +801,8 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, atomic_inc(&dev_priv->gpu_error.pending_fb_pin); - vma = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL); + vma = i915_gem_object_pin_to_display_plane(new_bo, + 0, NULL, PIN_MAPPABLE); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto out_pin_section; diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index fa6831f8c004..41d00b1603e3 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -397,8 +397,11 @@ intel_panel_detect(struct drm_i915_private *dev_priv) /** * scale - scale values from one range to another - * * @source_val: value in range [@source_min..@source_max] + * @source_min: minimum legal value for @source_val + * @source_max: maximum legal value for @source_val + * @target_min: corresponding target value for @source_min + * @target_max: corresponding target value for @source_max * * Return @source_val in range [@source_min..@source_max] scaled to range * [@target_min..@target_max]. @@ -416,8 +419,9 @@ static uint32_t scale(uint32_t source_val, source_val = clamp(source_val, source_min, source_max); /* avoid overflows */ - target_val = DIV_ROUND_CLOSEST_ULL((uint64_t)(source_val - source_min) * - (target_max - target_min), source_max - source_min); + target_val = mul_u32_u32(source_val - source_min, + target_max - target_min); + target_val = DIV_ROUND_CLOSEST_ULL(target_val, source_max - source_min); target_val += target_min; return target_val; @@ -497,7 +501,7 @@ static u32 i9xx_get_backlight(struct intel_connector *connector) u32 val; val = I915_READ(BLC_PWM_CTL) & BACKLIGHT_DUTY_CYCLE_MASK; - if (INTEL_INFO(dev_priv)->gen < 4) + if (INTEL_GEN(dev_priv) < 4) val >>= 1; if (panel->backlight.combination_mode) { @@ -1719,9 +1723,9 @@ cnp_setup_backlight(struct intel_connector *connector, enum pipe unused) u32 pwm_ctl, val; /* - * CNP has the BXT implementation of backlight, but with only - * one controller. Future platforms could have multiple controllers - * so let's make this extensible and prepared for the future. + * CNP has the BXT implementation of backlight, but with only one + * controller. TODO: ICP has multiple controllers but we only use + * controller 0 for now. */ panel->backlight.controller = 0; @@ -1865,7 +1869,7 @@ intel_panel_init_backlight_funcs(struct intel_panel *panel) panel->backlight.set = bxt_set_backlight; panel->backlight.get = bxt_get_backlight; panel->backlight.hz_to_pwm = bxt_hz_to_pwm; - } else if (HAS_PCH_CNP(dev_priv)) { + } else if (HAS_PCH_CNP(dev_priv) || HAS_PCH_ICP(dev_priv)) { panel->backlight.setup = cnp_setup_backlight; panel->backlight.enable = cnp_enable_backlight; panel->backlight.disable = cnp_disable_backlight; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 1a6e699e19e0..b8da4dcdd584 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -729,6 +729,7 @@ static unsigned int intel_wm_method2(unsigned int pixel_rate, * intel_calculate_wm - calculate watermark level * @pixel_rate: pixel clock * @wm: chip FIFO params + * @fifo_size: size of the FIFO buffer * @cpp: bytes per pixel * @latency_ns: memory latency for the platform * @@ -2916,10 +2917,6 @@ static void intel_fixup_cur_wm_latency(struct drm_i915_private *dev_priv, /* ILK cursor LP0 latency is 1300 ns */ if (IS_GEN5(dev_priv)) wm[0] = 13; - - /* WaDoubleCursorLP3Latency:ivb */ - if (IS_IVYBRIDGE(dev_priv)) - wm[3] *= 2; } int ilk_wm_max_level(const struct drm_i915_private *dev_priv) @@ -3694,11 +3691,18 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) struct intel_crtc_state *cstate; enum pipe pipe; int level, latency; - int sagv_block_time_us = IS_GEN9(dev_priv) ? 30 : 20; + int sagv_block_time_us; if (!intel_has_sagv(dev_priv)) return false; + if (IS_GEN9(dev_priv)) + sagv_block_time_us = 30; + else if (IS_GEN10(dev_priv)) + sagv_block_time_us = 20; + else + sagv_block_time_us = 10; + /* * SKL+ workaround: bspec recommends we disable the SAGV when we have * more then one pipe enabled @@ -3778,7 +3782,8 @@ skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, ddb_size = INTEL_INFO(dev_priv)->ddb_size; WARN_ON(ddb_size == 0); - ddb_size -= 4; /* 4 blocks for bypass path allocation */ + if (INTEL_GEN(dev_priv) < 11) + ddb_size -= 4; /* 4 blocks for bypass path allocation */ /* * If the state doesn't change the active CRTC's, then there's @@ -4311,7 +4316,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, */ static uint_fixed_16_16_t skl_wm_method1(const struct drm_i915_private *dev_priv, uint32_t pixel_rate, - uint8_t cpp, uint32_t latency) + uint8_t cpp, uint32_t latency, uint32_t dbuf_block_size) { uint32_t wm_intermediate_val; uint_fixed_16_16_t ret; @@ -4320,7 +4325,7 @@ skl_wm_method1(const struct drm_i915_private *dev_priv, uint32_t pixel_rate, return FP_16_16_MAX; wm_intermediate_val = latency * pixel_rate * cpp; - ret = div_fixed16(wm_intermediate_val, 1000 * 512); + ret = div_fixed16(wm_intermediate_val, 1000 * dbuf_block_size); if (INTEL_GEN(dev_priv) >= 10) ret = add_fixed16_u32(ret, 1); @@ -4430,6 +4435,12 @@ skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv, wp->plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate); + if (INTEL_GEN(dev_priv) >= 11 && + fb->modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 8) + wp->dbuf_block_size = 256; + else + wp->dbuf_block_size = 512; + if (drm_rotation_90_or_270(pstate->rotation)) { switch (wp->cpp) { @@ -4456,7 +4467,8 @@ skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv, wp->plane_bytes_per_line = wp->width * wp->cpp; if (wp->y_tiled) { interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line * - wp->y_min_scanlines, 512); + wp->y_min_scanlines, + wp->dbuf_block_size); if (INTEL_GEN(dev_priv) >= 10) interm_pbpl++; @@ -4464,10 +4476,12 @@ skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv, wp->plane_blocks_per_line = div_fixed16(interm_pbpl, wp->y_min_scanlines); } else if (wp->x_tiled && IS_GEN9(dev_priv)) { - interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512); + interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, + wp->dbuf_block_size); wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl); } else { - interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512) + 1; + interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, + wp->dbuf_block_size) + 1; wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl); } @@ -4497,6 +4511,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, struct intel_atomic_state *state = to_intel_atomic_state(cstate->base.state); bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); + uint32_t min_disp_buf_needed; if (latency == 0 || !intel_wm_plane_visible(cstate, intel_pstate)) { @@ -4514,7 +4529,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, latency += 15; method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate, - wp->cpp, latency); + wp->cpp, latency, wp->dbuf_block_size); method2 = skl_wm_method2(wp->plane_pixel_rate, cstate->base.adjusted_mode.crtc_htotal, latency, @@ -4524,7 +4539,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, selected_result = max_fixed16(method2, wp->y_tile_minimum); } else { if ((wp->cpp * cstate->base.adjusted_mode.crtc_htotal / - 512 < 1) && (wp->plane_bytes_per_line / 512 < 1)) + wp->dbuf_block_size < 1) && + (wp->plane_bytes_per_line / wp->dbuf_block_size < 1)) selected_result = method2; else if (ddb_allocation >= fixed16_to_u32_round_up(wp->plane_blocks_per_line)) @@ -4554,7 +4570,32 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, } } - if (res_blocks >= ddb_allocation || res_lines > 31) { + if (INTEL_GEN(dev_priv) >= 11) { + if (wp->y_tiled) { + uint32_t extra_lines; + uint_fixed_16_16_t fp_min_disp_buf_needed; + + if (res_lines % wp->y_min_scanlines == 0) + extra_lines = wp->y_min_scanlines; + else + extra_lines = wp->y_min_scanlines * 2 - + res_lines % wp->y_min_scanlines; + + fp_min_disp_buf_needed = mul_u32_fixed16(res_lines + + extra_lines, + wp->plane_blocks_per_line); + min_disp_buf_needed = fixed16_to_u32_round_up( + fp_min_disp_buf_needed); + } else { + min_disp_buf_needed = DIV_ROUND_UP(res_blocks * 11, 10); + } + } else { + min_disp_buf_needed = res_blocks; + } + + if ((level > 0 && res_lines > 31) || + res_blocks >= ddb_allocation || + min_disp_buf_needed >= ddb_allocation) { *enabled = false; /* @@ -4574,8 +4615,9 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, } } + /* The number of lines are ignored for the level 0 watermark. */ + *out_lines = level ? res_lines : 0; *out_blocks = res_blocks; - *out_lines = res_lines; *enabled = true; return 0; @@ -4667,6 +4709,7 @@ static void skl_compute_transition_wm(struct intel_crtc_state *cstate, if (!dev_priv->ipc_enabled) goto exit; + trans_min = 0; if (INTEL_GEN(dev_priv) >= 10) trans_min = 4; @@ -4790,8 +4833,10 @@ static void skl_write_plane_wm(struct intel_crtc *intel_crtc, skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane_id), &ddb->plane[pipe][plane_id]); - skl_ddb_entry_write(dev_priv, PLANE_NV12_BUF_CFG(pipe, plane_id), - &ddb->y_plane[pipe][plane_id]); + if (INTEL_GEN(dev_priv) < 11) + skl_ddb_entry_write(dev_priv, + PLANE_NV12_BUF_CFG(pipe, plane_id), + &ddb->y_plane[pipe][plane_id]); } static void skl_write_cursor_wm(struct intel_crtc *intel_crtc, @@ -5819,6 +5864,7 @@ void ilk_wm_get_hw_state(struct drm_device *dev) /** * intel_update_watermarks - update FIFO watermark values based on current modes + * @crtc: the #intel_crtc on which to compute the WM * * Calculate watermark values for the various WM regs based on current mode * and plane configuration. @@ -6314,7 +6360,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) mutex_unlock(&dev_priv->pcu_lock); } -void gen6_rps_boost(struct drm_i915_gem_request *rq, +void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps_client) { struct intel_rps *rps = &rq->i915->gt_pm.rps; @@ -6327,12 +6373,15 @@ void gen6_rps_boost(struct drm_i915_gem_request *rq, if (!rps->enabled) return; + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) + return; + + /* Serializes with i915_request_retire() */ boost = false; spin_lock_irqsave(&rq->lock, flags); - if (!rq->waitboost && !i915_gem_request_completed(rq)) { - atomic_inc(&rps->num_waiters); + if (!rq->waitboost && !dma_fence_is_signaled_locked(&rq->fence)) { + boost = !atomic_fetch_inc(&rps->num_waiters); rq->waitboost = true; - boost = true; } spin_unlock_irqrestore(&rq->lock, flags); if (!boost) @@ -6626,9 +6675,29 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RC_SLEEP, 0); - /* 2c: Program Coarse Power Gating Policies. */ - I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 25); - I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25); + /* + * 2c: Program Coarse Power Gating Policies. + * + * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we + * use instead is a more conservative estimate for the maximum time + * it takes us to service a CS interrupt and submit a new ELSP - that + * is the time which the GPU is idle waiting for the CPU to select the + * next request to execute. If the idle hysteresis is less than that + * interrupt service latency, the hardware will automatically gate + * the power well and we will then incur the wake up cost on top of + * the service latency. A similar guide from intel_pstate is that we + * do not want the enable hysteresis to less than the wakeup latency. + * + * igt/gem_exec_nop/sequential provides a rough estimate for the + * service latency, and puts it around 10us for Broadwell (and other + * big core) and around 40us for Broxton (and other low power cores). + * [Note that for legacy ringbuffer submission, this is less than 1us!] + * However, the wakeup latency on Broxton is closer to 100us. To be + * conservative, we have to factor in a context switch on top (due + * to ksoftirqd). + */ + I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250); + I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250); /* 3a: Enable RC6 */ I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ @@ -6646,7 +6715,7 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) /* * 3b: Enable Coarse Power Gating only when RC6 is enabled. - * WaRsDisableCoarsePowerGating:skl,bxt - Render/Media PG need to be disabled with RC6. + * WaRsDisableCoarsePowerGating:skl,cnl - Render/Media PG need to be disabled with RC6. */ if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) I915_WRITE(GEN9_PG_ENABLE, 0); @@ -6873,7 +6942,7 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) * No floor required for ring frequency on SKL. */ ring_freq = gpu_freq; - } else if (INTEL_INFO(dev_priv)->gen >= 8) { + } else if (INTEL_GEN(dev_priv) >= 8) { /* max(2 * GT, DDR). NB: GT is 50MHz units */ ring_freq = max(min_ring_freq, gpu_freq); } else if (IS_HASWELL(dev_priv)) { @@ -7484,7 +7553,7 @@ unsigned long i915_chipset_val(struct drm_i915_private *dev_priv) { unsigned long val; - if (INTEL_INFO(dev_priv)->gen != 5) + if (!IS_GEN5(dev_priv)) return 0; spin_lock_irq(&mchdev_lock); @@ -7568,7 +7637,7 @@ static void __i915_update_gfx_val(struct drm_i915_private *dev_priv) void i915_update_gfx_val(struct drm_i915_private *dev_priv) { - if (INTEL_INFO(dev_priv)->gen != 5) + if (!IS_GEN5(dev_priv)) return; spin_lock_irq(&mchdev_lock); @@ -7619,7 +7688,7 @@ unsigned long i915_gfx_val(struct drm_i915_private *dev_priv) { unsigned long val; - if (INTEL_INFO(dev_priv)->gen != 5) + if (!IS_GEN5(dev_priv)) return 0; spin_lock_irq(&mchdev_lock); @@ -7957,7 +8026,10 @@ void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */ intel_disable_gt_powersave(dev_priv); - gen6_reset_rps_interrupts(dev_priv); + if (INTEL_GEN(dev_priv) < 11) + gen6_reset_rps_interrupts(dev_priv); + else + WARN_ON_ONCE(1); } static inline void intel_disable_llc_pstate(struct drm_i915_private *i915) @@ -8070,6 +8142,8 @@ static void intel_enable_rps(struct drm_i915_private *dev_priv) cherryview_enable_rps(dev_priv); } else if (IS_VALLEYVIEW(dev_priv)) { valleyview_enable_rps(dev_priv); + } else if (WARN_ON_ONCE(INTEL_GEN(dev_priv) >= 11)) { + /* TODO */ } else if (INTEL_GEN(dev_priv) >= 9) { gen9_enable_rps(dev_priv); } else if (IS_BROADWELL(dev_priv)) { @@ -8418,7 +8492,7 @@ static void cnp_init_clock_gating(struct drm_i915_private *dev_priv) if (!HAS_PCH_CNP(dev_priv)) return; - /* Display WA #1181: cnp */ + /* Display WA #1181 WaSouthDisplayDisablePWMCGEGating: cnp */ I915_WRITE(SOUTH_DSPCLK_GATE_D, I915_READ(SOUTH_DSPCLK_GATE_D) | CNP_PWM_CGE_GATING_DISABLE); } @@ -8448,7 +8522,13 @@ static void cnl_init_clock_gating(struct drm_i915_private *dev_priv) val |= SARBUNIT_CLKGATE_DIS; I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val); + /* Wa_2201832410:cnl */ + val = I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE); + val |= GWUNIT_CLKGATE_DIS; + I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE, val); + /* WaDisableVFclkgate:cnl */ + /* WaVFUnitClockGatingDisable:cnl */ val = I915_READ(UNSLICE_UNIT_LEVEL_CLKGATE); val |= VFUNIT_CLKGATE_DIS; I915_WRITE(UNSLICE_UNIT_LEVEL_CLKGATE, val); @@ -9150,7 +9230,8 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val } int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, - u32 mbox, u32 val, int timeout_us) + u32 mbox, u32 val, + int fast_timeout_us, int slow_timeout_ms) { int status; @@ -9173,7 +9254,8 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, if (__intel_wait_for_register_fw(dev_priv, GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, - timeout_us, 0, NULL)) { + fast_timeout_us, slow_timeout_ms, + NULL)) { DRM_ERROR("timeout waiting for pcode write of 0x%08x to mbox %x to finish for %ps\n", val, mbox, __builtin_return_address(0)); return -ETIMEDOUT; @@ -9348,15 +9430,16 @@ static u64 vlv_residency_raw(struct drm_i915_private *dev_priv, const i915_reg_t reg) { u32 lower, upper, tmp; - unsigned long flags; int loop = 2; - /* The register accessed do not need forcewake. We borrow + /* + * The register accessed do not need forcewake. We borrow * uncore lock to prevent concurrent access to range reg. */ - spin_lock_irqsave(&dev_priv->uncore.lock, flags); + lockdep_assert_held(&dev_priv->uncore.lock); - /* vlv and chv residency counters are 40 bits in width. + /* + * vlv and chv residency counters are 40 bits in width. * With a control bit, we can choose between upper or lower * 32bit window into this counter. * @@ -9380,29 +9463,49 @@ static u64 vlv_residency_raw(struct drm_i915_private *dev_priv, upper = I915_READ_FW(reg); } while (upper != tmp && --loop); - /* Everywhere else we always use VLV_COUNTER_CONTROL with the + /* + * Everywhere else we always use VLV_COUNTER_CONTROL with the * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set * now. */ - spin_unlock_irqrestore(&dev_priv->uncore.lock, flags); - return lower | (u64)upper << 8; } u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv, const i915_reg_t reg) { - u64 time_hw; + u64 time_hw, prev_hw, overflow_hw; + unsigned int fw_domains; + unsigned long flags; + unsigned int i; u32 mul, div; if (!HAS_RC6(dev_priv)) return 0; + /* + * Store previous hw counter values for counter wrap-around handling. + * + * There are only four interesting registers and they live next to each + * other so we can use the relative address, compared to the smallest + * one as the index into driver storage. + */ + i = (i915_mmio_reg_offset(reg) - + i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32); + if (WARN_ON_ONCE(i >= ARRAY_SIZE(dev_priv->gt_pm.rc6.cur_residency))) + return 0; + + fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ); + + spin_lock_irqsave(&dev_priv->uncore.lock, flags); + intel_uncore_forcewake_get__locked(dev_priv, fw_domains); + /* On VLV and CHV, residency time is in CZ units rather than 1.28us */ if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { mul = 1000000; div = dev_priv->czclk_freq; + overflow_hw = BIT_ULL(40); time_hw = vlv_residency_raw(dev_priv, reg); } else { /* 833.33ns units on Gen9LP, 1.28us elsewhere. */ @@ -9414,10 +9517,33 @@ u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv, div = 1; } - time_hw = I915_READ(reg); + overflow_hw = BIT_ULL(32); + time_hw = I915_READ_FW(reg); } - return DIV_ROUND_UP_ULL(time_hw * mul, div); + /* + * Counter wrap handling. + * + * But relying on a sufficient frequency of queries otherwise counters + * can still wrap. + */ + prev_hw = dev_priv->gt_pm.rc6.prev_hw_residency[i]; + dev_priv->gt_pm.rc6.prev_hw_residency[i] = time_hw; + + /* RC6 delta from last sample. */ + if (time_hw >= prev_hw) + time_hw -= prev_hw; + else + time_hw += overflow_hw - prev_hw; + + /* Add delta to RC6 extended raw driver copy. */ + time_hw += dev_priv->gt_pm.rc6.cur_residency[i]; + dev_priv->gt_pm.rc6.cur_residency[i] = time_hw; + + intel_uncore_forcewake_put__locked(dev_priv, fw_domains); + spin_unlock_irqrestore(&dev_priv->uncore.lock, flags); + + return mul_u64_u32_div(time_hw, mul, div); } u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat) diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 2e32615eeada..23175c5c4a50 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -56,12 +56,109 @@ #include "intel_drv.h" #include "i915_drv.h" -static bool is_edp_psr(struct intel_dp *intel_dp) +static inline enum intel_display_power_domain +psr_aux_domain(struct intel_dp *intel_dp) { - if (!intel_dp_is_edp(intel_dp)) + /* CNL HW requires corresponding AUX IOs to be powered up for PSR. + * However, for non-A AUX ports the corresponding non-EDP transcoders + * would have already enabled power well 2 and DC_OFF. This means we can + * acquire a wider POWER_DOMAIN_AUX_{B,C,D,F} reference instead of a + * specific AUX_IO reference without powering up any extra wells. + * Note that PSR is enabled only on Port A even though this function + * returns the correct domain for other ports too. + */ + return intel_dp->aux_ch == AUX_CH_A ? POWER_DOMAIN_AUX_IO_A : + intel_dp->aux_power_domain; +} + +static void psr_aux_io_power_get(struct intel_dp *intel_dp) +{ + struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); + struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); + + if (INTEL_GEN(dev_priv) < 10) + return; + + intel_display_power_get(dev_priv, psr_aux_domain(intel_dp)); +} + +static void psr_aux_io_power_put(struct intel_dp *intel_dp) +{ + struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); + struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); + + if (INTEL_GEN(dev_priv) < 10) + return; + + intel_display_power_put(dev_priv, psr_aux_domain(intel_dp)); +} + +static bool intel_dp_get_y_cord_status(struct intel_dp *intel_dp) +{ + uint8_t psr_caps = 0; + + if (drm_dp_dpcd_readb(&intel_dp->aux, DP_PSR_CAPS, &psr_caps) != 1) + return false; + return psr_caps & DP_PSR2_SU_Y_COORDINATE_REQUIRED; +} + +static bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp) +{ + uint8_t dprx = 0; + + if (drm_dp_dpcd_readb(&intel_dp->aux, DP_DPRX_FEATURE_ENUMERATION_LIST, + &dprx) != 1) return false; + return dprx & DP_VSC_SDP_EXT_FOR_COLORIMETRY_SUPPORTED; +} + +static bool intel_dp_get_alpm_status(struct intel_dp *intel_dp) +{ + uint8_t alpm_caps = 0; - return intel_dp->psr_dpcd[0] & DP_PSR_IS_SUPPORTED; + if (drm_dp_dpcd_readb(&intel_dp->aux, DP_RECEIVER_ALPM_CAP, + &alpm_caps) != 1) + return false; + return alpm_caps & DP_ALPM_CAP; +} + +void intel_psr_init_dpcd(struct intel_dp *intel_dp) +{ + struct drm_i915_private *dev_priv = + to_i915(dp_to_dig_port(intel_dp)->base.base.dev); + + drm_dp_dpcd_read(&intel_dp->aux, DP_PSR_SUPPORT, intel_dp->psr_dpcd, + sizeof(intel_dp->psr_dpcd)); + + if (intel_dp->psr_dpcd[0] & DP_PSR_IS_SUPPORTED) { + dev_priv->psr.sink_support = true; + DRM_DEBUG_KMS("Detected EDP PSR Panel.\n"); + } + + if (INTEL_GEN(dev_priv) >= 9 && + (intel_dp->psr_dpcd[0] & DP_PSR2_IS_SUPPORTED)) { + uint8_t frame_sync_cap; + + dev_priv->psr.sink_support = true; + if (drm_dp_dpcd_readb(&intel_dp->aux, + DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP, + &frame_sync_cap) != 1) + frame_sync_cap = 0; + dev_priv->psr.aux_frame_sync = frame_sync_cap & DP_AUX_FRAME_SYNC_CAP; + /* PSR2 needs frame sync as well */ + dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync; + DRM_DEBUG_KMS("PSR2 %s on sink", + dev_priv->psr.psr2_support ? "supported" : "not supported"); + + if (dev_priv->psr.psr2_support) { + dev_priv->psr.y_cord_support = + intel_dp_get_y_cord_status(intel_dp); + dev_priv->psr.colorimetry_support = + intel_dp_get_colorimetry_status(intel_dp); + dev_priv->psr.alpm = + intel_dp_get_alpm_status(intel_dp); + } + } } static bool vlv_is_psr_active_on_pipe(struct drm_device *dev, int pipe) @@ -134,7 +231,7 @@ static void vlv_psr_enable_sink(struct intel_dp *intel_dp) static i915_reg_t psr_aux_ctl_reg(struct drm_i915_private *dev_priv, enum port port) { - if (INTEL_INFO(dev_priv)->gen >= 9) + if (INTEL_GEN(dev_priv) >= 9) return DP_AUX_CH_CTL(port); else return EDP_PSR_AUX_CTL; @@ -143,7 +240,7 @@ static i915_reg_t psr_aux_ctl_reg(struct drm_i915_private *dev_priv, static i915_reg_t psr_aux_data_reg(struct drm_i915_private *dev_priv, enum port port, int index) { - if (INTEL_INFO(dev_priv)->gen >= 9) + if (INTEL_GEN(dev_priv) >= 9) return DP_AUX_CH_DATA(port, index); else return EDP_PSR_AUX_DATA(index); @@ -349,6 +446,50 @@ static void hsw_psr_activate(struct intel_dp *intel_dp) hsw_activate_psr1(intel_dp); } +static bool intel_psr2_config_valid(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state) +{ + struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); + int crtc_hdisplay = crtc_state->base.adjusted_mode.crtc_hdisplay; + int crtc_vdisplay = crtc_state->base.adjusted_mode.crtc_vdisplay; + int psr_max_h = 0, psr_max_v = 0; + + /* + * FIXME psr2_support is messed up. It's both computed + * dynamically during PSR enable, and extracted from sink + * caps during eDP detection. + */ + if (!dev_priv->psr.psr2_support) + return false; + + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { + psr_max_h = 4096; + psr_max_v = 2304; + } else if (IS_GEN9(dev_priv)) { + psr_max_h = 3640; + psr_max_v = 2304; + } + + if (crtc_hdisplay > psr_max_h || crtc_vdisplay > psr_max_v) { + DRM_DEBUG_KMS("PSR2 not enabled, resolution %dx%d > max supported %dx%d\n", + crtc_hdisplay, crtc_vdisplay, + psr_max_h, psr_max_v); + return false; + } + + /* + * FIXME:enable psr2 only for y-cordinate psr2 panels + * After gtc implementation , remove this restriction. + */ + if (!dev_priv->psr.y_cord_support) { + DRM_DEBUG_KMS("PSR2 not enabled, panel does not support Y coordinate\n"); + return false; + } + + return true; +} + void intel_psr_compute_config(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state) { @@ -358,10 +499,7 @@ void intel_psr_compute_config(struct intel_dp *intel_dp, &crtc_state->base.adjusted_mode; int psr_setup_time; - if (!HAS_PSR(dev_priv)) - return; - - if (!is_edp_psr(intel_dp)) + if (!CAN_PSR(dev_priv)) return; if (!i915_modparams.enable_psr) { @@ -414,34 +552,14 @@ void intel_psr_compute_config(struct intel_dp *intel_dp, return; } - /* - * FIXME psr2_support is messed up. It's both computed - * dynamically during PSR enable, and extracted from sink - * caps during eDP detection. - */ - if (!dev_priv->psr.psr2_support) { - crtc_state->has_psr = true; - return; - } - - /* PSR2 is restricted to work with panel resolutions upto 3200x2000 */ - if (adjusted_mode->crtc_hdisplay > 3200 || - adjusted_mode->crtc_vdisplay > 2000) { - DRM_DEBUG_KMS("PSR2 disabled, panel resolution too big\n"); - return; - } - - /* - * FIXME:enable psr2 only for y-cordinate psr2 panels - * After gtc implementation , remove this restriction. - */ - if (!dev_priv->psr.y_cord_support) { - DRM_DEBUG_KMS("PSR2 disabled, panel does not support Y coordinate\n"); + if (!(intel_dp->edp_dpcd[1] & DP_EDP_SET_POWER_CAP)) { + DRM_DEBUG_KMS("PSR condition failed: panel lacks power state control\n"); return; } crtc_state->has_psr = true; - crtc_state->has_psr2 = true; + crtc_state->has_psr2 = intel_psr2_config_valid(intel_dp, crtc_state); + DRM_DEBUG_KMS("Enabling PSR%s\n", crtc_state->has_psr2 ? "2" : ""); } static void intel_psr_activate(struct intel_dp *intel_dp) @@ -470,13 +588,15 @@ static void hsw_psr_enable_source(struct intel_dp *intel_dp, enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; u32 chicken; + psr_aux_io_power_get(intel_dp); + if (dev_priv->psr.psr2_support) { chicken = PSR2_VSC_ENABLE_PROG_HEADER; if (dev_priv->psr.y_cord_support) chicken |= PSR2_ADD_VERTICAL_LINE_COUNT; I915_WRITE(CHICKEN_TRANS(cpu_transcoder), chicken); - I915_WRITE(EDP_PSR_DEBUG_CTL, + I915_WRITE(EDP_PSR_DEBUG, EDP_PSR_DEBUG_MASK_MEMUP | EDP_PSR_DEBUG_MASK_HPD | EDP_PSR_DEBUG_MASK_LPSP | @@ -490,7 +610,7 @@ static void hsw_psr_enable_source(struct intel_dp *intel_dp, * preventing other hw tracking issues now we can rely * on frontbuffer tracking. */ - I915_WRITE(EDP_PSR_DEBUG_CTL, + I915_WRITE(EDP_PSR_DEBUG, EDP_PSR_DEBUG_MASK_MEMUP | EDP_PSR_DEBUG_MASK_HPD | EDP_PSR_DEBUG_MASK_LPSP); @@ -514,6 +634,9 @@ void intel_psr_enable(struct intel_dp *intel_dp, if (!crtc_state->has_psr) return; + if (WARN_ON(!CAN_PSR(dev_priv))) + return; + WARN_ON(dev_priv->drrs.dp); mutex_lock(&dev_priv->psr.lock); if (dev_priv->psr.enabled) { @@ -522,8 +645,6 @@ void intel_psr_enable(struct intel_dp *intel_dp, } dev_priv->psr.psr2_support = crtc_state->has_psr2; - dev_priv->psr.source_ok = true; - dev_priv->psr.busy_frontbuffer_bits = 0; dev_priv->psr.setup_vsc(intel_dp, crtc_state); @@ -599,7 +720,7 @@ static void hsw_psr_disable(struct intel_dp *intel_dp, 0); if (dev_priv->psr.psr2_support) { - psr_status = EDP_PSR2_STATUS_CTL; + psr_status = EDP_PSR2_STATUS; psr_status_mask = EDP_PSR2_STATUS_STATE_MASK; I915_WRITE(EDP_PSR2_CTL, @@ -607,7 +728,7 @@ static void hsw_psr_disable(struct intel_dp *intel_dp, ~(EDP_PSR2_ENABLE | EDP_SU_TRACK_ENABLE)); } else { - psr_status = EDP_PSR_STATUS_CTL; + psr_status = EDP_PSR_STATUS; psr_status_mask = EDP_PSR_STATUS_STATE_MASK; I915_WRITE(EDP_PSR_CTL, @@ -627,6 +748,8 @@ static void hsw_psr_disable(struct intel_dp *intel_dp, else WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE); } + + psr_aux_io_power_put(intel_dp); } /** @@ -646,6 +769,9 @@ void intel_psr_disable(struct intel_dp *intel_dp, if (!old_crtc_state->has_psr) return; + if (WARN_ON(!CAN_PSR(dev_priv))) + return; + mutex_lock(&dev_priv->psr.lock); if (!dev_priv->psr.enabled) { mutex_unlock(&dev_priv->psr.lock); @@ -679,19 +805,19 @@ static void intel_psr_work(struct work_struct *work) if (HAS_DDI(dev_priv)) { if (dev_priv->psr.psr2_support) { if (intel_wait_for_register(dev_priv, - EDP_PSR2_STATUS_CTL, - EDP_PSR2_STATUS_STATE_MASK, - 0, - 50)) { + EDP_PSR2_STATUS, + EDP_PSR2_STATUS_STATE_MASK, + 0, + 50)) { DRM_ERROR("Timed out waiting for PSR2 Idle for re-enable\n"); return; } } else { if (intel_wait_for_register(dev_priv, - EDP_PSR_STATUS_CTL, - EDP_PSR_STATUS_STATE_MASK, - 0, - 50)) { + EDP_PSR_STATUS, + EDP_PSR_STATUS_STATE_MASK, + 0, + 50)) { DRM_ERROR("Timed out waiting for PSR Idle for re-enable\n"); return; } @@ -796,7 +922,7 @@ void intel_psr_single_frame_update(struct drm_i915_private *dev_priv, enum pipe pipe; u32 val; - if (!HAS_PSR(dev_priv)) + if (!CAN_PSR(dev_priv)) return; /* @@ -845,7 +971,7 @@ void intel_psr_invalidate(struct drm_i915_private *dev_priv, struct drm_crtc *crtc; enum pipe pipe; - if (!HAS_PSR(dev_priv)) + if (!CAN_PSR(dev_priv)) return; mutex_lock(&dev_priv->psr.lock); @@ -885,7 +1011,7 @@ void intel_psr_flush(struct drm_i915_private *dev_priv, struct drm_crtc *crtc; enum pipe pipe; - if (!HAS_PSR(dev_priv)) + if (!CAN_PSR(dev_priv)) return; mutex_lock(&dev_priv->psr.lock); @@ -926,6 +1052,9 @@ void intel_psr_init(struct drm_i915_private *dev_priv) dev_priv->psr_mmio_base = IS_HASWELL(dev_priv) ? HSW_EDP_PSR_BASE : BDW_EDP_PSR_BASE; + if (!dev_priv->psr.sink_support) + return; + /* Per platform default: all disabled. */ if (i915_modparams.enable_psr == -1) i915_modparams.enable_psr = 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e2085820b586..1d599524a759 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -66,7 +66,7 @@ unsigned int intel_ring_update_space(struct intel_ring *ring) } static int -gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) +gen2_render_ring_flush(struct i915_request *rq, u32 mode) { u32 cmd, *cs; @@ -75,19 +75,19 @@ gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) if (mode & EMIT_INVALIDATE) cmd |= MI_READ_FLUSH; - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) return PTR_ERR(cs); *cs++ = cmd; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int -gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) +gen4_render_ring_flush(struct i915_request *rq, u32 mode) { u32 cmd, *cs; @@ -122,22 +122,22 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) cmd = MI_FLUSH; if (mode & EMIT_INVALIDATE) { cmd |= MI_EXE_FLUSH; - if (IS_G4X(req->i915) || IS_GEN5(req->i915)) + if (IS_G4X(rq->i915) || IS_GEN5(rq->i915)) cmd |= MI_INVALIDATE_ISP; } - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) return PTR_ERR(cs); *cs++ = cmd; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } -/** +/* * Emits a PIPE_CONTROL with a non-zero post-sync operation, for * implementing two workarounds on gen6. From section 1.4.7.1 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: @@ -175,13 +175,13 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) * really our business. That leaves only stall at scoreboard. */ static int -intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) +intel_emit_post_sync_nonzero_flush(struct i915_request *rq) { u32 scratch_addr = - i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; + i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES; u32 *cs; - cs = intel_ring_begin(req, 6); + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -191,9 +191,9 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) *cs++ = 0; /* low dword */ *cs++ = 0; /* high dword */ *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); - cs = intel_ring_begin(req, 6); + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -203,21 +203,21 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) *cs++ = 0; *cs++ = 0; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int -gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) +gen6_render_ring_flush(struct i915_request *rq, u32 mode) { u32 scratch_addr = - i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; + i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES; u32 *cs, flags = 0; int ret; /* Force SNB workarounds for PIPE_CONTROL flushes */ - ret = intel_emit_post_sync_nonzero_flush(req); + ret = intel_emit_post_sync_nonzero_flush(rq); if (ret) return ret; @@ -247,7 +247,7 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; } - cs = intel_ring_begin(req, 4); + cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -255,17 +255,17 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) *cs++ = flags; *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; *cs++ = 0; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int -gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) +gen7_render_ring_cs_stall_wa(struct i915_request *rq) { u32 *cs; - cs = intel_ring_begin(req, 4); + cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -273,16 +273,16 @@ gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; *cs++ = 0; *cs++ = 0; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int -gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) +gen7_render_ring_flush(struct i915_request *rq, u32 mode) { u32 scratch_addr = - i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; + i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES; u32 *cs, flags = 0; /* @@ -324,10 +324,10 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) /* Workaround: we must issue a pipe_control with CS-stall bit * set before a pipe_control command that has the state cache * invalidate bit set. */ - gen7_render_ring_cs_stall_wa(req); + gen7_render_ring_cs_stall_wa(rq); } - cs = intel_ring_begin(req, 4); + cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -335,7 +335,7 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) *cs++ = flags; *cs++ = scratch_addr; *cs++ = 0; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -453,13 +453,13 @@ static int init_ring_common(struct intel_engine_cs *engine) if (!stop_ring(engine)) { /* G45 ring initialization often fails to reset head to zero */ - DRM_DEBUG_KMS("%s head not reset to zero " - "ctl %08x head %08x tail %08x start %08x\n", - engine->name, - I915_READ_CTL(engine), - I915_READ_HEAD(engine), - I915_READ_TAIL(engine), - I915_READ_START(engine)); + DRM_DEBUG_DRIVER("%s head not reset to zero " + "ctl %08x head %08x tail %08x start %08x\n", + engine->name, + I915_READ_CTL(engine), + I915_READ_HEAD(engine), + I915_READ_TAIL(engine), + I915_READ_START(engine)); if (!stop_ring(engine)) { DRM_ERROR("failed to set %s head to zero " @@ -492,8 +492,8 @@ static int init_ring_common(struct intel_engine_cs *engine) /* WaClearRingBufHeadRegAtInit:ctg,elk */ if (I915_READ_HEAD(engine)) - DRM_DEBUG("%s initialization failed [head=%08x], fudging\n", - engine->name, I915_READ_HEAD(engine)); + DRM_DEBUG_DRIVER("%s initialization failed [head=%08x], fudging\n", + engine->name, I915_READ_HEAD(engine)); intel_ring_update_space(ring); I915_WRITE_HEAD(engine, ring->head); @@ -531,7 +531,7 @@ out: } static void reset_ring_common(struct intel_engine_cs *engine, - struct drm_i915_gem_request *request) + struct i915_request *request) { /* * RC6 must be prevented until the reset is complete and the engine @@ -595,15 +595,15 @@ static void reset_ring_common(struct intel_engine_cs *engine, } } -static int intel_rcs_ctx_init(struct drm_i915_gem_request *req) +static int intel_rcs_ctx_init(struct i915_request *rq) { int ret; - ret = intel_ring_workarounds_emit(req); + ret = intel_ring_workarounds_emit(rq); if (ret != 0) return ret; - ret = i915_gem_render_state_emit(req); + ret = i915_gem_render_state_emit(rq); if (ret) return ret; @@ -655,15 +655,15 @@ static int init_render_ring(struct intel_engine_cs *engine) if (IS_GEN(dev_priv, 6, 7)) I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); - if (INTEL_INFO(dev_priv)->gen >= 6) + if (INTEL_GEN(dev_priv) >= 6) I915_WRITE_IMR(engine, ~engine->irq_keep_mask); return init_workarounds_ring(engine); } -static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs) +static u32 *gen6_signal(struct i915_request *rq, u32 *cs) { - struct drm_i915_private *dev_priv = req->i915; + struct drm_i915_private *dev_priv = rq->i915; struct intel_engine_cs *engine; enum intel_engine_id id; int num_rings = 0; @@ -674,11 +674,11 @@ static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs) if (!(BIT(engine->hw_id) & GEN6_SEMAPHORES_MASK)) continue; - mbox_reg = req->engine->semaphore.mbox.signal[engine->hw_id]; + mbox_reg = rq->engine->semaphore.mbox.signal[engine->hw_id]; if (i915_mmio_reg_valid(mbox_reg)) { *cs++ = MI_LOAD_REGISTER_IMM(1); *cs++ = i915_mmio_reg_offset(mbox_reg); - *cs++ = req->global_seqno; + *cs++ = rq->global_seqno; num_rings++; } } @@ -690,7 +690,7 @@ static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs) static void cancel_requests(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *request; + struct i915_request *request; unsigned long flags; spin_lock_irqsave(&engine->timeline->lock, flags); @@ -698,7 +698,7 @@ static void cancel_requests(struct intel_engine_cs *engine) /* Mark all submitted requests as skipped. */ list_for_each_entry(request, &engine->timeline->requests, link) { GEM_BUG_ON(!request->global_seqno); - if (!i915_gem_request_completed(request)) + if (!i915_request_completed(request)) dma_fence_set_error(&request->fence, -EIO); } /* Remaining _unready_ requests will be nop'ed when submitted */ @@ -706,56 +706,46 @@ static void cancel_requests(struct intel_engine_cs *engine) spin_unlock_irqrestore(&engine->timeline->lock, flags); } -static void i9xx_submit_request(struct drm_i915_gem_request *request) +static void i9xx_submit_request(struct i915_request *request) { struct drm_i915_private *dev_priv = request->i915; - i915_gem_request_submit(request); + i915_request_submit(request); I915_WRITE_TAIL(request->engine, intel_ring_set_tail(request->ring, request->tail)); } -static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) +static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) { *cs++ = MI_STORE_DWORD_INDEX; *cs++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT; - *cs++ = req->global_seqno; + *cs++ = rq->global_seqno; *cs++ = MI_USER_INTERRUPT; - req->tail = intel_ring_offset(req, cs); - assert_ring_tail_valid(req->ring, req->tail); + rq->tail = intel_ring_offset(rq, cs); + assert_ring_tail_valid(rq->ring, rq->tail); } static const int i9xx_emit_breadcrumb_sz = 4; -/** - * gen6_sema_emit_breadcrumb - Update the semaphore mailbox registers - * - * @request - request to write to the ring - * - * Update the mailbox registers in the *other* rings with the current seqno. - * This acts like a signal in the canonical semaphore. - */ -static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) +static void gen6_sema_emit_breadcrumb(struct i915_request *rq, u32 *cs) { - return i9xx_emit_breadcrumb(req, - req->engine->semaphore.signal(req, cs)); + return i9xx_emit_breadcrumb(rq, rq->engine->semaphore.signal(rq, cs)); } static int -gen6_ring_sync_to(struct drm_i915_gem_request *req, - struct drm_i915_gem_request *signal) +gen6_ring_sync_to(struct i915_request *rq, struct i915_request *signal) { u32 dw1 = MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | MI_SEMAPHORE_REGISTER; - u32 wait_mbox = signal->engine->semaphore.mbox.wait[req->engine->hw_id]; + u32 wait_mbox = signal->engine->semaphore.mbox.wait[rq->engine->hw_id]; u32 *cs; WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); - cs = intel_ring_begin(req, 4); + cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -767,7 +757,7 @@ gen6_ring_sync_to(struct drm_i915_gem_request *req, *cs++ = signal->global_seqno - 1; *cs++ = 0; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -866,17 +856,17 @@ i8xx_irq_disable(struct intel_engine_cs *engine) } static int -bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) +bsd_ring_flush(struct i915_request *rq, u32 mode) { u32 *cs; - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) return PTR_ERR(cs); *cs++ = MI_FLUSH; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -919,20 +909,20 @@ hsw_vebox_irq_disable(struct intel_engine_cs *engine) } static int -i965_emit_bb_start(struct drm_i915_gem_request *req, +i965_emit_bb_start(struct i915_request *rq, u64 offset, u32 length, unsigned int dispatch_flags) { u32 *cs; - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) return PTR_ERR(cs); *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965); *cs++ = offset; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -942,13 +932,13 @@ i965_emit_bb_start(struct drm_i915_gem_request *req, #define I830_TLB_ENTRIES (2) #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) static int -i830_emit_bb_start(struct drm_i915_gem_request *req, +i830_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, unsigned int dispatch_flags) { - u32 *cs, cs_offset = i915_ggtt_offset(req->engine->scratch); + u32 *cs, cs_offset = i915_ggtt_offset(rq->engine->scratch); - cs = intel_ring_begin(req, 6); + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -959,13 +949,13 @@ i830_emit_bb_start(struct drm_i915_gem_request *req, *cs++ = cs_offset; *cs++ = 0xdeadbeef; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { if (len > I830_BATCH_LIMIT) return -ENOSPC; - cs = intel_ring_begin(req, 6 + 2); + cs = intel_ring_begin(rq, 6 + 2); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -982,39 +972,39 @@ i830_emit_bb_start(struct drm_i915_gem_request *req, *cs++ = MI_FLUSH; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); /* ... and execute it. */ offset = cs_offset; } - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) return PTR_ERR(cs); *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE); - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int -i915_emit_bb_start(struct drm_i915_gem_request *req, +i915_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, unsigned int dispatch_flags) { u32 *cs; - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) return PTR_ERR(cs); *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE); - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -1385,7 +1375,7 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) intel_ring_reset(engine->buffer, 0); } -static inline int mi_set_context(struct drm_i915_gem_request *rq, u32 flags) +static inline int mi_set_context(struct i915_request *rq, u32 flags) { struct drm_i915_private *i915 = rq->i915; struct intel_engine_cs *engine = rq->engine; @@ -1471,7 +1461,7 @@ static inline int mi_set_context(struct drm_i915_gem_request *rq, u32 flags) return 0; } -static int remap_l3(struct drm_i915_gem_request *rq, int slice) +static int remap_l3(struct i915_request *rq, int slice) { u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice]; int i; @@ -1499,7 +1489,7 @@ static int remap_l3(struct drm_i915_gem_request *rq, int slice) return 0; } -static int switch_context(struct drm_i915_gem_request *rq) +static int switch_context(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; struct i915_gem_context *to_ctx = rq->ctx; @@ -1569,7 +1559,7 @@ err: return ret; } -static int ring_request_alloc(struct drm_i915_gem_request *request) +static int ring_request_alloc(struct i915_request *request) { int ret; @@ -1595,7 +1585,7 @@ static int ring_request_alloc(struct drm_i915_gem_request *request) static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) { - struct drm_i915_gem_request *target; + struct i915_request *target; long timeout; lockdep_assert_held(&ring->vma->vm->i915->drm.struct_mutex); @@ -1613,13 +1603,13 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) if (WARN_ON(&target->ring_link == &ring->request_list)) return -ENOSPC; - timeout = i915_wait_request(target, + timeout = i915_request_wait(target, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT); if (timeout < 0) return timeout; - i915_gem_request_retire_upto(target); + i915_request_retire_upto(target); intel_ring_update_space(ring); GEM_BUG_ON(ring->space < bytes); @@ -1642,10 +1632,9 @@ int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes) return 0; } -u32 *intel_ring_begin(struct drm_i915_gem_request *req, - unsigned int num_dwords) +u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) { - struct intel_ring *ring = req->ring; + struct intel_ring *ring = rq->ring; const unsigned int remain_usable = ring->effective_size - ring->emit; const unsigned int bytes = num_dwords * sizeof(u32); unsigned int need_wrap = 0; @@ -1655,7 +1644,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, /* Packets must be qword aligned. */ GEM_BUG_ON(num_dwords & 1); - total_bytes = bytes + req->reserved_space; + total_bytes = bytes + rq->reserved_space; GEM_BUG_ON(total_bytes > ring->effective_size); if (unlikely(total_bytes > remain_usable)) { @@ -1676,7 +1665,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, * wrap and only need to effectively wait for the * reserved size from the start of ringbuffer. */ - total_bytes = req->reserved_space + remain_actual; + total_bytes = rq->reserved_space + remain_actual; } } @@ -1690,9 +1679,9 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, * overallocation and the assumption is that then we never need * to wait (which has the risk of failing with EINTR). * - * See also i915_gem_request_alloc() and i915_add_request(). + * See also i915_request_alloc() and i915_request_add(). */ - GEM_BUG_ON(!req->reserved_space); + GEM_BUG_ON(!rq->reserved_space); ret = wait_for_space(ring, total_bytes); if (unlikely(ret)) @@ -1721,29 +1710,28 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, } /* Align the ring tail to a cacheline boundary */ -int intel_ring_cacheline_align(struct drm_i915_gem_request *req) +int intel_ring_cacheline_align(struct i915_request *rq) { - int num_dwords = - (req->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); + int num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); u32 *cs; if (num_dwords == 0) return 0; - num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords; - cs = intel_ring_begin(req, num_dwords); + num_dwords = CACHELINE_BYTES / sizeof(u32) - num_dwords; + cs = intel_ring_begin(rq, num_dwords); if (IS_ERR(cs)) return PTR_ERR(cs); while (num_dwords--) *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } -static void gen6_bsd_submit_request(struct drm_i915_gem_request *request) +static void gen6_bsd_submit_request(struct i915_request *request) { struct drm_i915_private *dev_priv = request->i915; @@ -1780,11 +1768,11 @@ static void gen6_bsd_submit_request(struct drm_i915_gem_request *request) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) +static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode) { u32 cmd, *cs; - cs = intel_ring_begin(req, 4); + cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1810,18 +1798,18 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = 0; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int -hsw_emit_bb_start(struct drm_i915_gem_request *req, +hsw_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, unsigned int dispatch_flags) { u32 *cs; - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1831,19 +1819,19 @@ hsw_emit_bb_start(struct drm_i915_gem_request *req, MI_BATCH_RESOURCE_STREAMER : 0); /* bit0-7 is the length on GEN6+ */ *cs++ = offset; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int -gen6_emit_bb_start(struct drm_i915_gem_request *req, +gen6_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, unsigned int dispatch_flags) { u32 *cs; - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1851,18 +1839,18 @@ gen6_emit_bb_start(struct drm_i915_gem_request *req, 0 : MI_BATCH_NON_SECURE_I965); /* bit0-7 is the length on GEN6+ */ *cs++ = offset; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } /* Blitter support (SandyBridge+) */ -static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) +static int gen6_ring_flush(struct i915_request *rq, u32 mode) { u32 cmd, *cs; - cs = intel_ring_begin(req, 4); + cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1887,7 +1875,7 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = 0; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index a0e7a6c2a57c..0320c2c4cfba 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -3,10 +3,12 @@ #define _INTEL_RINGBUFFER_H_ #include <linux/hashtable.h> + #include "i915_gem_batch_pool.h" -#include "i915_gem_request.h" #include "i915_gem_timeline.h" + #include "i915_pmu.h" +#include "i915_request.h" #include "i915_selftest.h" struct drm_printer; @@ -90,7 +92,7 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) #define instdone_subslice_mask(dev_priv__) \ (INTEL_GEN(dev_priv__) == 7 ? \ - 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask) + 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask[0]) #define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ for ((slice__) = 0, (subslice__) = 0; \ @@ -115,7 +117,7 @@ struct intel_engine_hangcheck { unsigned long action_timestamp; int deadlock; struct intel_instdone instdone; - struct drm_i915_gem_request *active_request; + struct i915_request *active_request; bool stalled; }; @@ -156,7 +158,10 @@ struct i915_ctx_workarounds { struct i915_vma *vma; }; -struct drm_i915_gem_request; +struct i915_request; + +#define I915_MAX_VCS 4 +#define I915_MAX_VECS 2 /* * Engine IDs definitions. @@ -167,8 +172,12 @@ enum intel_engine_id { BCS, VCS, VCS2, + VCS3, + VCS4, #define _VCS(n) (VCS + (n)) - VECS + VECS, + VECS2 +#define _VECS(n) (VECS + (n)) }; struct i915_priolist { @@ -200,9 +209,17 @@ struct intel_engine_execlists { bool no_priolist; /** - * @elsp: the ExecList Submission Port register + * @submit_reg: gen-specific execlist submission register + * set to the ExecList Submission Port (elsp) register pre-Gen11 and to + * the ExecList Submission Queue Contents register array for Gen11+ */ - u32 __iomem *elsp; + u32 __iomem *submit_reg; + + /** + * @ctrl_reg: the enhanced execlists control register, used to load the + * submit queue on the HW and to request preemptions to idle + */ + u32 __iomem *ctrl_reg; /** * @port: execlist port states @@ -218,7 +235,7 @@ struct intel_engine_execlists { /** * @request_count: combined request and submission count */ - struct drm_i915_gem_request *request_count; + struct i915_request *request_count; #define EXECLIST_COUNT_BITS 2 #define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) #define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) @@ -256,6 +273,16 @@ struct intel_engine_execlists { unsigned int port_mask; /** + * @queue_priority: Highest pending priority. + * + * When we add requests into the queue, or adjust the priority of + * executing requests, we compute the maximum priority of those + * pending requests. We can then use this value to determine if + * we need to preempt the executing requests to service the queue. + */ + int queue_priority; + + /** * @queue: queue of requests, in priority lists */ struct rb_root queue; @@ -279,6 +306,11 @@ struct intel_engine_execlists { * @csb_use_mmio: access csb through mmio, instead of hwsp */ bool csb_use_mmio; + + /** + * @preempt_complete_status: expected CSB upon completing preemption + */ + u32 preempt_complete_status; }; #define INTEL_ENGINE_CS_MAX_NAME 8 @@ -332,9 +364,9 @@ struct intel_engine_cs { spinlock_t rb_lock; /* protects the rb and wraps irq_lock */ struct rb_root waiters; /* sorted by retirement, priority */ - struct rb_root signals; /* sorted by retirement */ + struct list_head signals; /* sorted by retirement */ struct task_struct *signaler; /* used for fence signalling */ - struct drm_i915_gem_request __rcu *first_signal; + struct timer_list fake_irq; /* used after a missed interrupt */ struct timer_list hangcheck; /* detect missed interrupts */ @@ -386,7 +418,7 @@ struct intel_engine_cs { int (*init_hw)(struct intel_engine_cs *engine); void (*reset_hw)(struct intel_engine_cs *engine, - struct drm_i915_gem_request *req); + struct i915_request *rq); void (*park)(struct intel_engine_cs *engine); void (*unpark)(struct intel_engine_cs *engine); @@ -397,22 +429,20 @@ struct intel_engine_cs { struct i915_gem_context *ctx); void (*context_unpin)(struct intel_engine_cs *engine, struct i915_gem_context *ctx); - int (*request_alloc)(struct drm_i915_gem_request *req); - int (*init_context)(struct drm_i915_gem_request *req); + int (*request_alloc)(struct i915_request *rq); + int (*init_context)(struct i915_request *rq); - int (*emit_flush)(struct drm_i915_gem_request *request, - u32 mode); + int (*emit_flush)(struct i915_request *request, u32 mode); #define EMIT_INVALIDATE BIT(0) #define EMIT_FLUSH BIT(1) #define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) - int (*emit_bb_start)(struct drm_i915_gem_request *req, + int (*emit_bb_start)(struct i915_request *rq, u64 offset, u32 length, unsigned int dispatch_flags); #define I915_DISPATCH_SECURE BIT(0) #define I915_DISPATCH_PINNED BIT(1) #define I915_DISPATCH_RS BIT(2) - void (*emit_breadcrumb)(struct drm_i915_gem_request *req, - u32 *cs); + void (*emit_breadcrumb)(struct i915_request *rq, u32 *cs); int emit_breadcrumb_sz; /* Pass the request to the hardware queue (e.g. directly into @@ -421,7 +451,7 @@ struct intel_engine_cs { * This is called from an atomic context with irqs disabled; must * be irq safe. */ - void (*submit_request)(struct drm_i915_gem_request *req); + void (*submit_request)(struct i915_request *rq); /* Call when the priority on a request has changed and it and its * dependencies may need rescheduling. Note the request itself may @@ -429,8 +459,7 @@ struct intel_engine_cs { * * Called under the struct_mutex. */ - void (*schedule)(struct drm_i915_gem_request *request, - int priority); + void (*schedule)(struct i915_request *request, int priority); /* * Cancel all requests on the hardware, or queued for execution. @@ -498,9 +527,9 @@ struct intel_engine_cs { } mbox; /* AKA wait() */ - int (*sync_to)(struct drm_i915_gem_request *req, - struct drm_i915_gem_request *signal); - u32 *(*signal)(struct drm_i915_gem_request *req, u32 *cs); + int (*sync_to)(struct i915_request *rq, + struct i915_request *signal); + u32 *(*signal)(struct i915_request *rq, u32 *cs); } semaphore; struct intel_engine_execlists execlists; @@ -654,7 +683,7 @@ intel_engine_flag(const struct intel_engine_cs *engine) } static inline u32 -intel_read_status_page(struct intel_engine_cs *engine, int reg) +intel_read_status_page(const struct intel_engine_cs *engine, int reg) { /* Ensure that the compiler doesn't optimize away the load. */ return READ_ONCE(engine->status_page.page_addr[reg]); @@ -721,14 +750,12 @@ void intel_engine_cleanup(struct intel_engine_cs *engine); void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); -int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); +int __must_check intel_ring_cacheline_align(struct i915_request *rq); int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes); -u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req, - unsigned int n); +u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n); -static inline void -intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs) +static inline void intel_ring_advance(struct i915_request *rq, u32 *cs) { /* Dummy function. * @@ -738,22 +765,20 @@ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs) * reserved for the command packet (i.e. the value passed to * intel_ring_begin()). */ - GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs); + GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs); } -static inline u32 -intel_ring_wrap(const struct intel_ring *ring, u32 pos) +static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) { return pos & (ring->size - 1); } -static inline u32 -intel_ring_offset(const struct drm_i915_gem_request *req, void *addr) +static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr) { /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ - u32 offset = addr - req->ring->vaddr; - GEM_BUG_ON(offset > req->ring->size); - return intel_ring_wrap(req->ring, offset); + u32 offset = addr - rq->ring->vaddr; + GEM_BUG_ON(offset > rq->ring->size); + return intel_ring_wrap(rq->ring, offset); } static inline void @@ -791,7 +816,7 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) { /* Whilst writes to the tail are strictly order, there is no * serialisation between readers and the writers. The tail may be - * read by i915_gem_request_retire() just as it is being updated + * read by i915_request_retire() just as it is being updated * by execlists, as although the breadcrumb is complete, the context * switch hasn't been seen. */ @@ -812,8 +837,8 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); int intel_init_blt_ring_buffer(struct intel_engine_cs *engine); int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine); -u64 intel_engine_get_active_head(struct intel_engine_cs *engine); -u64 intel_engine_get_last_batch_head(struct intel_engine_cs *engine); +u64 intel_engine_get_active_head(const struct intel_engine_cs *engine); +u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine); static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine) { @@ -833,7 +858,7 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) } int init_workarounds_ring(struct intel_engine_cs *engine); -int intel_ring_workarounds_emit(struct drm_i915_gem_request *req); +int intel_ring_workarounds_emit(struct i915_request *rq); void intel_engine_get_instdone(struct intel_engine_cs *engine, struct intel_instdone *instdone); @@ -861,7 +886,7 @@ static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine) int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); static inline void intel_wait_init(struct intel_wait *wait, - struct drm_i915_gem_request *rq) + struct i915_request *rq) { wait->tsk = current; wait->request = rq; @@ -887,9 +912,9 @@ intel_wait_update_seqno(struct intel_wait *wait, u32 seqno) static inline bool intel_wait_update_request(struct intel_wait *wait, - const struct drm_i915_gem_request *rq) + const struct i915_request *rq) { - return intel_wait_update_seqno(wait, i915_gem_request_global_seqno(rq)); + return intel_wait_update_seqno(wait, i915_request_global_seqno(rq)); } static inline bool @@ -900,9 +925,9 @@ intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno) static inline bool intel_wait_check_request(const struct intel_wait *wait, - const struct drm_i915_gem_request *rq) + const struct i915_request *rq) { - return intel_wait_check_seqno(wait, i915_gem_request_global_seqno(rq)); + return intel_wait_check_seqno(wait, i915_request_global_seqno(rq)); } static inline bool intel_wait_complete(const struct intel_wait *wait) @@ -914,9 +939,8 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine, struct intel_wait *wait); void intel_engine_remove_wait(struct intel_engine_cs *engine, struct intel_wait *wait); -void intel_engine_enable_signaling(struct drm_i915_gem_request *request, - bool wakeup); -void intel_engine_cancel_signaling(struct drm_i915_gem_request *request); +void intel_engine_enable_signaling(struct i915_request *request, bool wakeup); +void intel_engine_cancel_signaling(struct i915_request *request); static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) { @@ -935,7 +959,6 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); -bool intel_breadcrumbs_busy(struct intel_engine_cs *engine); static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) { diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index d758da6156a8..53ea564f971e 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -94,6 +94,8 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) return "PORT_DDI_D_LANES"; case POWER_DOMAIN_PORT_DDI_E_LANES: return "PORT_DDI_E_LANES"; + case POWER_DOMAIN_PORT_DDI_F_LANES: + return "PORT_DDI_F_LANES"; case POWER_DOMAIN_PORT_DDI_A_IO: return "PORT_DDI_A_IO"; case POWER_DOMAIN_PORT_DDI_B_IO: @@ -104,6 +106,8 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) return "PORT_DDI_D_IO"; case POWER_DOMAIN_PORT_DDI_E_IO: return "PORT_DDI_E_IO"; + case POWER_DOMAIN_PORT_DDI_F_IO: + return "PORT_DDI_F_IO"; case POWER_DOMAIN_PORT_DSI: return "PORT_DSI"; case POWER_DOMAIN_PORT_CRT: @@ -124,6 +128,10 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) return "AUX_C"; case POWER_DOMAIN_AUX_D: return "AUX_D"; + case POWER_DOMAIN_AUX_F: + return "AUX_F"; + case POWER_DOMAIN_AUX_IO_A: + return "AUX_IO_A"; case POWER_DOMAIN_GMBUS: return "GMBUS"; case POWER_DOMAIN_INIT: @@ -390,6 +398,15 @@ static void hsw_power_well_enable(struct drm_i915_private *dev_priv, I915_WRITE(HSW_PWR_WELL_CTL_DRIVER(id), val | HSW_PWR_WELL_CTL_REQ(id)); hsw_wait_for_power_well_enable(dev_priv, power_well); + /* Display WA #1178: cnl */ + if (IS_CANNONLAKE(dev_priv) && + (id == CNL_DISP_PW_AUX_B || id == CNL_DISP_PW_AUX_C || + id == CNL_DISP_PW_AUX_D || id == CNL_DISP_PW_AUX_F)) { + val = I915_READ(CNL_AUX_ANAOVRD1(id)); + val |= CNL_AUX_ANAOVRD1_ENABLE | CNL_AUX_ANAOVRD1_LDO_BYPASS; + I915_WRITE(CNL_AUX_ANAOVRD1(id), val); + } + if (wait_fuses) gen9_wait_for_power_well_fuses(dev_priv, pg); @@ -1816,9 +1833,11 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_F_LANES) | \ BIT_ULL(POWER_DOMAIN_AUX_B) | \ BIT_ULL(POWER_DOMAIN_AUX_C) | \ BIT_ULL(POWER_DOMAIN_AUX_D) | \ + BIT_ULL(POWER_DOMAIN_AUX_F) | \ BIT_ULL(POWER_DOMAIN_AUDIO) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ BIT_ULL(POWER_DOMAIN_INIT)) @@ -1836,6 +1855,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_INIT)) #define CNL_DISPLAY_AUX_A_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_AUX_IO_A) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define CNL_DISPLAY_AUX_B_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_AUX_B) | \ @@ -1846,8 +1866,15 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, #define CNL_DISPLAY_AUX_D_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_AUX_D) | \ BIT_ULL(POWER_DOMAIN_INIT)) +#define CNL_DISPLAY_AUX_F_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_F) | \ + BIT_ULL(POWER_DOMAIN_INIT)) +#define CNL_DISPLAY_DDI_F_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_F_IO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define CNL_DISPLAY_DC_OFF_POWER_DOMAINS ( \ CNL_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_GT_IRQ) | \ BIT_ULL(POWER_DOMAIN_MODESET) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ BIT_ULL(POWER_DOMAIN_INIT)) @@ -2395,6 +2422,18 @@ static struct i915_power_well cnl_power_wells[] = { .ops = &hsw_power_well_ops, .id = SKL_DISP_PW_DDI_D, }, + { + .name = "DDI F IO power well", + .domains = CNL_DISPLAY_DDI_F_IO_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = CNL_DISP_PW_DDI_F, + }, + { + .name = "AUX F", + .domains = CNL_DISPLAY_AUX_F_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = CNL_DISP_PW_AUX_F, + }, }; static int @@ -2510,6 +2549,16 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) set_power_wells(power_domains, skl_power_wells); } else if (IS_CANNONLAKE(dev_priv)) { set_power_wells(power_domains, cnl_power_wells); + + /* + * DDI and Aux IO are getting enabled for all ports + * regardless the presence or use. So, in order to avoid + * timeouts, lets remove them from the list + * for the SKUs without port F. + */ + if (!IS_CNL_WITH_PORT_F(dev_priv)) + power_domains->power_well_count -= 2; + } else if (IS_BROXTON(dev_priv)) { set_power_wells(power_domains, bxt_power_wells); } else if (IS_GEMINILAKE(dev_priv)) { @@ -2600,6 +2649,48 @@ static void gen9_dbuf_disable(struct drm_i915_private *dev_priv) DRM_ERROR("DBuf power disable timeout!\n"); } +/* + * TODO: we shouldn't always enable DBUF_CTL_S2, we should only enable it when + * needed and keep it disabled as much as possible. + */ +static void icl_dbuf_enable(struct drm_i915_private *dev_priv) +{ + I915_WRITE(DBUF_CTL_S1, I915_READ(DBUF_CTL_S1) | DBUF_POWER_REQUEST); + I915_WRITE(DBUF_CTL_S2, I915_READ(DBUF_CTL_S2) | DBUF_POWER_REQUEST); + POSTING_READ(DBUF_CTL_S2); + + udelay(10); + + if (!(I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) || + !(I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)) + DRM_ERROR("DBuf power enable timeout\n"); +} + +static void icl_dbuf_disable(struct drm_i915_private *dev_priv) +{ + I915_WRITE(DBUF_CTL_S1, I915_READ(DBUF_CTL_S1) & ~DBUF_POWER_REQUEST); + I915_WRITE(DBUF_CTL_S2, I915_READ(DBUF_CTL_S2) & ~DBUF_POWER_REQUEST); + POSTING_READ(DBUF_CTL_S2); + + udelay(10); + + if ((I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) || + (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)) + DRM_ERROR("DBuf power disable timeout!\n"); +} + +static void icl_mbus_init(struct drm_i915_private *dev_priv) +{ + uint32_t val; + + val = MBUS_ABOX_BT_CREDIT_POOL1(16) | + MBUS_ABOX_BT_CREDIT_POOL2(16) | + MBUS_ABOX_B_CREDIT(1) | + MBUS_ABOX_BW_CREDIT(1); + + I915_WRITE(MBUS_ABOX_CTL, val); +} + static void skl_display_core_init(struct drm_i915_private *dev_priv, bool resume) { @@ -2748,12 +2839,19 @@ static const struct cnl_procmon { { .dw1 = 0x00440000, .dw9 = 0x9A00AB25, .dw10 = 0x8AE38FF1, }, }; -static void cnl_set_procmon_ref_values(struct drm_i915_private *dev_priv) +/* + * CNL has just one set of registers, while ICL has two sets: one for port A and + * the other for port B. The CNL registers are equivalent to the ICL port A + * registers, that's why we call the ICL macros even though the function has CNL + * on its name. + */ +static void cnl_set_procmon_ref_values(struct drm_i915_private *dev_priv, + enum port port) { const struct cnl_procmon *procmon; u32 val; - val = I915_READ(CNL_PORT_COMP_DW3); + val = I915_READ(ICL_PORT_COMP_DW3(port)); switch (val & (PROCESS_INFO_MASK | VOLTAGE_INFO_MASK)) { default: MISSING_CASE(val); @@ -2774,13 +2872,13 @@ static void cnl_set_procmon_ref_values(struct drm_i915_private *dev_priv) break; } - val = I915_READ(CNL_PORT_COMP_DW1); + val = I915_READ(ICL_PORT_COMP_DW1(port)); val &= ~((0xff << 16) | 0xff); val |= procmon->dw1; - I915_WRITE(CNL_PORT_COMP_DW1, val); + I915_WRITE(ICL_PORT_COMP_DW1(port), val); - I915_WRITE(CNL_PORT_COMP_DW9, procmon->dw9); - I915_WRITE(CNL_PORT_COMP_DW10, procmon->dw10); + I915_WRITE(ICL_PORT_COMP_DW9(port), procmon->dw9); + I915_WRITE(ICL_PORT_COMP_DW10(port), procmon->dw10); } static void cnl_display_core_init(struct drm_i915_private *dev_priv, bool resume) @@ -2801,7 +2899,8 @@ static void cnl_display_core_init(struct drm_i915_private *dev_priv, bool resume val &= ~CNL_COMP_PWR_DOWN; I915_WRITE(CHICKEN_MISC_2, val); - cnl_set_procmon_ref_values(dev_priv); + /* Dummy PORT_A to get the correct CNL register from the ICL macro */ + cnl_set_procmon_ref_values(dev_priv, PORT_A); val = I915_READ(CNL_PORT_COMP_DW0); val |= COMP_INIT; @@ -2865,6 +2964,80 @@ static void cnl_display_core_uninit(struct drm_i915_private *dev_priv) I915_WRITE(CHICKEN_MISC_2, val); } +static void icl_display_core_init(struct drm_i915_private *dev_priv, + bool resume) +{ + enum port port; + u32 val; + + gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); + + /* 1. Enable PCH reset handshake. */ + val = I915_READ(HSW_NDE_RSTWRN_OPT); + val |= RESET_PCH_HANDSHAKE_ENABLE; + I915_WRITE(HSW_NDE_RSTWRN_OPT, val); + + for (port = PORT_A; port <= PORT_B; port++) { + /* 2. Enable DDI combo PHY comp. */ + val = I915_READ(ICL_PHY_MISC(port)); + val &= ~ICL_PHY_MISC_DE_IO_COMP_PWR_DOWN; + I915_WRITE(ICL_PHY_MISC(port), val); + + cnl_set_procmon_ref_values(dev_priv, port); + + val = I915_READ(ICL_PORT_COMP_DW0(port)); + val |= COMP_INIT; + I915_WRITE(ICL_PORT_COMP_DW0(port), val); + + /* 3. Set power down enable. */ + val = I915_READ(ICL_PORT_CL_DW5(port)); + val |= CL_POWER_DOWN_ENABLE; + I915_WRITE(ICL_PORT_CL_DW5(port), val); + } + + /* 4. Enable power well 1 (PG1) and aux IO power. */ + /* FIXME: ICL power wells code not here yet. */ + + /* 5. Enable CDCLK. */ + icl_init_cdclk(dev_priv); + + /* 6. Enable DBUF. */ + icl_dbuf_enable(dev_priv); + + /* 7. Setup MBUS. */ + icl_mbus_init(dev_priv); + + /* 8. CHICKEN_DCPR_1 */ + I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) | + CNL_DDI_CLOCK_REG_ACCESS_ON); +} + +static void icl_display_core_uninit(struct drm_i915_private *dev_priv) +{ + enum port port; + u32 val; + + gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); + + /* 1. Disable all display engine functions -> aready done */ + + /* 2. Disable DBUF */ + icl_dbuf_disable(dev_priv); + + /* 3. Disable CD clock */ + icl_uninit_cdclk(dev_priv); + + /* 4. Disable Power Well 1 (PG1) and Aux IO Power */ + /* FIXME: ICL power wells code not here yet. */ + + /* 5. Disable Comp */ + for (port = PORT_A; port <= PORT_B; port++) { + val = I915_READ(ICL_PHY_MISC(port)); + val |= ICL_PHY_MISC_DE_IO_COMP_PWR_DOWN; + I915_WRITE(ICL_PHY_MISC(port), val); + } +} + static void chv_phy_control_init(struct drm_i915_private *dev_priv) { struct i915_power_well *cmn_bc = @@ -2997,7 +3170,9 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume) power_domains->initializing = true; - if (IS_CANNONLAKE(dev_priv)) { + if (IS_ICELAKE(dev_priv)) { + icl_display_core_init(dev_priv, resume); + } else if (IS_CANNONLAKE(dev_priv)) { cnl_display_core_init(dev_priv, resume); } else if (IS_GEN9_BC(dev_priv)) { skl_display_core_init(dev_priv, resume); @@ -3038,7 +3213,9 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv) if (!i915_modparams.disable_power_well) intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); - if (IS_CANNONLAKE(dev_priv)) + if (IS_ICELAKE(dev_priv)) + icl_display_core_uninit(dev_priv); + else if (IS_CANNONLAKE(dev_priv)) cnl_display_core_uninit(dev_priv); else if (IS_GEN9_BC(dev_priv)) skl_display_core_uninit(dev_priv); @@ -3154,18 +3331,19 @@ void intel_runtime_pm_get(struct drm_i915_private *dev_priv) * @dev_priv: i915 device instance * * This function grabs a device-level runtime pm reference if the device is - * already in use and ensures that it is powered up. + * already in use and ensures that it is powered up. It is illegal to try + * and access the HW should intel_runtime_pm_get_if_in_use() report failure. * * Any runtime pm reference obtained by this function must have a symmetric * call to intel_runtime_pm_put() to release the reference again. + * + * Returns: True if the wakeref was acquired, or False otherwise. */ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv) { - struct pci_dev *pdev = dev_priv->drm.pdev; - struct device *kdev = &pdev->dev; - if (IS_ENABLED(CONFIG_PM)) { - int ret = pm_runtime_get_if_in_use(kdev); + struct pci_dev *pdev = dev_priv->drm.pdev; + struct device *kdev = &pdev->dev; /* * In cases runtime PM is disabled by the RPM core and we get @@ -3173,9 +3351,7 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv) * function, since the power state is undefined. This applies * atm to the late/early system suspend/resume handlers. */ - WARN_ONCE(ret < 0, - "pm_runtime_get_if_in_use() failed: %d\n", ret); - if (ret <= 0) + if (pm_runtime_get_if_in_use(kdev) <= 0) return false; } diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 2b8764897d68..96e213ec202d 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -214,7 +214,7 @@ static bool intel_sdvo_create_enhance_property(struct intel_sdvo *intel_sdvo, struct intel_sdvo_connector *intel_sdvo_connector); -/** +/* * Writes the SDVOB or SDVOC with the given value, but always writes both * SDVOB and SDVOC to work around apparent hardware issues (according to * comments in the BIOS). @@ -250,10 +250,10 @@ static void intel_sdvo_write_sdvox(struct intel_sdvo *intel_sdvo, u32 val) * writing them only once doesn't appear to 'stick'. * The BIOS does this too. Yay, magic */ - for (i = 0; i < 2; i++) - { + for (i = 0; i < 2; i++) { I915_WRITE(GEN3_SDVOB, bval); POSTING_READ(GEN3_SDVOB); + I915_WRITE(GEN3_SDVOC, cval); POSTING_READ(GEN3_SDVOC); } @@ -643,7 +643,7 @@ static bool intel_sdvo_set_target_input(struct intel_sdvo *intel_sdvo) &targets, sizeof(targets)); } -/** +/* * Return whether each input is trained. * * This function is making an assumption about the layout of the response, @@ -1061,8 +1061,10 @@ intel_sdvo_set_output_timings_from_mode(struct intel_sdvo *intel_sdvo, return true; } -/* Asks the sdvo controller for the preferred input mode given the output mode. - * Unfortunately we have to set up the full output mode to do that. */ +/* + * Asks the sdvo controller for the preferred input mode given the output mode. + * Unfortunately we have to set up the full output mode to do that. + */ static bool intel_sdvo_get_preferred_input_mode(struct intel_sdvo *intel_sdvo, const struct drm_display_mode *mode, @@ -1095,8 +1097,10 @@ static void i9xx_adjust_sdvo_tv_clock(struct intel_crtc_state *pipe_config) unsigned dotclock = pipe_config->port_clock; struct dpll *clock = &pipe_config->dpll; - /* SDVO TV has fixed PLL values depend on its clock range, - this mirrors vbios setting. */ + /* + * SDVO TV has fixed PLL values depend on its clock range, + * this mirrors vbios setting. + */ if (dotclock >= 100000 && dotclock < 140500) { clock->p1 = 2; clock->p2 = 10; @@ -1132,7 +1136,8 @@ static bool intel_sdvo_compute_config(struct intel_encoder *encoder, if (HAS_PCH_SPLIT(to_i915(encoder->base.dev))) pipe_config->has_pch_encoder = true; - /* We need to construct preferred input timings based on our + /* + * We need to construct preferred input timings based on our * output timings. To do that, we have to set the output * timings, even though this isn't really the right place in * the sequence to do it. Oh well. @@ -1155,7 +1160,8 @@ static bool intel_sdvo_compute_config(struct intel_encoder *encoder, adjusted_mode); } - /* Make the CRTC code factor in the SDVO pixel multiplier. The + /* + * Make the CRTC code factor in the SDVO pixel multiplier. The * SDVO device will factor out the multiplier during mode_set. */ pipe_config->pixel_multiplier = @@ -1169,9 +1175,12 @@ static bool intel_sdvo_compute_config(struct intel_encoder *encoder, pipe_config->has_audio = true; if (intel_sdvo_state->base.broadcast_rgb == INTEL_BROADCAST_RGB_AUTO) { - /* See CEA-861-E - 5.1 Default Encoding Parameters */ - /* FIXME: This bit is only valid when using TMDS encoding and 8 - * bit per color mode. */ + /* + * See CEA-861-E - 5.1 Default Encoding Parameters + * + * FIXME: This bit is only valid when using TMDS encoding and 8 + * bit per color mode. + */ if (pipe_config->has_hdmi_sink && drm_match_cea_mode(adjusted_mode) > 1) pipe_config->limited_color_range = true; @@ -1272,7 +1281,8 @@ static void intel_sdvo_pre_enable(struct intel_encoder *intel_encoder, intel_sdvo_update_props(intel_sdvo, sdvo_state); - /* First, set the input mapping for the first input to our controlled + /* + * First, set the input mapping for the first input to our controlled * output. This is only correct if we're a single-input device, in * which case the first input is the output from the appropriate SDVO * channel on the motherboard. In a two-input device, the first input @@ -1435,8 +1445,10 @@ static void intel_sdvo_get_config(struct intel_encoder *encoder, ret = intel_sdvo_get_input_timing(intel_sdvo, &dtd); if (!ret) { - /* Some sdvo encoders are not spec compliant and don't - * implement the mandatory get_timings function. */ + /* + * Some sdvo encoders are not spec compliant and don't + * implement the mandatory get_timings function. + */ DRM_DEBUG_DRIVER("failed to retrieve SDVO DTD\n"); pipe_config->quirks |= PIPE_CONFIG_QUIRK_MODE_SYNC_FLAGS; } else { @@ -1585,7 +1597,9 @@ static void intel_enable_sdvo(struct intel_encoder *encoder, intel_wait_for_vblank(dev_priv, intel_crtc->pipe); success = intel_sdvo_get_trained_inputs(intel_sdvo, &input1, &input2); - /* Warn if the device reported failure to sync. + /* + * Warn if the device reported failure to sync. + * * A lot of SDVO devices fail to notify of sync, but it's * a given it the status is a success, we succeeded. */ @@ -1607,9 +1621,6 @@ intel_sdvo_mode_valid(struct drm_connector *connector, struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector); int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; - if (mode->flags & DRM_MODE_FLAG_DBLSCAN) - return MODE_NO_DBLESCAN; - if (intel_sdvo->pixel_clock_min > mode->clock) return MODE_CLOCK_LOW; @@ -1675,8 +1686,10 @@ static uint16_t intel_sdvo_get_hotplug_support(struct intel_sdvo *intel_sdvo) if (!I915_HAS_HOTPLUG(dev_priv)) return 0; - /* HW Erratum: SDVO Hotplug is broken on all i945G chips, there's noise - * on the line. */ + /* + * HW Erratum: SDVO Hotplug is broken on all i945G chips, there's noise + * on the line. + */ if (IS_I945G(dev_priv) || IS_I945GM(dev_priv)) return 0; @@ -1692,7 +1705,15 @@ static void intel_sdvo_enable_hotplug(struct intel_encoder *encoder) struct intel_sdvo *intel_sdvo = to_sdvo(encoder); intel_sdvo_write_cmd(intel_sdvo, SDVO_CMD_SET_ACTIVE_HOT_PLUG, - &intel_sdvo->hotplug_active, 2); + &intel_sdvo->hotplug_active, 2); +} + +static bool intel_sdvo_hotplug(struct intel_encoder *encoder, + struct intel_connector *connector) +{ + intel_sdvo_enable_hotplug(encoder); + + return intel_encoder_hotplug(encoder, connector); } static bool @@ -1960,7 +1981,8 @@ static void intel_sdvo_get_tv_modes(struct drm_connector *connector) DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); - /* Read the list of supported input resolutions for the selected TV + /* + * Read the list of supported input resolutions for the selected TV * format. */ format_map = 1 << conn_state->tv.mode; @@ -2271,7 +2293,8 @@ intel_sdvo_guess_ddc_bus(struct intel_sdvo *sdvo) uint16_t mask = 0; unsigned int num_bits; - /* Make a mask of outputs less than or equal to our own priority in the + /* + * Make a mask of outputs less than or equal to our own priority in the * list. */ switch (sdvo->controlled_output) { @@ -2301,7 +2324,7 @@ intel_sdvo_guess_ddc_bus(struct intel_sdvo *sdvo) sdvo->ddc_bus = 1 << num_bits; } -/** +/* * Choose the appropriate DDC bus for control bus switch command for this * SDVO output based on the controlled output. * @@ -2345,9 +2368,11 @@ intel_sdvo_select_i2c_bus(struct drm_i915_private *dev_priv, sdvo->i2c = intel_gmbus_get_adapter(dev_priv, pin); - /* With gmbus we should be able to drive sdvo i2c at 2MHz, but somehow + /* + * With gmbus we should be able to drive sdvo i2c at 2MHz, but somehow * our code totally fails once we start using gmbus. Hence fall back to - * bit banging for now. */ + * bit banging for now. + */ intel_gmbus_force_bit(sdvo->i2c, true); } @@ -2382,7 +2407,8 @@ intel_sdvo_get_slave_addr(struct drm_i915_private *dev_priv, if (my_mapping->slave_addr) return my_mapping->slave_addr; - /* If the BIOS only described a different SDVO device, use the + /* + * If the BIOS only described a different SDVO device, use the * address that it isn't using. */ if (other_mapping->slave_addr) { @@ -2392,7 +2418,8 @@ intel_sdvo_get_slave_addr(struct drm_i915_private *dev_priv, return 0x70; } - /* No SDVO device info is found for another DVO port, + /* + * No SDVO device info is found for another DVO port, * so use mapping assumption we had before BIOS parsing. */ if (sdvo->port == PORT_B) @@ -2493,10 +2520,11 @@ intel_sdvo_dvi_init(struct intel_sdvo *intel_sdvo, int device) if (intel_sdvo_get_hotplug_support(intel_sdvo) & intel_sdvo_connector->output_flag) { intel_sdvo->hotplug_active |= intel_sdvo_connector->output_flag; - /* Some SDVO devices have one-shot hotplug interrupts. + /* + * Some SDVO devices have one-shot hotplug interrupts. * Ensure that they get re-enabled when an interrupt happens. */ - intel_encoder->hot_plug = intel_sdvo_enable_hotplug; + intel_encoder->hotplug = intel_sdvo_hotplug; intel_sdvo_enable_hotplug(intel_encoder); } else { intel_connector->polled = DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT; @@ -2792,7 +2820,7 @@ intel_sdvo_create_enhance_property_tv(struct intel_sdvo *intel_sdvo, to_intel_sdvo_connector_state(conn_state); uint16_t response, data_value[2]; - /* when horizontal overscan is supported, Add the left/right property */ + /* when horizontal overscan is supported, Add the left/right property */ if (enhancements.overscan_h) { if (!intel_sdvo_get_value(intel_sdvo, SDVO_CMD_GET_MAX_OVERSCAN_H, @@ -3077,7 +3105,8 @@ bool intel_sdvo_init(struct drm_i915_private *dev_priv, goto err_output; } - /* Only enable the hotplug irq if we need it, to work around noisy + /* + * Only enable the hotplug irq if we need it, to work around noisy * hotplug lines. */ if (intel_sdvo->hotplug_active) { diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index dd485f59eb1d..dbdcf85032df 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -41,8 +41,7 @@ #include <drm/i915_drm.h> #include "i915_drv.h" -static bool -format_is_yuv(uint32_t format) +bool intel_format_is_yuv(u32 format) { switch (format) { case DRM_FORMAT_YUYV: @@ -266,6 +265,7 @@ skl_update_plane(struct intel_plane *plane, if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) I915_WRITE_FW(PLANE_COLOR_CTL(pipe, plane_id), plane_state->color_ctl); + if (key->flags) { I915_WRITE_FW(PLANE_KEYVAL(pipe, plane_id), key->min_value); I915_WRITE_FW(PLANE_KEYMAX(pipe, plane_id), key->max_value); @@ -346,44 +346,103 @@ skl_plane_get_hw_state(struct intel_plane *plane) } static void -chv_update_csc(struct intel_plane *plane, uint32_t format) +chv_update_csc(const struct intel_plane_state *plane_state) { + struct intel_plane *plane = to_intel_plane(plane_state->base.plane); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + const struct drm_framebuffer *fb = plane_state->base.fb; enum plane_id plane_id = plane->id; - - /* Seems RGB data bypasses the CSC always */ - if (!format_is_yuv(format)) - return; - /* - * BT.601 limited range YCbCr -> full range RGB + * |r| | c0 c1 c2 | |cr| + * |g| = | c3 c4 c5 | x |y | + * |b| | c6 c7 c8 | |cb| * - * |r| | 6537 4769 0| |cr | - * |g| = |-3330 4769 -1605| x |y-64| - * |b| | 0 4769 8263| |cb | + * Coefficients are s3.12. * - * Cb and Cr apparently come in as signed already, so no - * need for any offset. For Y we need to remove the offset. + * Cb and Cr apparently come in as signed already, and + * we always get full range data in on account of CLRC0/1. */ - I915_WRITE_FW(SPCSCYGOFF(plane_id), SPCSC_OOFF(0) | SPCSC_IOFF(-64)); + static const s16 csc_matrix[][9] = { + /* BT.601 full range YCbCr -> full range RGB */ + [DRM_COLOR_YCBCR_BT601] = { + 5743, 4096, 0, + -2925, 4096, -1410, + 0, 4096, 7258, + }, + /* BT.709 full range YCbCr -> full range RGB */ + [DRM_COLOR_YCBCR_BT709] = { + 6450, 4096, 0, + -1917, 4096, -767, + 0, 4096, 7601, + }, + }; + const s16 *csc = csc_matrix[plane_state->base.color_encoding]; + + /* Seems RGB data bypasses the CSC always */ + if (!intel_format_is_yuv(fb->format->format)) + return; + + I915_WRITE_FW(SPCSCYGOFF(plane_id), SPCSC_OOFF(0) | SPCSC_IOFF(0)); I915_WRITE_FW(SPCSCCBOFF(plane_id), SPCSC_OOFF(0) | SPCSC_IOFF(0)); I915_WRITE_FW(SPCSCCROFF(plane_id), SPCSC_OOFF(0) | SPCSC_IOFF(0)); - I915_WRITE_FW(SPCSCC01(plane_id), SPCSC_C1(4769) | SPCSC_C0(6537)); - I915_WRITE_FW(SPCSCC23(plane_id), SPCSC_C1(-3330) | SPCSC_C0(0)); - I915_WRITE_FW(SPCSCC45(plane_id), SPCSC_C1(-1605) | SPCSC_C0(4769)); - I915_WRITE_FW(SPCSCC67(plane_id), SPCSC_C1(4769) | SPCSC_C0(0)); - I915_WRITE_FW(SPCSCC8(plane_id), SPCSC_C0(8263)); + I915_WRITE_FW(SPCSCC01(plane_id), SPCSC_C1(csc[1]) | SPCSC_C0(csc[0])); + I915_WRITE_FW(SPCSCC23(plane_id), SPCSC_C1(csc[3]) | SPCSC_C0(csc[2])); + I915_WRITE_FW(SPCSCC45(plane_id), SPCSC_C1(csc[5]) | SPCSC_C0(csc[4])); + I915_WRITE_FW(SPCSCC67(plane_id), SPCSC_C1(csc[7]) | SPCSC_C0(csc[6])); + I915_WRITE_FW(SPCSCC8(plane_id), SPCSC_C0(csc[8])); - I915_WRITE_FW(SPCSCYGICLAMP(plane_id), SPCSC_IMAX(940) | SPCSC_IMIN(64)); - I915_WRITE_FW(SPCSCCBICLAMP(plane_id), SPCSC_IMAX(448) | SPCSC_IMIN(-448)); - I915_WRITE_FW(SPCSCCRICLAMP(plane_id), SPCSC_IMAX(448) | SPCSC_IMIN(-448)); + I915_WRITE_FW(SPCSCYGICLAMP(plane_id), SPCSC_IMAX(1023) | SPCSC_IMIN(0)); + I915_WRITE_FW(SPCSCCBICLAMP(plane_id), SPCSC_IMAX(512) | SPCSC_IMIN(-512)); + I915_WRITE_FW(SPCSCCRICLAMP(plane_id), SPCSC_IMAX(512) | SPCSC_IMIN(-512)); I915_WRITE_FW(SPCSCYGOCLAMP(plane_id), SPCSC_OMAX(1023) | SPCSC_OMIN(0)); I915_WRITE_FW(SPCSCCBOCLAMP(plane_id), SPCSC_OMAX(1023) | SPCSC_OMIN(0)); I915_WRITE_FW(SPCSCCROCLAMP(plane_id), SPCSC_OMAX(1023) | SPCSC_OMIN(0)); } +#define SIN_0 0 +#define COS_0 1 + +static void +vlv_update_clrc(const struct intel_plane_state *plane_state) +{ + struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + const struct drm_framebuffer *fb = plane_state->base.fb; + enum pipe pipe = plane->pipe; + enum plane_id plane_id = plane->id; + int contrast, brightness, sh_scale, sh_sin, sh_cos; + + if (intel_format_is_yuv(fb->format->format) && + plane_state->base.color_range == DRM_COLOR_YCBCR_LIMITED_RANGE) { + /* + * Expand limited range to full range: + * Contrast is applied first and is used to expand Y range. + * Brightness is applied second and is used to remove the + * offset from Y. Saturation/hue is used to expand CbCr range. + */ + contrast = DIV_ROUND_CLOSEST(255 << 6, 235 - 16); + brightness = -DIV_ROUND_CLOSEST(16 * 255, 235 - 16); + sh_scale = DIV_ROUND_CLOSEST(128 << 7, 240 - 128); + sh_sin = SIN_0 * sh_scale; + sh_cos = COS_0 * sh_scale; + } else { + /* Pass-through everything. */ + contrast = 1 << 6; + brightness = 0; + sh_scale = 1 << 7; + sh_sin = SIN_0 * sh_scale; + sh_cos = COS_0 * sh_scale; + } + + /* FIXME these register are single buffered :( */ + I915_WRITE_FW(SPCLRC0(pipe, plane_id), + SP_CONTRAST(contrast) | SP_BRIGHTNESS(brightness)); + I915_WRITE_FW(SPCLRC1(pipe, plane_id), + SP_SH_SIN(sh_sin) | SP_SH_COS(sh_cos)); +} + static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -433,6 +492,9 @@ static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state, return 0; } + if (plane_state->base.color_encoding == DRM_COLOR_YCBCR_BT709) + sprctl |= SP_YUV_FORMAT_BT709; + if (fb->modifier == I915_FORMAT_MOD_X_TILED) sprctl |= SP_TILED; @@ -477,8 +539,10 @@ vlv_update_plane(struct intel_plane *plane, spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); + vlv_update_clrc(plane_state); + if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_B) - chv_update_csc(plane, fb->format->format); + chv_update_csc(plane_state); if (key->flags) { I915_WRITE_FW(SPKEYMINVAL(pipe, plane_id), key->min_value); @@ -584,6 +648,12 @@ static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, return 0; } + if (plane_state->base.color_encoding == DRM_COLOR_YCBCR_BT709) + sprctl |= SPRITE_YUV_TO_RGB_CSC_FORMAT_BT709; + + if (plane_state->base.color_range == DRM_COLOR_YCBCR_FULL_RANGE) + sprctl |= SPRITE_YUV_RANGE_CORRECTION_DISABLE; + if (fb->modifier == I915_FORMAT_MOD_X_TILED) sprctl |= SPRITE_TILED; @@ -740,6 +810,12 @@ static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state, return 0; } + if (plane_state->base.color_encoding == DRM_COLOR_YCBCR_BT709) + dvscntr |= DVS_YUV_FORMAT_BT709; + + if (plane_state->base.color_range == DRM_COLOR_YCBCR_FULL_RANGE) + dvscntr |= DVS_YUV_RANGE_CORRECTION_DISABLE; + if (fb->modifier == I915_FORMAT_MOD_X_TILED) dvscntr |= DVS_TILED; @@ -864,7 +940,8 @@ intel_check_sprite_plane(struct intel_plane *plane, uint32_t src_x, src_y, src_w, src_h; struct drm_rect *src = &state->base.src; struct drm_rect *dst = &state->base.dst; - const struct drm_rect *clip = &state->clip; + struct drm_rect clip = {}; + int max_stride = INTEL_GEN(dev_priv) >= 9 ? 32768 : 16384; int hscale, vscale; int max_scale, min_scale; bool can_scale; @@ -885,7 +962,7 @@ intel_check_sprite_plane(struct intel_plane *plane, } /* FIXME check all gen limits */ - if (fb->width < 3 || fb->height < 3 || fb->pitches[0] > 16384) { + if (fb->width < 3 || fb->height < 3 || fb->pitches[0] > max_stride) { DRM_DEBUG_KMS("Unsuitable framebuffer for plane\n"); return -EINVAL; } @@ -893,7 +970,7 @@ intel_check_sprite_plane(struct intel_plane *plane, /* setup can_scale, min_scale, max_scale */ if (INTEL_GEN(dev_priv) >= 9) { /* use scaler when colorkey is not required */ - if (state->ckey.flags == I915_SET_COLORKEY_NONE) { + if (!state->ckey.flags) { can_scale = 1; min_scale = 1; max_scale = skl_max_scale(crtc, crtc_state); @@ -922,7 +999,11 @@ intel_check_sprite_plane(struct intel_plane *plane, vscale = drm_rect_calc_vscale_relaxed(src, dst, min_scale, max_scale); BUG_ON(vscale < 0); - state->base.visible = drm_rect_clip_scaled(src, dst, clip, hscale, vscale); + if (crtc_state->base.enable) + drm_mode_get_hv_timing(&crtc_state->base.mode, + &clip.x2, &clip.y2); + + state->base.visible = drm_rect_clip_scaled(src, dst, &clip, hscale, vscale); crtc_x = dst->x1; crtc_y = dst->y1; @@ -974,7 +1055,7 @@ intel_check_sprite_plane(struct intel_plane *plane, src_y = src->y1 >> 16; src_h = drm_rect_height(src) >> 16; - if (format_is_yuv(fb->format->format)) { + if (intel_format_is_yuv(fb->format->format)) { src_x &= ~1; src_w &= ~1; @@ -1027,7 +1108,7 @@ intel_check_sprite_plane(struct intel_plane *plane, dst->y2 = crtc_y + crtc_h; if (INTEL_GEN(dev_priv) >= 9) { - ret = skl_check_plane_surface(state); + ret = skl_check_plane_surface(crtc_state, state); if (ret) return ret; @@ -1058,8 +1139,8 @@ intel_check_sprite_plane(struct intel_plane *plane, return 0; } -int intel_sprite_set_colorkey(struct drm_device *dev, void *data, - struct drm_file *file_priv) +int intel_sprite_set_colorkey_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) { struct drm_i915_private *dev_priv = to_i915(dev); struct drm_intel_sprite_colorkey *set = data; @@ -1069,6 +1150,12 @@ int intel_sprite_set_colorkey(struct drm_device *dev, void *data, struct drm_modeset_acquire_ctx ctx; int ret = 0; + /* ignore the pointless "none" flag */ + set->flags &= ~I915_SET_COLORKEY_NONE; + + if (set->flags & ~(I915_SET_COLORKEY_DESTINATION | I915_SET_COLORKEY_SOURCE)) + return -EINVAL; + /* Make sure we don't try to enable both src & dest simultaneously */ if ((set->flags & (I915_SET_COLORKEY_DESTINATION | I915_SET_COLORKEY_SOURCE)) == (I915_SET_COLORKEY_DESTINATION | I915_SET_COLORKEY_SOURCE)) return -EINVAL; @@ -1161,18 +1248,27 @@ static uint32_t skl_plane_formats[] = { DRM_FORMAT_VYUY, }; -static const uint64_t skl_plane_format_modifiers[] = { +static const uint64_t skl_plane_format_modifiers_noccs[] = { + I915_FORMAT_MOD_Yf_TILED, + I915_FORMAT_MOD_Y_TILED, + I915_FORMAT_MOD_X_TILED, + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_INVALID +}; + +static const uint64_t skl_plane_format_modifiers_ccs[] = { + I915_FORMAT_MOD_Yf_TILED_CCS, + I915_FORMAT_MOD_Y_TILED_CCS, + I915_FORMAT_MOD_Yf_TILED, + I915_FORMAT_MOD_Y_TILED, I915_FORMAT_MOD_X_TILED, DRM_FORMAT_MOD_LINEAR, DRM_FORMAT_MOD_INVALID }; -static bool g4x_sprite_plane_format_mod_supported(struct drm_plane *plane, - uint32_t format, - uint64_t modifier) +static bool g4x_mod_supported(uint32_t format, uint64_t modifier) { switch (format) { - case DRM_FORMAT_XBGR8888: case DRM_FORMAT_XRGB8888: case DRM_FORMAT_YUYV: case DRM_FORMAT_YVYU: @@ -1187,22 +1283,38 @@ static bool g4x_sprite_plane_format_mod_supported(struct drm_plane *plane, } } -static bool vlv_sprite_plane_format_mod_supported(struct drm_plane *plane, - uint32_t format, - uint64_t modifier) +static bool snb_mod_supported(uint32_t format, uint64_t modifier) { switch (format) { + case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_XBGR8888: case DRM_FORMAT_YUYV: case DRM_FORMAT_YVYU: case DRM_FORMAT_UYVY: case DRM_FORMAT_VYUY: + if (modifier == DRM_FORMAT_MOD_LINEAR || + modifier == I915_FORMAT_MOD_X_TILED) + return true; + /* fall through */ + default: + return false; + } +} + +static bool vlv_mod_supported(uint32_t format, uint64_t modifier) +{ + switch (format) { case DRM_FORMAT_RGB565: - case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_ABGR8888: case DRM_FORMAT_ARGB8888: + case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_XRGB8888: case DRM_FORMAT_XBGR2101010: case DRM_FORMAT_ABGR2101010: - case DRM_FORMAT_XBGR8888: - case DRM_FORMAT_ABGR8888: + case DRM_FORMAT_YUYV: + case DRM_FORMAT_YVYU: + case DRM_FORMAT_UYVY: + case DRM_FORMAT_VYUY: if (modifier == DRM_FORMAT_MOD_LINEAR || modifier == I915_FORMAT_MOD_X_TILED) return true; @@ -1212,16 +1324,17 @@ static bool vlv_sprite_plane_format_mod_supported(struct drm_plane *plane, } } -static bool skl_sprite_plane_format_mod_supported(struct drm_plane *plane, - uint32_t format, - uint64_t modifier) +static bool skl_mod_supported(uint32_t format, uint64_t modifier) { - /* This is the same as primary plane since SKL has universal planes */ switch (format) { case DRM_FORMAT_XRGB8888: case DRM_FORMAT_XBGR8888: case DRM_FORMAT_ARGB8888: case DRM_FORMAT_ABGR8888: + if (modifier == I915_FORMAT_MOD_Yf_TILED_CCS || + modifier == I915_FORMAT_MOD_Y_TILED_CCS) + return true; + /* fall through */ case DRM_FORMAT_RGB565: case DRM_FORMAT_XRGB2101010: case DRM_FORMAT_XBGR2101010: @@ -1257,13 +1370,13 @@ static bool intel_sprite_plane_format_mod_supported(struct drm_plane *plane, return false; if (INTEL_GEN(dev_priv) >= 9) - return skl_sprite_plane_format_mod_supported(plane, format, modifier); + return skl_mod_supported(format, modifier); else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - return vlv_sprite_plane_format_mod_supported(plane, format, modifier); + return vlv_mod_supported(format, modifier); + else if (INTEL_GEN(dev_priv) >= 6) + return snb_mod_supported(format, modifier); else - return g4x_sprite_plane_format_mod_supported(plane, format, modifier); - - unreachable(); + return g4x_mod_supported(format, modifier); } static const struct drm_plane_funcs intel_sprite_plane_funcs = { @@ -1277,6 +1390,23 @@ static const struct drm_plane_funcs intel_sprite_plane_funcs = { .format_mod_supported = intel_sprite_plane_format_mod_supported, }; +bool skl_plane_has_ccs(struct drm_i915_private *dev_priv, + enum pipe pipe, enum plane_id plane_id) +{ + if (plane_id == PLANE_CURSOR) + return false; + + if (INTEL_GEN(dev_priv) >= 10) + return true; + + if (IS_GEMINILAKE(dev_priv)) + return pipe != PIPE_C; + + return pipe != PIPE_C && + (plane_id == PLANE_PRIMARY || + plane_id == PLANE_SPRITE0); +} + struct intel_plane * intel_sprite_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe, int plane) @@ -1303,7 +1433,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, } intel_plane->base.state = &state->base; - if (INTEL_GEN(dev_priv) >= 10) { + if (INTEL_GEN(dev_priv) >= 9) { intel_plane->can_scale = true; state->scaler_id = -1; @@ -1313,18 +1443,11 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, plane_formats = skl_plane_formats; num_plane_formats = ARRAY_SIZE(skl_plane_formats); - modifiers = skl_plane_format_modifiers; - } else if (INTEL_GEN(dev_priv) >= 9) { - intel_plane->can_scale = true; - state->scaler_id = -1; - - intel_plane->update_plane = skl_update_plane; - intel_plane->disable_plane = skl_disable_plane; - intel_plane->get_hw_state = skl_plane_get_hw_state; - plane_formats = skl_plane_formats; - num_plane_formats = ARRAY_SIZE(skl_plane_formats); - modifiers = skl_plane_format_modifiers; + if (skl_plane_has_ccs(dev_priv, pipe, PLANE_SPRITE0 + plane)) + modifiers = skl_plane_format_modifiers_ccs; + else + modifiers = skl_plane_format_modifiers_noccs; } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { intel_plane->can_scale = false; intel_plane->max_downscale = 1; @@ -1386,7 +1509,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->pipe = pipe; intel_plane->i9xx_plane = plane; intel_plane->id = PLANE_SPRITE0 + plane; - intel_plane->frontbuffer_bit = INTEL_FRONTBUFFER_SPRITE(pipe, plane); + intel_plane->frontbuffer_bit = INTEL_FRONTBUFFER(pipe, intel_plane->id); intel_plane->check_plane = intel_check_sprite_plane; possible_crtcs = (1 << pipe); @@ -1412,6 +1535,14 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, DRM_MODE_ROTATE_0, supported_rotations); + drm_plane_create_color_properties(&intel_plane->base, + BIT(DRM_COLOR_YCBCR_BT601) | + BIT(DRM_COLOR_YCBCR_BT709), + BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) | + BIT(DRM_COLOR_YCBCR_FULL_RANGE), + DRM_COLOR_YCBCR_BT709, + DRM_COLOR_YCBCR_LIMITED_RANGE); + drm_plane_helper_add(&intel_plane->base, &intel_plane_helper_funcs); return intel_plane; diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index b3dabc219e6a..885fc3809f7f 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -43,7 +43,6 @@ enum tv_margin { TV_MARGIN_RIGHT, TV_MARGIN_BOTTOM }; -/** Private structure for the integrated TV support */ struct intel_tv { struct intel_encoder base; @@ -370,12 +369,11 @@ struct tv_mode { * The constants below were all computed using a 107.520MHz clock */ -/** +/* * Register programming values for TV modes. * * These values account for -1s required. */ - static const struct tv_mode tv_modes[] = { { .name = "NTSC-M", @@ -1126,14 +1124,6 @@ static const struct drm_display_mode reported_modes[] = { }, }; -/** - * Detects TV presence by checking for load. - * - * Requires that the current pipe's DPLL is active. - - * \return true if TV is connected. - * \return false if TV is disconnected. - */ static int intel_tv_detect_type(struct intel_tv *intel_tv, struct drm_connector *connector) @@ -1259,12 +1249,6 @@ static void intel_tv_find_better_format(struct drm_connector *connector) connector->state->tv.mode = i; } -/** - * Detect the TV connection. - * - * Currently this always returns CONNECTOR_STATUS_UNKNOWN, as we need to be sure - * we have a pipe programmed in order to probe the TV. - */ static int intel_tv_detect(struct drm_connector *connector, struct drm_modeset_acquire_ctx *ctx, @@ -1339,13 +1323,6 @@ intel_tv_choose_preferred_modes(const struct tv_mode *tv_mode, } } -/** - * Stub get_modes function. - * - * This should probably return a set of fixed modes, unless we can figure out - * how to probe modes off of TV connections. - */ - static int intel_tv_get_modes(struct drm_connector *connector) { @@ -1512,7 +1489,8 @@ intel_tv_init(struct drm_i915_private *dev_priv) connector = &intel_connector->base; state = connector->state; - /* The documentation, for the older chipsets at least, recommend + /* + * The documentation, for the older chipsets at least, recommend * using a polling method rather than hotplug detection for TVs. * This is because in order to perform the hotplug detection, the PLLs * for the TV must be kept alive increasing power drain and starving diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index d82ca0f438f5..e5bf0d37bf43 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -27,6 +27,8 @@ #include "intel_guc.h" #include "i915_drv.h" +static void guc_free_load_err_log(struct intel_guc *guc); + /* Reset GuC providing us with fresh state for both GuC and HuC. */ static int __intel_uc_reset_hw(struct drm_i915_private *dev_priv) @@ -65,6 +67,21 @@ static int __get_platform_enable_guc(struct drm_i915_private *dev_priv) return enable_guc; } +static int __get_default_guc_log_level(struct drm_i915_private *dev_priv) +{ + int guc_log_level = 0; /* disabled */ + + /* Enable if we're running on platform with GuC and debug config */ + if (HAS_GUC(dev_priv) && intel_uc_is_using_guc() && + (IS_ENABLED(CONFIG_DRM_I915_DEBUG) || + IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))) + guc_log_level = 1 + GUC_LOG_VERBOSITY_MAX; + + /* Any platform specific fine-tuning can be done here */ + + return guc_log_level; +} + /** * intel_uc_sanitize_options - sanitize uC related modparam options * @dev_priv: device private @@ -74,6 +91,13 @@ static int __get_platform_enable_guc(struct drm_i915_private *dev_priv) * modparam varies between platforms and it is hardcoded in driver code. * Any other modparam value is only monitored against availability of the * related hardware or firmware definitions. + * + * In case of "guc_log_level" option this function will attempt to modify + * it only if it was initially set to "auto(-1)" or if initial value was + * "enable(1..4)" on platforms without the GuC. Default value for this + * modparam varies between platforms and is usually set to "disable(0)" + * unless GuC is enabled on given platform and the driver is compiled with + * debug config when this modparam will default to "enable(1..4)". */ void intel_uc_sanitize_options(struct drm_i915_private *dev_priv) { @@ -91,22 +115,48 @@ void intel_uc_sanitize_options(struct drm_i915_private *dev_priv) /* Verify GuC firmware availability */ if (intel_uc_is_using_guc() && !intel_uc_fw_is_selected(guc_fw)) { - DRM_WARN("Incompatible option detected: enable_guc=%d, %s!\n", - i915_modparams.enable_guc, + DRM_WARN("Incompatible option detected: %s=%d, %s!\n", + "enable_guc", i915_modparams.enable_guc, !HAS_GUC(dev_priv) ? "no GuC hardware" : "no GuC firmware"); } /* Verify HuC firmware availability */ if (intel_uc_is_using_huc() && !intel_uc_fw_is_selected(huc_fw)) { - DRM_WARN("Incompatible option detected: enable_guc=%d, %s!\n", - i915_modparams.enable_guc, + DRM_WARN("Incompatible option detected: %s=%d, %s!\n", + "enable_guc", i915_modparams.enable_guc, !HAS_HUC(dev_priv) ? "no HuC hardware" : "no HuC firmware"); } + /* A negative value means "use platform/config default" */ + if (i915_modparams.guc_log_level < 0) + i915_modparams.guc_log_level = + __get_default_guc_log_level(dev_priv); + + if (i915_modparams.guc_log_level > 0 && !intel_uc_is_using_guc()) { + DRM_WARN("Incompatible option detected: %s=%d, %s!\n", + "guc_log_level", i915_modparams.guc_log_level, + !HAS_GUC(dev_priv) ? "no GuC hardware" : + "GuC not enabled"); + i915_modparams.guc_log_level = 0; + } + + if (i915_modparams.guc_log_level > 1 + GUC_LOG_VERBOSITY_MAX) { + DRM_WARN("Incompatible option detected: %s=%d, %s!\n", + "guc_log_level", i915_modparams.guc_log_level, + "verbosity too high"); + i915_modparams.guc_log_level = 1 + GUC_LOG_VERBOSITY_MAX; + } + + DRM_DEBUG_DRIVER("guc_log_level=%d (enabled:%s verbosity:%d)\n", + i915_modparams.guc_log_level, + yesno(i915_modparams.guc_log_level), + i915_modparams.guc_log_level - 1); + /* Make sure that sanitization was done */ GEM_BUG_ON(i915_modparams.enable_guc < 0); + GEM_BUG_ON(i915_modparams.guc_log_level < 0); } void intel_uc_init_early(struct drm_i915_private *dev_priv) @@ -135,6 +185,8 @@ void intel_uc_fini_fw(struct drm_i915_private *dev_priv) if (USES_HUC(dev_priv)) intel_uc_fw_fini(&dev_priv->huc.fw); + + guc_free_load_err_log(&dev_priv->guc); } /** @@ -152,7 +204,7 @@ void intel_uc_init_mmio(struct drm_i915_private *dev_priv) static void guc_capture_load_err_log(struct intel_guc *guc) { - if (!guc->log.vma || i915_modparams.guc_log_level < 0) + if (!guc->log.vma || !i915_modparams.guc_log_level) return; if (!guc->load_err_log) @@ -188,28 +240,44 @@ static void guc_disable_communication(struct intel_guc *guc) guc->send = intel_guc_send_nop; } -int intel_uc_init_wq(struct drm_i915_private *dev_priv) +int intel_uc_init_misc(struct drm_i915_private *dev_priv) { + struct intel_guc *guc = &dev_priv->guc; int ret; if (!USES_GUC(dev_priv)) return 0; - ret = intel_guc_init_wq(&dev_priv->guc); + ret = intel_guc_init_wq(guc); if (ret) { DRM_ERROR("Couldn't allocate workqueues for GuC\n"); - return ret; + goto err; + } + + ret = intel_guc_log_relay_create(guc); + if (ret) { + DRM_ERROR("Couldn't allocate relay for GuC log\n"); + goto err_relay; } return 0; + +err_relay: + intel_guc_fini_wq(guc); +err: + return ret; } -void intel_uc_fini_wq(struct drm_i915_private *dev_priv) +void intel_uc_fini_misc(struct drm_i915_private *dev_priv) { + struct intel_guc *guc = &dev_priv->guc; + if (!USES_GUC(dev_priv)) return; - intel_guc_fini_wq(&dev_priv->guc); + intel_guc_fini_wq(guc); + + intel_guc_log_relay_destroy(guc); } int intel_uc_init(struct drm_i915_private *dev_priv) @@ -293,7 +361,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) goto err_out; if (USES_HUC(dev_priv)) { - ret = intel_huc_init_hw(huc); + ret = intel_huc_fw_upload(huc); if (ret) goto err_out; } @@ -322,7 +390,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) } if (USES_GUC_SUBMISSION(dev_priv)) { - if (i915_modparams.guc_log_level >= 0) + if (i915_modparams.guc_log_level) gen9_enable_guc_interrupts(dev_priv); ret = intel_guc_submission_enable(guc); @@ -364,8 +432,6 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; - guc_free_load_err_log(guc); - if (!USES_GUC(dev_priv)) return; @@ -379,3 +445,48 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv) if (USES_GUC_SUBMISSION(dev_priv)) gen9_disable_guc_interrupts(dev_priv); } + +int intel_uc_suspend(struct drm_i915_private *i915) +{ + struct intel_guc *guc = &i915->guc; + int err; + + if (!USES_GUC(i915)) + return 0; + + if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) + return 0; + + err = intel_guc_suspend(guc); + if (err) { + DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err); + return err; + } + + gen9_disable_guc_interrupts(i915); + + return 0; +} + +int intel_uc_resume(struct drm_i915_private *i915) +{ + struct intel_guc *guc = &i915->guc; + int err; + + if (!USES_GUC(i915)) + return 0; + + if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) + return 0; + + if (i915_modparams.guc_log_level) + gen9_enable_guc_interrupts(i915); + + err = intel_guc_resume(guc); + if (err) { + DRM_DEBUG_DRIVER("Failed to resume GuC, err=%d", err); + return err; + } + + return 0; +} diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 8a7249722ef1..f76d51d1ce70 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -33,12 +33,14 @@ void intel_uc_init_early(struct drm_i915_private *dev_priv); void intel_uc_init_mmio(struct drm_i915_private *dev_priv); void intel_uc_init_fw(struct drm_i915_private *dev_priv); void intel_uc_fini_fw(struct drm_i915_private *dev_priv); -int intel_uc_init_wq(struct drm_i915_private *dev_priv); -void intel_uc_fini_wq(struct drm_i915_private *dev_priv); +int intel_uc_init_misc(struct drm_i915_private *dev_priv); +void intel_uc_fini_misc(struct drm_i915_private *dev_priv); int intel_uc_init_hw(struct drm_i915_private *dev_priv); void intel_uc_fini_hw(struct drm_i915_private *dev_priv); int intel_uc_init(struct drm_i915_private *dev_priv); void intel_uc_fini(struct drm_i915_private *dev_priv); +int intel_uc_suspend(struct drm_i915_private *dev_priv); +int intel_uc_resume(struct drm_i915_private *dev_priv); static inline bool intel_uc_is_using_guc(void) { diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c index 784eff9cdfc8..3ec0ce505b76 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/intel_uc_fw.c @@ -197,11 +197,12 @@ fail: /** * intel_uc_fw_upload - load uC firmware using custom loader - * * @uc_fw: uC firmware - * @loader: custom uC firmware loader function + * @xfer: custom uC firmware loader function * * Loads uC firmware using custom loader and updates internal flags. + * + * Return: 0 on success, non-zero on failure. */ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, int (*xfer)(struct intel_uc_fw *uc_fw, diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 89547b614aa6..4df7c2ef8576 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -37,6 +37,12 @@ static const char * const forcewake_domain_names[] = { "render", "blitter", "media", + "vdbox0", + "vdbox1", + "vdbox2", + "vdbox3", + "vebox0", + "vebox1", }; const char * @@ -774,6 +780,9 @@ void assert_forcewakes_active(struct drm_i915_private *dev_priv, /* We give fast paths for the really cool registers */ #define NEEDS_FORCE_WAKE(reg) ((reg) < 0x40000) +#define GEN11_NEEDS_FORCE_WAKE(reg) \ + ((reg) < 0x40000 || ((reg) >= 0x1c0000 && (reg) < 0x1dc000)) + #define __gen6_reg_read_fw_domains(offset) \ ({ \ enum forcewake_domains __fwd; \ @@ -826,6 +835,14 @@ find_fw_domain(struct drm_i915_private *dev_priv, u32 offset) if (!entry) return 0; + /* + * The list of FW domains depends on the SKU in gen11+ so we + * can't determine it statically. We use FORCEWAKE_ALL and + * translate it here to the list of available domains. + */ + if (entry->domains == FORCEWAKE_ALL) + return dev_priv->uncore.fw_domains; + WARN(entry->domains & ~dev_priv->uncore.fw_domains, "Uninitialized forcewake domain(s) 0x%x accessed at 0x%x\n", entry->domains & ~dev_priv->uncore.fw_domains, offset); @@ -860,6 +877,14 @@ static const struct intel_forcewake_range __vlv_fw_ranges[] = { __fwd; \ }) +#define __gen11_fwtable_reg_read_fw_domains(offset) \ +({ \ + enum forcewake_domains __fwd = 0; \ + if (GEN11_NEEDS_FORCE_WAKE((offset))) \ + __fwd = find_fw_domain(dev_priv, offset); \ + __fwd; \ +}) + /* *Must* be sorted by offset! See intel_shadow_table_check(). */ static const i915_reg_t gen8_shadowed_regs[] = { RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ @@ -871,6 +896,20 @@ static const i915_reg_t gen8_shadowed_regs[] = { /* TODO: Other registers are not yet used */ }; +static const i915_reg_t gen11_shadowed_regs[] = { + RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ + GEN6_RPNSWREQ, /* 0xA008 */ + GEN6_RC_VIDEO_FREQ, /* 0xA00C */ + RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ + RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ + RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ + RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ + RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ + RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ + RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ + /* TODO: Other registers are not yet used */ +}; + static int mmio_reg_cmp(u32 key, const i915_reg_t *reg) { u32 offset = i915_mmio_reg_offset(*reg); @@ -883,14 +922,17 @@ static int mmio_reg_cmp(u32 key, const i915_reg_t *reg) return 0; } -static bool is_gen8_shadowed(u32 offset) -{ - const i915_reg_t *regs = gen8_shadowed_regs; - - return BSEARCH(offset, regs, ARRAY_SIZE(gen8_shadowed_regs), - mmio_reg_cmp); +#define __is_genX_shadowed(x) \ +static bool is_gen##x##_shadowed(u32 offset) \ +{ \ + const i915_reg_t *regs = gen##x##_shadowed_regs; \ + return BSEARCH(offset, regs, ARRAY_SIZE(gen##x##_shadowed_regs), \ + mmio_reg_cmp); \ } +__is_genX_shadowed(8) +__is_genX_shadowed(11) + #define __gen8_reg_write_fw_domains(offset) \ ({ \ enum forcewake_domains __fwd; \ @@ -929,6 +971,14 @@ static const struct intel_forcewake_range __chv_fw_ranges[] = { __fwd; \ }) +#define __gen11_fwtable_reg_write_fw_domains(offset) \ +({ \ + enum forcewake_domains __fwd = 0; \ + if (GEN11_NEEDS_FORCE_WAKE((offset)) && !is_gen11_shadowed(offset)) \ + __fwd = find_fw_domain(dev_priv, offset); \ + __fwd; \ +}) + /* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ static const struct intel_forcewake_range __gen9_fw_ranges[] = { GEN_FW_RANGE(0x0, 0xaff, FORCEWAKE_BLITTER), @@ -965,6 +1015,40 @@ static const struct intel_forcewake_range __gen9_fw_ranges[] = { GEN_FW_RANGE(0x30000, 0x3ffff, FORCEWAKE_MEDIA), }; +/* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ +static const struct intel_forcewake_range __gen11_fw_ranges[] = { + GEN_FW_RANGE(0x0, 0xaff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0xb00, 0x1fff, 0), /* uncore range */ + GEN_FW_RANGE(0x2000, 0x26ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x2700, 0x2fff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x3000, 0x3fff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x4000, 0x51ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x5200, 0x7fff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8000, 0x813f, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x8140, 0x815f, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8160, 0x82ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x8300, 0x84ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8500, 0x8bff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x8c00, 0x8cff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8d00, 0x93ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x9400, 0x97ff, FORCEWAKE_ALL), + GEN_FW_RANGE(0x9800, 0xafff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0xb000, 0xb47f, FORCEWAKE_RENDER), + GEN_FW_RANGE(0xb480, 0xdfff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0xe000, 0xe8ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0xe900, 0x243ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x24400, 0x247ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x24800, 0x3ffff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x40000, 0x1bffff, 0), + GEN_FW_RANGE(0x1c0000, 0x1c3fff, FORCEWAKE_MEDIA_VDBOX0), + GEN_FW_RANGE(0x1c4000, 0x1c7fff, FORCEWAKE_MEDIA_VDBOX1), + GEN_FW_RANGE(0x1c8000, 0x1cbfff, FORCEWAKE_MEDIA_VEBOX0), + GEN_FW_RANGE(0x1cc000, 0x1cffff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x1d0000, 0x1d3fff, FORCEWAKE_MEDIA_VDBOX2), + GEN_FW_RANGE(0x1d4000, 0x1d7fff, FORCEWAKE_MEDIA_VDBOX3), + GEN_FW_RANGE(0x1d8000, 0x1dbfff, FORCEWAKE_MEDIA_VEBOX1) +}; + static void ilk_dummy_write(struct drm_i915_private *dev_priv) { @@ -1095,7 +1179,12 @@ func##_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { } #define __gen6_read(x) __gen_read(gen6, x) #define __fwtable_read(x) __gen_read(fwtable, x) +#define __gen11_fwtable_read(x) __gen_read(gen11_fwtable, x) +__gen11_fwtable_read(8) +__gen11_fwtable_read(16) +__gen11_fwtable_read(32) +__gen11_fwtable_read(64) __fwtable_read(8) __fwtable_read(16) __fwtable_read(32) @@ -1105,6 +1194,7 @@ __gen6_read(16) __gen6_read(32) __gen6_read(64) +#undef __gen11_fwtable_read #undef __fwtable_read #undef __gen6_read #undef GEN6_READ_FOOTER @@ -1181,7 +1271,11 @@ func##_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, boo } #define __gen8_write(x) __gen_write(gen8, x) #define __fwtable_write(x) __gen_write(fwtable, x) +#define __gen11_fwtable_write(x) __gen_write(gen11_fwtable, x) +__gen11_fwtable_write(8) +__gen11_fwtable_write(16) +__gen11_fwtable_write(32) __fwtable_write(8) __fwtable_write(16) __fwtable_write(32) @@ -1192,6 +1286,7 @@ __gen6_write(8) __gen6_write(16) __gen6_write(32) +#undef __gen11_fwtable_write #undef __fwtable_write #undef __gen8_write #undef __gen6_write @@ -1240,6 +1335,13 @@ static void fw_domain_init(struct drm_i915_private *dev_priv, BUILD_BUG_ON(FORCEWAKE_RENDER != (1 << FW_DOMAIN_ID_RENDER)); BUILD_BUG_ON(FORCEWAKE_BLITTER != (1 << FW_DOMAIN_ID_BLITTER)); BUILD_BUG_ON(FORCEWAKE_MEDIA != (1 << FW_DOMAIN_ID_MEDIA)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX0 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX0)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX1 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX1)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX2 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX2)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX3 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX3)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX0 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX0)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX1 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX1)); + d->mask = BIT(domain_id); @@ -1267,7 +1369,34 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) dev_priv->uncore.fw_clear = _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL); } - if (INTEL_GEN(dev_priv) >= 9) { + if (INTEL_GEN(dev_priv) >= 11) { + int i; + + dev_priv->uncore.funcs.force_wake_get = fw_domains_get; + dev_priv->uncore.funcs.force_wake_put = fw_domains_put; + fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, + FORCEWAKE_RENDER_GEN9, + FORCEWAKE_ACK_RENDER_GEN9); + fw_domain_init(dev_priv, FW_DOMAIN_ID_BLITTER, + FORCEWAKE_BLITTER_GEN9, + FORCEWAKE_ACK_BLITTER_GEN9); + for (i = 0; i < I915_MAX_VCS; i++) { + if (!HAS_ENGINE(dev_priv, _VCS(i))) + continue; + + fw_domain_init(dev_priv, FW_DOMAIN_ID_MEDIA_VDBOX0 + i, + FORCEWAKE_MEDIA_VDBOX_GEN11(i), + FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(i)); + } + for (i = 0; i < I915_MAX_VECS; i++) { + if (!HAS_ENGINE(dev_priv, _VECS(i))) + continue; + + fw_domain_init(dev_priv, FW_DOMAIN_ID_MEDIA_VEBOX0 + i, + FORCEWAKE_MEDIA_VEBOX_GEN11(i), + FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(i)); + } + } else if (IS_GEN9(dev_priv) || IS_GEN10(dev_priv)) { dev_priv->uncore.funcs.force_wake_get = fw_domains_get_with_fallback; dev_priv->uncore.funcs.force_wake_put = fw_domains_put; @@ -1422,10 +1551,14 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen8); ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen6); } - } else { + } else if (IS_GEN(dev_priv, 9, 10)) { ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, fwtable); ASSIGN_READ_MMIO_VFUNCS(dev_priv, fwtable); + } else { + ASSIGN_FW_DOMAINS_TABLE(__gen11_fw_ranges); + ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen11_fwtable); + ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen11_fwtable); } iosf_mbi_register_pmic_bus_access_notifier( @@ -1452,7 +1585,7 @@ static const struct reg_whitelist { } reg_read_whitelist[] = { { .offset_ldw = RING_TIMESTAMP(RENDER_RING_BASE), .offset_udw = RING_TIMESTAMP_UDW(RENDER_RING_BASE), - .gen_mask = INTEL_GEN_MASK(4, 10), + .gen_mask = INTEL_GEN_MASK(4, 11), .size = 8 } }; @@ -1522,9 +1655,11 @@ static void gen3_stop_engine(struct intel_engine_cs *engine) engine->name); I915_WRITE_FW(RING_HEAD(base), I915_READ_FW(RING_TAIL(base))); + POSTING_READ_FW(RING_HEAD(base)); /* paranoia */ I915_WRITE_FW(RING_HEAD(base), 0); I915_WRITE_FW(RING_TAIL(base), 0); + POSTING_READ_FW(RING_TAIL(base)); /* The ring must be empty before it is disabled */ I915_WRITE_FW(RING_CTL(base), 0); @@ -1548,24 +1683,31 @@ static void i915_stop_engines(struct drm_i915_private *dev_priv, gen3_stop_engine(engine); } -static bool i915_reset_complete(struct pci_dev *pdev) +static bool i915_in_reset(struct pci_dev *pdev) { u8 gdrst; pci_read_config_byte(pdev, I915_GDRST, &gdrst); - return (gdrst & GRDOM_RESET_STATUS) == 0; + return gdrst & GRDOM_RESET_STATUS; } static int i915_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) { struct pci_dev *pdev = dev_priv->drm.pdev; + int err; - /* assert reset for at least 20 usec */ + /* Assert reset for at least 20 usec, and wait for acknowledgement. */ pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); usleep_range(50, 200); + err = wait_for(i915_in_reset(pdev), 500); + + /* Clear the reset request. */ pci_write_config_byte(pdev, I915_GDRST, 0); + usleep_range(50, 200); + if (!err) + err = wait_for(!i915_in_reset(pdev), 500); - return wait_for(i915_reset_complete(pdev), 500); + return err; } static bool g4x_reset_complete(struct pci_dev *pdev) @@ -1767,12 +1909,14 @@ int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv, } /** - * intel_wait_for_register - wait until register matches expected state + * __intel_wait_for_register - wait until register matches expected state * @dev_priv: the i915 device * @reg: the register to read * @mask: mask to apply to register value * @value: expected value - * @timeout_ms: timeout in millisecond + * @fast_timeout_us: fast timeout in microsecond for atomic/tight wait + * @slow_timeout_ms: slow timeout in millisecond + * @out_value: optional placeholder to hold registry value * * This routine waits until the target register @reg contains the expected * @value after applying the @mask, i.e. it waits until :: @@ -1783,14 +1927,17 @@ int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv, * * Returns 0 if the register matches the desired condition, or -ETIMEOUT. */ -int intel_wait_for_register(struct drm_i915_private *dev_priv, +int __intel_wait_for_register(struct drm_i915_private *dev_priv, i915_reg_t reg, u32 mask, u32 value, - unsigned int timeout_ms) + unsigned int fast_timeout_us, + unsigned int slow_timeout_ms, + u32 *out_value) { unsigned fw = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ); + u32 reg_value; int ret; might_sleep(); @@ -1800,14 +1947,18 @@ int intel_wait_for_register(struct drm_i915_private *dev_priv, ret = __intel_wait_for_register_fw(dev_priv, reg, mask, value, - 2, 0, NULL); + fast_timeout_us, 0, ®_value); intel_uncore_forcewake_put__locked(dev_priv, fw); spin_unlock_irq(&dev_priv->uncore.lock); if (ret) - ret = wait_for((I915_READ_NOTRACE(reg) & mask) == value, - timeout_ms); + ret = __wait_for(reg_value = I915_READ_NOTRACE(reg), + (reg_value & mask) == value, + slow_timeout_ms * 1000, 10, 1000); + + if (out_value) + *out_value = reg_value; return ret; } @@ -1865,9 +2016,9 @@ static reset_func intel_get_gpu_reset(struct drm_i915_private *dev_priv) if (!i915_modparams.reset) return NULL; - if (INTEL_INFO(dev_priv)->gen >= 8) + if (INTEL_GEN(dev_priv) >= 8) return gen8_reset_engines; - else if (INTEL_INFO(dev_priv)->gen >= 6) + else if (INTEL_GEN(dev_priv) >= 6) return gen6_reset_engines; else if (IS_GEN5(dev_priv)) return ironlake_do_reset; @@ -1875,7 +2026,7 @@ static reset_func intel_get_gpu_reset(struct drm_i915_private *dev_priv) return g4x_do_reset; else if (IS_G33(dev_priv) || IS_PINEVIEW(dev_priv)) return g33_do_reset; - else if (INTEL_INFO(dev_priv)->gen >= 3) + else if (INTEL_GEN(dev_priv) >= 3) return i915_do_reset; else return NULL; @@ -1936,8 +2087,7 @@ int intel_reset_guc(struct drm_i915_private *dev_priv) { int ret; - if (!HAS_GUC(dev_priv)) - return -EINVAL; + GEM_BUG_ON(!HAS_GUC(dev_priv)); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); ret = gen6_hw_domain_reset(dev_priv, GEN9_GRDOM_GUC); @@ -1977,7 +2127,9 @@ intel_uncore_forcewake_for_read(struct drm_i915_private *dev_priv, u32 offset = i915_mmio_reg_offset(reg); enum forcewake_domains fw_domains; - if (HAS_FWTABLE(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 11) { + fw_domains = __gen11_fwtable_reg_read_fw_domains(offset); + } else if (HAS_FWTABLE(dev_priv)) { fw_domains = __fwtable_reg_read_fw_domains(offset); } else if (INTEL_GEN(dev_priv) >= 6) { fw_domains = __gen6_reg_read_fw_domains(offset); @@ -1998,7 +2150,9 @@ intel_uncore_forcewake_for_write(struct drm_i915_private *dev_priv, u32 offset = i915_mmio_reg_offset(reg); enum forcewake_domains fw_domains; - if (HAS_FWTABLE(dev_priv) && !IS_VALLEYVIEW(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 11) { + fw_domains = __gen11_fwtable_reg_write_fw_domains(offset); + } else if (HAS_FWTABLE(dev_priv) && !IS_VALLEYVIEW(dev_priv)) { fw_domains = __fwtable_reg_write_fw_domains(offset); } else if (IS_GEN8(dev_priv)) { fw_domains = __gen8_reg_write_fw_domains(offset); diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 9ce079b5dd0d..dfdf444e4bcc 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -37,17 +37,28 @@ enum forcewake_domain_id { FW_DOMAIN_ID_RENDER = 0, FW_DOMAIN_ID_BLITTER, FW_DOMAIN_ID_MEDIA, + FW_DOMAIN_ID_MEDIA_VDBOX0, + FW_DOMAIN_ID_MEDIA_VDBOX1, + FW_DOMAIN_ID_MEDIA_VDBOX2, + FW_DOMAIN_ID_MEDIA_VDBOX3, + FW_DOMAIN_ID_MEDIA_VEBOX0, + FW_DOMAIN_ID_MEDIA_VEBOX1, FW_DOMAIN_ID_COUNT }; enum forcewake_domains { - FORCEWAKE_RENDER = BIT(FW_DOMAIN_ID_RENDER), - FORCEWAKE_BLITTER = BIT(FW_DOMAIN_ID_BLITTER), - FORCEWAKE_MEDIA = BIT(FW_DOMAIN_ID_MEDIA), - FORCEWAKE_ALL = (FORCEWAKE_RENDER | - FORCEWAKE_BLITTER | - FORCEWAKE_MEDIA) + FORCEWAKE_RENDER = BIT(FW_DOMAIN_ID_RENDER), + FORCEWAKE_BLITTER = BIT(FW_DOMAIN_ID_BLITTER), + FORCEWAKE_MEDIA = BIT(FW_DOMAIN_ID_MEDIA), + FORCEWAKE_MEDIA_VDBOX0 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX0), + FORCEWAKE_MEDIA_VDBOX1 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX1), + FORCEWAKE_MEDIA_VDBOX2 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX2), + FORCEWAKE_MEDIA_VDBOX3 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX3), + FORCEWAKE_MEDIA_VEBOX0 = BIT(FW_DOMAIN_ID_MEDIA_VEBOX0), + FORCEWAKE_MEDIA_VEBOX1 = BIT(FW_DOMAIN_ID_MEDIA_VEBOX1), + + FORCEWAKE_ALL = BIT(FW_DOMAIN_ID_COUNT) - 1 }; struct intel_uncore_funcs { @@ -163,11 +174,23 @@ void intel_uncore_forcewake_put__locked(struct drm_i915_private *dev_priv, void intel_uncore_forcewake_user_get(struct drm_i915_private *dev_priv); void intel_uncore_forcewake_user_put(struct drm_i915_private *dev_priv); +int __intel_wait_for_register(struct drm_i915_private *dev_priv, + i915_reg_t reg, + u32 mask, + u32 value, + unsigned int fast_timeout_us, + unsigned int slow_timeout_ms, + u32 *out_value); +static inline int intel_wait_for_register(struct drm_i915_private *dev_priv, i915_reg_t reg, u32 mask, u32 value, - unsigned int timeout_ms); + unsigned int timeout_ms) +{ + return __intel_wait_for_register(dev_priv, reg, mask, value, 2, + timeout_ms, NULL); +} int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv, i915_reg_t reg, u32 mask, @@ -186,4 +209,9 @@ int intel_wait_for_register_fw(struct drm_i915_private *dev_priv, 2, timeout_ms, NULL); } +#define raw_reg_read(base, reg) \ + readl(base + i915_mmio_reg_offset(reg)) +#define raw_reg_write(base, reg, value) \ + writel(value, base + i915_mmio_reg_offset(reg)) + #endif /* !__INTEL_UNCORE_H__ */ diff --git a/drivers/gpu/drm/i915/intel_vbt_defs.h b/drivers/gpu/drm/i915/intel_vbt_defs.h index 98dff6058d3c..458468237b5f 100644 --- a/drivers/gpu/drm/i915/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/intel_vbt_defs.h @@ -227,7 +227,7 @@ struct bdb_general_features { #define DEVICE_TYPE_COMPOSITE_OUTPUT (1 << 9) #define DEVICE_TYPE_DUAL_CHANNEL (1 << 8) #define DEVICE_TYPE_HIGH_SPEED_LINK (1 << 6) -#define DEVICE_TYPE_LVDS_SINGALING (1 << 5) +#define DEVICE_TYPE_LVDS_SIGNALING (1 << 5) #define DEVICE_TYPE_TMDS_DVI_SIGNALING (1 << 4) #define DEVICE_TYPE_VIDEO_SIGNALING (1 << 3) #define DEVICE_TYPE_DISPLAYPORT_OUTPUT (1 << 2) @@ -243,7 +243,7 @@ struct bdb_general_features { DEVICE_TYPE_MIPI_OUTPUT | \ DEVICE_TYPE_COMPOSITE_OUTPUT | \ DEVICE_TYPE_DUAL_CHANNEL | \ - DEVICE_TYPE_LVDS_SINGALING | \ + DEVICE_TYPE_LVDS_SIGNALING | \ DEVICE_TYPE_TMDS_DVI_SIGNALING | \ DEVICE_TYPE_VIDEO_SIGNALING | \ DEVICE_TYPE_DISPLAYPORT_OUTPUT | \ @@ -253,7 +253,7 @@ struct bdb_general_features { (DEVICE_TYPE_INTERNAL_CONNECTOR | \ DEVICE_TYPE_MIPI_OUTPUT | \ DEVICE_TYPE_COMPOSITE_OUTPUT | \ - DEVICE_TYPE_LVDS_SINGALING | \ + DEVICE_TYPE_LVDS_SIGNALING | \ DEVICE_TYPE_TMDS_DVI_SIGNALING | \ DEVICE_TYPE_VIDEO_SIGNALING | \ DEVICE_TYPE_DISPLAYPORT_OUTPUT | \ @@ -299,6 +299,8 @@ struct bdb_general_features { #define DVO_PORT_DPA 10 #define DVO_PORT_DPE 11 /* 193 */ #define DVO_PORT_HDMIE 12 /* 193 */ +#define DVO_PORT_DPF 13 /* N/A */ +#define DVO_PORT_HDMIF 14 /* N/A */ #define DVO_PORT_MIPIA 21 /* 171 */ #define DVO_PORT_MIPIB 22 /* 171 */ #define DVO_PORT_MIPIC 23 /* 171 */ @@ -318,6 +320,11 @@ enum vbt_gmbus_ddi { DDC_BUS_DDI_F, }; +#define VBT_DP_MAX_LINK_RATE_HBR3 0 +#define VBT_DP_MAX_LINK_RATE_HBR2 1 +#define VBT_DP_MAX_LINK_RATE_HBR 2 +#define VBT_DP_MAX_LINK_RATE_LBR 3 + /* * The child device config, aka the display device data structure, provides a * description of a port and its configuration on the platform. diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/selftests/huge_gem_object.c index a2632df39173..391f3d9ffdf1 100644 --- a/drivers/gpu/drm/i915/selftests/huge_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.c @@ -129,8 +129,8 @@ huge_gem_object(struct drm_i915_private *i915, drm_gem_private_object_init(&i915->drm, &obj->base, dma_size); i915_gem_object_init(obj, &huge_ops); - obj->base.read_domains = I915_GEM_DOMAIN_CPU; - obj->base.write_domain = I915_GEM_DOMAIN_CPU; + obj->read_domains = I915_GEM_DOMAIN_CPU; + obj->write_domain = I915_GEM_DOMAIN_CPU; cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; i915_gem_object_set_cache_coherency(obj, cache_level); obj->scratch = phys_size; diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index 2ea69394f428..05bbef363fff 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -178,8 +178,8 @@ huge_pages_object(struct drm_i915_private *i915, drm_gem_private_object_init(&i915->drm, &obj->base, size); i915_gem_object_init(obj, &huge_page_ops); - obj->base.write_domain = I915_GEM_DOMAIN_CPU; - obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->write_domain = I915_GEM_DOMAIN_CPU; + obj->read_domains = I915_GEM_DOMAIN_CPU; obj->cache_level = I915_CACHE_NONE; obj->mm.page_mask = page_mask; @@ -329,8 +329,8 @@ fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) else i915_gem_object_init(obj, &fake_ops); - obj->base.write_domain = I915_GEM_DOMAIN_CPU; - obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->write_domain = I915_GEM_DOMAIN_CPU; + obj->read_domains = I915_GEM_DOMAIN_CPU; obj->cache_level = I915_CACHE_NONE; return obj; @@ -964,7 +964,7 @@ static int gpu_write(struct i915_vma *vma, u32 dword, u32 value) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct i915_vma *batch; int flags = 0; int err; @@ -975,7 +975,7 @@ static int gpu_write(struct i915_vma *vma, if (err) return err; - rq = i915_gem_request_alloc(engine, ctx); + rq = i915_request_alloc(engine, ctx); if (IS_ERR(rq)) return PTR_ERR(rq); @@ -1003,7 +1003,7 @@ static int gpu_write(struct i915_vma *vma, reservation_object_unlock(vma->resv); err_request: - __i915_add_request(rq, err == 0); + __i915_request_add(rq, err == 0); return err; } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index 7a0d1e17c1ad..340a98c0c804 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -178,7 +178,7 @@ static int gpu_set(struct drm_i915_gem_object *obj, u32 v) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct i915_vma *vma; u32 *cs; int err; @@ -191,7 +191,7 @@ static int gpu_set(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return PTR_ERR(vma); - rq = i915_gem_request_alloc(i915->engine[RCS], i915->kernel_context); + rq = i915_request_alloc(i915->engine[RCS], i915->kernel_context); if (IS_ERR(rq)) { i915_vma_unpin(vma); return PTR_ERR(rq); @@ -199,7 +199,7 @@ static int gpu_set(struct drm_i915_gem_object *obj, cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) { - __i915_add_request(rq, false); + __i915_request_add(rq, false); i915_vma_unpin(vma); return PTR_ERR(cs); } @@ -229,7 +229,7 @@ static int gpu_set(struct drm_i915_gem_object *obj, reservation_object_add_excl_fence(obj->resv, &rq->fence); reservation_object_unlock(obj->resv); - __i915_add_request(rq, true); + __i915_request_add(rq, true); return 0; } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 56a803d11916..7ecaed50d0b9 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -114,7 +114,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj, struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct i915_vma *vma; struct i915_vma *batch; unsigned int flags; @@ -152,7 +152,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj, goto err_vma; } - rq = i915_gem_request_alloc(engine, ctx); + rq = i915_request_alloc(engine, ctx); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_batch; @@ -180,12 +180,12 @@ static int gpu_fill(struct drm_i915_gem_object *obj, reservation_object_add_excl_fence(obj->resv, &rq->fence); reservation_object_unlock(obj->resv); - __i915_add_request(rq, true); + __i915_request_add(rq, true); return 0; err_request: - __i915_add_request(rq, false); + __i915_request_add(rq, false); err_batch: i915_vma_unpin(batch); err_vma: @@ -215,8 +215,8 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) } i915_gem_obj_finish_shmem_access(obj); - obj->base.read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; - obj->base.write_domain = 0; + obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; + obj->write_domain = 0; return 0; } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index e1ddad635d73..ab9d7bee0aae 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -407,7 +407,7 @@ static int igt_evict_contexts(void *arg) mutex_lock(&i915->drm.struct_mutex); onstack_fence_init(&fence); do { - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct i915_gem_context *ctx; ctx = live_context(i915, file); @@ -416,7 +416,7 @@ static int igt_evict_contexts(void *arg) /* We will need some GGTT space for the rq's context */ igt_evict_ctl.fail_if_busy = true; - rq = i915_gem_request_alloc(engine, ctx); + rq = i915_request_alloc(engine, ctx); igt_evict_ctl.fail_if_busy = false; if (IS_ERR(rq)) { @@ -437,7 +437,7 @@ static int igt_evict_contexts(void *arg) if (err < 0) break; - i915_add_request(rq); + i915_request_add(rq); count++; err = 0; } while(1); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 4a28d713a7d8..f7dc926f4ef1 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -113,8 +113,8 @@ fake_dma_object(struct drm_i915_private *i915, u64 size) drm_gem_private_object_init(&i915->drm, &obj->base, size); i915_gem_object_init(obj, &fake_ops); - obj->base.write_domain = I915_GEM_DOMAIN_CPU; - obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->write_domain = I915_GEM_DOMAIN_CPU; + obj->read_domains = I915_GEM_DOMAIN_CPU; obj->cache_level = I915_CACHE_NONE; /* Preallocate the "backing storage" */ @@ -885,6 +885,84 @@ static int shrink_hole(struct drm_i915_private *i915, return err; } +static int shrink_boom(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + unsigned int sizes[] = { SZ_2M, SZ_1G }; + struct drm_i915_gem_object *purge; + struct drm_i915_gem_object *explode; + int err; + int i; + + /* + * Catch the case which shrink_hole seems to miss. The setup here + * requires invoking the shrinker as we do the alloc_pt/alloc_pd, while + * ensuring that all vma assiocated with the respective pd/pdp are + * unpinned at the time. + */ + + for (i = 0; i < ARRAY_SIZE(sizes); ++i) { + unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; + unsigned int size = sizes[i]; + struct i915_vma *vma; + + purge = fake_dma_object(i915, size); + if (IS_ERR(purge)) + return PTR_ERR(purge); + + vma = i915_vma_instance(purge, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_purge; + } + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + goto err_purge; + + /* Should now be ripe for purging */ + i915_vma_unpin(vma); + + explode = fake_dma_object(i915, size); + if (IS_ERR(explode)) { + err = PTR_ERR(explode); + goto err_purge; + } + + vm->fault_attr.probability = 100; + vm->fault_attr.interval = 1; + atomic_set(&vm->fault_attr.times, -1); + + vma = i915_vma_instance(explode, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_explode; + } + + err = i915_vma_pin(vma, 0, 0, flags | size); + if (err) + goto err_explode; + + i915_vma_unpin(vma); + + i915_gem_object_put(purge); + i915_gem_object_put(explode); + + memset(&vm->fault_attr, 0, sizeof(vm->fault_attr)); + } + + return 0; + +err_explode: + i915_gem_object_put(explode); +err_purge: + i915_gem_object_put(purge); + memset(&vm->fault_attr, 0, sizeof(vm->fault_attr)); + return err; +} + static int exercise_ppgtt(struct drm_i915_private *dev_priv, int (*func)(struct drm_i915_private *i915, struct i915_address_space *vm, @@ -953,6 +1031,11 @@ static int igt_ppgtt_shrink(void *arg) return exercise_ppgtt(arg, shrink_hole); } +static int igt_ppgtt_shrink_boom(void *arg) +{ + return exercise_ppgtt(arg, shrink_boom); +} + static int sort_holes(void *priv, struct list_head *A, struct list_head *B) { struct drm_mm_node *a = list_entry(A, typeof(*a), hole_stack); @@ -1052,35 +1135,38 @@ static int igt_ggtt_page(void *arg) memset(&tmp, 0, sizeof(tmp)); err = drm_mm_insert_node_in_range(&ggtt->base.mm, &tmp, - 1024 * PAGE_SIZE, 0, + count * PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 0, ggtt->mappable_end, DRM_MM_INSERT_LOW); if (err) goto out_unpin; + intel_runtime_pm_get(i915); + + for (n = 0; n < count; n++) { + u64 offset = tmp.start + n * PAGE_SIZE; + + ggtt->base.insert_page(&ggtt->base, + i915_gem_object_get_dma_address(obj, 0), + offset, I915_CACHE_NONE, 0); + } + order = i915_random_order(count, &prng); if (!order) { err = -ENOMEM; goto out_remove; } - intel_runtime_pm_get(i915); for (n = 0; n < count; n++) { u64 offset = tmp.start + order[n] * PAGE_SIZE; u32 __iomem *vaddr; - ggtt->base.insert_page(&ggtt->base, - i915_gem_object_get_dma_address(obj, 0), - offset, I915_CACHE_NONE, 0); - vaddr = io_mapping_map_atomic_wc(&ggtt->iomap, offset); iowrite32(n, vaddr + n); io_mapping_unmap_atomic(vaddr); - - wmb(); - ggtt->base.clear_range(&ggtt->base, offset, PAGE_SIZE); } + i915_gem_flush_ggtt_writes(i915); i915_random_reorder(order, count, &prng); for (n = 0; n < count; n++) { @@ -1088,16 +1174,10 @@ static int igt_ggtt_page(void *arg) u32 __iomem *vaddr; u32 val; - ggtt->base.insert_page(&ggtt->base, - i915_gem_object_get_dma_address(obj, 0), - offset, I915_CACHE_NONE, 0); - vaddr = io_mapping_map_atomic_wc(&ggtt->iomap, offset); val = ioread32(vaddr + n); io_mapping_unmap_atomic(vaddr); - ggtt->base.clear_range(&ggtt->base, offset, PAGE_SIZE); - if (val != n) { pr_err("insert page failed: found %d, expected %d\n", val, n); @@ -1105,10 +1185,11 @@ static int igt_ggtt_page(void *arg) break; } } - intel_runtime_pm_put(i915); kfree(order); out_remove: + ggtt->base.clear_range(&ggtt->base, tmp.start, tmp.size); + intel_runtime_pm_put(i915); drm_mm_remove_node(&tmp); out_unpin: i915_gem_object_unpin_pages(obj); @@ -1579,6 +1660,7 @@ int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_ppgtt_pot), SUBTEST(igt_ppgtt_fill), SUBTEST(igt_ppgtt_shrink), + SUBTEST(igt_ppgtt_shrink_boom), SUBTEST(igt_ggtt_lowlevel), SUBTEST(igt_ggtt_drunk), SUBTEST(igt_ggtt_walk), diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index f32aa6bb79e2..fbdb2419d418 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -212,8 +212,11 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, return -EINTR; err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride); - if (err) + if (err) { + pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n", + tile->tiling, tile->stride, err); return err; + } GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling); GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride); @@ -230,13 +233,16 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, GEM_BUG_ON(view.partial.size > nreal); err = i915_gem_object_set_to_gtt_domain(obj, true); - if (err) + if (err) { + pr_err("Failed to flush to GTT write domain; err=%d\n", + err); return err; + } vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); if (IS_ERR(vma)) { - pr_err("Failed to pin partial view: offset=%lu\n", - page); + pr_err("Failed to pin partial view: offset=%lu; err=%d\n", + page, (int)PTR_ERR(vma)); return PTR_ERR(vma); } @@ -246,8 +252,8 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, io = i915_vma_pin_iomap(vma); i915_vma_unpin(vma); if (IS_ERR(io)) { - pr_err("Failed to iomap partial view: offset=%lu\n", - page); + pr_err("Failed to iomap partial view: offset=%lu; err=%d\n", + page, (int)PTR_ERR(io)); return PTR_ERR(io); } @@ -430,7 +436,7 @@ out: static int make_obj_busy(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct i915_vma *vma; int err; @@ -442,14 +448,14 @@ static int make_obj_busy(struct drm_i915_gem_object *obj) if (err) return err; - rq = i915_gem_request_alloc(i915->engine[RCS], i915->kernel_context); + rq = i915_request_alloc(i915->engine[RCS], i915->kernel_context); if (IS_ERR(rq)) { i915_vma_unpin(vma); return PTR_ERR(rq); } i915_vma_move_to_active(vma, rq, 0); - i915_add_request(rq); + i915_request_add(rq); i915_gem_object_set_active_reference(obj); i915_vma_unpin(vma); diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 088f45bc6199..9c76f0305b6a 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -11,7 +11,7 @@ */ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ selftest(uncore, intel_uncore_live_selftests) -selftest(requests, i915_gem_request_live_selftests) +selftest(requests, i915_request_live_selftests) selftest(objects, i915_gem_object_live_selftests) selftest(dmabuf, i915_gem_dmabuf_live_selftests) selftest(coherency, i915_gem_coherency_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 19c6fce837df..9a48aa441743 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -16,7 +16,7 @@ selftest(syncmap, i915_syncmap_mock_selftests) selftest(uncore, intel_uncore_mock_selftests) selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) selftest(timelines, i915_gem_timeline_mock_selftests) -selftest(requests, i915_gem_request_mock_selftests) +selftest(requests, i915_request_mock_selftests) selftest(objects, i915_gem_object_mock_selftests) selftest(dmabuf, i915_gem_dmabuf_mock_selftests) selftest(vma, i915_vma_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_random.c b/drivers/gpu/drm/i915/selftests/i915_random.c index 2088ae57aa89..1f415ce47018 100644 --- a/drivers/gpu/drm/i915/selftests/i915_random.c +++ b/drivers/gpu/drm/i915/selftests/i915_random.c @@ -57,7 +57,8 @@ unsigned int *i915_random_order(unsigned int count, struct rnd_state *state) { unsigned int *order, i; - order = kmalloc_array(count, sizeof(*order), GFP_KERNEL | __GFP_NOWARN); + order = kmalloc_array(count, sizeof(*order), + GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (!order) return order; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 647bf2bbd799..94bc2e1898a4 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -32,7 +32,7 @@ static int igt_add_request(void *arg) { struct drm_i915_private *i915 = arg; - struct drm_i915_gem_request *request; + struct i915_request *request; int err = -ENOMEM; /* Basic preliminary test to create a request and let it loose! */ @@ -44,7 +44,7 @@ static int igt_add_request(void *arg) if (!request) goto out_unlock; - i915_add_request(request); + i915_request_add(request); err = 0; out_unlock: @@ -56,7 +56,7 @@ static int igt_wait_request(void *arg) { const long T = HZ / 4; struct drm_i915_private *i915 = arg; - struct drm_i915_gem_request *request; + struct i915_request *request; int err = -EINVAL; /* Submit a request, then wait upon it */ @@ -68,49 +68,49 @@ static int igt_wait_request(void *arg) goto out_unlock; } - if (i915_wait_request(request, I915_WAIT_LOCKED, 0) != -ETIME) { + if (i915_request_wait(request, I915_WAIT_LOCKED, 0) != -ETIME) { pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n"); goto out_unlock; } - if (i915_wait_request(request, I915_WAIT_LOCKED, T) != -ETIME) { + if (i915_request_wait(request, I915_WAIT_LOCKED, T) != -ETIME) { pr_err("request wait succeeded (expected timeout before submit!)\n"); goto out_unlock; } - if (i915_gem_request_completed(request)) { + if (i915_request_completed(request)) { pr_err("request completed before submit!!\n"); goto out_unlock; } - i915_add_request(request); + i915_request_add(request); - if (i915_wait_request(request, I915_WAIT_LOCKED, 0) != -ETIME) { + if (i915_request_wait(request, I915_WAIT_LOCKED, 0) != -ETIME) { pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n"); goto out_unlock; } - if (i915_gem_request_completed(request)) { + if (i915_request_completed(request)) { pr_err("request completed immediately!\n"); goto out_unlock; } - if (i915_wait_request(request, I915_WAIT_LOCKED, T / 2) != -ETIME) { + if (i915_request_wait(request, I915_WAIT_LOCKED, T / 2) != -ETIME) { pr_err("request wait succeeded (expected timeout!)\n"); goto out_unlock; } - if (i915_wait_request(request, I915_WAIT_LOCKED, T) == -ETIME) { + if (i915_request_wait(request, I915_WAIT_LOCKED, T) == -ETIME) { pr_err("request wait timed out!\n"); goto out_unlock; } - if (!i915_gem_request_completed(request)) { + if (!i915_request_completed(request)) { pr_err("request not complete after waiting!\n"); goto out_unlock; } - if (i915_wait_request(request, I915_WAIT_LOCKED, T) == -ETIME) { + if (i915_request_wait(request, I915_WAIT_LOCKED, T) == -ETIME) { pr_err("request wait timed out when already complete!\n"); goto out_unlock; } @@ -126,7 +126,7 @@ static int igt_fence_wait(void *arg) { const long T = HZ / 4; struct drm_i915_private *i915 = arg; - struct drm_i915_gem_request *request; + struct i915_request *request; int err = -EINVAL; /* Submit a request, treat it as a fence and wait upon it */ @@ -145,7 +145,7 @@ static int igt_fence_wait(void *arg) } mutex_lock(&i915->drm.struct_mutex); - i915_add_request(request); + i915_request_add(request); mutex_unlock(&i915->drm.struct_mutex); if (dma_fence_is_signaled(&request->fence)) { @@ -185,7 +185,7 @@ out_locked: static int igt_request_rewind(void *arg) { struct drm_i915_private *i915 = arg; - struct drm_i915_gem_request *request, *vip; + struct i915_request *request, *vip; struct i915_gem_context *ctx[2]; int err = -EINVAL; @@ -197,8 +197,8 @@ static int igt_request_rewind(void *arg) goto err_context_0; } - i915_gem_request_get(request); - i915_add_request(request); + i915_request_get(request); + i915_request_add(request); ctx[1] = mock_context(i915, "B"); vip = mock_request(i915->engine[RCS], ctx[1], 0); @@ -210,35 +210,35 @@ static int igt_request_rewind(void *arg) /* Simulate preemption by manual reordering */ if (!mock_cancel_request(request)) { pr_err("failed to cancel request (already executed)!\n"); - i915_add_request(vip); + i915_request_add(vip); goto err_context_1; } - i915_gem_request_get(vip); - i915_add_request(vip); + i915_request_get(vip); + i915_request_add(vip); rcu_read_lock(); request->engine->submit_request(request); rcu_read_unlock(); mutex_unlock(&i915->drm.struct_mutex); - if (i915_wait_request(vip, 0, HZ) == -ETIME) { + if (i915_request_wait(vip, 0, HZ) == -ETIME) { pr_err("timed out waiting for high priority request, vip.seqno=%d, current seqno=%d\n", vip->global_seqno, intel_engine_get_seqno(i915->engine[RCS])); goto err; } - if (i915_gem_request_completed(request)) { + if (i915_request_completed(request)) { pr_err("low priority request already completed\n"); goto err; } err = 0; err: - i915_gem_request_put(vip); + i915_request_put(vip); mutex_lock(&i915->drm.struct_mutex); err_context_1: mock_context_close(ctx[1]); - i915_gem_request_put(request); + i915_request_put(request); err_context_0: mock_context_close(ctx[0]); mock_device_flush(i915); @@ -246,7 +246,7 @@ err_context_0: return err; } -int i915_gem_request_mock_selftests(void) +int i915_request_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_add_request), @@ -303,7 +303,7 @@ static int end_live_test(struct live_test *t) { struct drm_i915_private *i915 = t->i915; - i915_gem_retire_requests(i915); + i915_retire_requests(i915); if (wait_for(intel_engines_are_idle(i915), 10)) { pr_err("%s(%s): GPU not idle\n", t->func, t->name); @@ -343,7 +343,7 @@ static int live_nop_request(void *arg) for_each_engine(engine, i915, id) { IGT_TIMEOUT(end_time); - struct drm_i915_gem_request *request; + struct i915_request *request; unsigned long n, prime; ktime_t times[2] = {}; @@ -355,8 +355,8 @@ static int live_nop_request(void *arg) times[1] = ktime_get_raw(); for (n = 0; n < prime; n++) { - request = i915_gem_request_alloc(engine, - i915->kernel_context); + request = i915_request_alloc(engine, + i915->kernel_context); if (IS_ERR(request)) { err = PTR_ERR(request); goto out_unlock; @@ -375,9 +375,9 @@ static int live_nop_request(void *arg) * for latency. */ - i915_add_request(request); + i915_request_add(request); } - i915_wait_request(request, + i915_request_wait(request, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT); @@ -447,15 +447,14 @@ err: return ERR_PTR(err); } -static struct drm_i915_gem_request * +static struct i915_request * empty_request(struct intel_engine_cs *engine, struct i915_vma *batch) { - struct drm_i915_gem_request *request; + struct i915_request *request; int err; - request = i915_gem_request_alloc(engine, - engine->i915->kernel_context); + request = i915_request_alloc(engine, engine->i915->kernel_context); if (IS_ERR(request)) return request; @@ -467,7 +466,7 @@ empty_request(struct intel_engine_cs *engine, goto out_request; out_request: - __i915_add_request(request, err == 0); + __i915_request_add(request, err == 0); return err ? ERR_PTR(err) : request; } @@ -495,7 +494,7 @@ static int live_empty_request(void *arg) for_each_engine(engine, i915, id) { IGT_TIMEOUT(end_time); - struct drm_i915_gem_request *request; + struct i915_request *request; unsigned long n, prime; ktime_t times[2] = {}; @@ -509,7 +508,7 @@ static int live_empty_request(void *arg) err = PTR_ERR(request); goto out_batch; } - i915_wait_request(request, + i915_request_wait(request, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT); @@ -523,7 +522,7 @@ static int live_empty_request(void *arg) goto out_batch; } } - i915_wait_request(request, + i915_request_wait(request, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT); @@ -633,7 +632,7 @@ static int live_all_engines(void *arg) { struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; - struct drm_i915_gem_request *request[I915_NUM_ENGINES]; + struct i915_request *request[I915_NUM_ENGINES]; struct i915_vma *batch; struct live_test t; unsigned int id; @@ -658,8 +657,7 @@ static int live_all_engines(void *arg) } for_each_engine(engine, i915, id) { - request[id] = i915_gem_request_alloc(engine, - i915->kernel_context); + request[id] = i915_request_alloc(engine, i915->kernel_context); if (IS_ERR(request[id])) { err = PTR_ERR(request[id]); pr_err("%s: Request allocation failed with err=%d\n", @@ -680,12 +678,12 @@ static int live_all_engines(void *arg) } i915_vma_move_to_active(batch, request[id], 0); - i915_gem_request_get(request[id]); - i915_add_request(request[id]); + i915_request_get(request[id]); + i915_request_add(request[id]); } for_each_engine(engine, i915, id) { - if (i915_gem_request_completed(request[id])) { + if (i915_request_completed(request[id])) { pr_err("%s(%s): request completed too early!\n", __func__, engine->name); err = -EINVAL; @@ -702,7 +700,7 @@ static int live_all_engines(void *arg) for_each_engine(engine, i915, id) { long timeout; - timeout = i915_wait_request(request[id], + timeout = i915_request_wait(request[id], I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT); if (timeout < 0) { @@ -712,8 +710,8 @@ static int live_all_engines(void *arg) goto out_request; } - GEM_BUG_ON(!i915_gem_request_completed(request[id])); - i915_gem_request_put(request[id]); + GEM_BUG_ON(!i915_request_completed(request[id])); + i915_request_put(request[id]); request[id] = NULL; } @@ -722,7 +720,7 @@ static int live_all_engines(void *arg) out_request: for_each_engine(engine, i915, id) if (request[id]) - i915_gem_request_put(request[id]); + i915_request_put(request[id]); i915_vma_unpin(batch); i915_vma_put(batch); out_unlock: @@ -733,8 +731,8 @@ out_unlock: static int live_sequential_engines(void *arg) { struct drm_i915_private *i915 = arg; - struct drm_i915_gem_request *request[I915_NUM_ENGINES] = {}; - struct drm_i915_gem_request *prev = NULL; + struct i915_request *request[I915_NUM_ENGINES] = {}; + struct i915_request *prev = NULL; struct intel_engine_cs *engine; struct live_test t; unsigned int id; @@ -763,8 +761,7 @@ static int live_sequential_engines(void *arg) goto out_unlock; } - request[id] = i915_gem_request_alloc(engine, - i915->kernel_context); + request[id] = i915_request_alloc(engine, i915->kernel_context); if (IS_ERR(request[id])) { err = PTR_ERR(request[id]); pr_err("%s: Request allocation failed for %s with err=%d\n", @@ -773,10 +770,10 @@ static int live_sequential_engines(void *arg) } if (prev) { - err = i915_gem_request_await_dma_fence(request[id], - &prev->fence); + err = i915_request_await_dma_fence(request[id], + &prev->fence); if (err) { - i915_add_request(request[id]); + i915_request_add(request[id]); pr_err("%s: Request await failed for %s with err=%d\n", __func__, engine->name, err); goto out_request; @@ -794,8 +791,8 @@ static int live_sequential_engines(void *arg) i915_gem_object_set_active_reference(batch->obj); i915_vma_get(batch); - i915_gem_request_get(request[id]); - i915_add_request(request[id]); + i915_request_get(request[id]); + i915_request_add(request[id]); prev = request[id]; } @@ -803,7 +800,7 @@ static int live_sequential_engines(void *arg) for_each_engine(engine, i915, id) { long timeout; - if (i915_gem_request_completed(request[id])) { + if (i915_request_completed(request[id])) { pr_err("%s(%s): request completed too early!\n", __func__, engine->name); err = -EINVAL; @@ -817,7 +814,7 @@ static int live_sequential_engines(void *arg) goto out_request; } - timeout = i915_wait_request(request[id], + timeout = i915_request_wait(request[id], I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT); if (timeout < 0) { @@ -827,7 +824,7 @@ static int live_sequential_engines(void *arg) goto out_request; } - GEM_BUG_ON(!i915_gem_request_completed(request[id])); + GEM_BUG_ON(!i915_request_completed(request[id])); } err = end_live_test(&t); @@ -849,14 +846,14 @@ out_request: } i915_vma_put(request[id]->batch); - i915_gem_request_put(request[id]); + i915_request_put(request[id]); } out_unlock: mutex_unlock(&i915->drm.struct_mutex); return err; } -int i915_gem_request_live_selftests(struct drm_i915_private *i915) +int i915_request_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_nop_request), diff --git a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c index ea01d0fe3ace..570e325af93e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c @@ -606,6 +606,139 @@ err: return -EINVAL; } +static const char *mock_name(struct dma_fence *fence) +{ + return "mock"; +} + +static bool mock_enable_signaling(struct dma_fence *fence) +{ + return true; +} + +static const struct dma_fence_ops mock_fence_ops = { + .get_driver_name = mock_name, + .get_timeline_name = mock_name, + .enable_signaling = mock_enable_signaling, + .wait = dma_fence_default_wait, + .release = dma_fence_free, +}; + +static DEFINE_SPINLOCK(mock_fence_lock); + +static struct dma_fence *alloc_dma_fence(void) +{ + struct dma_fence *dma; + + dma = kmalloc(sizeof(*dma), GFP_KERNEL); + if (dma) + dma_fence_init(dma, &mock_fence_ops, &mock_fence_lock, 0, 0); + + return dma; +} + +static struct i915_sw_fence * +wrap_dma_fence(struct dma_fence *dma, unsigned long delay) +{ + struct i915_sw_fence *fence; + int err; + + fence = alloc_fence(); + if (!fence) + return ERR_PTR(-ENOMEM); + + err = i915_sw_fence_await_dma_fence(fence, dma, delay, GFP_NOWAIT); + i915_sw_fence_commit(fence); + if (err < 0) { + free_fence(fence); + return ERR_PTR(err); + } + + return fence; +} + +static int test_dma_fence(void *arg) +{ + struct i915_sw_fence *timeout = NULL, *not = NULL; + unsigned long delay = i915_selftest.timeout_jiffies; + unsigned long end, sleep; + struct dma_fence *dma; + int err; + + dma = alloc_dma_fence(); + if (!dma) + return -ENOMEM; + + timeout = wrap_dma_fence(dma, delay); + if (IS_ERR(timeout)) { + err = PTR_ERR(timeout); + goto err; + } + + not = wrap_dma_fence(dma, 0); + if (IS_ERR(not)) { + err = PTR_ERR(not); + goto err; + } + + err = -EINVAL; + if (i915_sw_fence_done(timeout) || i915_sw_fence_done(not)) { + pr_err("Fences immediately signaled\n"); + goto err; + } + + /* We round the timeout for the fence up to the next second */ + end = round_jiffies_up(jiffies + delay); + + sleep = jiffies_to_usecs(delay) / 3; + usleep_range(sleep, 2 * sleep); + if (time_after(jiffies, end)) { + pr_debug("Slept too long, delay=%lu, (target=%lu, now=%lu) skipping\n", + delay, end, jiffies); + goto skip; + } + + if (i915_sw_fence_done(timeout) || i915_sw_fence_done(not)) { + pr_err("Fences signaled too early\n"); + goto err; + } + + if (!wait_event_timeout(timeout->wait, + i915_sw_fence_done(timeout), + 2 * (end - jiffies) + 1)) { + pr_err("Timeout fence unsignaled!\n"); + goto err; + } + + if (i915_sw_fence_done(not)) { + pr_err("No timeout fence signaled!\n"); + goto err; + } + +skip: + dma_fence_signal(dma); + + if (!i915_sw_fence_done(timeout) || !i915_sw_fence_done(not)) { + pr_err("Fences unsignaled\n"); + goto err; + } + + free_fence(not); + free_fence(timeout); + dma_fence_put(dma); + + return 0; + +err: + dma_fence_signal(dma); + if (!IS_ERR_OR_NULL(timeout)) + free_fence(timeout); + if (!IS_ERR_OR_NULL(not)) + free_fence(not); + dma_fence_put(dma); + return err; +} + int i915_sw_fence_mock_selftests(void) { static const struct i915_subtest tests[] = { @@ -618,6 +751,7 @@ int i915_sw_fence_mock_selftests(void) SUBTEST(test_chain), SUBTEST(test_ipc), SUBTEST(test_timer), + SUBTEST(test_dma_fence), }; return i915_subtests(tests, NULL); diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c index 3f9016466dea..fb74e2cf8a0a 100644 --- a/drivers/gpu/drm/i915/selftests/intel_guc.c +++ b/drivers/gpu/drm/i915/selftests/intel_guc.c @@ -87,7 +87,7 @@ static int validate_client(struct intel_guc_client *client, static bool client_doorbell_in_sync(struct intel_guc_client *client) { - return doorbell_ok(client->guc, client->doorbell_id); + return !client || doorbell_ok(client->guc, client->doorbell_id); } /* @@ -137,7 +137,6 @@ static int igt_guc_clients(void *args) goto unlock; } GEM_BUG_ON(!guc->execbuf_client); - GEM_BUG_ON(!guc->preempt_client); err = validate_client(guc->execbuf_client, GUC_CLIENT_PRIORITY_KMD_NORMAL, false); @@ -146,16 +145,18 @@ static int igt_guc_clients(void *args) goto out; } - err = validate_client(guc->preempt_client, - GUC_CLIENT_PRIORITY_KMD_HIGH, true); - if (err) { - pr_err("preempt client validation failed\n"); - goto out; + if (guc->preempt_client) { + err = validate_client(guc->preempt_client, + GUC_CLIENT_PRIORITY_KMD_HIGH, true); + if (err) { + pr_err("preempt client validation failed\n"); + goto out; + } } /* each client should now have reserved a doorbell */ if (!has_doorbell(guc->execbuf_client) || - !has_doorbell(guc->preempt_client)) { + (guc->preempt_client && !has_doorbell(guc->preempt_client))) { pr_err("guc_clients_create didn't reserve doorbells\n"); err = -EINVAL; goto out; @@ -224,7 +225,8 @@ out: * clients during unload. */ destroy_doorbell(guc->execbuf_client); - destroy_doorbell(guc->preempt_client); + if (guc->preempt_client) + destroy_doorbell(guc->preempt_client); guc_clients_destroy(guc); guc_clients_create(guc); guc_clients_doorbell_init(guc); diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index d1f91a533afa..df7898c8edcb 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -33,6 +33,7 @@ struct hang { struct drm_i915_private *i915; struct drm_i915_gem_object *hws; struct drm_i915_gem_object *obj; + struct i915_gem_context *ctx; u32 *seqno; u32 *batch; }; @@ -45,9 +46,15 @@ static int hang_init(struct hang *h, struct drm_i915_private *i915) memset(h, 0, sizeof(*h)); h->i915 = i915; + h->ctx = kernel_context(i915); + if (IS_ERR(h->ctx)) + return PTR_ERR(h->ctx); + h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(h->hws)) - return PTR_ERR(h->hws); + if (IS_ERR(h->hws)) { + err = PTR_ERR(h->hws); + goto err_ctx; + } h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(h->obj)) { @@ -79,17 +86,19 @@ err_obj: i915_gem_object_put(h->obj); err_hws: i915_gem_object_put(h->hws); +err_ctx: + kernel_context_close(h->ctx); return err; } static u64 hws_address(const struct i915_vma *hws, - const struct drm_i915_gem_request *rq) + const struct i915_request *rq) { return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context); } static int emit_recurse_batch(struct hang *h, - struct drm_i915_gem_request *rq) + struct i915_request *rq) { struct drm_i915_private *i915 = h->i915; struct i915_address_space *vm = rq->ctx->ppgtt ? &rq->ctx->ppgtt->base : &i915->ggtt.base; @@ -195,12 +204,10 @@ unpin_vma: return err; } -static struct drm_i915_gem_request * -hang_create_request(struct hang *h, - struct intel_engine_cs *engine, - struct i915_gem_context *ctx) +static struct i915_request * +hang_create_request(struct hang *h, struct intel_engine_cs *engine) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; int err; if (i915_gem_object_is_active(h->obj)) { @@ -225,25 +232,76 @@ hang_create_request(struct hang *h, h->batch = vaddr; } - rq = i915_gem_request_alloc(engine, ctx); + rq = i915_request_alloc(engine, h->ctx); if (IS_ERR(rq)) return rq; err = emit_recurse_batch(h, rq); if (err) { - __i915_add_request(rq, false); + __i915_request_add(rq, false); return ERR_PTR(err); } return rq; } -static u32 hws_seqno(const struct hang *h, - const struct drm_i915_gem_request *rq) +static u32 hws_seqno(const struct hang *h, const struct i915_request *rq) { return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]); } +struct wedge_me { + struct delayed_work work; + struct drm_i915_private *i915; + const void *symbol; +}; + +static void wedge_me(struct work_struct *work) +{ + struct wedge_me *w = container_of(work, typeof(*w), work.work); + + pr_err("%pS timed out, cancelling all further testing.\n", + w->symbol); + i915_gem_set_wedged(w->i915); +} + +static void __init_wedge(struct wedge_me *w, + struct drm_i915_private *i915, + long timeout, + const void *symbol) +{ + w->i915 = i915; + w->symbol = symbol; + + INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me); + schedule_delayed_work(&w->work, timeout); +} + +static void __fini_wedge(struct wedge_me *w) +{ + cancel_delayed_work_sync(&w->work); + destroy_delayed_work_on_stack(&w->work); + w->i915 = NULL; +} + +#define wedge_on_timeout(W, DEV, TIMEOUT) \ + for (__init_wedge((W), (DEV), (TIMEOUT), __builtin_return_address(0)); \ + (W)->i915; \ + __fini_wedge((W))) + +static noinline int +flush_test(struct drm_i915_private *i915, unsigned int flags) +{ + struct wedge_me w; + + cond_resched(); + + wedge_on_timeout(&w, i915, HZ) + i915_gem_wait_for_idle(i915, flags); + + return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0; +} + static void hang_fini(struct hang *h) { *h->batch = MI_BATCH_BUFFER_END; @@ -255,10 +313,12 @@ static void hang_fini(struct hang *h) i915_gem_object_unpin_map(h->hws); i915_gem_object_put(h->hws); - i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED); + kernel_context_close(h->ctx); + + flush_test(h->i915, I915_WAIT_LOCKED); } -static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq) +static bool wait_for_hang(struct hang *h, struct i915_request *rq) { return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq), rq->fence.seqno), @@ -271,7 +331,7 @@ static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq) static int igt_hang_sanitycheck(void *arg) { struct drm_i915_private *i915 = arg; - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct intel_engine_cs *engine; enum intel_engine_id id; struct hang h; @@ -290,7 +350,7 @@ static int igt_hang_sanitycheck(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - rq = hang_create_request(&h, engine, i915->kernel_context); + rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); pr_err("Failed to create request for %s, err=%d\n", @@ -298,17 +358,17 @@ static int igt_hang_sanitycheck(void *arg) goto fini; } - i915_gem_request_get(rq); + i915_request_get(rq); *h.batch = MI_BATCH_BUFFER_END; i915_gem_chipset_flush(i915); - __i915_add_request(rq, true); + __i915_request_add(rq, true); - timeout = i915_wait_request(rq, + timeout = i915_request_wait(rq, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT); - i915_gem_request_put(rq); + i915_request_put(rq); if (timeout < 0) { err = timeout; @@ -424,19 +484,18 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { if (active) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; mutex_lock(&i915->drm.struct_mutex); - rq = hang_create_request(&h, engine, - i915->kernel_context); + rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); mutex_unlock(&i915->drm.struct_mutex); break; } - i915_gem_request_get(rq); - __i915_add_request(rq, true); + i915_request_get(rq); + __i915_request_add(rq, true); mutex_unlock(&i915->drm.struct_mutex); if (!wait_for_hang(&h, rq)) { @@ -447,12 +506,12 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) intel_engine_dump(engine, &p, "%s\n", engine->name); - i915_gem_request_put(rq); + i915_request_put(rq); err = -EIO; break; } - i915_gem_request_put(rq); + i915_request_put(rq); } engine->hangcheck.stalled = true; @@ -487,7 +546,9 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) if (err) break; - cond_resched(); + err = flush_test(i915, 0); + if (err) + break; } if (i915_terminally_wedged(&i915->gpu_error)) @@ -515,7 +576,7 @@ static int igt_reset_active_engine(void *arg) static int active_engine(void *data) { struct intel_engine_cs *engine = data; - struct drm_i915_gem_request *rq[2] = {}; + struct i915_request *rq[2] = {}; struct i915_gem_context *ctx[2]; struct drm_file *file; unsigned long count = 0; @@ -544,29 +605,29 @@ static int active_engine(void *data) while (!kthread_should_stop()) { unsigned int idx = count++ & 1; - struct drm_i915_gem_request *old = rq[idx]; - struct drm_i915_gem_request *new; + struct i915_request *old = rq[idx]; + struct i915_request *new; mutex_lock(&engine->i915->drm.struct_mutex); - new = i915_gem_request_alloc(engine, ctx[idx]); + new = i915_request_alloc(engine, ctx[idx]); if (IS_ERR(new)) { mutex_unlock(&engine->i915->drm.struct_mutex); err = PTR_ERR(new); break; } - rq[idx] = i915_gem_request_get(new); - i915_add_request(new); + rq[idx] = i915_request_get(new); + i915_request_add(new); mutex_unlock(&engine->i915->drm.struct_mutex); if (old) { - i915_wait_request(old, 0, MAX_SCHEDULE_TIMEOUT); - i915_gem_request_put(old); + i915_request_wait(old, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_put(old); } } for (count = 0; count < ARRAY_SIZE(rq); count++) - i915_gem_request_put(rq[count]); + i915_request_put(rq[count]); err_file: mock_file_free(engine->i915, file); @@ -630,19 +691,18 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915, set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { if (active) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; mutex_lock(&i915->drm.struct_mutex); - rq = hang_create_request(&h, engine, - i915->kernel_context); + rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); mutex_unlock(&i915->drm.struct_mutex); break; } - i915_gem_request_get(rq); - __i915_add_request(rq, true); + i915_request_get(rq); + __i915_request_add(rq, true); mutex_unlock(&i915->drm.struct_mutex); if (!wait_for_hang(&h, rq)) { @@ -653,12 +713,12 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915, intel_engine_dump(engine, &p, "%s\n", engine->name); - i915_gem_request_put(rq); + i915_request_put(rq); err = -EIO; break; } - i915_gem_request_put(rq); + i915_request_put(rq); } engine->hangcheck.stalled = true; @@ -726,7 +786,9 @@ unwind: if (err) break; - cond_resched(); + err = flush_test(i915, 0); + if (err) + break; } if (i915_terminally_wedged(&i915->gpu_error)) @@ -751,7 +813,7 @@ static int igt_reset_active_engine_others(void *arg) return __igt_reset_engine_others(arg, true); } -static u32 fake_hangcheck(struct drm_i915_gem_request *rq) +static u32 fake_hangcheck(struct i915_request *rq) { u32 reset_count; @@ -769,7 +831,7 @@ static u32 fake_hangcheck(struct drm_i915_gem_request *rq) static int igt_wait_reset(void *arg) { struct drm_i915_private *i915 = arg; - struct drm_i915_gem_request *rq; + struct i915_request *rq; unsigned int reset_count; struct hang h; long timeout; @@ -787,14 +849,14 @@ static int igt_wait_reset(void *arg) if (err) goto unlock; - rq = hang_create_request(&h, i915->engine[RCS], i915->kernel_context); + rq = hang_create_request(&h, i915->engine[RCS]); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto fini; } - i915_gem_request_get(rq); - __i915_add_request(rq, true); + i915_request_get(rq); + __i915_request_add(rq, true); if (!wait_for_hang(&h, rq)) { struct drm_printer p = drm_info_printer(i915->drm.dev); @@ -812,9 +874,9 @@ static int igt_wait_reset(void *arg) reset_count = fake_hangcheck(rq); - timeout = i915_wait_request(rq, I915_WAIT_LOCKED, 10); + timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10); if (timeout < 0) { - pr_err("i915_wait_request failed on a stuck request: err=%ld\n", + pr_err("i915_request_wait failed on a stuck request: err=%ld\n", timeout); err = timeout; goto out_rq; @@ -828,7 +890,7 @@ static int igt_wait_reset(void *arg) } out_rq: - i915_gem_request_put(rq); + i915_request_put(rq); fini: hang_fini(&h); unlock: @@ -859,37 +921,35 @@ static int igt_reset_queue(void *arg) goto unlock; for_each_engine(engine, i915, id) { - struct drm_i915_gem_request *prev; + struct i915_request *prev; IGT_TIMEOUT(end_time); unsigned int count; if (!intel_engine_can_store_dword(engine)) continue; - prev = hang_create_request(&h, engine, i915->kernel_context); + prev = hang_create_request(&h, engine); if (IS_ERR(prev)) { err = PTR_ERR(prev); goto fini; } - i915_gem_request_get(prev); - __i915_add_request(prev, true); + i915_request_get(prev); + __i915_request_add(prev, true); count = 0; do { - struct drm_i915_gem_request *rq; + struct i915_request *rq; unsigned int reset_count; - rq = hang_create_request(&h, - engine, - i915->kernel_context); + rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto fini; } - i915_gem_request_get(rq); - __i915_add_request(rq, true); + i915_request_get(rq); + __i915_request_add(rq, true); if (!wait_for_hang(&h, prev)) { struct drm_printer p = drm_info_printer(i915->drm.dev); @@ -899,8 +959,8 @@ static int igt_reset_queue(void *arg) intel_engine_dump(prev->engine, &p, "%s\n", prev->engine->name); - i915_gem_request_put(rq); - i915_gem_request_put(prev); + i915_request_put(rq); + i915_request_put(prev); i915_reset(i915, 0); i915_gem_set_wedged(i915); @@ -919,8 +979,8 @@ static int igt_reset_queue(void *arg) if (prev->fence.error != -EIO) { pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n", prev->fence.error); - i915_gem_request_put(rq); - i915_gem_request_put(prev); + i915_request_put(rq); + i915_request_put(prev); err = -EINVAL; goto fini; } @@ -928,21 +988,21 @@ static int igt_reset_queue(void *arg) if (rq->fence.error) { pr_err("Fence error status not zero [%d] after unrelated reset\n", rq->fence.error); - i915_gem_request_put(rq); - i915_gem_request_put(prev); + i915_request_put(rq); + i915_request_put(prev); err = -EINVAL; goto fini; } if (i915_reset_count(&i915->gpu_error) == reset_count) { pr_err("No GPU reset recorded!\n"); - i915_gem_request_put(rq); - i915_gem_request_put(prev); + i915_request_put(rq); + i915_request_put(prev); err = -EINVAL; goto fini; } - i915_gem_request_put(prev); + i915_request_put(prev); prev = rq; count++; } while (time_before(jiffies, end_time)); @@ -951,7 +1011,11 @@ static int igt_reset_queue(void *arg) *h.batch = MI_BATCH_BUFFER_END; i915_gem_chipset_flush(i915); - i915_gem_request_put(prev); + i915_request_put(prev); + + err = flush_test(i915, I915_WAIT_LOCKED); + if (err) + break; } fini: @@ -971,7 +1035,7 @@ static int igt_handle_error(void *arg) struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine = i915->engine[RCS]; struct hang h; - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct i915_gpu_state *error; int err; @@ -989,14 +1053,14 @@ static int igt_handle_error(void *arg) if (err) goto err_unlock; - rq = hang_create_request(&h, engine, i915->kernel_context); + rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_fini; } - i915_gem_request_get(rq); - __i915_add_request(rq, true); + i915_request_get(rq); + __i915_request_add(rq, true); if (!wait_for_hang(&h, rq)) { struct drm_printer p = drm_info_printer(i915->drm.dev); @@ -1033,7 +1097,7 @@ static int igt_handle_error(void *arg) } err_request: - i915_gem_request_put(rq); + i915_request_put(rq); err_fini: hang_fini(&h); err_unlock: diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 2f6367643171..f76f2597df5c 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -61,20 +61,30 @@ static int intel_fw_table_check(const struct intel_forcewake_range *ranges, static int intel_shadow_table_check(void) { - const i915_reg_t *reg = gen8_shadowed_regs; - unsigned int i; + struct { + const i915_reg_t *regs; + unsigned int size; + } reg_lists[] = { + { gen8_shadowed_regs, ARRAY_SIZE(gen8_shadowed_regs) }, + { gen11_shadowed_regs, ARRAY_SIZE(gen11_shadowed_regs) }, + }; + const i915_reg_t *reg; + unsigned int i, j; s32 prev; - for (i = 0, prev = -1; i < ARRAY_SIZE(gen8_shadowed_regs); i++, reg++) { - u32 offset = i915_mmio_reg_offset(*reg); + for (j = 0; j < ARRAY_SIZE(reg_lists); ++j) { + reg = reg_lists[j].regs; + for (i = 0, prev = -1; i < reg_lists[j].size; i++, reg++) { + u32 offset = i915_mmio_reg_offset(*reg); - if (prev >= (s32)offset) { - pr_err("%s: entry[%d]:(%x) is before previous (%x)\n", - __func__, i, offset, prev); - return -EINVAL; - } + if (prev >= (s32)offset) { + pr_err("%s: entry[%d]:(%x) is before previous (%x)\n", + __func__, i, offset, prev); + return -EINVAL; + } - prev = offset; + prev = offset; + } } return 0; @@ -90,6 +100,7 @@ int intel_uncore_mock_selftests(void) { __vlv_fw_ranges, ARRAY_SIZE(__vlv_fw_ranges), false }, { __chv_fw_ranges, ARRAY_SIZE(__chv_fw_ranges), false }, { __gen9_fw_ranges, ARRAY_SIZE(__gen9_fw_ranges), true }, + { __gen11_fw_ranges, ARRAY_SIZE(__gen11_fw_ranges), true }, }; int err, i; diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c index bbf80d42e793..501becc47c0c 100644 --- a/drivers/gpu/drm/i915/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/selftests/mock_context.c @@ -92,3 +92,14 @@ live_context(struct drm_i915_private *i915, struct drm_file *file) return i915_gem_create_context(i915, file->driver_priv); } + +struct i915_gem_context * +kernel_context(struct drm_i915_private *i915) +{ + return i915_gem_context_create_kernel(i915, I915_PRIORITY_NORMAL); +} + +void kernel_context_close(struct i915_gem_context *ctx) +{ + context_close(ctx); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_context.h b/drivers/gpu/drm/i915/selftests/mock_context.h index 2f432c03d413..29b9d60a158b 100644 --- a/drivers/gpu/drm/i915/selftests/mock_context.h +++ b/drivers/gpu/drm/i915/selftests/mock_context.h @@ -36,4 +36,7 @@ void mock_context_close(struct i915_gem_context *ctx); struct i915_gem_context * live_context(struct drm_i915_private *i915, struct drm_file *file); +struct i915_gem_context *kernel_context(struct drm_i915_private *i915); +void kernel_context_close(struct i915_gem_context *ctx); + #endif /* !__MOCK_CONTEXT_H */ diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 55c0e2c15782..78a89efa1119 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -81,7 +81,7 @@ static void mock_context_unpin(struct intel_engine_cs *engine, i915_gem_context_put(ctx); } -static int mock_request_alloc(struct drm_i915_gem_request *request) +static int mock_request_alloc(struct i915_request *request) { struct mock_request *mock = container_of(request, typeof(*mock), base); @@ -91,24 +91,24 @@ static int mock_request_alloc(struct drm_i915_gem_request *request) return 0; } -static int mock_emit_flush(struct drm_i915_gem_request *request, +static int mock_emit_flush(struct i915_request *request, unsigned int flags) { return 0; } -static void mock_emit_breadcrumb(struct drm_i915_gem_request *request, +static void mock_emit_breadcrumb(struct i915_request *request, u32 *flags) { } -static void mock_submit_request(struct drm_i915_gem_request *request) +static void mock_submit_request(struct i915_request *request) { struct mock_request *mock = container_of(request, typeof(*mock), base); struct mock_engine *engine = container_of(request->engine, typeof(*engine), base); - i915_gem_request_submit(request); + i915_request_submit(request); GEM_BUG_ON(!request->global_seqno); spin_lock_irq(&engine->hw_lock); diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 1bc61f3f76fc..e6d4b882599a 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -43,7 +43,7 @@ void mock_device_flush(struct drm_i915_private *i915) for_each_engine(engine, i915, id) mock_engine_flush(engine); - i915_gem_retire_requests(i915); + i915_retire_requests(i915); } static void mock_device_release(struct drm_device *dev) @@ -243,16 +243,10 @@ struct drm_i915_private *mock_gem_device(void) if (!i915->kernel_context) goto err_engine; - i915->preempt_context = mock_context(i915, NULL); - if (!i915->preempt_context) - goto err_kernel_context; - WARN_ON(i915_gemfs_init(i915)); return i915; -err_kernel_context: - i915_gem_context_put(i915->kernel_context); err_engine: for_each_engine(engine, i915, id) mock_engine_free(engine); diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c index 8097e3693ec4..0dc29e242597 100644 --- a/drivers/gpu/drm/i915/selftests/mock_request.c +++ b/drivers/gpu/drm/i915/selftests/mock_request.c @@ -25,16 +25,16 @@ #include "mock_engine.h" #include "mock_request.h" -struct drm_i915_gem_request * +struct i915_request * mock_request(struct intel_engine_cs *engine, struct i915_gem_context *context, unsigned long delay) { - struct drm_i915_gem_request *request; + struct i915_request *request; struct mock_request *mock; /* NB the i915->requests slab cache is enlarged to fit mock_request */ - request = i915_gem_request_alloc(engine, context); + request = i915_request_alloc(engine, context); if (IS_ERR(request)) return NULL; @@ -44,7 +44,7 @@ mock_request(struct intel_engine_cs *engine, return &mock->base; } -bool mock_cancel_request(struct drm_i915_gem_request *request) +bool mock_cancel_request(struct i915_request *request) { struct mock_request *mock = container_of(request, typeof(*mock), base); struct mock_engine *engine = @@ -57,7 +57,7 @@ bool mock_cancel_request(struct drm_i915_gem_request *request) spin_unlock_irq(&engine->hw_lock); if (was_queued) - i915_gem_request_unsubmit(request); + i915_request_unsubmit(request); return was_queued; } diff --git a/drivers/gpu/drm/i915/selftests/mock_request.h b/drivers/gpu/drm/i915/selftests/mock_request.h index 4dea74c8e96d..995fb728380c 100644 --- a/drivers/gpu/drm/i915/selftests/mock_request.h +++ b/drivers/gpu/drm/i915/selftests/mock_request.h @@ -27,20 +27,20 @@ #include <linux/list.h> -#include "../i915_gem_request.h" +#include "../i915_request.h" struct mock_request { - struct drm_i915_gem_request base; + struct i915_request base; struct list_head link; unsigned long delay; }; -struct drm_i915_gem_request * +struct i915_request * mock_request(struct intel_engine_cs *engine, struct i915_gem_context *context, unsigned long delay); -bool mock_cancel_request(struct drm_i915_gem_request *request); +bool mock_cancel_request(struct i915_request *request); #endif /* !__MOCK_REQUEST__ */ |