diff options
Diffstat (limited to 'drivers/gpu/drm/xe')
174 files changed, 7703 insertions, 2975 deletions
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 628c245c4822..edfd812e0f41 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -12,32 +12,15 @@ subdir-ccflags-$(CONFIG_DRM_XE_WERROR) += -Werror subdir-ccflags-y += -I$(obj) -I$(src) # generated sources -hostprogs := xe_gen_wa_oob +hostprogs := xe_gen_wa_oob generated_oob := $(obj)/generated/xe_wa_oob.c $(obj)/generated/xe_wa_oob.h - quiet_cmd_wa_oob = GEN $(notdir $(generated_oob)) cmd_wa_oob = mkdir -p $(@D); $^ $(generated_oob) - $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \ $(src)/xe_wa_oob.rules $(call cmd,wa_oob) -uses_generated_oob := \ - $(obj)/xe_ggtt.o \ - $(obj)/xe_gsc.o \ - $(obj)/xe_gt.o \ - $(obj)/xe_guc.o \ - $(obj)/xe_guc_ads.o \ - $(obj)/xe_guc_pc.o \ - $(obj)/xe_migrate.o \ - $(obj)/xe_ring_ops.o \ - $(obj)/xe_vm.o \ - $(obj)/xe_wa.o \ - $(obj)/xe_ttm_stolen_mgr.o - -$(uses_generated_oob): $(generated_oob) - # Please keep these build lists sorted! # core driver code @@ -45,7 +28,6 @@ $(uses_generated_oob): $(generated_oob) xe-y += xe_bb.o \ xe_bo.o \ xe_bo_evict.o \ - xe_debugfs.o \ xe_devcoredump.o \ xe_device.o \ xe_device_sysfs.o \ @@ -58,12 +40,12 @@ xe-y += xe_bb.o \ xe_ggtt.o \ xe_gpu_scheduler.o \ xe_gsc.o \ + xe_gsc_debugfs.o \ xe_gsc_proxy.o \ xe_gsc_submit.o \ xe_gt.o \ xe_gt_ccs_mode.o \ xe_gt_clock.o \ - xe_gt_debugfs.o \ xe_gt_freq.o \ xe_gt_idle.o \ xe_gt_mcr.o \ @@ -76,7 +58,6 @@ xe-y += xe_bb.o \ xe_guc_ads.o \ xe_guc_ct.o \ xe_guc_db_mgr.o \ - xe_guc_debugfs.o \ xe_guc_hwconfig.o \ xe_guc_id_mgr.o \ xe_guc_klv_helpers.o \ @@ -86,9 +67,9 @@ xe-y += xe_bb.o \ xe_heci_gsc.o \ xe_hw_engine.o \ xe_hw_engine_class_sysfs.o \ + xe_hw_engine_group.o \ xe_hw_fence.o \ xe_huc.o \ - xe_huc_debugfs.o \ xe_irq.o \ xe_lrc.o \ xe_migrate.o \ @@ -124,7 +105,6 @@ xe-y += xe_bb.o \ xe_ttm_vram_mgr.o \ xe_tuning.o \ xe_uc.o \ - xe_uc_debugfs.o \ xe_uc_fw.o \ xe_vm.o \ xe_vram.o \ @@ -141,7 +121,6 @@ xe-$(CONFIG_HWMON) += xe_hwmon.o # graphics virtualization (SR-IOV) support xe-y += \ xe_gt_sriov_vf.o \ - xe_gt_sriov_vf_debugfs.o \ xe_guc_relay.o \ xe_memirq.o \ xe_sriov.o @@ -150,7 +129,6 @@ xe-$(CONFIG_PCI_IOV) += \ xe_gt_sriov_pf.o \ xe_gt_sriov_pf_config.o \ xe_gt_sriov_pf_control.o \ - xe_gt_sriov_pf_debugfs.o \ xe_gt_sriov_pf_monitor.o \ xe_gt_sriov_pf_policy.o \ xe_gt_sriov_pf_service.o \ @@ -192,6 +170,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ display/xe_display.o \ display/xe_display_misc.o \ display/xe_display_rps.o \ + display/xe_display_wa.o \ display/xe_dsb_buffer.o \ display/xe_fb_pin.o \ display/xe_hdcp_gsc.o \ @@ -297,6 +276,16 @@ ifeq ($(CONFIG_DRM_FBDEV_EMULATION),y) endif ifeq ($(CONFIG_DEBUG_FS),y) + xe-y += xe_debugfs.o \ + xe_gt_debugfs.o \ + xe_gt_sriov_vf_debugfs.o \ + xe_gt_stats.o \ + xe_guc_debugfs.o \ + xe_huc_debugfs.o \ + xe_uc_debugfs.o + + xe-$(CONFIG_PCI_IOV) += xe_gt_sriov_pf_debugfs.o + xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_display_debugfs.o \ i915-display/intel_display_debugfs_params.o \ @@ -320,3 +309,6 @@ quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@) $(obj)/%.hdrtest: $(src)/%.h FORCE $(call if_changed_dep,hdrtest) + +uses_generated_oob := $(addprefix $(obj)/, $(xe-y)) +$(uses_generated_oob): $(obj)/generated/xe_wa_oob.h diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 8f9f60b28306..6b30743a2f6c 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -351,6 +351,7 @@ enum xe_guc_klv_ids { GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING = 0x9005, GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE = 0x9007, GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE = 0x9008, + GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET = 0x9009, }; #endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index 2feedddf1e40..f27a2c75b56d 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -21,11 +21,6 @@ static inline struct drm_i915_private *to_i915(const struct drm_device *dev) return container_of(dev, struct drm_i915_private, drm); } -static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) -{ - return dev_get_drvdata(kdev); -} - #define IS_PLATFORM(xe, x) ((xe)->info.platform == x) #define INTEL_INFO(dev_priv) (&((dev_priv)->info)) #define IS_I830(dev_priv) (dev_priv && 0) @@ -80,14 +75,9 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) #define IS_MOBILE(xe) (xe && 0) -#define HAS_GMD_ID(xe) GRAPHICS_VERx100(xe) >= 1270 - -/* Workarounds not handled yet */ -#define IS_DISPLAY_STEP(xe, first, last) ({u8 __step = (xe)->info.step.display; first <= __step && __step <= last; }) - -#define IS_LP(xe) (0) -#define IS_GEN9_LP(xe) (0) -#define IS_GEN9_BC(xe) (0) +#define IS_LP(xe) ((xe) && 0) +#define IS_GEN9_LP(xe) ((xe) && 0) +#define IS_GEN9_BC(xe) ((xe) && 0) #define IS_TIGERLAKE_UY(xe) (xe && 0) #define IS_COMETLAKE_ULX(xe) (xe && 0) @@ -115,9 +105,6 @@ struct i915_sched_attr { }; #define i915_gem_fence_wait_priority(fence, attr) do { (void) attr; } while (0) -#define pdev_to_i915 pdev_to_xe_device -#define RUNTIME_INFO(xe) (&(xe)->info.i915_runtime) - #define FORCEWAKE_ALL XE_FORCEWAKE_ALL #ifdef CONFIG_ARM64 diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h index a20d2638ea7a..bdae8392e125 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h @@ -7,7 +7,8 @@ #define I915_VMA_H #include <uapi/drm/i915_drm.h> -#include <drm/drm_mm.h> + +#include "xe_ggtt_types.h" /* We don't want these from i915_drm.h in case of Xe */ #undef I915_TILING_X @@ -19,7 +20,7 @@ struct xe_bo; struct i915_vma { struct xe_bo *bo, *dpt; - struct drm_mm_node node; + struct xe_ggtt_node *node; }; #define i915_ggtt_clear_scanout(bo) do { } while (0) @@ -28,7 +29,7 @@ struct i915_vma { static inline u32 i915_ggtt_offset(const struct i915_vma *vma) { - return vma->node.start; + return vma->node->base.start; } #endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h index 0c47661bdc6a..a473aa6697d0 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h @@ -13,7 +13,7 @@ static inline int snb_pcode_write_timeout(struct intel_uncore *uncore, u32 mbox, u32 val, int fast_timeout_us, int slow_timeout_ms) { - return xe_pcode_write_timeout(__compat_uncore_to_gt(uncore), mbox, val, + return xe_pcode_write_timeout(__compat_uncore_to_tile(uncore), mbox, val, slow_timeout_ms ?: 1); } @@ -21,13 +21,13 @@ static inline int snb_pcode_write(struct intel_uncore *uncore, u32 mbox, u32 val) { - return xe_pcode_write(__compat_uncore_to_gt(uncore), mbox, val); + return xe_pcode_write(__compat_uncore_to_tile(uncore), mbox, val); } static inline int snb_pcode_read(struct intel_uncore *uncore, u32 mbox, u32 *val, u32 *val1) { - return xe_pcode_read(__compat_uncore_to_gt(uncore), mbox, val, val1); + return xe_pcode_read(__compat_uncore_to_tile(uncore), mbox, val, val1); } static inline int @@ -35,7 +35,7 @@ skl_pcode_request(struct intel_uncore *uncore, u32 mbox, u32 request, u32 reply_mask, u32 reply, int timeout_base_ms) { - return xe_pcode_request(__compat_uncore_to_gt(uncore), mbox, request, reply_mask, reply, + return xe_pcode_request(__compat_uncore_to_tile(uncore), mbox, request, reply_mask, reply, timeout_base_ms); } diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h index 0006ef812346..2cf13a572ab0 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h @@ -6,15 +6,9 @@ #ifndef __INTEL_STEP_H__ #define __INTEL_STEP_H__ -#include "xe_device_types.h" #include "xe_step.h" -#define intel_display_step_name xe_display_step_name - -static inline -const char *xe_display_step_name(struct xe_device *xe) -{ - return xe_step_name(xe->info.step.display); -} +#define intel_step xe_step +#define intel_step_name xe_step_name #endif /* __INTEL_STEP_H__ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h index 083c4da2ea41..eb5b5f0e4bd9 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h @@ -17,6 +17,13 @@ static inline struct xe_gt *__compat_uncore_to_gt(struct intel_uncore *uncore) return xe_root_mmio_gt(xe); } +static inline struct xe_tile *__compat_uncore_to_tile(struct intel_uncore *uncore) +{ + struct xe_device *xe = container_of(uncore, struct xe_device, uncore); + + return xe_device_get_root_tile(xe); +} + static inline u32 intel_uncore_read(struct intel_uncore *uncore, i915_reg_t i915_reg) { diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.c b/drivers/gpu/drm/xe/display/intel_fb_bo.c index f835492f73fb..63ce97cc4cfe 100644 --- a/drivers/gpu/drm/xe/display/intel_fb_bo.c +++ b/drivers/gpu/drm/xe/display/intel_fb_bo.c @@ -7,6 +7,7 @@ #include <drm/ttm/ttm_bo.h> #include "intel_display_types.h" +#include "intel_fb.h" #include "intel_fb_bo.h" #include "xe_bo.h" @@ -28,6 +29,14 @@ int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb, struct xe_device *xe = to_xe_device(bo->ttm.base.dev); int ret; + /* + * Some modifiers require physical alignment of 64KiB VRAM pages; + * require that the BO in those cases is created correctly. + */ + if (XE_IOCTL_DBG(xe, intel_fb_needs_64k_phys(mode_cmd->modifier[0]) && + !(bo->flags & XE_BO_FLAG_NEEDS_64K))) + return -EINVAL; + xe_bo_get(bo); ret = ttm_bo_reserve(&bo->ttm, true, false, NULL); diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c index 816ad13821a8..99499d6c0256 100644 --- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c @@ -8,8 +8,10 @@ #include "intel_display_types.h" #include "intel_fbdev_fb.h" #include "xe_bo.h" -#include "xe_gt.h" #include "xe_ttm_stolen_mgr.h" +#include "xe_wa.h" + +#include <generated/xe_wa_oob.h> struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, struct drm_fb_helper_surface_size *sizes) @@ -37,7 +39,7 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, size = PAGE_ALIGN(size); obj = ERR_PTR(-ENODEV); - if (!IS_DGFX(xe)) { + if (!IS_DGFX(xe) && !XE_WA(xe_root_mmio_gt(xe), 22019338487_display)) { obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, size, ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | @@ -48,6 +50,7 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, else drm_info(&xe->drm, "Allocated fbdev into stolen failed: %li\n", PTR_ERR(obj)); } + if (IS_ERR(obj)) { obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, size, ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 8b83dcff72e1..c6e0c8d77a70 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -10,7 +10,7 @@ #include <drm/drm_drv.h> #include <drm/drm_managed.h> -#include <drm/xe_drm.h> +#include <uapi/drm/xe_drm.h> #include "soc/intel_dram.h" #include "i915_drv.h" /* FIXME: HAS_DISPLAY() depends on this */ @@ -46,7 +46,7 @@ static bool has_display(struct xe_device *xe) */ bool xe_display_driver_probe_defer(struct pci_dev *pdev) { - if (!xe_modparam.enable_display) + if (!xe_modparam.probe_display) return 0; return intel_display_driver_probe_defer(pdev); @@ -62,7 +62,7 @@ bool xe_display_driver_probe_defer(struct pci_dev *pdev) */ void xe_display_driver_set_hooks(struct drm_driver *driver) { - if (!xe_modparam.enable_display) + if (!xe_modparam.probe_display) return; driver->driver_features |= DRIVER_MODESET | DRIVER_ATOMIC; @@ -104,7 +104,7 @@ static void xe_display_fini_nommio(struct drm_device *dev, void *dummy) { struct xe_device *xe = to_xe_device(dev); - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_power_domains_cleanup(xe); @@ -112,7 +112,7 @@ static void xe_display_fini_nommio(struct drm_device *dev, void *dummy) int xe_display_init_nommio(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return 0; /* Fake uncore lock */ @@ -127,24 +127,27 @@ int xe_display_init_nommio(struct xe_device *xe) static void xe_display_fini_noirq(void *arg) { struct xe_device *xe = arg; + struct intel_display *display = &xe->display; - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_display_driver_remove_noirq(xe); + intel_opregion_cleanup(display); } int xe_display_init_noirq(struct xe_device *xe) { + struct intel_display *display = &xe->display; int err; - if (!xe->info.enable_display) + if (!xe->info.probe_display) return 0; intel_display_driver_early_probe(xe); /* Early display init.. */ - intel_opregion_setup(xe); + intel_opregion_setup(display); /* * Fill the dram structure to get the system dram info. This will be @@ -157,8 +160,10 @@ int xe_display_init_noirq(struct xe_device *xe) intel_display_device_info_runtime_init(xe); err = intel_display_driver_probe_noirq(xe); - if (err) + if (err) { + intel_opregion_cleanup(display); return err; + } return devm_add_action_or_reset(xe->drm.dev, xe_display_fini_noirq, xe); } @@ -167,7 +172,7 @@ static void xe_display_fini_noaccel(void *arg) { struct xe_device *xe = arg; - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_display_driver_remove_nogem(xe); @@ -177,7 +182,7 @@ int xe_display_init_noaccel(struct xe_device *xe) { int err; - if (!xe->info.enable_display) + if (!xe->info.probe_display) return 0; err = intel_display_driver_probe_nogem(xe); @@ -189,7 +194,7 @@ int xe_display_init_noaccel(struct xe_device *xe) int xe_display_init(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return 0; return intel_display_driver_probe(xe); @@ -197,7 +202,7 @@ int xe_display_init(struct xe_device *xe) void xe_display_fini(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_hpd_poll_fini(xe); @@ -208,7 +213,7 @@ void xe_display_fini(struct xe_device *xe) void xe_display_register(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_display_driver_register(xe); @@ -218,7 +223,7 @@ void xe_display_register(struct xe_device *xe) void xe_display_unregister(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_unregister_dsm_handler(); @@ -228,7 +233,7 @@ void xe_display_unregister(struct xe_device *xe) void xe_display_driver_remove(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_display_driver_remove(xe); @@ -238,7 +243,7 @@ void xe_display_driver_remove(struct xe_device *xe) void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; if (master_ctl & DISPLAY_IRQ) @@ -247,16 +252,18 @@ void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) { - if (!xe->info.enable_display) + struct intel_display *display = &xe->display; + + if (!xe->info.probe_display) return; if (gu_misc_iir & GU_MISC_GSE) - intel_opregion_asle_intr(xe); + intel_opregion_asle_intr(display); } void xe_display_irq_reset(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; gen11_display_irq_reset(xe); @@ -264,7 +271,7 @@ void xe_display_irq_reset(struct xe_device *xe) void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; if (gt->info.id == XE_GT0) @@ -280,10 +287,33 @@ static bool suspend_to_idle(void) return false; } -void xe_display_pm_suspend(struct xe_device *xe, bool runtime) +static void xe_display_flush_cleanup_work(struct xe_device *xe) +{ + struct intel_crtc *crtc; + + for_each_intel_crtc(&xe->drm, crtc) { + struct drm_crtc_commit *commit; + + spin_lock(&crtc->base.commit_lock); + commit = list_first_entry_or_null(&crtc->base.commit_list, + struct drm_crtc_commit, commit_entry); + if (commit) + drm_crtc_commit_get(commit); + spin_unlock(&crtc->base.commit_lock); + + if (commit) { + wait_for_completion(&commit->cleanup_done); + drm_crtc_commit_put(commit); + } + } +} + +/* TODO: System and runtime suspend/resume sequences will be sanitized as a follow-up. */ +static void __xe_display_pm_suspend(struct xe_device *xe, bool runtime) { + struct intel_display *display = &xe->display; bool s2idle = suspend_to_idle(); - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; /* @@ -291,29 +321,54 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime) * properly. */ intel_power_domains_disable(xe); - if (has_display(xe)) + intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); + if (!runtime && has_display(xe)) { drm_kms_helper_poll_disable(&xe->drm); - - if (!runtime) + intel_display_driver_disable_user_access(xe); intel_display_driver_suspend(xe); + } + + xe_display_flush_cleanup_work(xe); intel_dp_mst_suspend(xe); intel_hpd_cancel_work(xe); - intel_encoder_suspend_all(&xe->display); - - intel_opregion_suspend(xe, s2idle ? PCI_D1 : PCI_D3cold); + if (!runtime && has_display(xe)) { + intel_display_driver_suspend_access(xe); + intel_encoder_suspend_all(&xe->display); + } - intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); + intel_opregion_suspend(display, s2idle ? PCI_D1 : PCI_D3cold); intel_dmc_suspend(xe); + + if (runtime && has_display(xe)) + intel_hpd_poll_enable(xe); +} + +void xe_display_pm_suspend(struct xe_device *xe) +{ + __xe_display_pm_suspend(xe, false); +} + +void xe_display_pm_runtime_suspend(struct xe_device *xe) +{ + if (!xe->info.probe_display) + return; + + if (xe->d3cold.allowed) { + __xe_display_pm_suspend(xe, true); + return; + } + + intel_hpd_poll_enable(xe); } void xe_display_pm_suspend_late(struct xe_device *xe) { bool s2idle = suspend_to_idle(); - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_power_domains_suspend(xe, s2idle); @@ -323,7 +378,7 @@ void xe_display_pm_suspend_late(struct xe_device *xe) void xe_display_pm_resume_early(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_display_power_resume_early(xe); @@ -331,9 +386,11 @@ void xe_display_pm_resume_early(struct xe_device *xe) intel_power_domains_resume(xe); } -void xe_display_pm_resume(struct xe_device *xe, bool runtime) +static void __xe_display_pm_resume(struct xe_device *xe, bool runtime) { - if (!xe->info.enable_display) + struct intel_display *display = &xe->display; + + if (!xe->info.probe_display) return; intel_dmc_resume(xe); @@ -344,22 +401,47 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime) intel_display_driver_init_hw(xe); intel_hpd_init(xe); + if (!runtime && has_display(xe)) + intel_display_driver_resume_access(xe); + /* MST sideband requires HPD interrupts enabled */ intel_dp_mst_resume(xe); - if (!runtime) + if (!runtime && has_display(xe)) { intel_display_driver_resume(xe); + drm_kms_helper_poll_enable(&xe->drm); + intel_display_driver_enable_user_access(xe); + } - intel_hpd_poll_disable(xe); if (has_display(xe)) - drm_kms_helper_poll_enable(&xe->drm); + intel_hpd_poll_disable(xe); - intel_opregion_resume(xe); + intel_opregion_resume(display); intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_RUNNING, false); intel_power_domains_enable(xe); } +void xe_display_pm_resume(struct xe_device *xe) +{ + __xe_display_pm_resume(xe, false); +} + +void xe_display_pm_runtime_resume(struct xe_device *xe) +{ + if (!xe->info.probe_display) + return; + + if (xe->d3cold.allowed) { + __xe_display_pm_resume(xe, true); + return; + } + + intel_hpd_init(xe); + intel_hpd_poll_disable(xe); +} + + static void display_device_remove(struct drm_device *dev, void *arg) { struct xe_device *xe = arg; @@ -371,7 +453,7 @@ int xe_display_probe(struct xe_device *xe) { int err; - if (!xe->info.enable_display) + if (!xe->info.probe_display) goto no_display; intel_display_device_probe(xe); @@ -384,7 +466,7 @@ int xe_display_probe(struct xe_device *xe) return 0; no_display: - xe->info.enable_display = false; + xe->info.probe_display = false; unset_display_features(xe); return 0; } diff --git a/drivers/gpu/drm/xe/display/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h index 000fb5799df5..bed55fd26f30 100644 --- a/drivers/gpu/drm/xe/display/xe_display.h +++ b/drivers/gpu/drm/xe/display/xe_display.h @@ -34,10 +34,12 @@ void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir); void xe_display_irq_reset(struct xe_device *xe); void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt); -void xe_display_pm_suspend(struct xe_device *xe, bool runtime); +void xe_display_pm_suspend(struct xe_device *xe); void xe_display_pm_suspend_late(struct xe_device *xe); void xe_display_pm_resume_early(struct xe_device *xe); -void xe_display_pm_resume(struct xe_device *xe, bool runtime); +void xe_display_pm_resume(struct xe_device *xe); +void xe_display_pm_runtime_suspend(struct xe_device *xe); +void xe_display_pm_runtime_resume(struct xe_device *xe); #else @@ -63,10 +65,12 @@ static inline void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) static inline void xe_display_irq_reset(struct xe_device *xe) {} static inline void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) {} -static inline void xe_display_pm_suspend(struct xe_device *xe, bool runtime) {} +static inline void xe_display_pm_suspend(struct xe_device *xe) {} static inline void xe_display_pm_suspend_late(struct xe_device *xe) {} static inline void xe_display_pm_resume_early(struct xe_device *xe) {} -static inline void xe_display_pm_resume(struct xe_device *xe, bool runtime) {} +static inline void xe_display_pm_resume(struct xe_device *xe) {} +static inline void xe_display_pm_runtime_suspend(struct xe_device *xe) {} +static inline void xe_display_pm_runtime_resume(struct xe_device *xe) {} #endif /* CONFIG_DRM_XE_DISPLAY */ #endif /* _XE_DISPLAY_H_ */ diff --git a/drivers/gpu/drm/xe/display/xe_display_wa.c b/drivers/gpu/drm/xe/display/xe_display_wa.c new file mode 100644 index 000000000000..68e3d1959ad6 --- /dev/null +++ b/drivers/gpu/drm/xe/display/xe_display_wa.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include "intel_display_wa.h" + +#include "xe_device.h" +#include "xe_wa.h" + +#include <generated/xe_wa_oob.h> + +bool intel_display_needs_wa_16023588340(struct drm_i915_private *i915) +{ + return XE_WA(xe_root_mmio_gt(i915), 16023588340); +} diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c index 9e860c61f4b3..f99d901a3214 100644 --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c @@ -7,7 +7,8 @@ #include "intel_display_types.h" #include "intel_dsb_buffer.h" #include "xe_bo.h" -#include "xe_gt.h" +#include "xe_device.h" +#include "xe_device_types.h" u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf) { @@ -16,7 +17,10 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val) { + struct xe_device *xe = dsb_buf->vma->bo->tile->xe; + iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val); + xe_device_l2_flush(xe); } u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) @@ -26,9 +30,12 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size) { + struct xe_device *xe = dsb_buf->vma->bo->tile->xe; + WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf)); iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size); + xe_device_l2_flush(xe); } bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size) diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 423f367c7065..b58fc4ba2aac 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -10,8 +10,8 @@ #include "intel_fb.h" #include "intel_fb_pin.h" #include "xe_bo.h" +#include "xe_device.h" #include "xe_ggtt.h" -#include "xe_gt.h" #include "xe_pm.h" static void @@ -203,21 +203,28 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) align = max_t(u32, align, SZ_64K); - if (bo->ggtt_node.size && view->type == I915_GTT_VIEW_NORMAL) { + if (bo->ggtt_node && view->type == I915_GTT_VIEW_NORMAL) { vma->node = bo->ggtt_node; } else if (view->type == I915_GTT_VIEW_NORMAL) { u32 x, size = bo->ttm.base.size; - ret = xe_ggtt_insert_special_node_locked(ggtt, &vma->node, size, - align, 0); - if (ret) + vma->node = xe_ggtt_node_init(ggtt); + if (IS_ERR(vma->node)) { + ret = PTR_ERR(vma->node); goto out_unlock; + } + + ret = xe_ggtt_node_insert_locked(vma->node, size, align, 0); + if (ret) { + xe_ggtt_node_fini(vma->node); + goto out_unlock; + } for (x = 0; x < size; x += XE_PAGE_SIZE) { u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x, xe->pat.idx[XE_CACHE_NONE]); - ggtt->pt_ops->ggtt_set_pte(ggtt, vma->node.start + x, pte); + ggtt->pt_ops->ggtt_set_pte(ggtt, vma->node->base.start + x, pte); } } else { u32 i, ggtt_ofs; @@ -226,12 +233,19 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, /* display seems to use tiles instead of bytes here, so convert it back.. */ u32 size = intel_rotation_info_size(rot_info) * XE_PAGE_SIZE; - ret = xe_ggtt_insert_special_node_locked(ggtt, &vma->node, size, - align, 0); - if (ret) + vma->node = xe_ggtt_node_init(ggtt); + if (IS_ERR(vma->node)) { + ret = PTR_ERR(vma->node); + goto out_unlock; + } + + ret = xe_ggtt_node_insert_locked(vma->node, size, align, 0); + if (ret) { + xe_ggtt_node_fini(vma->node); goto out_unlock; + } - ggtt_ofs = vma->node.start; + ggtt_ofs = vma->node->base.start; for (i = 0; i < ARRAY_SIZE(rot_info->plane); i++) write_ggtt_rotated(bo, ggtt, &ggtt_ofs, @@ -304,6 +318,8 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, if (ret) goto err_unpin; + /* Ensure DPT writes are flushed */ + xe_device_l2_flush(xe); return vma; err_unpin: @@ -317,14 +333,11 @@ err: static void __xe_unpin_fb_vma(struct i915_vma *vma) { - struct xe_device *xe = to_xe_device(vma->bo->ttm.base.dev); - struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; - if (vma->dpt) xe_bo_unpin_map_no_vm(vma->dpt); - else if (!drm_mm_node_allocated(&vma->bo->ggtt_node) || - vma->bo->ggtt_node.start != vma->node.start) - xe_ggtt_remove_node(ggtt, &vma->node, false); + else if (!xe_ggtt_node_allocated(vma->bo->ggtt_node) || + vma->bo->ggtt_node->base.start != vma->node->base.start) + xe_ggtt_node_remove(vma->node, false); ttm_bo_reserve(&vma->bo->ttm, false, false, NULL); ttm_bo_unpin(&vma->bo->ttm); @@ -374,8 +387,8 @@ void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state) } /* - * For Xe introduce dummy intel_dpt_create which just return NULL and - * intel_dpt_destroy which does nothing. + * For Xe introduce dummy intel_dpt_create which just return NULL, + * intel_dpt_destroy which does nothing, and fake intel_dpt_ofsset returning 0; */ struct i915_address_space *intel_dpt_create(struct intel_framebuffer *fb) { @@ -386,3 +399,8 @@ void intel_dpt_destroy(struct i915_address_space *vm) { return; } + +u64 intel_dpt_offset(struct i915_vma *dpt_vma) +{ + return 0; +} diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index 990285aa9b26..6619a40aed15 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -16,7 +16,6 @@ #include "xe_force_wake.h" #include "xe_gsc_proxy.h" #include "xe_gsc_submit.h" -#include "xe_gt.h" #include "xe_map.h" #include "xe_pm.h" #include "xe_uc_fw.h" @@ -40,10 +39,14 @@ bool intel_hdcp_gsc_check_status(struct xe_device *xe) { struct xe_tile *tile = xe_device_get_root_tile(xe); struct xe_gt *gt = tile->media_gt; + struct xe_gsc *gsc = >->uc.gsc; bool ret = true; - if (!xe_uc_fw_is_enabled(>->uc.gsc.fw)) + if (!gsc && !xe_uc_fw_is_enabled(&gsc->fw)) { + drm_dbg_kms(&xe->drm, + "GSC Components not ready for HDCP2.x\n"); return false; + } xe_pm_runtime_get(xe); if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC)) { @@ -53,7 +56,7 @@ bool intel_hdcp_gsc_check_status(struct xe_device *xe) goto out; } - if (!xe_gsc_proxy_init_done(>->uc.gsc)) + if (!xe_gsc_proxy_init_done(gsc)) ret = false; xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c index 5eccd6abb3ef..a50ab9eae40a 100644 --- a/drivers/gpu/drm/xe/display/xe_plane_initial.c +++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c @@ -18,6 +18,9 @@ #include "intel_frontbuffer.h" #include "intel_plane_initial.h" #include "xe_bo.h" +#include "xe_wa.h" + +#include <generated/xe_wa_oob.h> static bool intel_reuse_initial_plane_obj(struct intel_crtc *this, @@ -104,6 +107,9 @@ initial_plane_bo(struct xe_device *xe, phys_base = base; flags |= XE_BO_FLAG_STOLEN; + if (XE_WA(xe_root_mmio_gt(xe), 22019338487_display)) + return NULL; + /* * If the FB is too big, just don't use it since fbdev is not very * important and we should probably use that space with FBC or other diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index c38db2a74614..81b71903675e 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -104,6 +104,7 @@ #define CSFE_CHICKEN1(base) XE_REG((base) + 0xd4, XE_REG_OPTION_MASKED) #define GHWSP_CSB_REPORT_DIS REG_BIT(15) #define PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS REG_BIT(14) +#define CS_PRIORITY_MEM_READ REG_BIT(7) #define FF_SLICE_CS_CHICKEN1(base) XE_REG((base) + 0xe0, XE_REG_OPTION_MASKED) #define FFSC_PERCTX_PREEMPT_CTRL REG_BIT(14) diff --git a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h index e2a925be137c..7702364b65f1 100644 --- a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h @@ -32,8 +32,12 @@ #define HECI1_FWSTS1_CURRENT_STATE_RESET 0 #define HECI1_FWSTS1_PROXY_STATE_NORMAL 5 #define HECI1_FWSTS1_INIT_COMPLETE REG_BIT(9) +#define HECI_FWSTS2(base) XE_REG((base) + 0xc48) +#define HECI_FWSTS3(base) XE_REG((base) + 0xc60) +#define HECI_FWSTS4(base) XE_REG((base) + 0xc64) #define HECI_FWSTS5(base) XE_REG((base) + 0xc68) #define HECI1_FWSTS5_HUC_AUTH_DONE REG_BIT(19) +#define HECI_FWSTS6(base) XE_REG((base) + 0xc6c) #define HECI_H_GS1(base) XE_REG((base) + 0xc4c) #define HECI_H_GS1_ER_PREP REG_BIT(0) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index d44564bad009..bd604b9f08e4 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -80,6 +80,12 @@ #define LE_CACHEABILITY_MASK REG_GENMASK(1, 0) #define LE_CACHEABILITY(value) REG_FIELD_PREP(LE_CACHEABILITY_MASK, value) +#define STATELESS_COMPRESSION_CTRL XE_REG_MCR(0x4148) +#define UNIFIED_COMPRESSION_FORMAT REG_GENMASK(3, 0) + +#define XE2_GAMREQSTRM_CTRL XE_REG_MCR(0x4194) +#define CG_DIS_CNTLBUS REG_BIT(6) + #define CCS_AUX_INV XE_REG(0x4208) #define VD0_AUX_INV XE_REG(0x4218) @@ -88,6 +94,8 @@ #define VE1_AUX_INV XE_REG(0x42b8) #define AUX_INV REG_BIT(0) +#define XE2_LMEM_CFG XE_REG(0x48b0) + #define XEHP_TILE_ADDR_RANGE(_idx) XE_REG_MCR(0x4900 + (_idx) * 4) #define XEHP_FLAT_CCS_BASE_ADDR XE_REG_MCR(0x4910) #define XEHP_FLAT_CCS_PTR REG_GENMASK(31, 8) @@ -97,12 +105,14 @@ #define CHICKEN_RASTER_1 XE_REG_MCR(0x6204, XE_REG_OPTION_MASKED) #define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8) +#define DIS_CLIP_NEGATIVE_BOUNDING_BOX REG_BIT(6) #define CHICKEN_RASTER_2 XE_REG_MCR(0x6208, XE_REG_OPTION_MASKED) #define TBIMR_FAST_CLIP REG_BIT(5) #define FF_MODE XE_REG_MCR(0x6210) #define DIS_TE_AUTOSTRIP REG_BIT(31) +#define VS_HIT_MAX_VALUE_MASK REG_GENMASK(25, 20) #define DIS_MESH_PARTIAL_AUTOSTRIP REG_BIT(16) #define DIS_MESH_AUTOSTRIP REG_BIT(15) @@ -159,6 +169,8 @@ #define XEHP_SLICE_COMMON_ECO_CHICKEN1 XE_REG_MCR(0x731c, XE_REG_OPTION_MASKED) #define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14) +#define XE2LPM_CCCHKNREG1 XE_REG(0x82a8) + #define VF_PREEMPTION XE_REG(0x83a4, XE_REG_OPTION_MASKED) #define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0) @@ -187,6 +199,7 @@ #define GSCPSMI_BASE XE_REG(0x880c) #define CCCHKNREG1 XE_REG_MCR(0x8828) +#define L3CMPCTRL REG_BIT(23) #define ENCOMPPERFFIX REG_BIT(18) /* Fuse readout registers for GT */ @@ -361,9 +374,15 @@ #define XEHP_L3NODEARBCFG XE_REG_MCR(0xb0b4) #define XEHP_LNESPARE REG_BIT(19) +#define L3SQCREG2 XE_REG_MCR(0xb104) +#define COMPMEMRD256BOVRFETCHEN REG_BIT(20) + #define L3SQCREG3 XE_REG_MCR(0xb108) #define COMPPWOVERFETCHEN REG_BIT(28) +#define SCRATCH3_LBCF XE_REG_MCR(0xb154) +#define RWFLUSHALLEN REG_BIT(17) + #define XEHP_L3SQCREG5 XE_REG_MCR(0xb158) #define L3_PWM_TIMER_INIT_VAL_MASK REG_GENMASK(9, 0) @@ -372,6 +391,14 @@ #define XEHPC_L3CLOS_MASK(i) XE_REG_MCR(0xb194 + (i) * 8) +#define XE2_GLOBAL_INVAL XE_REG(0xb404) + +#define XE2LPM_L3SQCREG2 XE_REG_MCR(0xb604) + +#define XE2LPM_L3SQCREG3 XE_REG_MCR(0xb608) + +#define XE2LPM_SCRATCH3_LBCF XE_REG_MCR(0xb654) + #define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658) #define XE2_TDF_CTRL XE_REG(0xb418) @@ -395,6 +422,10 @@ #define INVALIDATION_BROADCAST_MODE_DIS REG_BIT(12) #define GLOBAL_INVALIDATION_MODE REG_BIT(2) +#define LMEM_CFG XE_REG(0xcf58) +#define LMEM_EN REG_BIT(31) +#define LMTT_DIR_PTR REG_GENMASK(30, 0) /* in multiples of 64KB */ + #define HALF_SLICE_CHICKEN5 XE_REG_MCR(0xe188, XE_REG_OPTION_MASKED) #define DISABLE_SAMPLE_G_PERFORMANCE REG_BIT(0) @@ -429,6 +460,7 @@ #define DIS_FIX_EOT1_FLUSH REG_BIT(9) #define TDL_TSL_CHICKEN XE_REG_MCR(0xe4c4, XE_REG_OPTION_MASKED) +#define STK_ID_RESTRICT REG_BIT(12) #define SLM_WMTP_RESTORE REG_BIT(11) #define ROW_CHICKEN XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED) @@ -485,7 +517,7 @@ * [4-6] RSVD * [7] Disabled */ -#define CCS_MODE XE_REG(0x14804) +#define CCS_MODE XE_REG(0x14804, XE_REG_OPTION_MASKED) #define CCS_MODE_CSLICE_0_3_MASK REG_GENMASK(11, 0) /* 3 bits per cslice */ #define CCS_MODE_CSLICE_MASK 0x7 /* CCS0-3 + rsvd */ #define CCS_MODE_CSLICE_WIDTH ilog2(CCS_MODE_CSLICE_MASK + 1) diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h index 1189f5a540a8..a9b0091cb7ee 100644 --- a/drivers/gpu/drm/xe/regs/xe_oa_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h @@ -52,6 +52,7 @@ #define OAG_OABUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */ #define OAG_OACONTROL XE_REG(0xdaf4) +#define OAG_OACONTROL_OA_PES_DISAG_EN REG_GENMASK(27, 22) #define OAG_OACONTROL_OA_CCS_SELECT_MASK REG_GENMASK(18, 16) #define OAG_OACONTROL_OA_COUNTER_SEL_MASK REG_GENMASK(4, 2) #define OAG_OACONTROL_OA_COUNTER_ENABLE REG_BIT(0) diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index 23e33ec84902..dfa869f0dddd 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -15,8 +15,6 @@ #define GU_MISC_IRQ_OFFSET 0x444f0 #define GU_MISC_GSE REG_BIT(27) -#define SOFTWARE_FLAGS_SPR33 XE_REG(0x4f084) - #define GU_CNTL_PROTECTED XE_REG(0x10100C) #define DRIVERINT_FLR_DIS REG_BIT(31) @@ -24,11 +22,14 @@ #define LMEM_INIT REG_BIT(7) #define DRIVERFLR REG_BIT(31) +#define XEHP_CLOCK_GATE_DIS XE_REG(0x101014) +#define SGSI_SIDECLK_DIS REG_BIT(17) + #define GU_DEBUG XE_REG(0x101018) #define DRIVERFLR_STATUS REG_BIT(31) -#define XEHP_CLOCK_GATE_DIS XE_REG(0x101014) -#define SGSI_SIDECLK_DIS REG_BIT(17) +#define VIRTUAL_CTRL_REG XE_REG(0x10108c) +#define GUEST_GTT_UPDATE_EN REG_BIT(8) #define XEHP_MTCFG_ADDR XE_REG(0x101800) #define TILE_COUNT REG_GENMASK(15, 8) @@ -66,6 +67,9 @@ #define DISPLAY_IRQ REG_BIT(16) #define GT_DW_IRQ(x) REG_BIT(x) +#define VF_CAP_REG XE_REG(0x1901f8, XE_REG_OPTION_VF) +#define VF_CAP REG_BIT(0) + #define PVC_RP_STATE_CAP XE_REG(0x281014) #endif diff --git a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h deleted file mode 100644 index 017b4ddd1ecf..000000000000 --- a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#ifndef _REGS_XE_SRIOV_REGS_H_ -#define _REGS_XE_SRIOV_REGS_H_ - -#include "regs/xe_reg_defs.h" - -#define XE2_LMEM_CFG XE_REG(0x48b0) - -#define LMEM_CFG XE_REG(0xcf58) -#define LMEM_EN REG_BIT(31) -#define LMTT_DIR_PTR REG_GENMASK(30, 0) /* in multiples of 64KB */ - -#define VIRTUAL_CTRL_REG XE_REG(0x10108c) -#define GUEST_GTT_UPDATE_EN REG_BIT(8) - -#define VF_CAP_REG XE_REG(0x1901f8, XE_REG_OPTION_VF) -#define VF_CAP REG_BIT(0) - -#endif diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile index 6e58931fddd4..0e3408f4952c 100644 --- a/drivers/gpu/drm/xe/tests/Makefile +++ b/drivers/gpu/drm/xe/tests/Makefile @@ -2,11 +2,7 @@ # "live" kunit tests obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_live_test.o -xe_live_test-y = xe_live_test_mod.o \ - xe_bo_test.o \ - xe_dma_buf_test.o \ - xe_migrate_test.o \ - xe_mocs_test.o +xe_live_test-y = xe_live_test_mod.o # Normal kunit tests obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_test.o diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 9f3c02826464..8dac069483e8 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -6,7 +6,7 @@ #include <kunit/test.h> #include <kunit/visibility.h> -#include "tests/xe_bo_test.h" +#include "tests/xe_kunit_helpers.h" #include "tests/xe_pci_test.h" #include "tests/xe_test.h" @@ -36,7 +36,8 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, /* Optionally clear bo *and* CCS data in VRAM. */ if (clear) { - fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource); + fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource, + XE_MIGRATE_CLEAR_FLAG_FULL); if (IS_ERR(fence)) { KUNIT_FAIL(test, "Failed to submit bo clear.\n"); return PTR_ERR(fence); @@ -124,7 +125,7 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, kunit_info(test, "Testing system memory\n"); bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, bo_flags); + bo_flags); if (IS_ERR(bo)) { KUNIT_FAIL(test, "Failed to create bo.\n"); return; @@ -154,12 +155,18 @@ out_unlock: static int ccs_test_run_device(struct xe_device *xe) { - struct kunit *test = xe_cur_kunit(); + struct kunit *test = kunit_get_current_test(); struct xe_tile *tile; int id; if (!xe_device_has_flat_ccs(xe)) { - kunit_info(test, "Skipping non-flat-ccs device.\n"); + kunit_skip(test, "non-flat-ccs device\n"); + return 0; + } + + /* For xe2+ dgfx, we don't handle ccs metadata */ + if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe)) { + kunit_skip(test, "xe2+ dgfx device\n"); return 0; } @@ -177,11 +184,12 @@ static int ccs_test_run_device(struct xe_device *xe) return 0; } -void xe_ccs_migrate_kunit(struct kunit *test) +static void xe_ccs_migrate_kunit(struct kunit *test) { - xe_call_for_each_device(ccs_test_run_device); + struct xe_device *xe = test->priv; + + ccs_test_run_device(xe); } -EXPORT_SYMBOL_IF_KUNIT(xe_ccs_migrate_kunit); static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test) { @@ -198,7 +206,6 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc xe_vm_lock(vm, false); bo = xe_bo_create_user(xe, NULL, vm, 0x10000, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, bo_flags); xe_vm_unlock(vm); if (IS_ERR(bo)) { @@ -208,7 +215,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc external = xe_bo_create_user(xe, NULL, NULL, 0x10000, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, bo_flags); + bo_flags); if (IS_ERR(external)) { KUNIT_FAIL(test, "external bo create err=%pe\n", external); goto cleanup_bo; @@ -325,13 +332,12 @@ cleanup_bo: static int evict_test_run_device(struct xe_device *xe) { - struct kunit *test = xe_cur_kunit(); + struct kunit *test = kunit_get_current_test(); struct xe_tile *tile; int id; if (!IS_DGFX(xe)) { - kunit_info(test, "Skipping non-discrete device %s.\n", - dev_name(xe->drm.dev)); + kunit_skip(test, "non-discrete device\n"); return 0; } @@ -345,8 +351,23 @@ static int evict_test_run_device(struct xe_device *xe) return 0; } -void xe_bo_evict_kunit(struct kunit *test) +static void xe_bo_evict_kunit(struct kunit *test) { - xe_call_for_each_device(evict_test_run_device); + struct xe_device *xe = test->priv; + + evict_test_run_device(xe); } -EXPORT_SYMBOL_IF_KUNIT(xe_bo_evict_kunit); + +static struct kunit_case xe_bo_tests[] = { + KUNIT_CASE_PARAM(xe_ccs_migrate_kunit, xe_pci_live_device_gen_param), + KUNIT_CASE_PARAM(xe_bo_evict_kunit, xe_pci_live_device_gen_param), + {} +}; + +VISIBLE_IF_KUNIT +struct kunit_suite xe_bo_test_suite = { + .name = "xe_bo", + .test_cases = xe_bo_tests, + .init = xe_kunit_helper_xe_device_live_test_init, +}; +EXPORT_SYMBOL_IF_KUNIT(xe_bo_test_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.c b/drivers/gpu/drm/xe/tests/xe_bo_test.c deleted file mode 100644 index a324cde77db8..000000000000 --- a/drivers/gpu/drm/xe/tests/xe_bo_test.c +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright © 2022 Intel Corporation - */ - -#include "xe_bo_test.h" - -#include <kunit/test.h> - -static struct kunit_case xe_bo_tests[] = { - KUNIT_CASE(xe_ccs_migrate_kunit), - KUNIT_CASE(xe_bo_evict_kunit), - {} -}; - -static struct kunit_suite xe_bo_test_suite = { - .name = "xe_bo", - .test_cases = xe_bo_tests, -}; - -kunit_test_suite(xe_bo_test_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.h b/drivers/gpu/drm/xe/tests/xe_bo_test.h deleted file mode 100644 index 0113ab45066a..000000000000 --- a/drivers/gpu/drm/xe/tests/xe_bo_test.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 AND MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#ifndef _XE_BO_TEST_H_ -#define _XE_BO_TEST_H_ - -struct kunit; - -void xe_ccs_migrate_kunit(struct kunit *test); -void xe_bo_evict_kunit(struct kunit *test); - -#endif diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index e7f9b531c465..cedd3e88a6fb 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -3,12 +3,12 @@ * Copyright © 2022 Intel Corporation */ -#include <drm/xe_drm.h> +#include <uapi/drm/xe_drm.h> #include <kunit/test.h> #include <kunit/visibility.h> -#include "tests/xe_dma_buf_test.h" +#include "tests/xe_kunit_helpers.h" #include "tests/xe_pci_test.h" #include "xe_pci.h" @@ -107,7 +107,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported, static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) { - struct kunit *test = xe_cur_kunit(); + struct kunit *test = kunit_get_current_test(); struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv); struct drm_gem_object *import; struct dma_buf *dmabuf; @@ -126,7 +126,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) kunit_info(test, "running %s\n", __func__); bo = xe_bo_create_user(xe, NULL, NULL, size, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, params->mem_mask); + params->mem_mask); if (IS_ERR(bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", PTR_ERR(bo)); @@ -258,7 +258,7 @@ static const struct dma_buf_test_params test_params[] = { static int dma_buf_run_device(struct xe_device *xe) { const struct dma_buf_test_params *params; - struct kunit *test = xe_cur_kunit(); + struct kunit *test = kunit_get_current_test(); xe_pm_runtime_get(xe); for (params = test_params; params->mem_mask; ++params) { @@ -274,8 +274,22 @@ static int dma_buf_run_device(struct xe_device *xe) return 0; } -void xe_dma_buf_kunit(struct kunit *test) +static void xe_dma_buf_kunit(struct kunit *test) { - xe_call_for_each_device(dma_buf_run_device); + struct xe_device *xe = test->priv; + + dma_buf_run_device(xe); } -EXPORT_SYMBOL_IF_KUNIT(xe_dma_buf_kunit); + +static struct kunit_case xe_dma_buf_tests[] = { + KUNIT_CASE_PARAM(xe_dma_buf_kunit, xe_pci_live_device_gen_param), + {} +}; + +VISIBLE_IF_KUNIT +struct kunit_suite xe_dma_buf_test_suite = { + .name = "xe_dma_buf", + .test_cases = xe_dma_buf_tests, + .init = xe_kunit_helper_xe_device_live_test_init, +}; +EXPORT_SYMBOL_IF_KUNIT(xe_dma_buf_test_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c deleted file mode 100644 index 99cdb718b6c6..000000000000 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c +++ /dev/null @@ -1,20 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright © 2022 Intel Corporation - */ - -#include "xe_dma_buf_test.h" - -#include <kunit/test.h> - -static struct kunit_case xe_dma_buf_tests[] = { - KUNIT_CASE(xe_dma_buf_kunit), - {} -}; - -static struct kunit_suite xe_dma_buf_test_suite = { - .name = "xe_dma_buf", - .test_cases = xe_dma_buf_tests, -}; - -kunit_test_suite(xe_dma_buf_test_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h deleted file mode 100644 index e6b464ddd526..000000000000 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 AND MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#ifndef _XE_DMA_BUF_TEST_H_ -#define _XE_DMA_BUF_TEST_H_ - -struct kunit; - -void xe_dma_buf_kunit(struct kunit *test); - -#endif diff --git a/drivers/gpu/drm/xe/tests/xe_kunit_helpers.c b/drivers/gpu/drm/xe/tests/xe_kunit_helpers.c index fefe79b3b75a..bc5156966ce9 100644 --- a/drivers/gpu/drm/xe/tests/xe_kunit_helpers.c +++ b/drivers/gpu/drm/xe/tests/xe_kunit_helpers.c @@ -12,7 +12,9 @@ #include "tests/xe_kunit_helpers.h" #include "tests/xe_pci_test.h" +#include "xe_device.h" #include "xe_device_types.h" +#include "xe_pm.h" /** * xe_kunit_helper_alloc_xe_device - Allocate a &xe_device for a KUnit test. @@ -88,3 +90,40 @@ int xe_kunit_helper_xe_device_test_init(struct kunit *test) return 0; } EXPORT_SYMBOL_IF_KUNIT(xe_kunit_helper_xe_device_test_init); + +KUNIT_DEFINE_ACTION_WRAPPER(put_xe_pm_runtime, xe_pm_runtime_put, struct xe_device *); + +/** + * xe_kunit_helper_xe_device_live_test_init - Prepare a &xe_device for + * use in a live KUnit test. + * @test: the &kunit where live &xe_device will be used + * + * This function expects pointer to the &xe_device in the &test.param_value, + * like it is prepared by the &xe_pci_live_device_gen_param and stores that + * pointer as &kunit.priv to allow the test code to access it. + * + * This function makes sure that device is not wedged and then resumes it + * to avoid waking up the device inside the test. It uses deferred cleanup + * action to release a runtime_pm reference. + * + * This function can be used as custom implementation of &kunit_suite.init. + * + * This function uses KUNIT_ASSERT to detect any failures. + * + * Return: Always 0. + */ +int xe_kunit_helper_xe_device_live_test_init(struct kunit *test) +{ + struct xe_device *xe = xe_device_const_cast(test->param_value); + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe); + kunit_info(test, "running on %s device\n", xe->info.platform_name); + + KUNIT_ASSERT_FALSE(test, xe_device_wedged(xe)); + xe_pm_runtime_get(xe); + KUNIT_ASSERT_EQ(test, 0, kunit_add_action_or_reset(test, put_xe_pm_runtime, xe)); + + test->priv = xe; + return 0; +} +EXPORT_SYMBOL_IF_KUNIT(xe_kunit_helper_xe_device_live_test_init); diff --git a/drivers/gpu/drm/xe/tests/xe_kunit_helpers.h b/drivers/gpu/drm/xe/tests/xe_kunit_helpers.h index 067a1babf049..83665f7b1254 100644 --- a/drivers/gpu/drm/xe/tests/xe_kunit_helpers.h +++ b/drivers/gpu/drm/xe/tests/xe_kunit_helpers.h @@ -14,4 +14,6 @@ struct xe_device *xe_kunit_helper_alloc_xe_device(struct kunit *test, struct device *dev); int xe_kunit_helper_xe_device_test_init(struct kunit *test); +int xe_kunit_helper_xe_device_live_test_init(struct kunit *test); + #endif diff --git a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c index eb1ea99a5a8b..5f14737c8210 100644 --- a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c +++ b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c @@ -3,6 +3,17 @@ * Copyright © 2023 Intel Corporation */ #include <linux/module.h> +#include <kunit/test.h> + +extern struct kunit_suite xe_bo_test_suite; +extern struct kunit_suite xe_dma_buf_test_suite; +extern struct kunit_suite xe_migrate_test_suite; +extern struct kunit_suite xe_mocs_test_suite; + +kunit_test_suite(xe_bo_test_suite); +kunit_test_suite(xe_dma_buf_test_suite); +kunit_test_suite(xe_migrate_test_suite); +kunit_test_suite(xe_mocs_test_suite); MODULE_AUTHOR("Intel Corporation"); MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 962f6438e219..1a192a2a941b 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -6,7 +6,7 @@ #include <kunit/test.h> #include <kunit/visibility.h> -#include "tests/xe_migrate_test.h" +#include "tests/xe_kunit_helpers.h" #include "tests/xe_pci_test.h" #include "xe_pci.h" @@ -105,7 +105,8 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, } xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size); - fence = xe_migrate_clear(m, remote, remote->ttm.resource); + fence = xe_migrate_clear(m, remote, remote->ttm.resource, + XE_MIGRATE_CLEAR_FLAG_FULL); if (!sanity_fence_failed(xe, fence, big ? "Clearing remote big bo" : "Clearing remote small bo", test)) { retval = xe_map_rd(xe, &remote->vmap, 0, u64); @@ -279,7 +280,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) kunit_info(test, "Clearing small buffer object\n"); xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size); expected = 0; - fence = xe_migrate_clear(m, tiny, tiny->ttm.resource); + fence = xe_migrate_clear(m, tiny, tiny->ttm.resource, + XE_MIGRATE_CLEAR_FLAG_FULL); if (sanity_fence_failed(xe, fence, "Clearing small bo", test)) goto out; @@ -300,7 +302,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) kunit_info(test, "Clearing big buffer object\n"); xe_map_memset(xe, &big->vmap, 0, 0x11, big->size); expected = 0; - fence = xe_migrate_clear(m, big, big->ttm.resource); + fence = xe_migrate_clear(m, big, big->ttm.resource, + XE_MIGRATE_CLEAR_FLAG_FULL); if (sanity_fence_failed(xe, fence, "Clearing big bo", test)) goto out; @@ -334,7 +337,7 @@ vunmap: static int migrate_test_run_device(struct xe_device *xe) { - struct kunit *test = xe_cur_kunit(); + struct kunit *test = kunit_get_current_test(); struct xe_tile *tile; int id; @@ -354,8 +357,425 @@ static int migrate_test_run_device(struct xe_device *xe) return 0; } -void xe_migrate_sanity_kunit(struct kunit *test) +static void xe_migrate_sanity_kunit(struct kunit *test) { - xe_call_for_each_device(migrate_test_run_device); + struct xe_device *xe = test->priv; + + migrate_test_run_device(xe); +} + +static struct dma_fence *blt_copy(struct xe_tile *tile, + struct xe_bo *src_bo, struct xe_bo *dst_bo, + bool copy_only_ccs, const char *str, struct kunit *test) +{ + struct xe_gt *gt = tile->primary_gt; + struct xe_migrate *m = tile->migrate; + struct xe_device *xe = gt_to_xe(gt); + struct dma_fence *fence = NULL; + u64 size = src_bo->size; + struct xe_res_cursor src_it, dst_it; + struct ttm_resource *src = src_bo->ttm.resource, *dst = dst_bo->ttm.resource; + u64 src_L0_ofs, dst_L0_ofs; + u32 src_L0_pt, dst_L0_pt; + u64 src_L0, dst_L0; + int err; + bool src_is_vram = mem_type_is_vram(src->mem_type); + bool dst_is_vram = mem_type_is_vram(dst->mem_type); + + if (!src_is_vram) + xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it); + else + xe_res_first(src, 0, size, &src_it); + + if (!dst_is_vram) + xe_res_first_sg(xe_bo_sg(dst_bo), 0, size, &dst_it); + else + xe_res_first(dst, 0, size, &dst_it); + + while (size) { + u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */ + struct xe_sched_job *job; + struct xe_bb *bb; + u32 flush_flags = 0; + u32 update_idx; + u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; + u32 pte_flags; + + src_L0 = xe_migrate_res_sizes(m, &src_it); + dst_L0 = xe_migrate_res_sizes(m, &dst_it); + + src_L0 = min(src_L0, dst_L0); + + pte_flags = src_is_vram ? (PTE_UPDATE_FLAG_IS_VRAM | + PTE_UPDATE_FLAG_IS_COMP_PTE) : 0; + batch_size += pte_update_size(m, pte_flags, src, &src_it, &src_L0, + &src_L0_ofs, &src_L0_pt, 0, 0, + avail_pts); + + pte_flags = dst_is_vram ? (PTE_UPDATE_FLAG_IS_VRAM | + PTE_UPDATE_FLAG_IS_COMP_PTE) : 0; + batch_size += pte_update_size(m, pte_flags, dst, &dst_it, &src_L0, + &dst_L0_ofs, &dst_L0_pt, 0, + avail_pts, avail_pts); + + /* Add copy commands size here */ + batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) + + ((xe_device_has_flat_ccs(xe) && copy_only_ccs) ? EMIT_COPY_CCS_DW : 0); + + bb = xe_bb_new(gt, batch_size, xe->info.has_usm); + if (IS_ERR(bb)) { + err = PTR_ERR(bb); + goto err_sync; + } + + if (src_is_vram) + xe_res_next(&src_it, src_L0); + else + emit_pte(m, bb, src_L0_pt, src_is_vram, false, + &src_it, src_L0, src); + + if (dst_is_vram) + xe_res_next(&dst_it, src_L0); + else + emit_pte(m, bb, dst_L0_pt, dst_is_vram, false, + &dst_it, src_L0, dst); + + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + update_idx = bb->len; + if (!copy_only_ccs) + emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, XE_PAGE_SIZE); + + if (copy_only_ccs) + flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, + src_is_vram, dst_L0_ofs, + dst_is_vram, src_L0, dst_L0_ofs, + copy_only_ccs); + + job = xe_bb_create_migration_job(m->q, bb, + xe_migrate_batch_base(m, xe->info.has_usm), + update_idx); + if (IS_ERR(job)) { + err = PTR_ERR(job); + goto err; + } + + xe_sched_job_add_migrate_flush(job, flush_flags); + + mutex_lock(&m->job_mutex); + xe_sched_job_arm(job); + dma_fence_put(fence); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + dma_fence_put(m->fence); + m->fence = dma_fence_get(fence); + + mutex_unlock(&m->job_mutex); + + xe_bb_free(bb, fence); + size -= src_L0; + continue; + +err: + xe_bb_free(bb, NULL); + +err_sync: + if (fence) { + dma_fence_wait(fence, false); + dma_fence_put(fence); + } + return ERR_PTR(err); + } + + return fence; +} + +static void test_migrate(struct xe_device *xe, struct xe_tile *tile, + struct xe_bo *sys_bo, struct xe_bo *vram_bo, struct xe_bo *ccs_bo, + struct kunit *test) +{ + struct dma_fence *fence; + u64 expected, retval; + long timeout; + long ret; + + expected = 0xd0d0d0d0d0d0d0d0; + xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size); + + fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test); + if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) { + retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64); + if (retval == expected) + KUNIT_FAIL(test, "Sanity check failed: VRAM must have compressed value\n"); + } + dma_fence_put(fence); + + kunit_info(test, "Evict vram buffer object\n"); + ret = xe_bo_evict(vram_bo, true); + if (ret) { + KUNIT_FAIL(test, "Failed to evict bo.\n"); + return; + } + + ret = xe_bo_vmap(vram_bo); + if (ret) { + KUNIT_FAIL(test, "Failed to vmap vram bo: %li\n", ret); + return; + } + + retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64); + check(retval, expected, "Clear evicted vram data first value", test); + retval = xe_map_rd(xe, &vram_bo->vmap, vram_bo->size - 8, u64); + check(retval, expected, "Clear evicted vram data last value", test); + + fence = blt_copy(tile, vram_bo, ccs_bo, + true, "Blit surf copy from vram to sysmem", test); + if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) { + retval = xe_map_rd(xe, &ccs_bo->vmap, 0, u64); + check(retval, 0, "Clear ccs data first value", test); + + retval = xe_map_rd(xe, &ccs_bo->vmap, ccs_bo->size - 8, u64); + check(retval, 0, "Clear ccs data last value", test); + } + dma_fence_put(fence); + + kunit_info(test, "Restore vram buffer object\n"); + ret = xe_bo_validate(vram_bo, NULL, false); + if (ret) { + KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret); + return; + } + + /* Sync all migration blits */ + timeout = dma_resv_wait_timeout(vram_bo->ttm.base.resv, + DMA_RESV_USAGE_KERNEL, + true, + 5 * HZ); + if (timeout <= 0) { + KUNIT_FAIL(test, "Failed to sync bo eviction.\n"); + return; + } + + ret = xe_bo_vmap(vram_bo); + if (ret) { + KUNIT_FAIL(test, "Failed to vmap vram bo: %li\n", ret); + return; + } + + retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64); + check(retval, expected, "Restored value must be equal to initial value", test); + retval = xe_map_rd(xe, &vram_bo->vmap, vram_bo->size - 8, u64); + check(retval, expected, "Restored value must be equal to initial value", test); + + fence = blt_copy(tile, vram_bo, ccs_bo, + true, "Blit surf copy from vram to sysmem", test); + if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) { + retval = xe_map_rd(xe, &ccs_bo->vmap, 0, u64); + check(retval, 0, "Clear ccs data first value", test); + retval = xe_map_rd(xe, &ccs_bo->vmap, ccs_bo->size - 8, u64); + check(retval, 0, "Clear ccs data last value", test); + } + dma_fence_put(fence); } -EXPORT_SYMBOL_IF_KUNIT(xe_migrate_sanity_kunit); + +static void test_clear(struct xe_device *xe, struct xe_tile *tile, + struct xe_bo *sys_bo, struct xe_bo *vram_bo, struct kunit *test) +{ + struct dma_fence *fence; + u64 expected, retval; + + expected = 0xd0d0d0d0d0d0d0d0; + xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size); + + fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test); + if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) { + retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64); + if (retval == expected) + KUNIT_FAIL(test, "Sanity check failed: VRAM must have compressed value\n"); + } + dma_fence_put(fence); + + fence = blt_copy(tile, vram_bo, sys_bo, false, "Blit copy from vram to sysmem", test); + if (!sanity_fence_failed(xe, fence, "Blit copy from vram to sysmem", test)) { + retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64); + check(retval, expected, "Decompressed value must be equal to initial value", test); + retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64); + check(retval, expected, "Decompressed value must be equal to initial value", test); + } + dma_fence_put(fence); + + kunit_info(test, "Clear vram buffer object\n"); + expected = 0x0000000000000000; + fence = xe_migrate_clear(tile->migrate, vram_bo, vram_bo->ttm.resource, + XE_MIGRATE_CLEAR_FLAG_FULL); + if (sanity_fence_failed(xe, fence, "Clear vram_bo", test)) + return; + dma_fence_put(fence); + + fence = blt_copy(tile, vram_bo, sys_bo, + false, "Blit copy from vram to sysmem", test); + if (!sanity_fence_failed(xe, fence, "Clear main buffer data", test)) { + retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64); + check(retval, expected, "Clear main buffer first value", test); + retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64); + check(retval, expected, "Clear main buffer last value", test); + } + dma_fence_put(fence); + + fence = blt_copy(tile, vram_bo, sys_bo, + true, "Blit surf copy from vram to sysmem", test); + if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) { + retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64); + check(retval, expected, "Clear ccs data first value", test); + retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64); + check(retval, expected, "Clear ccs data last value", test); + } + dma_fence_put(fence); +} + +static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, + struct kunit *test) +{ + struct xe_bo *sys_bo, *vram_bo = NULL, *ccs_bo = NULL; + unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile); + long ret; + + sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, + DRM_XE_GEM_CPU_CACHING_WC, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS); + + if (IS_ERR(sys_bo)) { + KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", + PTR_ERR(sys_bo)); + return; + } + + xe_bo_lock(sys_bo, false); + ret = xe_bo_validate(sys_bo, NULL, false); + if (ret) { + KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", ret); + goto free_sysbo; + } + + ret = xe_bo_vmap(sys_bo); + if (ret) { + KUNIT_FAIL(test, "Failed to vmap system bo: %li\n", ret); + goto free_sysbo; + } + xe_bo_unlock(sys_bo); + + ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, + DRM_XE_GEM_CPU_CACHING_WC, + bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); + + if (IS_ERR(ccs_bo)) { + KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", + PTR_ERR(ccs_bo)); + return; + } + + xe_bo_lock(ccs_bo, false); + ret = xe_bo_validate(ccs_bo, NULL, false); + if (ret) { + KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", ret); + goto free_ccsbo; + } + + ret = xe_bo_vmap(ccs_bo); + if (ret) { + KUNIT_FAIL(test, "Failed to vmap system bo: %li\n", ret); + goto free_ccsbo; + } + xe_bo_unlock(ccs_bo); + + vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, + DRM_XE_GEM_CPU_CACHING_WC, + bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); + if (IS_ERR(vram_bo)) { + KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", + PTR_ERR(vram_bo)); + return; + } + + xe_bo_lock(vram_bo, false); + ret = xe_bo_validate(vram_bo, NULL, false); + if (ret) { + KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret); + goto free_vrambo; + } + + ret = xe_bo_vmap(vram_bo); + if (ret) { + KUNIT_FAIL(test, "Failed to vmap vram bo: %li\n", ret); + goto free_vrambo; + } + + test_clear(xe, tile, sys_bo, vram_bo, test); + test_migrate(xe, tile, sys_bo, vram_bo, ccs_bo, test); + xe_bo_unlock(vram_bo); + + xe_bo_lock(vram_bo, false); + xe_bo_vunmap(vram_bo); + xe_bo_unlock(vram_bo); + + xe_bo_lock(ccs_bo, false); + xe_bo_vunmap(ccs_bo); + xe_bo_unlock(ccs_bo); + + xe_bo_lock(sys_bo, false); + xe_bo_vunmap(sys_bo); + xe_bo_unlock(sys_bo); +free_vrambo: + xe_bo_put(vram_bo); +free_ccsbo: + xe_bo_put(ccs_bo); +free_sysbo: + xe_bo_put(sys_bo); +} + +static int validate_ccs_test_run_device(struct xe_device *xe) +{ + struct kunit *test = kunit_get_current_test(); + struct xe_tile *tile; + int id; + + if (!xe_device_has_flat_ccs(xe)) { + kunit_skip(test, "non-flat-ccs device\n"); + return 0; + } + + if (!(GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))) { + kunit_skip(test, "non-xe2 discrete device\n"); + return 0; + } + + xe_pm_runtime_get(xe); + + for_each_tile(tile, xe, id) + validate_ccs_test_run_tile(xe, tile, test); + + xe_pm_runtime_put(xe); + + return 0; +} + +static void xe_validate_ccs_kunit(struct kunit *test) +{ + struct xe_device *xe = test->priv; + + validate_ccs_test_run_device(xe); +} + +static struct kunit_case xe_migrate_tests[] = { + KUNIT_CASE_PARAM(xe_migrate_sanity_kunit, xe_pci_live_device_gen_param), + KUNIT_CASE_PARAM(xe_validate_ccs_kunit, xe_pci_live_device_gen_param), + {} +}; + +VISIBLE_IF_KUNIT +struct kunit_suite xe_migrate_test_suite = { + .name = "xe_migrate", + .test_cases = xe_migrate_tests, + .init = xe_kunit_helper_xe_device_live_test_init, +}; +EXPORT_SYMBOL_IF_KUNIT(xe_migrate_test_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c b/drivers/gpu/drm/xe/tests/xe_migrate_test.c deleted file mode 100644 index eb0d8963419c..000000000000 --- a/drivers/gpu/drm/xe/tests/xe_migrate_test.c +++ /dev/null @@ -1,20 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright © 2022 Intel Corporation - */ - -#include "xe_migrate_test.h" - -#include <kunit/test.h> - -static struct kunit_case xe_migrate_tests[] = { - KUNIT_CASE(xe_migrate_sanity_kunit), - {} -}; - -static struct kunit_suite xe_migrate_test_suite = { - .name = "xe_migrate", - .test_cases = xe_migrate_tests, -}; - -kunit_test_suite(xe_migrate_test_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.h b/drivers/gpu/drm/xe/tests/xe_migrate_test.h deleted file mode 100644 index 7c645c66824f..000000000000 --- a/drivers/gpu/drm/xe/tests/xe_migrate_test.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 AND MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#ifndef _XE_MIGRATE_TEST_H_ -#define _XE_MIGRATE_TEST_H_ - -struct kunit; - -void xe_migrate_sanity_kunit(struct kunit *test); - -#endif diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c index 67c65e88c384..79be73b4a02b 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c @@ -6,7 +6,7 @@ #include <kunit/test.h> #include <kunit/visibility.h> -#include "tests/xe_mocs_test.h" +#include "tests/xe_kunit_helpers.h" #include "tests/xe_pci_test.h" #include "tests/xe_test.h" @@ -23,7 +23,7 @@ struct live_mocs { static int live_mocs_init(struct live_mocs *arg, struct xe_gt *gt) { unsigned int flags; - struct kunit *test = xe_cur_kunit(); + struct kunit *test = kunit_get_current_test(); memset(arg, 0, sizeof(*arg)); @@ -41,7 +41,7 @@ static int live_mocs_init(struct live_mocs *arg, struct xe_gt *gt) static void read_l3cc_table(struct xe_gt *gt, const struct xe_mocs_info *info) { - struct kunit *test = xe_cur_kunit(); + struct kunit *test = kunit_get_current_test(); u32 l3cc, l3cc_expected; unsigned int i; u32 reg_val; @@ -78,7 +78,7 @@ static void read_l3cc_table(struct xe_gt *gt, static void read_mocs_table(struct xe_gt *gt, const struct xe_mocs_info *info) { - struct kunit *test = xe_cur_kunit(); + struct kunit *test = kunit_get_current_test(); u32 mocs, mocs_expected; unsigned int i; u32 reg_val; @@ -134,11 +134,15 @@ static int mocs_kernel_test_run_device(struct xe_device *xe) return 0; } -void xe_live_mocs_kernel_kunit(struct kunit *test) +static void xe_live_mocs_kernel_kunit(struct kunit *test) { - xe_call_for_each_device(mocs_kernel_test_run_device); + struct xe_device *xe = test->priv; + + if (IS_SRIOV_VF(xe)) + kunit_skip(test, "this test is N/A for VF"); + + mocs_kernel_test_run_device(xe); } -EXPORT_SYMBOL_IF_KUNIT(xe_live_mocs_kernel_kunit); static int mocs_reset_test_run_device(struct xe_device *xe) { @@ -148,7 +152,7 @@ static int mocs_reset_test_run_device(struct xe_device *xe) struct xe_gt *gt; unsigned int flags; int id; - struct kunit *test = xe_cur_kunit(); + struct kunit *test = kunit_get_current_test(); xe_pm_runtime_get(xe); @@ -175,8 +179,26 @@ static int mocs_reset_test_run_device(struct xe_device *xe) return 0; } -void xe_live_mocs_reset_kunit(struct kunit *test) +static void xe_live_mocs_reset_kunit(struct kunit *test) { - xe_call_for_each_device(mocs_reset_test_run_device); + struct xe_device *xe = test->priv; + + if (IS_SRIOV_VF(xe)) + kunit_skip(test, "this test is N/A for VF"); + + mocs_reset_test_run_device(xe); } -EXPORT_SYMBOL_IF_KUNIT(xe_live_mocs_reset_kunit); + +static struct kunit_case xe_mocs_tests[] = { + KUNIT_CASE_PARAM(xe_live_mocs_kernel_kunit, xe_pci_live_device_gen_param), + KUNIT_CASE_PARAM(xe_live_mocs_reset_kunit, xe_pci_live_device_gen_param), + {} +}; + +VISIBLE_IF_KUNIT +struct kunit_suite xe_mocs_test_suite = { + .name = "xe_mocs", + .test_cases = xe_mocs_tests, + .init = xe_kunit_helper_xe_device_live_test_init, +}; +EXPORT_SYMBOL_IF_KUNIT(xe_mocs_test_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.c b/drivers/gpu/drm/xe/tests/xe_mocs_test.c deleted file mode 100644 index 6315886b659e..000000000000 --- a/drivers/gpu/drm/xe/tests/xe_mocs_test.c +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright © 2022 Intel Corporation - */ - -#include "xe_mocs_test.h" - -#include <kunit/test.h> - -static struct kunit_case xe_mocs_tests[] = { - KUNIT_CASE(xe_live_mocs_kernel_kunit), - KUNIT_CASE(xe_live_mocs_reset_kunit), - {} -}; - -static struct kunit_suite xe_mocs_test_suite = { - .name = "xe_mocs", - .test_cases = xe_mocs_tests, -}; - -kunit_test_suite(xe_mocs_test_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.h b/drivers/gpu/drm/xe/tests/xe_mocs_test.h deleted file mode 100644 index e7699d495411..000000000000 --- a/drivers/gpu/drm/xe/tests/xe_mocs_test.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 AND MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#ifndef _XE_MOCS_TEST_H_ -#define _XE_MOCS_TEST_H_ - -struct kunit; - -void xe_live_mocs_kernel_kunit(struct kunit *test); -void xe_live_mocs_reset_kunit(struct kunit *test); - -#endif diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index f62809ca8b51..67404863087e 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -12,58 +12,6 @@ #include <kunit/test-bug.h> #include <kunit/visibility.h> -struct kunit_test_data { - int ndevs; - xe_device_fn xe_fn; -}; - -static int dev_to_xe_device_fn(struct device *dev, void *__data) - -{ - struct drm_device *drm = dev_get_drvdata(dev); - struct kunit_test_data *data = __data; - int ret = 0; - int idx; - - data->ndevs++; - - if (drm_dev_enter(drm, &idx)) - ret = data->xe_fn(to_xe_device(dev_get_drvdata(dev))); - drm_dev_exit(idx); - - return ret; -} - -/** - * xe_call_for_each_device - Iterate over all devices this driver binds to - * @xe_fn: Function to call for each device. - * - * This function iterated over all devices this driver binds to, and calls - * @xe_fn: for each one of them. If the called function returns anything else - * than 0, iteration is stopped and the return value is returned by this - * function. Across each function call, drm_dev_enter() / drm_dev_exit() is - * called for the corresponding drm device. - * - * Return: Number of devices iterated or - * the error code of a call to @xe_fn returning an error code. - */ -int xe_call_for_each_device(xe_device_fn xe_fn) -{ - int ret; - struct kunit_test_data data = { - .xe_fn = xe_fn, - .ndevs = 0, - }; - - ret = driver_for_each_device(&xe_pci_driver.driver, NULL, - &data, dev_to_xe_device_fn); - - if (!data.ndevs) - kunit_skip(current->kunit_test, "test runs only on hardware\n"); - - return ret ?: data.ndevs; -} - /** * xe_call_for_each_graphics_ip - Iterate over all recognized graphics IPs * @xe_fn: Function to call for each device. @@ -167,3 +115,33 @@ done: return 0; } EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_device_init); + +/** + * xe_pci_live_device_gen_param - Helper to iterate Xe devices as KUnit parameters + * @prev: the previously returned value, or NULL for the first iteration + * @desc: the buffer for a parameter name + * + * Iterates over the available Xe devices on the system. Uses the device name + * as the parameter name. + * + * To be used only as a parameter generator function in &KUNIT_CASE_PARAM. + * + * Return: pointer to the next &struct xe_device ready to be used as a parameter + * or NULL if there are no more Xe devices on the system. + */ +const void *xe_pci_live_device_gen_param(const void *prev, char *desc) +{ + const struct xe_device *xe = prev; + struct device *dev = xe ? xe->drm.dev : NULL; + struct device *next; + + next = driver_find_next_device(&xe_pci_driver.driver, dev); + if (dev) + put_device(dev); + if (!next) + return NULL; + + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%s", dev_name(next)); + return pdev_to_xe_device(to_pci_dev(next)); +} +EXPORT_SYMBOL_IF_KUNIT(xe_pci_live_device_gen_param); diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.c b/drivers/gpu/drm/xe/tests/xe_pci_test.c index a6705a536391..744a37583d2d 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.c +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.c @@ -16,7 +16,7 @@ static void check_graphics_ip(const struct xe_graphics_desc *graphics) { - struct kunit *test = xe_cur_kunit(); + struct kunit *test = kunit_get_current_test(); u64 mask = graphics->hw_engine_mask; /* RCS, CCS, and BCS engines are allowed on the graphics IP */ @@ -30,7 +30,7 @@ static void check_graphics_ip(const struct xe_graphics_desc *graphics) static void check_media_ip(const struct xe_media_desc *media) { - struct kunit *test = xe_cur_kunit(); + struct kunit *test = kunit_get_current_test(); u64 mask = media->hw_engine_mask; /* VCS, VECS and GSCCS engines are allowed on the media IP */ diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h index f40dcec83992..ede46800aff1 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.h +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h @@ -19,7 +19,6 @@ typedef int (*xe_device_fn)(struct xe_device *); typedef void (*xe_graphics_fn)(const struct xe_graphics_desc *); typedef void (*xe_media_fn)(const struct xe_media_desc *); -int xe_call_for_each_device(xe_device_fn xe_fn); void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn); void xe_call_for_each_media_ip(xe_media_fn xe_fn); @@ -35,4 +34,6 @@ struct xe_pci_fake_data { int xe_pci_fake_device_init(struct xe_device *xe); +const void *xe_pci_live_device_gen_param(const void *prev, char *desc); + #endif diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c index f217445c246a..36a3b5420fef 100644 --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c @@ -31,16 +31,23 @@ #undef XE_REG_MCR #define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1) -struct rtp_test_case { +struct rtp_to_sr_test_case { const char *name; struct xe_reg expected_reg; u32 expected_set_bits; u32 expected_clr_bits; - unsigned long expected_count; + unsigned long expected_count_sr_entries; unsigned int expected_sr_errors; + unsigned long expected_active; const struct xe_rtp_entry_sr *entries; }; +struct rtp_test_case { + const char *name; + unsigned long expected_active; + const struct xe_rtp_entry *entries; +}; + static bool match_yes(const struct xe_gt *gt, const struct xe_hw_engine *hwe) { return true; @@ -51,13 +58,14 @@ static bool match_no(const struct xe_gt *gt, const struct xe_hw_engine *hwe) return false; } -static const struct rtp_test_case cases[] = { +static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = { { .name = "coalesce-same-reg", .expected_reg = REGULAR_REG1, .expected_set_bits = REG_BIT(0) | REG_BIT(1), .expected_clr_bits = REG_BIT(0) | REG_BIT(1), - .expected_count = 1, + .expected_active = BIT(0) | BIT(1), + .expected_count_sr_entries = 1, /* Different bits on the same register: create a single entry */ .entries = (const struct xe_rtp_entry_sr[]) { { XE_RTP_NAME("basic-1"), @@ -76,7 +84,8 @@ static const struct rtp_test_case cases[] = { .expected_reg = REGULAR_REG1, .expected_set_bits = REG_BIT(0), .expected_clr_bits = REG_BIT(0), - .expected_count = 1, + .expected_active = BIT(0), + .expected_count_sr_entries = 1, /* Don't coalesce second entry since rules don't match */ .entries = (const struct xe_rtp_entry_sr[]) { { XE_RTP_NAME("basic-1"), @@ -95,7 +104,8 @@ static const struct rtp_test_case cases[] = { .expected_reg = REGULAR_REG1, .expected_set_bits = REG_BIT(0) | REG_BIT(1) | REG_BIT(2), .expected_clr_bits = REG_BIT(0) | REG_BIT(1) | REG_BIT(2), - .expected_count = 1, + .expected_active = BIT(0) | BIT(1) | BIT(2), + .expected_count_sr_entries = 1, .entries = (const struct xe_rtp_entry_sr[]) { { XE_RTP_NAME("first"), XE_RTP_RULES(FUNC(match_yes), OR, FUNC(match_no)), @@ -121,7 +131,7 @@ static const struct rtp_test_case cases[] = { { .name = "match-or-xfail", .expected_reg = REGULAR_REG1, - .expected_count = 0, + .expected_count_sr_entries = 0, .entries = (const struct xe_rtp_entry_sr[]) { { XE_RTP_NAME("leading-or"), XE_RTP_RULES(OR, FUNC(match_yes)), @@ -148,7 +158,8 @@ static const struct rtp_test_case cases[] = { .expected_reg = REGULAR_REG1, .expected_set_bits = REG_BIT(0), .expected_clr_bits = REG_BIT(0), - .expected_count = 1, + .expected_active = BIT(0), + .expected_count_sr_entries = 1, /* Don't coalesce second entry due to one of the rules */ .entries = (const struct xe_rtp_entry_sr[]) { { XE_RTP_NAME("basic-1"), @@ -167,7 +178,8 @@ static const struct rtp_test_case cases[] = { .expected_reg = REGULAR_REG1, .expected_set_bits = REG_BIT(0), .expected_clr_bits = REG_BIT(0), - .expected_count = 2, + .expected_active = BIT(0) | BIT(1), + .expected_count_sr_entries = 2, /* Same bits on different registers are not coalesced */ .entries = (const struct xe_rtp_entry_sr[]) { { XE_RTP_NAME("basic-1"), @@ -186,7 +198,8 @@ static const struct rtp_test_case cases[] = { .expected_reg = REGULAR_REG1, .expected_set_bits = REG_BIT(0), .expected_clr_bits = REG_BIT(1) | REG_BIT(0), - .expected_count = 1, + .expected_active = BIT(0) | BIT(1), + .expected_count_sr_entries = 1, /* Check clr vs set actions on different bits */ .entries = (const struct xe_rtp_entry_sr[]) { { XE_RTP_NAME("basic-1"), @@ -207,7 +220,8 @@ static const struct rtp_test_case cases[] = { .expected_reg = REGULAR_REG1, .expected_set_bits = TEMP_FIELD, .expected_clr_bits = TEMP_MASK, - .expected_count = 1, + .expected_active = BIT(0), + .expected_count_sr_entries = 1, /* Check FIELD_SET works */ .entries = (const struct xe_rtp_entry_sr[]) { { XE_RTP_NAME("basic-1"), @@ -225,7 +239,8 @@ static const struct rtp_test_case cases[] = { .expected_reg = REGULAR_REG1, .expected_set_bits = REG_BIT(0), .expected_clr_bits = REG_BIT(0), - .expected_count = 1, + .expected_active = BIT(0) | BIT(1), + .expected_count_sr_entries = 1, .expected_sr_errors = 1, .entries = (const struct xe_rtp_entry_sr[]) { { XE_RTP_NAME("basic-1"), @@ -245,7 +260,8 @@ static const struct rtp_test_case cases[] = { .expected_reg = REGULAR_REG1, .expected_set_bits = REG_BIT(0), .expected_clr_bits = REG_BIT(0), - .expected_count = 1, + .expected_active = BIT(0) | BIT(1), + .expected_count_sr_entries = 1, .expected_sr_errors = 1, .entries = (const struct xe_rtp_entry_sr[]) { { XE_RTP_NAME("basic-1"), @@ -265,7 +281,8 @@ static const struct rtp_test_case cases[] = { .expected_reg = REGULAR_REG1, .expected_set_bits = REG_BIT(0), .expected_clr_bits = REG_BIT(0), - .expected_count = 1, + .expected_active = BIT(0) | BIT(1) | BIT(2), + .expected_count_sr_entries = 1, .expected_sr_errors = 2, .entries = (const struct xe_rtp_entry_sr[]) { { XE_RTP_NAME("basic-1"), @@ -287,28 +304,35 @@ static const struct rtp_test_case cases[] = { }, }; -static void xe_rtp_process_tests(struct kunit *test) +static void xe_rtp_process_to_sr_tests(struct kunit *test) { - const struct rtp_test_case *param = test->param_value; + const struct rtp_to_sr_test_case *param = test->param_value; struct xe_device *xe = test->priv; struct xe_gt *gt = xe_device_get_root_tile(xe)->primary_gt; struct xe_reg_sr *reg_sr = >->reg_sr; const struct xe_reg_sr_entry *sre, *sr_entry = NULL; struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt); - unsigned long idx, count = 0; + unsigned long idx, count_sr_entries = 0, count_rtp_entries = 0, active = 0; + + xe_reg_sr_init(reg_sr, "xe_rtp_to_sr_tests", xe); + + while (param->entries[count_rtp_entries].rules) + count_rtp_entries++; - xe_reg_sr_init(reg_sr, "xe_rtp_tests", xe); + xe_rtp_process_ctx_enable_active_tracking(&ctx, &active, count_rtp_entries); xe_rtp_process_to_sr(&ctx, param->entries, reg_sr); xa_for_each(®_sr->xa, idx, sre) { if (idx == param->expected_reg.addr) sr_entry = sre; - count++; + count_sr_entries++; } - KUNIT_EXPECT_EQ(test, count, param->expected_count); - if (count) { + KUNIT_EXPECT_EQ(test, active, param->expected_active); + + KUNIT_EXPECT_EQ(test, count_sr_entries, param->expected_count_sr_entries); + if (count_sr_entries) { KUNIT_EXPECT_EQ(test, sr_entry->clr_bits, param->expected_clr_bits); KUNIT_EXPECT_EQ(test, sr_entry->set_bits, param->expected_set_bits); KUNIT_EXPECT_EQ(test, sr_entry->reg.raw, param->expected_reg.raw); @@ -319,12 +343,162 @@ static void xe_rtp_process_tests(struct kunit *test) KUNIT_EXPECT_EQ(test, reg_sr->errors, param->expected_sr_errors); } +/* + * Entries below follow the logic used with xe_wa_oob.rules: + * 1) Entries with empty name are OR'ed: all entries marked active since the + * last entry with a name + * 2) There are no action associated with rules + */ +static const struct rtp_test_case rtp_cases[] = { + { + .name = "active1", + .expected_active = BIT(0), + .entries = (const struct xe_rtp_entry[]) { + { XE_RTP_NAME("r1"), + XE_RTP_RULES(FUNC(match_yes)), + }, + {} + }, + }, + { + .name = "active2", + .expected_active = BIT(0) | BIT(1), + .entries = (const struct xe_rtp_entry[]) { + { XE_RTP_NAME("r1"), + XE_RTP_RULES(FUNC(match_yes)), + }, + { XE_RTP_NAME("r2"), + XE_RTP_RULES(FUNC(match_yes)), + }, + {} + }, + }, + { + .name = "active-inactive", + .expected_active = BIT(0), + .entries = (const struct xe_rtp_entry[]) { + { XE_RTP_NAME("r1"), + XE_RTP_RULES(FUNC(match_yes)), + }, + { XE_RTP_NAME("r2"), + XE_RTP_RULES(FUNC(match_no)), + }, + {} + }, + }, + { + .name = "inactive-active", + .expected_active = BIT(1), + .entries = (const struct xe_rtp_entry[]) { + { XE_RTP_NAME("r1"), + XE_RTP_RULES(FUNC(match_no)), + }, + { XE_RTP_NAME("r2"), + XE_RTP_RULES(FUNC(match_yes)), + }, + {} + }, + }, + { + .name = "inactive-1st_or_active-inactive", + .expected_active = BIT(1), + .entries = (const struct xe_rtp_entry[]) { + { XE_RTP_NAME("r1"), + XE_RTP_RULES(FUNC(match_no)), + }, + { XE_RTP_NAME("r2_or_conditions"), + XE_RTP_RULES(FUNC(match_yes), OR, + FUNC(match_no), OR, + FUNC(match_no)) }, + { XE_RTP_NAME("r3"), + XE_RTP_RULES(FUNC(match_no)), + }, + {} + }, + }, + { + .name = "inactive-2nd_or_active-inactive", + .expected_active = BIT(1), + .entries = (const struct xe_rtp_entry[]) { + { XE_RTP_NAME("r1"), + XE_RTP_RULES(FUNC(match_no)), + }, + { XE_RTP_NAME("r2_or_conditions"), + XE_RTP_RULES(FUNC(match_no), OR, + FUNC(match_yes), OR, + FUNC(match_no)) }, + { XE_RTP_NAME("r3"), + XE_RTP_RULES(FUNC(match_no)), + }, + {} + }, + }, + { + .name = "inactive-last_or_active-inactive", + .expected_active = BIT(1), + .entries = (const struct xe_rtp_entry[]) { + { XE_RTP_NAME("r1"), + XE_RTP_RULES(FUNC(match_no)), + }, + { XE_RTP_NAME("r2_or_conditions"), + XE_RTP_RULES(FUNC(match_no), OR, + FUNC(match_no), OR, + FUNC(match_yes)) }, + { XE_RTP_NAME("r3"), + XE_RTP_RULES(FUNC(match_no)), + }, + {} + }, + }, + { + .name = "inactive-no_or_active-inactive", + .expected_active = 0, + .entries = (const struct xe_rtp_entry[]) { + { XE_RTP_NAME("r1"), + XE_RTP_RULES(FUNC(match_no)), + }, + { XE_RTP_NAME("r2_or_conditions"), + XE_RTP_RULES(FUNC(match_no), OR, + FUNC(match_no), OR, + FUNC(match_no)) }, + { XE_RTP_NAME("r3"), + XE_RTP_RULES(FUNC(match_no)), + }, + {} + }, + }, +}; + +static void xe_rtp_process_tests(struct kunit *test) +{ + const struct rtp_test_case *param = test->param_value; + struct xe_device *xe = test->priv; + struct xe_gt *gt = xe_device_get_root_tile(xe)->primary_gt; + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt); + unsigned long count_rtp_entries = 0, active = 0; + + while (param->entries[count_rtp_entries].rules) + count_rtp_entries++; + + xe_rtp_process_ctx_enable_active_tracking(&ctx, &active, count_rtp_entries); + xe_rtp_process(&ctx, param->entries); + + KUNIT_EXPECT_EQ(test, active, param->expected_active); +} + +static void rtp_to_sr_desc(const struct rtp_to_sr_test_case *t, char *desc) +{ + strscpy(desc, t->name, KUNIT_PARAM_DESC_SIZE); +} + +KUNIT_ARRAY_PARAM(rtp_to_sr, rtp_to_sr_cases, rtp_to_sr_desc); + static void rtp_desc(const struct rtp_test_case *t, char *desc) { strscpy(desc, t->name, KUNIT_PARAM_DESC_SIZE); } -KUNIT_ARRAY_PARAM(rtp, cases, rtp_desc); +KUNIT_ARRAY_PARAM(rtp, rtp_cases, rtp_desc); static int xe_rtp_test_init(struct kunit *test) { @@ -357,6 +531,7 @@ static void xe_rtp_test_exit(struct kunit *test) } static struct kunit_case xe_rtp_tests[] = { + KUNIT_CASE_PARAM(xe_rtp_process_to_sr_tests, rtp_to_sr_gen_params), KUNIT_CASE_PARAM(xe_rtp_process_tests, rtp_gen_params), {} }; diff --git a/drivers/gpu/drm/xe/tests/xe_test.h b/drivers/gpu/drm/xe/tests/xe_test.h index 7a1ae213e750..9c23ad9dba8d 100644 --- a/drivers/gpu/drm/xe/tests/xe_test.h +++ b/drivers/gpu/drm/xe/tests/xe_test.h @@ -9,8 +9,8 @@ #include <linux/types.h> #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) -#include <linux/sched.h> #include <kunit/test.h> +#include <kunit/test-bug.h> /* * Each test that provides a kunit private test structure, place a test id @@ -31,8 +31,6 @@ struct xe_test_priv { #define XE_TEST_DECLARE(x) x #define XE_TEST_ONLY(x) unlikely(x) -#define XE_TEST_EXPORT -#define xe_cur_kunit() current->kunit_test /** * xe_cur_kunit_priv - Obtain the struct xe_test_priv pointed to by @@ -48,10 +46,10 @@ xe_cur_kunit_priv(enum xe_test_priv_id id) { struct xe_test_priv *priv; - if (!xe_cur_kunit()) + if (!kunit_get_current_test()) return NULL; - priv = xe_cur_kunit()->priv; + priv = kunit_get_current_test()->priv; return priv->id == id ? priv : NULL; } @@ -59,8 +57,6 @@ xe_cur_kunit_priv(enum xe_test_priv_id id) #define XE_TEST_DECLARE(x) #define XE_TEST_ONLY(x) 0 -#define XE_TEST_EXPORT static -#define xe_cur_kunit() NULL #define xe_cur_kunit_priv(_id) NULL #endif diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c index 9d0c715142b9..c96d1fe34151 100644 --- a/drivers/gpu/drm/xe/tests/xe_wa_test.c +++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c @@ -74,6 +74,7 @@ static const struct platform_test_case cases[] = { GMDID_CASE(METEORLAKE, 1274, A0, 1300, A0), GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0), GMDID_CASE(LUNARLAKE, 2004, B0, 2000, A0), + GMDID_CASE(BATTLEMAGE, 2001, A0, 1301, A1), }; static void platform_desc(const struct platform_test_case *t, char *desc) diff --git a/drivers/gpu/drm/xe/xe_assert.h b/drivers/gpu/drm/xe/xe_assert.h index 8b0cc1bc9327..e22bbf57fca7 100644 --- a/drivers/gpu/drm/xe/xe_assert.h +++ b/drivers/gpu/drm/xe/xe_assert.h @@ -81,7 +81,7 @@ #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) #define __xe_assert_msg(xe, condition, msg, arg...) ({ \ - (void)drm_WARN(&(xe)->drm, !(condition), "[" DRM_NAME "] Assertion `%s` failed!\n" msg, \ + (void)drm_WARN(&(xe)->drm, !(condition), "Assertion `%s` failed!\n" msg, \ __stringify(condition), ## arg); \ }) #else diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index a13e0b3a169e..ef777dbdf4ec 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -65,7 +65,8 @@ __xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr) { u32 size = drm_suballoc_size(bb->bo); - bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + if (bb->len == 0 || bb->cs[bb->len - 1] != MI_BATCH_BUFFER_END) + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; xe_gt_assert(q->gt, bb->len * 4 + bb_prefetch(q->gt) <= size); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 31192d983d9e..e5f51fd23c65 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -13,7 +13,7 @@ #include <drm/ttm/ttm_device.h> #include <drm/ttm/ttm_placement.h> #include <drm/ttm/ttm_tt.h> -#include <drm/xe_drm.h> +#include <uapi/drm/xe_drm.h> #include "xe_device.h" #include "xe_dma_buf.h" @@ -680,8 +680,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, tt_has_data = ttm && (ttm_tt_is_populated(ttm) || (ttm->page_flags & TTM_TT_FLAG_SWAPPED)); - move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared) : - (!mem_type_is_vram(old_mem_type) && !tt_has_data); + move_lacks_source = !old_mem || (handle_system_ccs ? (!bo->ccs_cleared) : + (!mem_type_is_vram(old_mem_type) && !tt_has_data)); needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) || (!ttm && ttm_bo->type == ttm_bo_type_device); @@ -758,7 +758,16 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, xe_assert(xe, migrate); trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source); - xe_pm_runtime_get_noresume(xe); + if (xe_rpm_reclaim_safe(xe)) { + /* + * We might be called through swapout in the validation path of + * another TTM device, so unconditionally acquire rpm here. + */ + xe_pm_runtime_get(xe); + } else { + drm_WARN_ON(&xe->drm, handle_system_ccs); + xe_pm_runtime_get_noresume(xe); + } if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) { /* @@ -793,8 +802,16 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, } } } else { - if (move_lacks_source) - fence = xe_migrate_clear(migrate, bo, new_mem); + if (move_lacks_source) { + u32 flags = 0; + + if (mem_type_is_vram(new_mem->mem_type)) + flags |= XE_MIGRATE_CLEAR_FLAG_FULL; + else if (handle_system_ccs) + flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA; + + fence = xe_migrate_clear(migrate, bo, new_mem, flags); + } else fence = xe_migrate_copy(migrate, bo, bo, old_mem, new_mem, handle_system_ccs); @@ -1090,7 +1107,7 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list)); - if (bo->ggtt_node.size) + if (bo->ggtt_node && bo->ggtt_node->base.size) xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo); #ifdef CONFIG_PROC_FS @@ -1264,13 +1281,14 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) && !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) && ((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) || - (flags & XE_BO_NEEDS_64K))) { - aligned_size = ALIGN(size, SZ_64K); + (flags & (XE_BO_FLAG_NEEDS_64K | XE_BO_FLAG_NEEDS_2M)))) { + size_t align = flags & XE_BO_FLAG_NEEDS_2M ? SZ_2M : SZ_64K; + + aligned_size = ALIGN(size, align); if (type != ttm_bo_type_device) - size = ALIGN(size, SZ_64K); + size = ALIGN(size, align); flags |= XE_BO_FLAG_INTERNAL_64K; - alignment = SZ_64K >> PAGE_SHIFT; - + alignment = align >> PAGE_SHIFT; } else { aligned_size = ALIGN(size, SZ_4K); flags &= ~XE_BO_FLAG_INTERNAL_64K; @@ -1490,11 +1508,10 @@ struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, u16 cpu_caching, - enum ttm_bo_type type, u32 flags) { struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, - cpu_caching, type, + cpu_caching, ttm_bo_type_device, flags | XE_BO_FLAG_USER); if (!IS_ERR(bo)) xe_bo_unlock_vm_held(bo); @@ -1575,7 +1592,7 @@ struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, return bo; } -static void __xe_bo_unpin_map_no_vm(struct drm_device *drm, void *arg) +static void __xe_bo_unpin_map_no_vm(void *arg) { xe_bo_unpin_map_no_vm(arg); } @@ -1590,7 +1607,7 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile if (IS_ERR(bo)) return bo; - ret = drmm_add_action_or_reset(&xe->drm, __xe_bo_unpin_map_no_vm, bo); + ret = devm_add_action_or_reset(xe->drm.dev, __xe_bo_unpin_map_no_vm, bo); if (ret) return ERR_PTR(ret); @@ -1638,7 +1655,7 @@ int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, str if (IS_ERR(bo)) return PTR_ERR(bo); - drmm_release_action(&xe->drm, __xe_bo_unpin_map_no_vm, *src); + devm_release_action(xe->drm.dev, __xe_bo_unpin_map_no_vm, *src); *src = bo; return 0; @@ -1989,6 +2006,13 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, bo_flags |= args->placement << (ffs(XE_BO_FLAG_SYSTEM) - 1); + /* CCS formats need physical placement at a 64K alignment in VRAM. */ + if ((bo_flags & XE_BO_FLAG_VRAM_MASK) && + (bo_flags & XE_BO_FLAG_SCANOUT) && + !(xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) && + IS_ALIGNED(args->size, SZ_64K)) + bo_flags |= XE_BO_FLAG_NEEDS_64K; + if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) { if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_FLAG_VRAM_MASK))) return -EINVAL; @@ -2018,7 +2042,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, } bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching, - ttm_bo_type_device, bo_flags); + bo_flags); if (vm) xe_vm_unlock(vm); @@ -2296,6 +2320,20 @@ void xe_bo_put_commit(struct llist_head *deferred) drm_gem_object_free(&bo->ttm.base.refcount); } +void xe_bo_put(struct xe_bo *bo) +{ + might_sleep(); + if (bo) { +#ifdef CONFIG_PROC_FS + if (bo->client) + might_lock(&bo->client->bos_lock); +#endif + if (bo->ggtt_node && bo->ggtt_node->ggtt) + might_lock(&bo->ggtt_node->ggtt->lock); + drm_gem_object_put(&bo->ttm.base); + } +} + /** * xe_bo_dumb_create - Create a dumb bo as backing for a fb * @file_priv: ... @@ -2324,7 +2362,6 @@ int xe_bo_dumb_create(struct drm_file *file_priv, bo = xe_bo_create_user(xe, NULL, NULL, args->size, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | XE_BO_FLAG_SCANOUT | XE_BO_FLAG_NEEDS_CPU_ACCESS); diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 6de894c728f5..6e4be52306df 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -36,8 +36,9 @@ #define XE_BO_FLAG_PAGETABLE BIT(12) #define XE_BO_FLAG_NEEDS_CPU_ACCESS BIT(13) #define XE_BO_FLAG_NEEDS_UC BIT(14) -#define XE_BO_NEEDS_64K BIT(15) -#define XE_BO_FLAG_GGTT_INVALIDATE BIT(16) +#define XE_BO_FLAG_NEEDS_64K BIT(15) +#define XE_BO_FLAG_NEEDS_2M BIT(16) +#define XE_BO_FLAG_GGTT_INVALIDATE BIT(17) /* this one is trigger internally only */ #define XE_BO_FLAG_INTERNAL_TEST BIT(30) #define XE_BO_FLAG_INTERNAL_64K BIT(31) @@ -86,7 +87,6 @@ struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile, struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, u16 cpu_caching, - enum ttm_bo_type type, u32 flags); struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, @@ -126,11 +126,7 @@ static inline struct xe_bo *xe_bo_get(struct xe_bo *bo) return bo; } -static inline void xe_bo_put(struct xe_bo *bo) -{ - if (bo) - drm_gem_object_put(&bo->ttm.base); -} +void xe_bo_put(struct xe_bo *bo); static inline void __xe_bo_unset_bulk_move(struct xe_bo *bo) { @@ -194,9 +190,12 @@ xe_bo_main_addr(struct xe_bo *bo, size_t page_size) static inline u32 xe_bo_ggtt_addr(struct xe_bo *bo) { - XE_WARN_ON(bo->ggtt_node.size > bo->size); - XE_WARN_ON(bo->ggtt_node.start + bo->ggtt_node.size > (1ull << 32)); - return bo->ggtt_node.start; + if (XE_WARN_ON(!bo->ggtt_node)) + return 0; + + XE_WARN_ON(bo->ggtt_node->base.size > bo->size); + XE_WARN_ON(bo->ggtt_node->base.start + bo->ggtt_node->base.size > (1ull << 32)); + return bo->ggtt_node->base.start; } int xe_bo_vmap(struct xe_bo *bo); diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 10450f1fbbde..2ed558ac2264 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -8,12 +8,13 @@ #include <linux/iosys-map.h> -#include <drm/drm_mm.h> #include <drm/ttm/ttm_bo.h> #include <drm/ttm/ttm_device.h> #include <drm/ttm/ttm_execbuf_util.h> #include <drm/ttm/ttm_placement.h> +#include "xe_ggtt_types.h" + struct xe_device; struct xe_vm; @@ -39,7 +40,7 @@ struct xe_bo { /** @placement: current placement for this BO */ struct ttm_placement placement; /** @ggtt_node: GGTT node if this BO is mapped in the GGTT */ - struct drm_mm_node ggtt_node; + struct xe_ggtt_node *ggtt_node; /** @vmap: iosys map of this buffer */ struct iosys_map vmap; /** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */ @@ -58,6 +59,8 @@ struct xe_bo { #endif /** @freed: List node for delayed put. */ struct llist_node freed; + /** @update_index: Update index if PT BO */ + int update_index; /** @created: Whether the bo has passed initial creation */ bool created; diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 1011e5d281fa..fe4319eb13fd 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -6,6 +6,7 @@ #include "xe_debugfs.h" #include <linux/debugfs.h> +#include <linux/fault-inject.h> #include <linux/string_helpers.h> #include <drm/drm_debugfs.h> @@ -26,10 +27,7 @@ #include "xe_vm.h" #endif -#ifdef CONFIG_FAULT_INJECTION -#include <linux/fault-inject.h> /* XXX: fault-inject.h is broken */ DECLARE_FAULT_ATTR(gt_reset_failure); -#endif static struct xe_device *node_to_xe(struct drm_info_node *node) { @@ -47,10 +45,9 @@ static int info(struct seq_file *m, void *data) drm_printf(&p, "graphics_verx100 %d\n", xe->info.graphics_verx100); drm_printf(&p, "media_verx100 %d\n", xe->info.media_verx100); - drm_printf(&p, "stepping G:%s M:%s D:%s B:%s\n", + drm_printf(&p, "stepping G:%s M:%s B:%s\n", xe_step_name(xe->info.step.graphics), xe_step_name(xe->info.step.media), - xe_step_name(xe->info.step.display), xe_step_name(xe->info.step.basedie)); drm_printf(&p, "is_dgfx %s\n", str_yes_no(xe->info.is_dgfx)); drm_printf(&p, "platform %d\n", xe->info.platform); @@ -190,7 +187,7 @@ void xe_debugfs_register(struct xe_device *xe) debugfs_create_file("forcewake_all", 0400, root, xe, &forcewake_all_fops); - debugfs_create_file("wedged_mode", 0400, root, xe, + debugfs_create_file("wedged_mode", 0600, root, xe, &wedged_mode_fops); for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) { @@ -214,8 +211,5 @@ void xe_debugfs_register(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_debugfs_register(gt); -#ifdef CONFIG_FAULT_INJECTION fault_create_debugfs_attr("fail_gt_reset", root, >_reset_failure); -#endif - } diff --git a/drivers/gpu/drm/xe/xe_debugfs.h b/drivers/gpu/drm/xe/xe_debugfs.h index 715b8e2e0bd9..17f4c2f1b5e4 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.h +++ b/drivers/gpu/drm/xe/xe_debugfs.h @@ -8,6 +8,10 @@ struct xe_device; +#ifdef CONFIG_DEBUG_FS void xe_debugfs_register(struct xe_device *xe); +#else +static inline void xe_debugfs_register(struct xe_device *xe) { } +#endif #endif diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 62c2b10fbf1d..bdb76e834e4c 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -66,22 +66,9 @@ static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q) return &q->gt->uc.guc; } -static void xe_devcoredump_deferred_snap_work(struct work_struct *work) +static ssize_t __xe_devcoredump_read(char *buffer, size_t count, + struct xe_devcoredump *coredump) { - struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); - - /* keep going if fw fails as we still want to save the memory and SW data */ - if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL)) - xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); - xe_vm_snapshot_capture_delayed(ss->vm); - xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); - xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); -} - -static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, - size_t count, void *data, size_t datalen) -{ - struct xe_devcoredump *coredump = data; struct xe_device *xe; struct xe_devcoredump_snapshot *ss; struct drm_printer p; @@ -89,18 +76,11 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, struct timespec64 ts; int i; - if (!coredump) - return -ENODEV; - xe = coredump_to_xe(coredump); ss = &coredump->snapshot; - /* Ensure delayed work is captured before continuing */ - flush_work(&ss->work); - iter.data = buffer; - iter.offset = 0; - iter.start = offset; + iter.start = 0; iter.remain = count; p = drm_coredump_printer(&iter); @@ -134,10 +114,83 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, return count - iter.remain; } +static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss) +{ + int i; + + xe_guc_ct_snapshot_free(ss->ct); + ss->ct = NULL; + + xe_guc_exec_queue_snapshot_free(ss->ge); + ss->ge = NULL; + + xe_sched_job_snapshot_free(ss->job); + ss->job = NULL; + + for (i = 0; i < XE_NUM_HW_ENGINES; i++) + if (ss->hwe[i]) { + xe_hw_engine_snapshot_free(ss->hwe[i]); + ss->hwe[i] = NULL; + } + + xe_vm_snapshot_free(ss->vm); + ss->vm = NULL; +} + +static void xe_devcoredump_deferred_snap_work(struct work_struct *work) +{ + struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); + struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot); + + /* keep going if fw fails as we still want to save the memory and SW data */ + if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL)) + xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); + xe_vm_snapshot_capture_delayed(ss->vm); + xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); + xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); + + /* Calculate devcoredump size */ + ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump); + + ss->read.buffer = kvmalloc(ss->read.size, GFP_USER); + if (!ss->read.buffer) + return; + + __xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump); + xe_devcoredump_snapshot_free(ss); +} + +static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, + size_t count, void *data, size_t datalen) +{ + struct xe_devcoredump *coredump = data; + struct xe_devcoredump_snapshot *ss; + ssize_t byte_copied; + + if (!coredump) + return -ENODEV; + + ss = &coredump->snapshot; + + /* Ensure delayed work is captured before continuing */ + flush_work(&ss->work); + + if (!ss->read.buffer) + return -ENODEV; + + if (offset >= ss->read.size) + return 0; + + byte_copied = count < ss->read.size - offset ? count : + ss->read.size - offset; + memcpy(buffer, ss->read.buffer + offset, byte_copied); + + return byte_copied; +} + static void xe_devcoredump_free(void *data) { struct xe_devcoredump *coredump = data; - int i; /* Our device is gone. Nothing to do... */ if (!data || !coredump_to_xe(coredump)) @@ -145,13 +198,8 @@ static void xe_devcoredump_free(void *data) cancel_work_sync(&coredump->snapshot.work); - xe_guc_ct_snapshot_free(coredump->snapshot.ct); - xe_guc_exec_queue_snapshot_free(coredump->snapshot.ge); - xe_sched_job_snapshot_free(coredump->snapshot.job); - for (i = 0; i < XE_NUM_HW_ENGINES; i++) - if (coredump->snapshot.hwe[i]) - xe_hw_engine_snapshot_free(coredump->snapshot.hwe[i]); - xe_vm_snapshot_free(coredump->snapshot.vm); + xe_devcoredump_snapshot_free(&coredump->snapshot); + kvfree(coredump->snapshot.read.buffer); /* To prevent stale data on next snapshot, clear everything */ memset(&coredump->snapshot, 0, sizeof(coredump->snapshot)); @@ -171,7 +219,6 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, u32 adj_logical_mask = q->logical_mask; u32 width_mask = (0x1 << q->width) - 1; const char *process_name = "no process"; - struct task_struct *task = NULL; int i; bool cookie; @@ -179,14 +226,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, ss->snapshot_time = ktime_get_real(); ss->boot_time = ktime_get_boottime(); - if (q->vm && q->vm->xef) { - task = get_pid_task(q->vm->xef->drm->pid, PIDTYPE_PID); - if (task) - process_name = task->comm; - } + if (q->vm && q->vm->xef) + process_name = q->vm->xef->process_name; strscpy(ss->process_name, process_name); - if (task) - put_task_struct(task); ss->gt = q->gt; INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work); @@ -266,4 +308,5 @@ int xe_devcoredump_init(struct xe_device *xe) { return devm_add_action_or_reset(xe->drm.dev, xe_driver_devcoredump_fini, &xe->drm); } + #endif diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h index 923cdf72a816..440d05d77a5a 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h @@ -46,6 +46,14 @@ struct xe_devcoredump_snapshot { struct xe_sched_job_snapshot *job; /** @vm: Snapshot of VM state */ struct xe_vm_snapshot *vm; + + /** @read: devcoredump in human readable format */ + struct { + /** @read.size: size of devcoredump in human readable format */ + ssize_t size; + /** @read.buffer: buffer of devcoredump in human readable format */ + char *buffer; + } read; }; /** diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index f2f1d8ddb221..a1987b554a8d 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -15,7 +15,7 @@ #include <drm/drm_ioctl.h> #include <drm/drm_managed.h> #include <drm/drm_print.h> -#include <drm/xe_drm.h> +#include <uapi/drm/xe_drm.h> #include "display/xe_display.h" #include "instructions/xe_gpu_commands.h" @@ -37,6 +37,7 @@ #include "xe_gt_printk.h" #include "xe_gt_sriov_vf.h" #include "xe_guc.h" +#include "xe_hw_engine_group.h" #include "xe_hwmon.h" #include "xe_irq.h" #include "xe_memirq.h" @@ -54,6 +55,9 @@ #include "xe_vm.h" #include "xe_vram.h" #include "xe_wait_user_fence.h" +#include "xe_wa.h" + +#include <generated/xe_wa_oob.h> static int xe_file_open(struct drm_device *dev, struct drm_file *file) { @@ -61,6 +65,7 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file) struct xe_drm_client *client; struct xe_file *xef; int ret = -ENOMEM; + struct task_struct *task = NULL; xef = kzalloc(sizeof(*xef), GFP_KERNEL); if (!xef) @@ -82,31 +87,30 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file) mutex_init(&xef->exec_queue.lock); xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1); - spin_lock(&xe->clients.lock); - xe->clients.count++; - spin_unlock(&xe->clients.lock); - file->driver_priv = xef; kref_init(&xef->refcount); + task = get_pid_task(rcu_access_pointer(file->pid), PIDTYPE_PID); + if (task) { + xef->process_name = kstrdup(task->comm, GFP_KERNEL); + xef->pid = task->pid; + put_task_struct(task); + } + return 0; } static void xe_file_destroy(struct kref *ref) { struct xe_file *xef = container_of(ref, struct xe_file, refcount); - struct xe_device *xe = xef->xe; xa_destroy(&xef->exec_queue.xa); mutex_destroy(&xef->exec_queue.lock); xa_destroy(&xef->vm.xa); mutex_destroy(&xef->vm.lock); - spin_lock(&xe->clients.lock); - xe->clients.count--; - spin_unlock(&xe->clients.lock); - xe_drm_client_put(xef->client); + kfree(xef->process_name); kfree(xef); } @@ -153,13 +157,13 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) * vm->lock taken during xe_exec_queue_kill(). */ xa_for_each(&xef->exec_queue.xa, idx, q) { + if (q->vm && q->hwe->hw_engine_group) + xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); xe_exec_queue_kill(q); xe_exec_queue_put(q); } - mutex_lock(&xef->vm.lock); xa_for_each(&xef->vm.xa, idx, vm) xe_vm_close_and_put(vm); - mutex_unlock(&xef->vm.lock); xe_file_put(xef); @@ -238,6 +242,7 @@ static const struct file_operations xe_driver_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = drm_show_fdinfo, #endif + .fop_flags = FOP_UNSIGNED_OFFSET, }; static struct drm_driver driver = { @@ -282,6 +287,9 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy) if (xe->unordered_wq) destroy_workqueue(xe->unordered_wq); + if (xe->destroy_wq) + destroy_workqueue(xe->destroy_wq); + ttm_device_fini(&xe->ttm); } @@ -316,13 +324,10 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, xe->info.force_execlist = xe_modparam.force_execlist; spin_lock_init(&xe->irq.lock); - spin_lock_init(&xe->clients.lock); init_waitqueue_head(&xe->ufence_wq); - err = drmm_mutex_init(&xe->drm, &xe->usm.lock); - if (err) - goto err; + init_rwsem(&xe->usm.lock); xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC); @@ -347,8 +352,9 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0); xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0); + xe->destroy_wq = alloc_workqueue("xe-destroy-wq", 0, 0); if (!xe->ordered_wq || !xe->unordered_wq || - !xe->preempt_fence_wq) { + !xe->preempt_fence_wq || !xe->destroy_wq) { /* * Cleanup done in xe_device_destroy via * drmm_add_action_or_reset register above @@ -531,7 +537,7 @@ static void update_device_info(struct xe_device *xe) { /* disable features that are not available/applicable to VFs */ if (IS_SRIOV_VF(xe)) { - xe->info.enable_display = 0; + xe->info.probe_display = 0; xe->info.has_heci_gscfi = 0; xe->info.skip_guc_pc = 1; xe->info.skip_pcode = 1; @@ -785,13 +791,22 @@ void xe_device_shutdown(struct xe_device *xe) { } +/** + * xe_device_wmb() - Device specific write memory barrier + * @xe: the &xe_device + * + * While wmb() is sufficient for a barrier if we use system memory, on discrete + * platforms with device memory we additionally need to issue a register write. + * Since it doesn't matter which register we write to, use the read-only VF_CAP + * register that is also marked as accessible by the VFs. + */ void xe_device_wmb(struct xe_device *xe) { struct xe_gt *gt = xe_root_mmio_gt(xe); wmb(); if (IS_DGFX(xe)) - xe_mmio_write32(gt, SOFTWARE_FLAGS_SPR33, 0); + xe_mmio_write32(gt, VF_CAP_REG, 0); } /** @@ -820,6 +835,11 @@ void xe_device_td_flush(struct xe_device *xe) if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) return; + if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) { + xe_device_l2_flush(xe); + return; + } + for_each_gt(gt, xe, id) { if (xe_gt_is_media_type(gt)) continue; @@ -843,6 +863,30 @@ void xe_device_td_flush(struct xe_device *xe) } } +void xe_device_l2_flush(struct xe_device *xe) +{ + struct xe_gt *gt; + int err; + + gt = xe_root_mmio_gt(xe); + + if (!XE_WA(gt, 16023588340)) + return; + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + return; + + spin_lock(>->global_invl_lock); + xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1); + + if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true)) + xe_gt_err_once(gt, "Global invalidation timeout\n"); + spin_unlock(>->global_invl_lock); + + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +} + u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) { return xe_device_has_flat_ccs(xe) ? @@ -926,13 +970,13 @@ void xe_device_declare_wedged(struct xe_device *xe) return; } + xe_pm_runtime_get_noresume(xe); + if (drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe)) { drm_err(&xe->drm, "Failed to register xe_device_wedged_fini clean-up. Although device is wedged.\n"); return; } - xe_pm_runtime_get_noresume(xe); - if (!atomic_xchg(&xe->wedged.flag, 1)) { xe->needs_flr_on_fini = true; drm_err(&xe->drm, diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index b3952718b3c1..34620ef855c0 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -15,9 +15,23 @@ static inline struct xe_device *to_xe_device(const struct drm_device *dev) return container_of(dev, struct xe_device, drm); } +static inline struct xe_device *kdev_to_xe_device(struct device *kdev) +{ + struct drm_device *drm = dev_get_drvdata(kdev); + + return drm ? to_xe_device(drm) : NULL; +} + static inline struct xe_device *pdev_to_xe_device(struct pci_dev *pdev) { - return pci_get_drvdata(pdev); + struct drm_device *drm = pci_get_drvdata(pdev); + + return drm ? to_xe_device(drm) : NULL; +} + +static inline struct xe_device *xe_device_const_cast(const struct xe_device *xe) +{ + return (struct xe_device *)xe; } static inline struct xe_device *ttm_to_xe_device(struct ttm_device *ttm) @@ -129,16 +143,6 @@ static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt) void xe_device_assert_mem_access(struct xe_device *xe); -static inline bool xe_device_in_fault_mode(struct xe_device *xe) -{ - return xe->usm.num_vm_in_fault_mode != 0; -} - -static inline bool xe_device_in_non_fault_mode(struct xe_device *xe) -{ - return xe->usm.num_vm_in_non_fault_mode != 0; -} - static inline bool xe_device_has_flat_ccs(struct xe_device *xe) { return xe->info.has_flat_ccs; @@ -162,6 +166,7 @@ u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address); u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address); void xe_device_td_flush(struct xe_device *xe); +void xe_device_l2_flush(struct xe_device *xe); static inline bool xe_device_wedged(struct xe_device *xe) { @@ -173,4 +178,18 @@ void xe_device_declare_wedged(struct xe_device *xe); struct xe_file *xe_file_get(struct xe_file *xef); void xe_file_put(struct xe_file *xef); +/* + * Occasionally it is seen that the G2H worker starts running after a delay of more than + * a second even after being queued and activated by the Linux workqueue subsystem. This + * leads to G2H timeout error. The root cause of issue lies with scheduling latency of + * Lunarlake Hybrid CPU. Issue disappears if we disable Lunarlake atom cores from BIOS + * and this is beyond xe kmd. + * + * TODO: Drop this change once workqueue scheduling delay issue is fixed on LNL Hybrid CPU. + */ +#define LNL_FLUSH_WORKQUEUE(wq__) \ + flush_workqueue(wq__) +#define LNL_FLUSH_WORK(wrk__) \ + flush_work(wrk__) + #endif diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index cbc582bcc90a..687f3a9039bb 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -23,6 +23,10 @@ #include "xe_sriov_types.h" #include "xe_step_types.h" +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) +#define TEST_VM_OPS_ERROR +#endif + #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) #include "soc/intel_pch.h" #include "intel_display_core.h" @@ -40,6 +44,7 @@ struct xe_pat_ops; #define MEDIA_VERx100(xe) ((xe)->info.media_verx100) #define IS_DGFX(xe) ((xe)->info.is_dgfx) #define HAS_HECI_GSCFI(xe) ((xe)->info.has_heci_gscfi) +#define HAS_HECI_CSCFI(xe) ((xe)->info.has_heci_cscfi) #define XE_VRAM_FLAGS_NEED64K BIT(0) @@ -199,10 +204,16 @@ struct xe_tile { struct xe_memirq memirq; /** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */ - struct drm_mm_node ggtt_balloon[2]; + struct xe_ggtt_node *ggtt_balloon[2]; } vf; } sriov; + /** @pcode: tile's PCODE */ + struct { + /** @pcode.lock: protecting tile's PCODE mailbox data */ + struct mutex lock; + } pcode; + /** @migrate: Migration helper for vram blits and clearing */ struct xe_migrate *migrate; @@ -277,26 +288,29 @@ struct xe_device { u8 has_sriov:1; /** @info.has_usm: Device has unified shared memory support */ u8 has_usm:1; - /** @info.enable_display: display enabled */ - u8 enable_display:1; + /** + * @info.probe_display: Probe display hardware. If set to + * false, the driver will behave as if there is no display + * hardware present and will not try to read/write to it in any + * way. The display hardware, if it exists, will not be + * exposed to userspace and will be left untouched in whatever + * state the firmware or bootloader left it in. + */ + u8 probe_display:1; /** @info.skip_mtcfg: skip Multi-Tile configuration from MTCFG register */ u8 skip_mtcfg:1; /** @info.skip_pcode: skip access to PCODE uC */ u8 skip_pcode:1; /** @info.has_heci_gscfi: device has heci gscfi */ u8 has_heci_gscfi:1; + /** @info.has_heci_cscfi: device has heci cscfi */ + u8 has_heci_cscfi:1; /** @info.skip_guc_pc: Skip GuC based PM feature init */ u8 skip_guc_pc:1; /** @info.has_atomic_enable_pte_bit: Device has atomic enable PTE bit */ u8 has_atomic_enable_pte_bit:1; /** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */ u8 has_device_atomics_on_smem:1; - -#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) - struct { - u32 rawclk_freq; - } i915_runtime; -#endif } info; /** @irq: device interrupt state */ @@ -339,27 +353,14 @@ struct xe_device { struct workqueue_struct *wq; } sriov; - /** @clients: drm clients info */ - struct { - /** @clients.lock: Protects drm clients info */ - spinlock_t lock; - - /** @clients.count: number of drm clients */ - u64 count; - } clients; - /** @usm: unified memory state */ struct { /** @usm.asid: convert a ASID to VM */ struct xarray asid_to_vm; /** @usm.next_asid: next ASID, used to cyclical alloc asids */ u32 next_asid; - /** @usm.num_vm_in_fault_mode: number of VM in fault mode */ - u32 num_vm_in_fault_mode; - /** @usm.num_vm_in_non_fault_mode: number of VM in non-fault mode */ - u32 num_vm_in_non_fault_mode; /** @usm.lock: protects UM state */ - struct mutex lock; + struct rw_semaphore lock; } usm; /** @pinned: pinned BO state */ @@ -386,6 +387,9 @@ struct xe_device { /** @unordered_wq: used to serialize unordered work, mostly display */ struct workqueue_struct *unordered_wq; + /** @destroy_wq: used to serialize user destroy work, like queue */ + struct workqueue_struct *destroy_wq; + /** @tiles: device tiles */ struct xe_tile tiles[XE_MAX_TILES_PER_DEVICE]; @@ -477,6 +481,14 @@ struct xe_device { int mode; } wedged; +#ifdef TEST_VM_OPS_ERROR + /** + * @vm_inject_error_position: inject errors at different places in VM + * bind IOCTL based on this value + */ + u8 vm_inject_error_position; +#endif + /* private: */ #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) @@ -549,15 +561,23 @@ struct xe_file { struct { /** @vm.xe: xarray to store VMs */ struct xarray xa; - /** @vm.lock: protects file VM state */ + /** + * @vm.lock: Protects VM lookup + reference and removal a from + * file xarray. Not an intended to be an outer lock which does + * thing while being held. + */ struct mutex lock; } vm; /** @exec_queue: Submission exec queue state for file */ struct { - /** @exec_queue.xe: xarray to store engines */ + /** @exec_queue.xa: xarray to store exece queues */ struct xarray xa; - /** @exec_queue.lock: protects file engine state */ + /** + * @exec_queue.lock: Protects exec queue lookup + reference and + * removal a frommfile xarray. Not an intended to be an outer + * lock which does thing while being held. + */ struct mutex lock; } exec_queue; @@ -567,6 +587,18 @@ struct xe_file { /** @client: drm client */ struct xe_drm_client *client; + /** + * @process_name: process name for file handle, used to safely output + * during error situations where xe file can outlive process + */ + char *process_name; + + /** + * @pid: pid for file handle, used to safely output uring error + * situations where xe file can outlive process + */ + pid_t pid; + /** @refcount: ref count of this xe file */ struct kref refcount; }; diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 7ddd59908334..fb52a23e28f8 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -5,11 +5,12 @@ #include "xe_drm_client.h" #include <drm/drm_print.h> -#include <drm/xe_drm.h> +#include <uapi/drm/xe_drm.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/types.h> +#include "xe_assert.h" #include "xe_bo.h" #include "xe_bo_types.h" #include "xe_device_types.h" @@ -151,10 +152,13 @@ void xe_drm_client_add_bo(struct xe_drm_client *client, */ void xe_drm_client_remove_bo(struct xe_bo *bo) { + struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); struct xe_drm_client *client = bo->client; + xe_assert(xe, !kref_read(&bo->ttm.base.refcount)); + spin_lock(&client->bos_lock); - list_del(&bo->client_link); + list_del_init(&bo->client_link); spin_unlock(&client->bos_lock); xe_drm_client_put(client); @@ -164,12 +168,9 @@ static void bo_meminfo(struct xe_bo *bo, struct drm_memory_stats stats[TTM_NUM_MEM_TYPES]) { u64 sz = bo->size; - u32 mem_type; + u32 mem_type = bo->ttm.resource->mem_type; - if (bo->placement.placement) - mem_type = bo->placement.placement->mem_type; - else - mem_type = XE_PL_TT; + xe_bo_assert_held(bo); if (drm_gem_object_is_shared_for_memory_stats(&bo->ttm.base)) stats[mem_type].shared += sz; @@ -196,6 +197,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) struct xe_drm_client *client; struct drm_gem_object *obj; struct xe_bo *bo; + LLIST_HEAD(deferred); unsigned int id; u32 mem_type; @@ -206,7 +208,20 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) idr_for_each_entry(&file->object_idr, obj, id) { struct xe_bo *bo = gem_to_xe_bo(obj); - bo_meminfo(bo, stats); + if (dma_resv_trylock(bo->ttm.base.resv)) { + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + } else { + xe_bo_get(bo); + spin_unlock(&file->table_lock); + + xe_bo_lock(bo, false); + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + + xe_bo_put(bo); + spin_lock(&file->table_lock); + } } spin_unlock(&file->table_lock); @@ -215,11 +230,28 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) list_for_each_entry(bo, &client->bos_list, client_link) { if (!kref_get_unless_zero(&bo->ttm.base.refcount)) continue; - bo_meminfo(bo, stats); - xe_bo_put(bo); + + if (dma_resv_trylock(bo->ttm.base.resv)) { + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + } else { + spin_unlock(&client->bos_lock); + + xe_bo_lock(bo, false); + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + + spin_lock(&client->bos_lock); + /* The bo ref will prevent this bo from being removed from the list */ + xe_assert(xef->xe, !list_empty(&bo->client_link)); + } + + xe_bo_put_deferred(bo, &deferred); } spin_unlock(&client->bos_lock); + xe_bo_put_commit(&deferred); + for (mem_type = XE_PL_SYSTEM; mem_type < TTM_NUM_MEM_TYPES; ++mem_type) { if (!xe_mem_type_to_name[mem_type]) continue; @@ -251,8 +283,15 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) /* Accumulate all the exec queues from this client */ mutex_lock(&xef->exec_queue.lock); - xa_for_each(&xef->exec_queue.xa, i, q) + xa_for_each(&xef->exec_queue.xa, i, q) { + xe_exec_queue_get(q); + mutex_unlock(&xef->exec_queue.lock); + xe_exec_queue_update_run_ticks(q); + + mutex_lock(&xef->exec_queue.lock); + xe_exec_queue_put(q); + } mutex_unlock(&xef->exec_queue.lock); /* Get the total GPU cycles */ diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index f36980aa26e6..756b492f13b0 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -8,12 +8,13 @@ #include <drm/drm_device.h> #include <drm/drm_exec.h> #include <drm/drm_file.h> -#include <drm/xe_drm.h> +#include <uapi/drm/xe_drm.h> #include <linux/delay.h> #include "xe_bo.h" #include "xe_device.h" #include "xe_exec_queue.h" +#include "xe_hw_engine_group.h" #include "xe_macros.h" #include "xe_ring_ops_types.h" #include "xe_sched_job.h" @@ -40,11 +41,6 @@ * user knows an exec writes to a BO and reads from the BO in the next exec, it * is the user's responsibility to pass in / out fence between the two execs). * - * Implicit dependencies for external BOs are handled by using the dma-buf - * implicit dependency uAPI (TODO: add link). To make this works each exec must - * install the job's fence into the DMA_RESV_USAGE_WRITE slot of every external - * BO mapped in the VM. - * * We do not allow a user to trigger a bind at exec time rather we have a VM * bind IOCTL which uses the same in / out fence interface as exec. In that * sense, a VM bind is basically the same operation as an exec from the user @@ -58,8 +54,8 @@ * behind any pending kernel operations on any external BOs in VM or any BOs * private to the VM. This is accomplished by the rebinds waiting on BOs * DMA_RESV_USAGE_KERNEL slot (kernel ops) and kernel ops waiting on all BOs - * slots (inflight execs are in the DMA_RESV_USAGE_BOOKING for private BOs and - * in DMA_RESV_USAGE_WRITE for external BOs). + * slots (inflight execs are in the DMA_RESV_USAGE_BOOKKEEP for private BOs and + * for external BOs). * * Rebinds / dma-resv usage applies to non-compute mode VMs only as for compute * mode VMs we use preempt fences and a rebind worker (TODO: add link). @@ -124,6 +120,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) bool write_locked, skip_retry = false; ktime_t end = 0; int err = 0; + struct xe_hw_engine_group *group; + enum xe_hw_engine_group_execution_mode mode, previous_mode; if (XE_IOCTL_DBG(xe, args->extensions) || XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) || @@ -134,12 +132,16 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (XE_IOCTL_DBG(xe, !q)) return -ENOENT; - if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM)) - return -EINVAL; + if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM)) { + err = -EINVAL; + goto err_exec_queue; + } if (XE_IOCTL_DBG(xe, args->num_batch_buffer && - q->width != args->num_batch_buffer)) - return -EINVAL; + q->width != args->num_batch_buffer)) { + err = -EINVAL; + goto err_exec_queue; + } if (XE_IOCTL_DBG(xe, q->ops->reset_status(q))) { err = -ECANCELED; @@ -182,6 +184,15 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } } + group = q->hwe->hw_engine_group; + mode = xe_hw_engine_group_find_exec_mode(q); + + if (mode == EXEC_MODE_DMA_FENCE) { + err = xe_hw_engine_group_get_mode(group, mode, &previous_mode); + if (err) + goto err_syncs; + } + retry: if (!xe_vm_in_lr_mode(vm) && xe_vm_userptr_check_repin(vm)) { err = down_write_killable(&vm->lock); @@ -199,7 +210,7 @@ retry: downgrade_write(&vm->lock); write_locked = false; if (err) - goto err_unlock_list; + goto err_hw_exec_mode; } if (!args->num_batch_buffer) { @@ -213,6 +224,7 @@ retry: fence = xe_sync_in_fence_get(syncs, num_syncs, q, vm); if (IS_ERR(fence)) { err = PTR_ERR(fence); + xe_vm_unlock(vm); goto err_unlock_list; } for (i = 0; i < num_syncs; i++) @@ -292,7 +304,8 @@ retry: xe_sched_job_arm(job); if (!xe_vm_in_lr_mode(vm)) drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, &job->drm.s_fence->finished, - DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE); + DMA_RESV_USAGE_BOOKKEEP, + DMA_RESV_USAGE_BOOKKEEP); for (i = 0; i < num_syncs; i++) { xe_sync_entry_signal(&syncs[i], &job->drm.s_fence->finished); @@ -312,6 +325,9 @@ retry: spin_unlock(&xe->ttm.lru_lock); } + if (mode == EXEC_MODE_LR) + xe_hw_engine_group_resume_faulting_lr_jobs(group); + err_repin: if (!xe_vm_in_lr_mode(vm)) up_read(&vm->userptr.notifier_lock); @@ -324,6 +340,9 @@ err_unlock_list: up_read(&vm->lock); if (err == -EAGAIN && !skip_retry) goto retry; +err_hw_exec_mode: + if (mode == EXEC_MODE_DMA_FENCE) + xe_hw_engine_group_put(group); err_syncs: while (num_syncs--) xe_sync_entry_cleanup(&syncs[num_syncs]); diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index a39384bb9553..fd0f3b3c9101 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -9,11 +9,12 @@ #include <drm/drm_device.h> #include <drm/drm_file.h> -#include <drm/xe_drm.h> +#include <uapi/drm/xe_drm.h> #include "xe_device.h" #include "xe_gt.h" #include "xe_hw_engine_class_sysfs.h" +#include "xe_hw_engine_group.h" #include "xe_hw_fence.h" #include "xe_lrc.h" #include "xe_macros.h" @@ -73,6 +74,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, q->ops = gt->exec_queue_ops; INIT_LIST_HEAD(&q->lr.link); INIT_LIST_HEAD(&q->multi_gt_link); + INIT_LIST_HEAD(&q->hw_engine_group_link); q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; q->sched_props.preempt_timeout_us = @@ -105,22 +107,35 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, static int __xe_exec_queue_init(struct xe_exec_queue *q) { + struct xe_vm *vm = q->vm; int i, err; + if (vm) { + err = xe_vm_lock(vm, true); + if (err) + return err; + } + for (i = 0; i < q->width; ++i) { q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K); if (IS_ERR(q->lrc[i])) { err = PTR_ERR(q->lrc[i]); - goto err_lrc; + goto err_unlock; } } + if (vm) + xe_vm_unlock(vm); + err = q->ops->init(q); if (err) goto err_lrc; return 0; +err_unlock: + if (vm) + xe_vm_unlock(vm); err_lrc: for (i = i - 1; i >= 0; --i) xe_lrc_put(q->lrc[i]); @@ -140,15 +155,7 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v if (IS_ERR(q)) return q; - if (vm) { - err = xe_vm_lock(vm, true); - if (err) - goto err_post_alloc; - } - err = __xe_exec_queue_init(q); - if (vm) - xe_vm_unlock(vm); if (err) goto err_post_alloc; @@ -161,7 +168,8 @@ err_post_alloc: struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, struct xe_vm *vm, - enum xe_engine_class class, u32 flags) + enum xe_engine_class class, + u32 flags, u64 extensions) { struct xe_hw_engine *hwe, *hwe0 = NULL; enum xe_hw_engine_id id; @@ -181,7 +189,56 @@ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe if (!logical_mask) return ERR_PTR(-ENODEV); - return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, 0); + return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, extensions); +} + +/** + * xe_exec_queue_create_bind() - Create bind exec queue. + * @xe: Xe device. + * @tile: tile which bind exec queue belongs to. + * @flags: exec queue creation flags + * @extensions: exec queue creation extensions + * + * Normalize bind exec queue creation. Bind exec queue is tied to migration VM + * for access to physical memory required for page table programming. On a + * faulting devices the reserved copy engine instance must be used to avoid + * deadlocking (user binds cannot get stuck behind faults as kernel binds which + * resolve faults depend on user binds). On non-faulting devices any copy engine + * can be used. + * + * Returns exec queue on success, ERR_PTR on failure + */ +struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, + struct xe_tile *tile, + u32 flags, u64 extensions) +{ + struct xe_gt *gt = tile->primary_gt; + struct xe_exec_queue *q; + struct xe_vm *migrate_vm; + + migrate_vm = xe_migrate_get_vm(tile->migrate); + if (xe->info.has_usm) { + struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, + XE_ENGINE_CLASS_COPY, + gt->usm.reserved_bcs_instance, + false); + + if (!hwe) { + xe_vm_put(migrate_vm); + return ERR_PTR(-EINVAL); + } + + q = xe_exec_queue_create(xe, migrate_vm, + BIT(hwe->logical_instance), 1, hwe, + flags, extensions); + } else { + q = xe_exec_queue_create_class(xe, gt, migrate_vm, + XE_ENGINE_CLASS_COPY, flags, + extensions); + } + xe_vm_put(migrate_vm); + + return q; } void xe_exec_queue_destroy(struct kref *ref) @@ -203,8 +260,14 @@ void xe_exec_queue_fini(struct xe_exec_queue *q) { int i; + /* + * Before releasing our ref to lrc and xef, accumulate our run ticks + */ + xe_exec_queue_update_run_ticks(q); + for (i = 0; i < q->width; ++i) xe_lrc_put(q->lrc[i]); + __xe_exec_queue_free(q); } @@ -413,63 +476,6 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue return 0; } -static const enum xe_engine_class user_to_xe_engine_class[] = { - [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, - [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, - [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, - [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, - [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, -}; - -static struct xe_hw_engine * -find_hw_engine(struct xe_device *xe, - struct drm_xe_engine_class_instance eci) -{ - u32 idx; - - if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) - return NULL; - - if (eci.gt_id >= xe->info.gt_count) - return NULL; - - idx = array_index_nospec(eci.engine_class, - ARRAY_SIZE(user_to_xe_engine_class)); - - return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), - user_to_xe_engine_class[idx], - eci.engine_instance, true); -} - -static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt, - struct drm_xe_engine_class_instance *eci, - u16 width, u16 num_placements) -{ - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - u32 logical_mask = 0; - - if (XE_IOCTL_DBG(xe, width != 1)) - return 0; - if (XE_IOCTL_DBG(xe, num_placements != 1)) - return 0; - if (XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) - return 0; - - eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY; - - for_each_hw_engine(hwe, gt, id) { - if (xe_hw_engine_is_reserved(hwe)) - continue; - - if (hwe->class == - user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY]) - logical_mask |= BIT(hwe->logical_instance); - } - - return logical_mask; -} - static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, struct drm_xe_engine_class_instance *eci, u16 width, u16 num_placements) @@ -492,7 +498,7 @@ static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, n = j * width + i; - hwe = find_hw_engine(xe, eci[n]); + hwe = xe_hw_engine_lookup(xe, eci[n]); if (XE_IOCTL_DBG(xe, !hwe)) return 0; @@ -531,8 +537,9 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, struct drm_xe_engine_class_instance __user *user_eci = u64_to_user_ptr(args->instances); struct xe_hw_engine *hwe; - struct xe_vm *vm, *migrate_vm; + struct xe_vm *vm; struct xe_gt *gt; + struct xe_tile *tile; struct xe_exec_queue *q = NULL; u32 logical_mask; u32 id; @@ -557,37 +564,20 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, return -EINVAL; if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { - for_each_gt(gt, xe, id) { - struct xe_exec_queue *new; - u32 flags; - - if (xe_gt_is_media_type(gt)) - continue; - - eci[0].gt_id = gt->info.id; - logical_mask = bind_exec_queue_logical_mask(xe, gt, eci, - args->width, - args->num_placements); - if (XE_IOCTL_DBG(xe, !logical_mask)) - return -EINVAL; - - hwe = find_hw_engine(xe, eci[0]); - if (XE_IOCTL_DBG(xe, !hwe)) - return -EINVAL; - - /* The migration vm doesn't hold rpm ref */ - xe_pm_runtime_get_noresume(xe); - - flags = EXEC_QUEUE_FLAG_VM | (id ? EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : 0); + if (XE_IOCTL_DBG(xe, args->width != 1) || + XE_IOCTL_DBG(xe, args->num_placements != 1) || + XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) + return -EINVAL; - migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate); - new = xe_exec_queue_create(xe, migrate_vm, logical_mask, - args->width, hwe, flags, - args->extensions); + for_each_tile(tile, xe, id) { + struct xe_exec_queue *new; + u32 flags = EXEC_QUEUE_FLAG_VM; - xe_pm_runtime_put(xe); /* now held by engine */ + if (id) + flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD; - xe_vm_put(migrate_vm); + new = xe_exec_queue_create_bind(xe, tile, flags, + args->extensions); if (IS_ERR(new)) { err = PTR_ERR(new); if (q) @@ -608,7 +598,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, !logical_mask)) return -EINVAL; - hwe = find_hw_engine(xe, eci[0]); + hwe = xe_hw_engine_lookup(xe, eci[0]); if (XE_IOCTL_DBG(xe, !hwe)) return -EINVAL; @@ -638,22 +628,27 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (xe_vm_in_preempt_fence_mode(vm)) { q->lr.context = dma_fence_context_alloc(1); - spin_lock_init(&q->lr.lock); err = xe_vm_add_compute_exec_queue(vm, q); if (XE_IOCTL_DBG(xe, err)) goto put_exec_queue; } + + if (q->vm && q->hwe->hw_engine_group) { + err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q); + if (err) + goto put_exec_queue; + } } - mutex_lock(&xef->exec_queue.lock); + q->xef = xe_file_get(xef); + + /* user id alloc must always be last in ioctl to prevent UAF */ err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); - mutex_unlock(&xef->exec_queue.lock); if (err) goto kill_exec_queue; args->exec_queue_id = id; - q->xef = xe_file_get(xef); return 0; @@ -794,6 +789,15 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; } +/** + * xe_exec_queue_kill - permanently stop all execution from an exec queue + * @q: The exec queue + * + * This function permanently stops all activity on an exec queue. If the queue + * is actively executing on the HW, it will be kicked off the engine; any + * pending jobs are discarded and all future submissions are rejected. + * This function is safe to call multiple times. + */ void xe_exec_queue_kill(struct xe_exec_queue *q) { struct xe_exec_queue *eq = q, *next; @@ -826,6 +830,9 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, !q)) return -ENOENT; + if (q->vm && q->hwe->hw_engine_group) + xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); + xe_exec_queue_kill(q); trace_xe_exec_queue_close(q); @@ -837,10 +844,12 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, struct xe_vm *vm) { - if (q->flags & EXEC_QUEUE_FLAG_VM) + if (q->flags & EXEC_QUEUE_FLAG_VM) { lockdep_assert_held(&vm->lock); - else + } else { xe_vm_assert_held(vm); + lockdep_assert_held(&q->hwe->hw_engine_group->mode_sem); + } } /** @@ -852,10 +861,7 @@ void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm) { xe_exec_queue_last_fence_lockdep_assert(q, vm); - if (q->last_fence) { - dma_fence_put(q->last_fence); - q->last_fence = NULL; - } + xe_exec_queue_last_fence_put_unlocked(q); } /** @@ -898,6 +904,33 @@ struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q, } /** + * xe_exec_queue_last_fence_get_for_resume() - Get last fence + * @q: The exec queue + * @vm: The VM the engine does a bind or exec for + * + * Get last fence, takes a ref. Only safe to be called in the context of + * resuming the hw engine group's long-running exec queue, when the group + * semaphore is held. + * + * Returns: last fence if not signaled, dma fence stub if signaled + */ +struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *q, + struct xe_vm *vm) +{ + struct dma_fence *fence; + + lockdep_assert_held_write(&q->hwe->hw_engine_group->mode_sem); + + if (q->last_fence && + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) + xe_exec_queue_last_fence_put_unlocked(q); + + fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); + dma_fence_get(fence); + return fence; +} + +/** * xe_exec_queue_last_fence_set() - Set last fence * @q: The exec queue * @vm: The VM the engine does a bind or exec for @@ -914,3 +947,26 @@ void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm, xe_exec_queue_last_fence_put(q, vm); q->last_fence = dma_fence_get(fence); } + +/** + * xe_exec_queue_last_fence_test_dep - Test last fence dependency of queue + * @q: The exec queue + * @vm: The VM the engine does a bind or exec for + * + * Returns: + * -ETIME if there exists an unsignalled last fence dependency, zero otherwise. + */ +int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, struct xe_vm *vm) +{ + struct dma_fence *fence; + int err = 0; + + fence = xe_exec_queue_last_fence_get(q, vm); + if (fence) { + err = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) ? + 0 : -ETIME; + dma_fence_put(fence); + } + + return err; +} diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index 289a3a51d2a2..90c7f73eab88 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -20,7 +20,11 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v u64 extensions); struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, struct xe_vm *vm, - enum xe_engine_class class, u32 flags); + enum xe_engine_class class, + u32 flags, u64 extensions); +struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, + struct xe_tile *tile, + u32 flags, u64 extensions); void xe_exec_queue_fini(struct xe_exec_queue *q); void xe_exec_queue_destroy(struct kref *ref); @@ -73,8 +77,12 @@ void xe_exec_queue_last_fence_put(struct xe_exec_queue *e, struct xe_vm *vm); void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *e); struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *e, struct xe_vm *vm); +struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *e, + struct xe_vm *vm); void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm, struct dma_fence *fence); +int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, + struct xe_vm *vm); void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q); #endif diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index a35ce24c9798..7deb480e26af 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -126,8 +126,6 @@ struct xe_exec_queue { u32 seqno; /** @lr.link: link into VM's list of exec queues */ struct list_head link; - /** @lr.lock: preemption fences lock */ - spinlock_t lock; } lr; /** @ops: submission backend exec queue operations */ @@ -142,6 +140,8 @@ struct xe_exec_queue { * Protected by @vm's resv. Unused if @vm == NULL. */ u64 tlb_flush_seqno; + /** @hw_engine_group_link: link into exec queues in the same hw engine group */ + struct list_head hw_engine_group_link; /** @lrc: logical ring context for this exec queue */ struct xe_lrc *lrc[]; }; @@ -171,9 +171,11 @@ struct xe_exec_queue_ops { int (*suspend)(struct xe_exec_queue *q); /** * @suspend_wait: Wait for an exec queue to suspend executing, should be - * call after suspend. + * call after suspend. In dma-fencing path thus must return within a + * reasonable amount of time. -ETIME return shall indicate an error + * waiting for suspend resulting in associated VM getting killed. */ - void (*suspend_wait)(struct xe_exec_queue *q); + int (*suspend_wait)(struct xe_exec_queue *q); /** * @resume: Resume exec queue execution, exec queue must be in a suspended * state and dma fence returned from most recent suspend call must be diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index db906117db6d..6a59165b9569 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -123,8 +123,8 @@ static void __xe_execlist_port_idle(struct xe_execlist_port *port) if (!port->running_exl) return; - xe_lrc_write_ring(port->hwe->kernel_lrc, noop, sizeof(noop)); - __start_lrc(port->hwe, port->hwe->kernel_lrc, 0); + xe_lrc_write_ring(port->lrc, noop, sizeof(noop)); + __start_lrc(port->hwe, port->lrc, 0); port->running_exl = NULL; } @@ -254,14 +254,22 @@ struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe, { struct drm_device *drm = &xe->drm; struct xe_execlist_port *port; - int i; + int i, err; port = drmm_kzalloc(drm, sizeof(*port), GFP_KERNEL); - if (!port) - return ERR_PTR(-ENOMEM); + if (!port) { + err = -ENOMEM; + goto err; + } port->hwe = hwe; + port->lrc = xe_lrc_create(hwe, NULL, SZ_16K); + if (IS_ERR(port->lrc)) { + err = PTR_ERR(port->lrc); + goto err; + } + spin_lock_init(&port->lock); for (i = 0; i < ARRAY_SIZE(port->active); i++) INIT_LIST_HEAD(&port->active[i]); @@ -277,6 +285,9 @@ struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe, add_timer(&port->irq_fail); return port; + +err: + return ERR_PTR(err); } void xe_execlist_port_destroy(struct xe_execlist_port *port) @@ -287,6 +298,8 @@ void xe_execlist_port_destroy(struct xe_execlist_port *port) spin_lock_irq(>_to_xe(port->hwe->gt)->irq.lock); port->hwe->irq_handler = NULL; spin_unlock_irq(>_to_xe(port->hwe->gt)->irq.lock); + + xe_lrc_put(port->lrc); } static struct dma_fence * @@ -422,10 +435,11 @@ static int execlist_exec_queue_suspend(struct xe_exec_queue *q) return 0; } -static void execlist_exec_queue_suspend_wait(struct xe_exec_queue *q) +static int execlist_exec_queue_suspend_wait(struct xe_exec_queue *q) { /* NIY */ + return 0; } static void execlist_exec_queue_resume(struct xe_exec_queue *q) diff --git a/drivers/gpu/drm/xe/xe_execlist_types.h b/drivers/gpu/drm/xe/xe_execlist_types.h index f94bbf4c53e4..415140936f11 100644 --- a/drivers/gpu/drm/xe/xe_execlist_types.h +++ b/drivers/gpu/drm/xe/xe_execlist_types.h @@ -27,6 +27,8 @@ struct xe_execlist_port { struct xe_execlist_exec_queue *running_exl; struct timer_list irq_fail; + + struct xe_lrc *lrc; }; struct xe_execlist_exec_queue { diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index b263fff15273..7d9fc489dcb8 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -115,9 +115,15 @@ static int __domain_wait(struct xe_gt *gt, struct xe_force_wake_domain *domain, XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC, &value, true); if (ret) - xe_gt_notice(gt, "Force wake domain %d failed to ack %s (%pe) reg[%#x] = %#x\n", - domain->id, str_wake_sleep(wake), ERR_PTR(ret), - domain->reg_ack.addr, value); + xe_gt_err(gt, "Force wake domain %d failed to ack %s (%pe) reg[%#x] = %#x\n", + domain->id, str_wake_sleep(wake), ERR_PTR(ret), + domain->reg_ack.addr, value); + if (value == ~0) { + xe_gt_err(gt, + "Force wake domain %d: %s. MMIO unreliable (forcewake register returns 0xFFFFFFFF)!\n", + domain->id, str_wake_sleep(wake)); + ret = -EIO; + } return ret; } diff --git a/drivers/gpu/drm/xe/xe_gen_wa_oob.c b/drivers/gpu/drm/xe/xe_gen_wa_oob.c index 106ee2b027f0..904cf47925aa 100644 --- a/drivers/gpu/drm/xe/xe_gen_wa_oob.c +++ b/drivers/gpu/drm/xe/xe_gen_wa_oob.c @@ -97,19 +97,27 @@ static int parse(FILE *input, FILE *csource, FILE *cheader) if (name) { fprintf(cheader, "\tXE_WA_OOB_%s = %u,\n", name, idx); - fprintf(csource, "{ XE_RTP_NAME(\"%s\"), XE_RTP_RULES(%s) },\n", + + /* Close previous entry before starting a new one */ + if (idx) + fprintf(csource, ") },\n"); + + fprintf(csource, "{ XE_RTP_NAME(\"%s\"),\n XE_RTP_RULES(%s", name, rules); + idx++; } else { - fprintf(csource, "{ XE_RTP_NAME(NULL), XE_RTP_RULES(%s) },\n", - rules); + fprintf(csource, ", OR,\n\t%s", rules); } - idx++; lineno++; if (!is_continuation) prev_name = name; } + /* Close last entry */ + if (idx) + fprintf(csource, ") },\n"); + fprintf(cheader, "\t_XE_WA_OOB_COUNT = %u\n", idx); return 0; diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 0cdbc1296e88..ff19eca5d358 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -30,6 +30,39 @@ #include "xe_wa.h" #include "xe_wopcm.h" +/** + * DOC: Global Graphics Translation Table (GGTT) + * + * Xe GGTT implements the support for a Global Virtual Address space that is used + * for resources that are accessible to privileged (i.e. kernel-mode) processes, + * and not tied to a specific user-level process. For example, the Graphics + * micro-Controller (GuC) and Display Engine (if present) utilize this Global + * address space. + * + * The Global GTT (GGTT) translates from the Global virtual address to a physical + * address that can be accessed by HW. The GGTT is a flat, single-level table. + * + * Xe implements a simplified version of the GGTT specifically managing only a + * certain range of it that goes from the Write Once Protected Content Memory (WOPCM) + * Layout to a predefined GUC_GGTT_TOP. This approach avoids complications related to + * the GuC (Graphics Microcontroller) hardware limitations. The GuC address space + * is limited on both ends of the GGTT, because the GuC shim HW redirects + * accesses to those addresses to other HW areas instead of going through the + * GGTT. On the bottom end, the GuC can't access offsets below the WOPCM size, + * while on the top side the limit is fixed at GUC_GGTT_TOP. To keep things + * simple, instead of checking each object to see if they are accessed by GuC or + * not, we just exclude those areas from the allocator. Additionally, to simplify + * the driver load, we use the maximum WOPCM size in this logic instead of the + * programmed one, so we don't need to wait until the actual size to be + * programmed is determined (which requires FW fetch) before initializing the + * GGTT. These simplifications might waste space in the GGTT (about 20-25 MBs + * depending on the platform) but we can live with this. Another benefit of this + * is the GuC bootrom can't access anything below the WOPCM max size so anything + * the bootrom needs to access (e.g. a RSA key) needs to be placed in the GGTT + * above the WOPCM max size. Starting the GGTT allocations above the WOPCM max + * give us the correct placement for free. + */ + static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, u16 pat_index) { @@ -128,11 +161,12 @@ static void ggtt_fini_early(struct drm_device *drm, void *arg) { struct xe_ggtt *ggtt = arg; + destroy_workqueue(ggtt->wq); mutex_destroy(&ggtt->lock); drm_mm_takedown(&ggtt->mm); } -static void ggtt_fini(struct drm_device *drm, void *arg) +static void ggtt_fini(void *arg) { struct xe_ggtt *ggtt = arg; @@ -164,12 +198,16 @@ static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = { .ggtt_set_pte = xe_ggtt_set_pte_and_flush, }; -/* - * Early GGTT initialization, which allows to create new mappings usable by the - * GuC. - * Mappings are not usable by the HW engines, as it doesn't have scratch / +/** + * xe_ggtt_init_early - Early GGTT initialization + * @ggtt: the &xe_ggtt to be initialized + * + * It allows to create new mappings usable by the GuC. + * Mappings are not usable by the HW engines, as it doesn't have scratch nor * initial clear done to it yet. That will happen in the regular, non-early - * GGTT init. + * GGTT initialization. + * + * Return: 0 on success or a negative error code on failure. */ int xe_ggtt_init_early(struct xe_ggtt *ggtt) { @@ -194,29 +232,6 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ggtt->flags |= XE_GGTT_FLAGS_64K; - /* - * 8B per entry, each points to a 4KB page. - * - * The GuC address space is limited on both ends of the GGTT, because - * the GuC shim HW redirects accesses to those addresses to other HW - * areas instead of going through the GGTT. On the bottom end, the GuC - * can't access offsets below the WOPCM size, while on the top side the - * limit is fixed at GUC_GGTT_TOP. To keep things simple, instead of - * checking each object to see if they are accessed by GuC or not, we - * just exclude those areas from the allocator. Additionally, to - * simplify the driver load, we use the maximum WOPCM size in this logic - * instead of the programmed one, so we don't need to wait until the - * actual size to be programmed is determined (which requires FW fetch) - * before initializing the GGTT. These simplifications might waste space - * in the GGTT (about 20-25 MBs depending on the platform) but we can - * live with this. - * - * Another benifit of this is the GuC bootrom can't access anything - * below the WOPCM max size so anything the bootom needs to access (e.g. - * a RSA key) needs to be placed in the GGTT above the WOPCM max size. - * Starting the GGTT allocations above the WOPCM max give us the correct - * placement for free. - */ if (ggtt->size > GUC_GGTT_TOP) ggtt->size = GUC_GGTT_TOP; @@ -228,6 +243,8 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) else ggtt->pt_ops = &xelp_pt_ops; + ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, 0); + drm_mm_init(&ggtt->mm, xe_wopcm_size(xe), ggtt->size - xe_wopcm_size(xe)); mutex_init(&ggtt->lock); @@ -262,6 +279,77 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) mutex_unlock(&ggtt->lock); } +static void ggtt_node_remove(struct xe_ggtt_node *node) +{ + struct xe_ggtt *ggtt = node->ggtt; + struct xe_device *xe = tile_to_xe(ggtt->tile); + bool bound; + int idx; + + bound = drm_dev_enter(&xe->drm, &idx); + + mutex_lock(&ggtt->lock); + if (bound) + xe_ggtt_clear(ggtt, node->base.start, node->base.size); + drm_mm_remove_node(&node->base); + node->base.size = 0; + mutex_unlock(&ggtt->lock); + + if (!bound) + goto free_node; + + if (node->invalidate_on_remove) + xe_ggtt_invalidate(ggtt); + + drm_dev_exit(idx); + +free_node: + xe_ggtt_node_fini(node); +} + +static void ggtt_node_remove_work_func(struct work_struct *work) +{ + struct xe_ggtt_node *node = container_of(work, typeof(*node), + delayed_removal_work); + struct xe_device *xe = tile_to_xe(node->ggtt->tile); + + xe_pm_runtime_get(xe); + ggtt_node_remove(node); + xe_pm_runtime_put(xe); +} + +/** + * xe_ggtt_node_remove - Remove a &xe_ggtt_node from the GGTT + * @node: the &xe_ggtt_node to be removed + * @invalidate: if node needs invalidation upon removal + */ +void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate) +{ + struct xe_ggtt *ggtt; + struct xe_device *xe; + + if (!node || !node->ggtt) + return; + + ggtt = node->ggtt; + xe = tile_to_xe(ggtt->tile); + + node->invalidate_on_remove = invalidate; + + if (xe_pm_runtime_get_if_active(xe)) { + ggtt_node_remove(node); + xe_pm_runtime_put(xe); + } else { + queue_work(ggtt->wq, &node->delayed_removal_work); + } +} + +/** + * xe_ggtt_init - Regular non-early GGTT initialization + * @ggtt: the &xe_ggtt to be initialized + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_init(struct xe_ggtt *ggtt) { struct xe_device *xe = tile_to_xe(ggtt->tile); @@ -289,7 +377,7 @@ int xe_ggtt_init(struct xe_ggtt *ggtt) xe_ggtt_initial_clear(ggtt); - return drmm_add_action_or_reset(&xe->drm, ggtt_fini, ggtt); + return devm_add_action_or_reset(xe->drm.dev, ggtt_fini, ggtt); err: ggtt->scratch = NULL; return err; @@ -309,31 +397,21 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt) static void xe_ggtt_invalidate(struct xe_ggtt *ggtt) { + struct xe_device *xe = tile_to_xe(ggtt->tile); + + /* + * XXX: Barrier for GGTT pages. Unsure exactly why this required but + * without this LNL is having issues with the GuC reading scratch page + * vs. correct GGTT page. Not particularly a hot code path so blindly + * do a mmio read here which results in GuC reading correct GGTT page. + */ + xe_mmio_read32(xe_root_mmio_gt(xe), VF_CAP_REG); + /* Each GT in a tile has its own TLB to cache GGTT lookups */ ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt); ggtt_invalidate_gt_tlb(ggtt->tile->media_gt); } -void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix) -{ - u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB]; - u64 addr, scratch_pte; - - scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0, pat_index); - - printk("%sGlobal GTT:", prefix); - for (addr = 0; addr < ggtt->size; addr += XE_PAGE_SIZE) { - unsigned int i = addr / XE_PAGE_SIZE; - - xe_tile_assert(ggtt->tile, addr <= U32_MAX); - if (ggtt->gsm[i] == scratch_pte) - continue; - - printk("%s ggtt[0x%08x] = 0x%016llx", - prefix, (u32)addr, ggtt->gsm[i]); - } -} - static void xe_ggtt_dump_node(struct xe_ggtt *ggtt, const struct drm_mm_node *node, const char *description) { @@ -347,88 +425,180 @@ static void xe_ggtt_dump_node(struct xe_ggtt *ggtt, } /** - * xe_ggtt_balloon - prevent allocation of specified GGTT addresses - * @ggtt: the &xe_ggtt where we want to make reservation + * xe_ggtt_node_insert_balloon - prevent allocation of specified GGTT addresses + * @node: the &xe_ggtt_node to hold reserved GGTT node * @start: the starting GGTT address of the reserved region * @end: then end GGTT address of the reserved region - * @node: the &drm_mm_node to hold reserved GGTT node * - * Use xe_ggtt_deballoon() to release a reserved GGTT node. + * Use xe_ggtt_node_remove_balloon() to release a reserved GGTT node. * * Return: 0 on success or a negative error code on failure. */ -int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 end, struct drm_mm_node *node) +int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, u64 start, u64 end) { + struct xe_ggtt *ggtt = node->ggtt; int err; xe_tile_assert(ggtt->tile, start < end); xe_tile_assert(ggtt->tile, IS_ALIGNED(start, XE_PAGE_SIZE)); xe_tile_assert(ggtt->tile, IS_ALIGNED(end, XE_PAGE_SIZE)); - xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(node)); + xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(&node->base)); - node->color = 0; - node->start = start; - node->size = end - start; + node->base.color = 0; + node->base.start = start; + node->base.size = end - start; mutex_lock(&ggtt->lock); - err = drm_mm_reserve_node(&ggtt->mm, node); + err = drm_mm_reserve_node(&ggtt->mm, &node->base); mutex_unlock(&ggtt->lock); if (xe_gt_WARN(ggtt->tile->primary_gt, err, "Failed to balloon GGTT %#llx-%#llx (%pe)\n", - node->start, node->start + node->size, ERR_PTR(err))) + node->base.start, node->base.start + node->base.size, ERR_PTR(err))) return err; - xe_ggtt_dump_node(ggtt, node, "balloon"); + xe_ggtt_dump_node(ggtt, &node->base, "balloon"); return 0; } /** - * xe_ggtt_deballoon - release a reserved GGTT region - * @ggtt: the &xe_ggtt where reserved node belongs - * @node: the &drm_mm_node with reserved GGTT region + * xe_ggtt_node_remove_balloon - release a reserved GGTT region + * @node: the &xe_ggtt_node with reserved GGTT region * - * See xe_ggtt_balloon() for details. + * See xe_ggtt_node_insert_balloon() for details. */ -void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node) +void xe_ggtt_node_remove_balloon(struct xe_ggtt_node *node) { - if (!drm_mm_node_allocated(node)) + if (!node || !node->ggtt) return; - xe_ggtt_dump_node(ggtt, node, "deballoon"); + if (!drm_mm_node_allocated(&node->base)) + goto free_node; - mutex_lock(&ggtt->lock); - drm_mm_remove_node(node); - mutex_unlock(&ggtt->lock); + xe_ggtt_dump_node(node->ggtt, &node->base, "remove-balloon"); + + mutex_lock(&node->ggtt->lock); + drm_mm_remove_node(&node->base); + mutex_unlock(&node->ggtt->lock); + +free_node: + xe_ggtt_node_fini(node); } -int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node *node, - u32 size, u32 align, u32 mm_flags) +/** + * xe_ggtt_node_insert_locked - Locked version to insert a &xe_ggtt_node into the GGTT + * @node: the &xe_ggtt_node to be inserted + * @size: size of the node + * @align: alignment constrain of the node + * @mm_flags: flags to control the node behavior + * + * It cannot be called without first having called xe_ggtt_init() once. + * To be used in cases where ggtt->lock is already taken. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_ggtt_node_insert_locked(struct xe_ggtt_node *node, + u32 size, u32 align, u32 mm_flags) { - return drm_mm_insert_node_generic(&ggtt->mm, node, size, align, 0, + return drm_mm_insert_node_generic(&node->ggtt->mm, &node->base, size, align, 0, mm_flags); } -int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, - u32 size, u32 align) +/** + * xe_ggtt_node_insert - Insert a &xe_ggtt_node into the GGTT + * @node: the &xe_ggtt_node to be inserted + * @size: size of the node + * @align: alignment constrain of the node + * + * It cannot be called without first having called xe_ggtt_init() once. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align) { int ret; - mutex_lock(&ggtt->lock); - ret = xe_ggtt_insert_special_node_locked(ggtt, node, size, - align, DRM_MM_INSERT_HIGH); - mutex_unlock(&ggtt->lock); + if (!node || !node->ggtt) + return -ENOENT; + + mutex_lock(&node->ggtt->lock); + ret = xe_ggtt_node_insert_locked(node, size, align, + DRM_MM_INSERT_HIGH); + mutex_unlock(&node->ggtt->lock); return ret; } +/** + * xe_ggtt_node_init - Initialize %xe_ggtt_node struct + * @ggtt: the &xe_ggtt where the new node will later be inserted/reserved. + * + * This function will allocated the struct %xe_ggtt_node and return it's pointer. + * This struct will then be freed after the node removal upon xe_ggtt_node_remove() + * or xe_ggtt_node_remove_balloon(). + * Having %xe_ggtt_node struct allocated doesn't mean that the node is already allocated + * in GGTT. Only the xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(), + * xe_ggtt_node_insert_balloon() will ensure the node is inserted or reserved in GGTT. + * + * Return: A pointer to %xe_ggtt_node struct on success. An ERR_PTR otherwise. + **/ +struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt) +{ + struct xe_ggtt_node *node = kzalloc(sizeof(*node), GFP_NOFS); + + if (!node) + return ERR_PTR(-ENOMEM); + + INIT_WORK(&node->delayed_removal_work, ggtt_node_remove_work_func); + node->ggtt = ggtt; + + return node; +} + +/** + * xe_ggtt_node_fini - Forcebly finalize %xe_ggtt_node struct + * @node: the &xe_ggtt_node to be freed + * + * If anything went wrong with either xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(), + * or xe_ggtt_node_insert_balloon(); and this @node is not going to be reused, then, + * this function needs to be called to free the %xe_ggtt_node struct + **/ +void xe_ggtt_node_fini(struct xe_ggtt_node *node) +{ + kfree(node); +} + +/** + * xe_ggtt_node_allocated - Check if node is allocated in GGTT + * @node: the &xe_ggtt_node to be inspected + * + * Return: True if allocated, False otherwise. + */ +bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node) +{ + if (!node || !node->ggtt) + return false; + + return drm_mm_node_allocated(&node->base); +} + +/** + * xe_ggtt_map_bo - Map the BO into GGTT + * @ggtt: the &xe_ggtt where node will be mapped + * @bo: the &xe_bo to be mapped + */ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; - u64 start = bo->ggtt_node.start; + u64 start; u64 offset, pte; + if (XE_WARN_ON(!bo->ggtt_node)) + return; + + start = bo->ggtt_node->base.start; + for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) { pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index); ggtt->pt_ops->ggtt_set_pte(ggtt, start + offset, pte); @@ -444,9 +614,9 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) alignment = SZ_64K; - if (XE_WARN_ON(bo->ggtt_node.size)) { + if (XE_WARN_ON(bo->ggtt_node)) { /* Someone's already inserted this BO in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size); return 0; } @@ -455,69 +625,111 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, return err; xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); + + bo->ggtt_node = xe_ggtt_node_init(ggtt); + if (IS_ERR(bo->ggtt_node)) { + err = PTR_ERR(bo->ggtt_node); + bo->ggtt_node = NULL; + goto out; + } + mutex_lock(&ggtt->lock); - err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size, + err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node->base, bo->size, alignment, 0, start, end, 0); - if (!err) + if (err) { + xe_ggtt_node_fini(bo->ggtt_node); + bo->ggtt_node = NULL; + } else { xe_ggtt_map_bo(ggtt, bo); + } mutex_unlock(&ggtt->lock); if (!err && bo->flags & XE_BO_FLAG_GGTT_INVALIDATE) xe_ggtt_invalidate(ggtt); + +out: xe_pm_runtime_put(tile_to_xe(ggtt->tile)); return err; } +/** + * xe_ggtt_insert_bo_at - Insert BO at a specific GGTT space + * @ggtt: the &xe_ggtt where bo will be inserted + * @bo: the &xe_bo to be inserted + * @start: address where it will be inserted + * @end: end of the range where it will be inserted + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 start, u64 end) { return __xe_ggtt_insert_bo_at(ggtt, bo, start, end); } +/** + * xe_ggtt_insert_bo - Insert BO into GGTT + * @ggtt: the &xe_ggtt where bo will be inserted + * @bo: the &xe_bo to be inserted + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX); } -void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, - bool invalidate) +/** + * xe_ggtt_remove_bo - Remove a BO from the GGTT + * @ggtt: the &xe_ggtt where node will be removed + * @bo: the &xe_bo to be removed + */ +void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { - struct xe_device *xe = tile_to_xe(ggtt->tile); - bool bound; - int idx; - - bound = drm_dev_enter(&xe->drm, &idx); - if (bound) - xe_pm_runtime_get_noresume(xe); - - mutex_lock(&ggtt->lock); - if (bound) - xe_ggtt_clear(ggtt, node->start, node->size); - drm_mm_remove_node(node); - node->size = 0; - mutex_unlock(&ggtt->lock); - - if (!bound) + if (XE_WARN_ON(!bo->ggtt_node)) return; - if (invalidate) - xe_ggtt_invalidate(ggtt); + /* This BO is not currently in the GGTT */ + xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size); - xe_pm_runtime_put(xe); - drm_dev_exit(idx); + xe_ggtt_node_remove(bo->ggtt_node, + bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); } -void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) +/** + * xe_ggtt_largest_hole - Largest GGTT hole + * @ggtt: the &xe_ggtt that will be inspected + * @alignment: minimum alignment + * @spare: If not NULL: in: desired memory size to be spared / out: Adjusted possible spare + * + * Return: size of the largest continuous GGTT region + */ +u64 xe_ggtt_largest_hole(struct xe_ggtt *ggtt, u64 alignment, u64 *spare) { - if (XE_WARN_ON(!bo->ggtt_node.size)) - return; + const struct drm_mm *mm = &ggtt->mm; + const struct drm_mm_node *entry; + u64 hole_min_start = xe_wopcm_size(tile_to_xe(ggtt->tile)); + u64 hole_start, hole_end, hole_size; + u64 max_hole = 0; - /* This BO is not currently in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size); + mutex_lock(&ggtt->lock); - xe_ggtt_remove_node(ggtt, &bo->ggtt_node, - bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); + drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { + hole_start = max(hole_start, hole_min_start); + hole_start = ALIGN(hole_start, alignment); + hole_end = ALIGN_DOWN(hole_end, alignment); + if (hole_start >= hole_end) + continue; + hole_size = hole_end - hole_start; + if (spare) + *spare -= min3(*spare, hole_size, max_hole); + max_hole = max(max_hole, hole_size); + } + + mutex_unlock(&ggtt->lock); + + return max_hole; } #ifdef CONFIG_PCI_IOV @@ -548,22 +760,28 @@ static void xe_ggtt_assign_locked(struct xe_ggtt *ggtt, const struct drm_mm_node /** * xe_ggtt_assign - assign a GGTT region to the VF - * @ggtt: the &xe_ggtt where the node belongs - * @node: the &drm_mm_node to update + * @node: the &xe_ggtt_node to update * @vfid: the VF identifier * * This function is used by the PF driver to assign a GGTT region to the VF. * In addition to PTE's VFID bits 11:2 also PRESENT bit 0 is set as on some * platforms VFs can't modify that either. */ -void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid) +void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid) { - mutex_lock(&ggtt->lock); - xe_ggtt_assign_locked(ggtt, node, vfid); - mutex_unlock(&ggtt->lock); + mutex_lock(&node->ggtt->lock); + xe_ggtt_assign_locked(node->ggtt, &node->base, vfid); + mutex_unlock(&node->ggtt->lock); } #endif +/** + * xe_ggtt_dump - Dump GGTT for debug + * @ggtt: the &xe_ggtt to be dumped + * @p: the &drm_mm_printer helper handle to be used to dump the information + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p) { int err; @@ -576,3 +794,43 @@ int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p) mutex_unlock(&ggtt->lock); return err; } + +/** + * xe_ggtt_print_holes - Print holes + * @ggtt: the &xe_ggtt to be inspected + * @alignment: min alignment + * @p: the &drm_printer + * + * Print GGTT ranges that are available and return total size available. + * + * Return: Total available size. + */ +u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer *p) +{ + const struct drm_mm *mm = &ggtt->mm; + const struct drm_mm_node *entry; + u64 hole_min_start = xe_wopcm_size(tile_to_xe(ggtt->tile)); + u64 hole_start, hole_end, hole_size; + u64 total = 0; + char buf[10]; + + mutex_lock(&ggtt->lock); + + drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { + hole_start = max(hole_start, hole_min_start); + hole_start = ALIGN(hole_start, alignment); + hole_end = ALIGN_DOWN(hole_end, alignment); + if (hole_start >= hole_end) + continue; + hole_size = hole_end - hole_start; + total += hole_size; + + string_get_size(hole_size, 1, STRING_UNITS_2, buf, sizeof(buf)); + drm_printf(p, "range:\t%#llx-%#llx\t(%s)\n", + hole_start, hole_end - 1, buf); + } + + mutex_unlock(&ggtt->lock); + + return total; +} diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 6a96fd54bf60..27e7d67de004 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -12,28 +12,30 @@ struct drm_printer; int xe_ggtt_init_early(struct xe_ggtt *ggtt); int xe_ggtt_init(struct xe_ggtt *ggtt); -void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix); - -int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 size, struct drm_mm_node *node); -void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node); - -int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, - u32 size, u32 align); -int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, - struct drm_mm_node *node, - u32 size, u32 align, u32 mm_flags); -void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, - bool invalidate); + +struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt); +void xe_ggtt_node_fini(struct xe_ggtt_node *node); +int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, + u64 start, u64 size); +void xe_ggtt_node_remove_balloon(struct xe_ggtt_node *node); + +int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align); +int xe_ggtt_node_insert_locked(struct xe_ggtt_node *node, + u32 size, u32 align, u32 mm_flags); +void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate); +bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node); void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 start, u64 end); void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); +u64 xe_ggtt_largest_hole(struct xe_ggtt *ggtt, u64 alignment, u64 *spare); int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p); +u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer *p); #ifdef CONFIG_PCI_IOV -void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid); +void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid); #endif #endif diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h index 2245d88d8f39..cb02b7994a9a 100644 --- a/drivers/gpu/drm/xe/xe_ggtt_types.h +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -13,30 +13,70 @@ struct xe_bo; struct xe_gt; +/** + * struct xe_ggtt - Main GGTT struct + * + * In general, each tile can contains its own Global Graphics Translation Table + * (GGTT) instance. + */ struct xe_ggtt { + /** @tile: Back pointer to tile where this GGTT belongs */ struct xe_tile *tile; - + /** @size: Total size of this GGTT */ u64 size; #define XE_GGTT_FLAGS_64K BIT(0) + /** + * @flags: Flags for this GGTT + * Acceptable flags: + * - %XE_GGTT_FLAGS_64K - if PTE size is 64K. Otherwise, regular is 4K. + */ unsigned int flags; - + /** @scratch: Internal object allocation used as a scratch page */ struct xe_bo *scratch; - + /** @lock: Mutex lock to protect GGTT data */ struct mutex lock; - + /** + * @gsm: The iomem pointer to the actual location of the translation + * table located in the GSM for easy PTE manipulation + */ u64 __iomem *gsm; - + /** @pt_ops: Page Table operations per platform */ const struct xe_ggtt_pt_ops *pt_ops; - + /** @mm: The memory manager used to manage individual GGTT allocations */ struct drm_mm mm; - /** @access_count: counts GGTT writes */ unsigned int access_count; + /** @wq: Dedicated unordered work queue to process node removals */ + struct workqueue_struct *wq; +}; + +/** + * struct xe_ggtt_node - A node in GGTT. + * + * This struct needs to be initialized (only-once) with xe_ggtt_node_init() before any node + * insertion, reservation, or 'ballooning'. + * It will, then, be finalized by either xe_ggtt_node_remove() or xe_ggtt_node_deballoon(). + */ +struct xe_ggtt_node { + /** @ggtt: Back pointer to xe_ggtt where this region will be inserted at */ + struct xe_ggtt *ggtt; + /** @base: A drm_mm_node */ + struct drm_mm_node base; + /** @delayed_removal_work: The work struct for the delayed removal */ + struct work_struct delayed_removal_work; + /** @invalidate_on_remove: If it needs invalidation upon removal */ + bool invalidate_on_remove; }; +/** + * struct xe_ggtt_pt_ops - GGTT Page table operations + * Which can vary from platform to platform. + */ struct xe_ggtt_pt_ops { + /** @pte_encode_bo: Encode PTE address for a given BO */ u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index); + /** @ggtt_set_pte: Directly write into GGTT's PTE */ void (*ggtt_set_pte)(struct xe_ggtt *ggtt, u64 addr, u64 pte); }; diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c index e4ad1d6ce1d5..50361b4638f9 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.c +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c @@ -15,11 +15,11 @@ static void xe_sched_process_msg_queue_if_ready(struct xe_gpu_scheduler *sched) { struct xe_sched_msg *msg; - spin_lock(&sched->base.job_list_lock); + xe_sched_msg_lock(sched); msg = list_first_entry_or_null(&sched->msgs, struct xe_sched_msg, link); if (msg) xe_sched_process_msg_queue(sched); - spin_unlock(&sched->base.job_list_lock); + xe_sched_msg_unlock(sched); } static struct xe_sched_msg * @@ -27,12 +27,12 @@ xe_sched_get_msg(struct xe_gpu_scheduler *sched) { struct xe_sched_msg *msg; - spin_lock(&sched->base.job_list_lock); + xe_sched_msg_lock(sched); msg = list_first_entry_or_null(&sched->msgs, struct xe_sched_msg, link); if (msg) - list_del(&msg->link); - spin_unlock(&sched->base.job_list_lock); + list_del_init(&msg->link); + xe_sched_msg_unlock(sched); return msg; } @@ -90,12 +90,24 @@ void xe_sched_submission_stop(struct xe_gpu_scheduler *sched) cancel_work_sync(&sched->work_process_msg); } +void xe_sched_submission_resume_tdr(struct xe_gpu_scheduler *sched) +{ + drm_sched_resume_timeout(&sched->base, sched->base.timeout); +} + void xe_sched_add_msg(struct xe_gpu_scheduler *sched, struct xe_sched_msg *msg) { - spin_lock(&sched->base.job_list_lock); - list_add_tail(&msg->link, &sched->msgs); - spin_unlock(&sched->base.job_list_lock); + xe_sched_msg_lock(sched); + xe_sched_add_msg_locked(sched, msg); + xe_sched_msg_unlock(sched); +} + +void xe_sched_add_msg_locked(struct xe_gpu_scheduler *sched, + struct xe_sched_msg *msg) +{ + lockdep_assert_held(&sched->base.job_list_lock); + list_add_tail(&msg->link, &sched->msgs); xe_sched_process_msg_queue(sched); } diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h index 10c6bb9c9386..64b2ae6839db 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -22,8 +22,22 @@ void xe_sched_fini(struct xe_gpu_scheduler *sched); void xe_sched_submission_start(struct xe_gpu_scheduler *sched); void xe_sched_submission_stop(struct xe_gpu_scheduler *sched); +void xe_sched_submission_resume_tdr(struct xe_gpu_scheduler *sched); + void xe_sched_add_msg(struct xe_gpu_scheduler *sched, struct xe_sched_msg *msg); +void xe_sched_add_msg_locked(struct xe_gpu_scheduler *sched, + struct xe_sched_msg *msg); + +static inline void xe_sched_msg_lock(struct xe_gpu_scheduler *sched) +{ + spin_lock(&sched->base.job_list_lock); +} + +static inline void xe_sched_msg_unlock(struct xe_gpu_scheduler *sched) +{ + spin_unlock(&sched->base.job_list_lock); +} static inline void xe_sched_stop(struct xe_gpu_scheduler *sched) { @@ -49,7 +63,9 @@ xe_sched_invalidate_job(struct xe_sched_job *job, int threshold) static inline void xe_sched_add_pending_job(struct xe_gpu_scheduler *sched, struct xe_sched_job *job) { + spin_lock(&sched->base.job_list_lock); list_add(&job->drm.list, &sched->base.pending_list); + spin_unlock(&sched->base.job_list_lock); } static inline diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index f8239a13fa2b..6fbea70d3d36 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -8,6 +8,7 @@ #include <linux/delay.h> #include <drm/drm_managed.h> +#include <drm/drm_print.h> #include <generated/xe_wa_oob.h> @@ -165,10 +166,11 @@ static int query_compatibility_version(struct xe_gsc *gsc) return err; } - compat->major = version_query_rd(xe, &bo->vmap, rd_offset, compat_major); - compat->minor = version_query_rd(xe, &bo->vmap, rd_offset, compat_minor); + compat->major = version_query_rd(xe, &bo->vmap, rd_offset, proj_major); + compat->minor = version_query_rd(xe, &bo->vmap, rd_offset, compat_major); + compat->patch = version_query_rd(xe, &bo->vmap, rd_offset, compat_minor); - xe_gt_info(gt, "found GSC cv%u.%u\n", compat->major, compat->minor); + xe_gt_info(gt, "found GSC cv%u.%u.%u\n", compat->major, compat->minor, compat->patch); out_bo: xe_bo_unpin_map_no_vm(bo); @@ -260,7 +262,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) struct xe_tile *tile = gt_to_tile(gt); int ret; - if (XE_WA(gt, 14018094691)) { + if (XE_WA(tile->primary_gt, 14018094691)) { ret = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); /* @@ -278,7 +280,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) ret = gsc_upload(gsc); - if (XE_WA(gt, 14018094691)) + if (XE_WA(tile->primary_gt, 14018094691)) xe_force_wake_put(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); if (ret) @@ -333,9 +335,11 @@ static int gsc_er_complete(struct xe_gt *gt) if (er_status == GSCI_TIMER_STATUS_TIMER_EXPIRED) { /* * XXX: we should trigger an FLR here, but we don't have support - * for that yet. + * for that yet. Since we can't recover from the error, we + * declare the device as wedged. */ xe_gt_err(gt, "GSC ER timed out!\n"); + xe_device_declare_wedged(gt_to_xe(gt)); return -EIO; } @@ -437,7 +441,7 @@ out: return ret; } -static void free_resources(struct drm_device *drm, void *arg) +static void free_resources(void *arg) { struct xe_gsc *gsc = arg; @@ -450,11 +454,6 @@ static void free_resources(struct drm_device *drm, void *arg) xe_exec_queue_put(gsc->q); gsc->q = NULL; } - - if (gsc->private) { - xe_bo_unpin_map_no_vm(gsc->private); - gsc->private = NULL; - } } int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) @@ -474,10 +473,9 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) if (!hwe) return -ENODEV; - bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M, - ttm_bo_type_kernel, - XE_BO_FLAG_STOLEN | - XE_BO_FLAG_GGTT); + bo = xe_managed_bo_create_pin_map(xe, tile, SZ_4M, + XE_BO_FLAG_STOLEN | + XE_BO_FLAG_GGTT); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -501,7 +499,7 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) gsc->q = q; gsc->wq = wq; - err = drmm_add_action_or_reset(&xe->drm, free_resources, gsc); + err = devm_add_action_or_reset(xe->drm.dev, free_resources, gsc); if (err) return err; @@ -519,13 +517,28 @@ out_bo: void xe_gsc_load_start(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_device *xe = gt_to_xe(gt); if (!xe_uc_fw_is_loadable(&gsc->fw) || !gsc->q) return; + /* + * The GSC HW is only reset by driver FLR or D3cold entry. We don't + * support the former at runtime, while the latter is only supported on + * DGFX, for which we don't support GSC. Therefore, if GSC failed to + * load previously there is no need to try again because the HW is + * stuck in the error state. + */ + xe_assert(xe, !IS_DGFX(xe)); + if (xe_uc_fw_is_in_error_state(&gsc->fw)) + return; + /* GSC FW survives GT reset and D3Hot */ if (gsc_fw_is_loaded(gt)) { - xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); + if (xe_gsc_proxy_init_done(gsc)) + xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_RUNNING); + else + xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); return; } @@ -577,3 +590,35 @@ void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep) msleep(200); } } + +/** + * xe_gsc_print_info - print info about GSC FW status + * @gsc: the GSC structure + * @p: the printer to be used to print the info + */ +void xe_gsc_print_info(struct xe_gsc *gsc, struct drm_printer *p) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + int err; + + xe_uc_fw_print(&gsc->fw, p); + + drm_printf(p, "\tfound security version %u\n", gsc->security_version); + + if (!xe_uc_fw_is_enabled(&gsc->fw)) + return; + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); + if (err) + return; + + drm_printf(p, "\nHECI1 FWSTS: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n", + xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)), + xe_mmio_read32(gt, HECI_FWSTS2(MTL_GSC_HECI1_BASE)), + xe_mmio_read32(gt, HECI_FWSTS3(MTL_GSC_HECI1_BASE)), + xe_mmio_read32(gt, HECI_FWSTS4(MTL_GSC_HECI1_BASE)), + xe_mmio_read32(gt, HECI_FWSTS5(MTL_GSC_HECI1_BASE)), + xe_mmio_read32(gt, HECI_FWSTS6(MTL_GSC_HECI1_BASE))); + + xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); +} diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h index 1c7a623faf11..e282b9ef6ec4 100644 --- a/drivers/gpu/drm/xe/xe_gsc.h +++ b/drivers/gpu/drm/xe/xe_gsc.h @@ -8,6 +8,7 @@ #include <linux/types.h> +struct drm_printer; struct xe_gsc; struct xe_gt; struct xe_hw_engine; @@ -21,4 +22,6 @@ void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec); void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep); +void xe_gsc_print_info(struct xe_gsc *gsc, struct drm_printer *p); + #endif diff --git a/drivers/gpu/drm/xe/xe_gsc_debugfs.c b/drivers/gpu/drm/xe/xe_gsc_debugfs.c new file mode 100644 index 000000000000..461d7e99c2b3 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gsc_debugfs.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_gsc_debugfs.h" + +#include <drm/drm_debugfs.h> +#include <drm/drm_managed.h> + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_gsc.h" +#include "xe_macros.h" +#include "xe_pm.h" + +static struct xe_gt * +gsc_to_gt(struct xe_gsc *gsc) +{ + return container_of(gsc, struct xe_gt, uc.gsc); +} + +static struct xe_device * +gsc_to_xe(struct xe_gsc *gsc) +{ + return gt_to_xe(gsc_to_gt(gsc)); +} + +static struct xe_gsc *node_to_gsc(struct drm_info_node *node) +{ + return node->info_ent->data; +} + +static int gsc_info(struct seq_file *m, void *data) +{ + struct xe_gsc *gsc = node_to_gsc(m->private); + struct xe_device *xe = gsc_to_xe(gsc); + struct drm_printer p = drm_seq_file_printer(m); + + xe_pm_runtime_get(xe); + xe_gsc_print_info(gsc, &p); + xe_pm_runtime_put(xe); + + return 0; +} + +static const struct drm_info_list debugfs_list[] = { + {"gsc_info", gsc_info, 0}, +}; + +void xe_gsc_debugfs_register(struct xe_gsc *gsc, struct dentry *parent) +{ + struct drm_minor *minor = gsc_to_xe(gsc)->drm.primary; + struct drm_info_list *local; + int i; + +#define DEBUGFS_SIZE (ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)) + local = drmm_kmalloc(&gsc_to_xe(gsc)->drm, DEBUGFS_SIZE, GFP_KERNEL); + if (!local) + return; + + memcpy(local, debugfs_list, DEBUGFS_SIZE); +#undef DEBUGFS_SIZE + + for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i) + local[i].data = gsc; + + drm_debugfs_create_files(local, + ARRAY_SIZE(debugfs_list), + parent, minor); +} diff --git a/drivers/gpu/drm/xe/xe_gsc_debugfs.h b/drivers/gpu/drm/xe/xe_gsc_debugfs.h new file mode 100644 index 000000000000..c2e2645dc705 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gsc_debugfs.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_GSC_DEBUGFS_H_ +#define _XE_GSC_DEBUGFS_H_ + +struct dentry; +struct xe_gsc; + +void xe_gsc_debugfs_register(struct xe_gsc *gsc, struct dentry *parent); + +#endif diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index aa812a2bc3ed..2d6ea8c01445 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -62,11 +62,6 @@ gsc_to_gt(struct xe_gsc *gsc) return container_of(gsc, struct xe_gt, uc.gsc); } -static inline struct xe_device *kdev_to_xe(struct device *kdev) -{ - return dev_get_drvdata(kdev); -} - bool xe_gsc_proxy_init_done(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); @@ -345,7 +340,7 @@ void xe_gsc_proxy_irq_handler(struct xe_gsc *gsc, u32 iir) static int xe_gsc_proxy_component_bind(struct device *xe_kdev, struct device *mei_kdev, void *data) { - struct xe_device *xe = kdev_to_xe(xe_kdev); + struct xe_device *xe = kdev_to_xe_device(xe_kdev); struct xe_gt *gt = xe->tiles[0].media_gt; struct xe_gsc *gsc = >->uc.gsc; @@ -360,7 +355,7 @@ static int xe_gsc_proxy_component_bind(struct device *xe_kdev, static void xe_gsc_proxy_component_unbind(struct device *xe_kdev, struct device *mei_kdev, void *data) { - struct xe_device *xe = kdev_to_xe(xe_kdev); + struct xe_device *xe = kdev_to_xe_device(xe_kdev); struct xe_gt *gt = xe->tiles[0].media_gt; struct xe_gsc *gsc = >->uc.gsc; @@ -376,27 +371,6 @@ static const struct component_ops xe_gsc_proxy_component_ops = { .unbind = xe_gsc_proxy_component_unbind, }; -static void proxy_channel_free(struct drm_device *drm, void *arg) -{ - struct xe_gsc *gsc = arg; - - if (!gsc->proxy.bo) - return; - - if (gsc->proxy.to_csme) { - kfree(gsc->proxy.to_csme); - gsc->proxy.to_csme = NULL; - gsc->proxy.from_csme = NULL; - } - - if (gsc->proxy.bo) { - iosys_map_clear(&gsc->proxy.to_gsc); - iosys_map_clear(&gsc->proxy.from_gsc); - xe_bo_unpin_map_no_vm(gsc->proxy.bo); - gsc->proxy.bo = NULL; - } -} - static int proxy_channel_alloc(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); @@ -405,18 +379,15 @@ static int proxy_channel_alloc(struct xe_gsc *gsc) struct xe_bo *bo; void *csme; - csme = kzalloc(GSC_PROXY_CHANNEL_SIZE, GFP_KERNEL); + csme = drmm_kzalloc(&xe->drm, GSC_PROXY_CHANNEL_SIZE, GFP_KERNEL); if (!csme) return -ENOMEM; - bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_PROXY_CHANNEL_SIZE, - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT); - if (IS_ERR(bo)) { - kfree(csme); + bo = xe_managed_bo_create_pin_map(xe, tile, GSC_PROXY_CHANNEL_SIZE, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT); + if (IS_ERR(bo)) return PTR_ERR(bo); - } gsc->proxy.bo = bo; gsc->proxy.to_gsc = IOSYS_MAP_INIT_OFFSET(&bo->vmap, 0); @@ -424,7 +395,7 @@ static int proxy_channel_alloc(struct xe_gsc *gsc) gsc->proxy.to_csme = csme; gsc->proxy.from_csme = csme + GSC_PROXY_BUFFER_SIZE; - return drmm_add_action_or_reset(&xe->drm, proxy_channel_free, gsc); + return 0; } /** diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 31b2e64c70c6..d5fd6a089b7c 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -8,7 +8,8 @@ #include <linux/minmax.h> #include <drm/drm_managed.h> -#include <drm/xe_drm.h> +#include <uapi/drm/xe_drm.h> + #include <generated/xe_wa_oob.h> #include "instructions/xe_gfxpipe_commands.h" @@ -45,7 +46,6 @@ #include "xe_migrate.h" #include "xe_mmio.h" #include "xe_pat.h" -#include "xe_pcode.h" #include "xe_pm.h" #include "xe_mocs.h" #include "xe_reg_sr.h" @@ -95,6 +95,50 @@ void xe_gt_sanitize(struct xe_gt *gt) gt->uc.guc.submission_state.enabled = false; } +static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) +{ + u32 reg; + int err; + + if (!XE_WA(gt, 16023588340)) + return; + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (WARN_ON(err)) + return; + + if (!xe_gt_is_media_type(gt)) { + reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); + reg |= CG_DIS_CNTLBUS; + xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); + } + + xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3); + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +} + +static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) +{ + u32 reg; + int err; + + if (!XE_WA(gt, 16023588340)) + return; + + if (xe_gt_is_media_type(gt)) + return; + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (WARN_ON(err)) + return; + + reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); + reg &= ~CG_DIS_CNTLBUS; + xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); + + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +} + /** * xe_gt_remove() - Clean up the GT structures before driver removal * @gt: the GT object @@ -111,6 +155,8 @@ void xe_gt_remove(struct xe_gt *gt) for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) xe_hw_fence_irq_finish(>->fence_irq[i]); + + xe_gt_disable_host_l2_vram(gt); } static void gt_reset_worker(struct work_struct *w); @@ -338,7 +384,7 @@ int xe_gt_init_early(struct xe_gt *gt) xe_tuning_process_gt(gt); xe_force_wake_init_gt(gt, gt_to_fw(gt)); - xe_pcode_init(gt); + spin_lock_init(>->global_invl_lock); return 0; } @@ -519,6 +565,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt) xe_gt_topology_init(gt); xe_gt_mcr_init(gt); + xe_gt_enable_host_l2_vram(gt); out_fw: xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); @@ -643,6 +690,8 @@ static int do_gt_restart(struct xe_gt *gt) xe_pat_init(gt); + xe_gt_enable_host_l2_vram(gt); + xe_gt_mcr_set_implicit_defaults(gt); xe_reg_sr_apply_mmio(>->reg_sr, gt); @@ -702,12 +751,13 @@ static int gt_reset(struct xe_gt *gt) xe_gt_info(gt, "reset started\n"); + xe_pm_runtime_get(gt_to_xe(gt)); + if (xe_fault_inject_gt_reset()) { err = -ECANCELED; goto err_fail; } - xe_pm_runtime_get(gt_to_xe(gt)); xe_gt_sanitize(gt); err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); @@ -742,11 +792,11 @@ err_out: XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); err_msg: XE_WARN_ON(xe_uc_start(>->uc)); - xe_pm_runtime_put(gt_to_xe(gt)); err_fail: xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); xe_device_declare_wedged(gt_to_xe(gt)); + xe_pm_runtime_put(gt_to_xe(gt)); return err; } @@ -796,6 +846,8 @@ int xe_gt_suspend(struct xe_gt *gt) xe_gt_idle_disable_pg(gt); + xe_gt_disable_host_l2_vram(gt); + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); xe_gt_dbg(gt, "suspended\n"); @@ -821,7 +873,9 @@ int xe_gt_sanitize_freq(struct xe_gt *gt) int ret = 0; if ((!xe_uc_fw_is_available(>->uc.gsc.fw) || - xe_uc_fw_is_loaded(>->uc.gsc.fw)) && XE_WA(gt, 22019338487)) + xe_uc_fw_is_loaded(>->uc.gsc.fw) || + xe_uc_fw_is_in_error_state(>->uc.gsc.fw)) && + XE_WA(gt, 22019338487)) ret = xe_guc_pc_restore_stashed_freq(>->uc.guc.pc); return ret; diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 8b1a5027dcf2..ee138e9768a2 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -6,6 +6,8 @@ #ifndef _XE_GT_H_ #define _XE_GT_H_ +#include <linux/fault-inject.h> + #include <drm/drm_util.h> #include "xe_device.h" @@ -19,19 +21,11 @@ #define CCS_MASK(gt) (((gt)->info.engine_mask & XE_HW_ENGINE_CCS_MASK) >> XE_HW_ENGINE_CCS0) -#ifdef CONFIG_FAULT_INJECTION -#include <linux/fault-inject.h> /* XXX: fault-inject.h is broken */ extern struct fault_attr gt_reset_failure; static inline bool xe_fault_inject_gt_reset(void) { return should_fail(>_reset_failure, 1); } -#else -static inline bool xe_fault_inject_gt_reset(void) -{ - return false; -} -#endif struct xe_gt *xe_gt_alloc(struct xe_tile *tile); int xe_gt_init_hwconfig(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c index d2e4dc3aaf61..ffcbd05671fc 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c @@ -68,6 +68,12 @@ static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines) } } + /* + * Mask bits need to be set for the register. Though only Xe2+ + * platforms require setting of mask bits, it won't harm for older + * platforms as these bits are unused there. + */ + mode |= CCS_MODE_CSLICE_0_3_MASK << 16; xe_mmio_write32(gt, CCS_MODE, mode); xe_gt_dbg(gt, "CCS_MODE=%x config:%08x, num_engines:%d, num_slices:%d\n", @@ -133,9 +139,10 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr, } /* CCS mode can only be updated when there are no drm clients */ - spin_lock(&xe->clients.lock); - if (xe->clients.count) { - spin_unlock(&xe->clients.lock); + mutex_lock(&xe->drm.filelist_mutex); + if (!list_empty(&xe->drm.filelist)) { + mutex_unlock(&xe->drm.filelist_mutex); + xe_gt_dbg(gt, "Rejecting compute mode change as there are active drm clients\n"); return -EBUSY; } @@ -146,7 +153,7 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr, xe_gt_reset_async(gt); } - spin_unlock(&xe->clients.lock); + mutex_unlock(&xe->drm.filelist_mutex); return count; } diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 5e7fd937917a..8f95d3a5949b 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -17,7 +17,9 @@ #include "xe_gt_mcr.h" #include "xe_gt_sriov_pf_debugfs.h" #include "xe_gt_sriov_vf_debugfs.h" +#include "xe_gt_stats.h" #include "xe_gt_topology.h" +#include "xe_guc_hwconfig.h" #include "xe_hw_engine.h" #include "xe_lrc.h" #include "xe_macros.h" @@ -269,6 +271,15 @@ static int vecs_default_lrc(struct xe_gt *gt, struct drm_printer *p) return 0; } +static int hwconfig(struct xe_gt *gt, struct drm_printer *p) +{ + xe_pm_runtime_get(gt_to_xe(gt)); + xe_guc_hwconfig_dump(>->uc.guc, p); + xe_pm_runtime_put(gt_to_xe(gt)); + + return 0; +} + static const struct drm_info_list debugfs_list[] = { {"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines}, {"force_reset", .show = xe_gt_debugfs_simple_show, .data = force_reset}, @@ -286,6 +297,8 @@ static const struct drm_info_list debugfs_list[] = { {"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc}, {"default_lrc_vcs", .show = xe_gt_debugfs_simple_show, .data = vcs_default_lrc}, {"default_lrc_vecs", .show = xe_gt_debugfs_simple_show, .data = vecs_default_lrc}, + {"stats", .show = xe_gt_debugfs_simple_show, .data = xe_gt_stats_print_info}, + {"hwconfig", .show = xe_gt_debugfs_simple_show, .data = hwconfig}, }; void xe_gt_debugfs_register(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index 68a5778b4319..ab76973f3e1e 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -237,11 +237,11 @@ int xe_gt_freq_init(struct xe_gt *gt) if (!gt->freq) return -ENOMEM; - err = devm_add_action(xe->drm.dev, freq_fini, gt->freq); + err = sysfs_create_files(gt->freq, freq_attrs); if (err) return err; - err = sysfs_create_files(gt->freq, freq_attrs); + err = devm_add_action_or_reset(xe->drm.dev, freq_fini, gt->freq); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 6d948a469126..c834f64b0178 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -8,8 +8,10 @@ #include "regs/xe_gt_regs.h" #include "xe_assert.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_gt_topology.h" #include "xe_gt_types.h" +#include "xe_guc_hwconfig.h" #include "xe_mmio.h" #include "xe_sriov.h" @@ -297,6 +299,36 @@ static void init_steering_mslice(struct xe_gt *gt) static unsigned int dss_per_group(struct xe_gt *gt) { + struct xe_guc *guc = >->uc.guc; + u32 max_slices = 0, max_subslices = 0; + int ret; + + /* + * Try to query the GuC's hwconfig table for the maximum number of + * slices and subslices. These don't reflect the platform's actual + * slice/DSS counts, just the physical layout by which we should + * determine the steering targets. On older platforms with older GuC + * firmware releases it's possible that these attributes may not be + * included in the table, so we can always fall back to the old + * hardcoded layouts. + */ +#define HWCONFIG_ATTR_MAX_SLICES 1 +#define HWCONFIG_ATTR_MAX_SUBSLICES 70 + + ret = xe_guc_hwconfig_lookup_u32(guc, HWCONFIG_ATTR_MAX_SLICES, + &max_slices); + if (ret < 0 || max_slices == 0) + goto fallback; + + ret = xe_guc_hwconfig_lookup_u32(guc, HWCONFIG_ATTR_MAX_SUBSLICES, + &max_subslices); + if (ret < 0 || max_subslices == 0) + goto fallback; + + return DIV_ROUND_UP(max_subslices, max_slices); + +fallback: + xe_gt_dbg(gt, "GuC hwconfig cannot provide dss/slice; using typical fallback values\n"); if (gt_to_xe(gt)->info.platform == XE_PVC) return 8; else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250) @@ -314,16 +346,16 @@ static unsigned int dss_per_group(struct xe_gt *gt) */ void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance) { - int dss_per_grp = dss_per_group(gt); - xe_gt_assert(gt, dss < XE_MAX_DSS_FUSE_BITS); - *group = dss / dss_per_grp; - *instance = dss % dss_per_grp; + *group = dss / gt->steering_dss_per_grp; + *instance = dss % gt->steering_dss_per_grp; } static void init_steering_dss(struct xe_gt *gt) { + gt->steering_dss_per_grp = dss_per_group(gt); + xe_gt_mcr_get_dss_steering(gt, min(xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0), xe_dss_mask_group_ffs(gt->fuse_topo.c_dss_mask, 0, 0)), @@ -407,7 +439,7 @@ void xe_gt_mcr_init(struct xe_gt *gt) if (gt->info.type == XE_GT_TYPE_MEDIA) { drm_WARN_ON(&xe->drm, MEDIA_VER(xe) < 13); - if (MEDIA_VER(xe) >= 20) { + if (MEDIA_VERx100(xe) >= 1301) { gt->steering[OADDRM].ranges = xe2lpm_gpmxmt_steering_table; gt->steering[INSTANCE0].ranges = xe2lpm_instance0_steering_table; } else { diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 9292d5468868..79c426dc2505 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -185,6 +185,21 @@ unlock_dma_resv: return err; } +static struct xe_vm *asid_to_vm(struct xe_device *xe, u32 asid) +{ + struct xe_vm *vm; + + down_read(&xe->usm.lock); + vm = xa_load(&xe->usm.asid_to_vm, asid); + if (vm && xe_vm_in_fault_mode(vm)) + xe_vm_get(vm); + else + vm = ERR_PTR(-EINVAL); + up_read(&xe->usm.lock); + + return vm; +} + static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) { struct xe_device *xe = gt_to_xe(gt); @@ -197,21 +212,20 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) if (pf->trva_fault) return -EFAULT; - /* ASID to VM */ - mutex_lock(&xe->usm.lock); - vm = xa_load(&xe->usm.asid_to_vm, pf->asid); - if (vm && xe_vm_in_fault_mode(vm)) - xe_vm_get(vm); - else - vm = NULL; - mutex_unlock(&xe->usm.lock); - if (!vm) - return -EINVAL; + vm = asid_to_vm(xe, pf->asid); + if (IS_ERR(vm)) + return PTR_ERR(vm); /* * TODO: Change to read lock? Using write lock for simplicity. */ down_write(&vm->lock); + + if (xe_vm_is_closed(vm)) { + err = -ENOENT; + goto unlock_vm; + } + vma = lookup_vma(vm, pf->page_addr); if (!vma) { err = -EINVAL; @@ -287,7 +301,7 @@ static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf) PFD_VIRTUAL_ADDR_LO_SHIFT; pf_queue->tail = (pf_queue->tail + PF_MSG_LEN_DW) % - PF_QUEUE_NUM_DW; + pf_queue->num_dw; ret = true; } spin_unlock_irq(&pf_queue->lock); @@ -299,7 +313,8 @@ static bool pf_queue_full(struct pf_queue *pf_queue) { lockdep_assert_held(&pf_queue->lock); - return CIRC_SPACE(pf_queue->head, pf_queue->tail, PF_QUEUE_NUM_DW) <= + return CIRC_SPACE(pf_queue->head, pf_queue->tail, + pf_queue->num_dw) <= PF_MSG_LEN_DW; } @@ -312,22 +327,23 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) u32 asid; bool full; - /* - * The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0 - */ - BUILD_BUG_ON(PF_QUEUE_NUM_DW % PF_MSG_LEN_DW); - if (unlikely(len != PF_MSG_LEN_DW)) return -EPROTO; asid = FIELD_GET(PFD_ASID, msg[1]); pf_queue = gt->usm.pf_queue + (asid % NUM_PF_QUEUE); + /* + * The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0 + */ + xe_gt_assert(gt, !(pf_queue->num_dw % PF_MSG_LEN_DW)); + spin_lock_irqsave(&pf_queue->lock, flags); full = pf_queue_full(pf_queue); if (!full) { memcpy(pf_queue->data + pf_queue->head, msg, len * sizeof(u32)); - pf_queue->head = (pf_queue->head + len) % PF_QUEUE_NUM_DW; + pf_queue->head = (pf_queue->head + len) % + pf_queue->num_dw; queue_work(gt->usm.pf_wq, &pf_queue->worker); } else { drm_warn(&xe->drm, "PF Queue full, shouldn't be possible"); @@ -382,18 +398,59 @@ static void pf_queue_work_func(struct work_struct *w) static void acc_queue_work_func(struct work_struct *w); +static void pagefault_fini(void *arg) +{ + struct xe_gt *gt = arg; + struct xe_device *xe = gt_to_xe(gt); + + if (!xe->info.has_usm) + return; + + destroy_workqueue(gt->usm.acc_wq); + destroy_workqueue(gt->usm.pf_wq); +} + +static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) +{ + struct xe_device *xe = gt_to_xe(gt); + xe_dss_mask_t all_dss; + int num_dss, num_eus; + + bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask, + XE_MAX_DSS_FUSE_BITS); + + num_dss = bitmap_weight(all_dss, XE_MAX_DSS_FUSE_BITS); + num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss, + XE_MAX_EU_FUSE_BITS) * num_dss; + + /* user can issue separate page faults per EU and per CS */ + pf_queue->num_dw = + (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW; + + pf_queue->gt = gt; + pf_queue->data = devm_kcalloc(xe->drm.dev, pf_queue->num_dw, + sizeof(u32), GFP_KERNEL); + if (!pf_queue->data) + return -ENOMEM; + + spin_lock_init(&pf_queue->lock); + INIT_WORK(&pf_queue->worker, pf_queue_work_func); + + return 0; +} + int xe_gt_pagefault_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - int i; + int i, ret = 0; if (!xe->info.has_usm) return 0; for (i = 0; i < NUM_PF_QUEUE; ++i) { - gt->usm.pf_queue[i].gt = gt; - spin_lock_init(>->usm.pf_queue[i].lock); - INIT_WORK(>->usm.pf_queue[i].worker, pf_queue_work_func); + ret = xe_alloc_pf_queue(gt, >->usm.pf_queue[i]); + if (ret) + return ret; } for (i = 0; i < NUM_ACC_QUEUE; ++i) { gt->usm.acc_queue[i].gt = gt; @@ -409,10 +466,12 @@ int xe_gt_pagefault_init(struct xe_gt *gt) gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue", WQ_UNBOUND | WQ_HIGHPRI, NUM_ACC_QUEUE); - if (!gt->usm.acc_wq) + if (!gt->usm.acc_wq) { + destroy_workqueue(gt->usm.pf_wq); return -ENOMEM; + } - return 0; + return devm_add_action_or_reset(xe->drm.dev, pagefault_fini, gt); } void xe_gt_pagefault_reset(struct xe_gt *gt) @@ -497,14 +556,9 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc) if (acc->access_type != ACC_TRIGGER) return -EINVAL; - /* ASID to VM */ - mutex_lock(&xe->usm.lock); - vm = xa_load(&xe->usm.asid_to_vm, acc->asid); - if (vm) - xe_vm_get(vm); - mutex_unlock(&xe->usm.lock); - if (!vm || !xe_vm_in_fault_mode(vm)) - return -EINVAL; + vm = asid_to_vm(xe, acc->asid); + if (IS_ERR(vm)) + return PTR_ERR(vm); down_read(&vm->lock); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index 9dbba9ab7a9a..905f409db74b 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -5,10 +5,11 @@ #include <drm/drm_managed.h> -#include "regs/xe_sriov_regs.h" +#include "regs/xe_regs.h" #include "xe_gt_sriov_pf.h" #include "xe_gt_sriov_pf_config.h" +#include "xe_gt_sriov_pf_control.h" #include "xe_gt_sriov_pf_helpers.h" #include "xe_gt_sriov_pf_service.h" #include "xe_mmio.h" @@ -57,6 +58,10 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt) if (err) return err; + err = xe_gt_sriov_pf_control_init(gt); + if (err) + return err; + return 0; } @@ -93,4 +98,5 @@ void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) void xe_gt_sriov_pf_restart(struct xe_gt *gt) { xe_gt_sriov_pf_config_restart(gt); + xe_gt_sriov_pf_control_restart(gt); } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index b6f0a7299c03..afdb477ecf83 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -29,6 +29,7 @@ #include "xe_guc_submit.h" #include "xe_lmtt.h" #include "xe_map.h" +#include "xe_migrate.h" #include "xe_sriov.h" #include "xe_ttm_vram_mgr.h" #include "xe_wopcm.h" @@ -232,14 +233,14 @@ static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config) { u32 n = 0; - if (drm_mm_node_allocated(&config->ggtt_region)) { + if (xe_ggtt_node_allocated(config->ggtt_region)) { cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START); - cfg[n++] = lower_32_bits(config->ggtt_region.start); - cfg[n++] = upper_32_bits(config->ggtt_region.start); + cfg[n++] = lower_32_bits(config->ggtt_region->base.start); + cfg[n++] = upper_32_bits(config->ggtt_region->base.start); cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE); - cfg[n++] = lower_32_bits(config->ggtt_region.size); - cfg[n++] = upper_32_bits(config->ggtt_region.size); + cfg[n++] = lower_32_bits(config->ggtt_region->base.size); + cfg[n++] = upper_32_bits(config->ggtt_region->base.size); } return n; @@ -276,6 +277,14 @@ static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config) cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_PREEMPT_TIMEOUT); cfg[n++] = config->preempt_timeout; +#define encode_threshold_config(TAG, ...) ({ \ + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_THRESHOLD_##TAG); \ + cfg[n++] = config->thresholds[MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG)]; \ +}); + + MAKE_XE_GUC_KLV_THRESHOLDS_SET(encode_threshold_config); +#undef encode_threshold_config + return n; } @@ -369,29 +378,30 @@ static int pf_distribute_config_ggtt(struct xe_tile *tile, unsigned int vfid, u6 return err ?: err2; } -static void pf_release_ggtt(struct xe_tile *tile, struct drm_mm_node *node) +static void pf_release_ggtt(struct xe_tile *tile, struct xe_ggtt_node *node) { - struct xe_ggtt *ggtt = tile->mem.ggtt; - - if (drm_mm_node_allocated(node)) { + if (xe_ggtt_node_allocated(node)) { /* * explicit GGTT PTE assignment to the PF using xe_ggtt_assign() * is redundant, as PTE will be implicitly re-assigned to PF by * the xe_ggtt_clear() called by below xe_ggtt_remove_node(). */ - xe_ggtt_remove_node(ggtt, node, false); + xe_ggtt_node_remove(node, false); + } else { + xe_ggtt_node_fini(node); } } static void pf_release_vf_config_ggtt(struct xe_gt *gt, struct xe_gt_sriov_config *config) { - pf_release_ggtt(gt_to_tile(gt), &config->ggtt_region); + pf_release_ggtt(gt_to_tile(gt), config->ggtt_region); + config->ggtt_region = NULL; } static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); - struct drm_mm_node *node = &config->ggtt_region; + struct xe_ggtt_node *node; struct xe_tile *tile = gt_to_tile(gt); struct xe_ggtt *ggtt = tile->mem.ggtt; u64 alignment = pf_get_ggtt_alignment(gt); @@ -403,40 +413,48 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) size = round_up(size, alignment); - if (drm_mm_node_allocated(node)) { + if (xe_ggtt_node_allocated(config->ggtt_region)) { err = pf_distribute_config_ggtt(tile, vfid, 0, 0); if (unlikely(err)) return err; - pf_release_ggtt(tile, node); + pf_release_vf_config_ggtt(gt, config); } - xe_gt_assert(gt, !drm_mm_node_allocated(node)); + xe_gt_assert(gt, !xe_ggtt_node_allocated(config->ggtt_region)); if (!size) return 0; - err = xe_ggtt_insert_special_node(ggtt, node, size, alignment); + node = xe_ggtt_node_init(ggtt); + if (IS_ERR(node)) + return PTR_ERR(node); + + err = xe_ggtt_node_insert(node, size, alignment); if (unlikely(err)) - return err; + goto err; - xe_ggtt_assign(ggtt, node, vfid); + xe_ggtt_assign(node, vfid); xe_gt_sriov_dbg_verbose(gt, "VF%u assigned GGTT %llx-%llx\n", - vfid, node->start, node->start + node->size - 1); + vfid, node->base.start, node->base.start + node->base.size - 1); - err = pf_distribute_config_ggtt(gt->tile, vfid, node->start, node->size); + err = pf_distribute_config_ggtt(gt->tile, vfid, node->base.start, node->base.size); if (unlikely(err)) - return err; + goto err; + config->ggtt_region = node; return 0; +err: + pf_release_ggtt(tile, node); + return err; } static u64 pf_get_vf_config_ggtt(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); - struct drm_mm_node *node = &config->ggtt_region; + struct xe_ggtt_node *node = config->ggtt_region; xe_gt_assert(gt, !xe_gt_is_media_type(gt)); - return drm_mm_node_allocated(node) ? node->size : 0; + return xe_ggtt_node_allocated(node) ? node->base.size : 0; } /** @@ -587,30 +605,11 @@ int xe_gt_sriov_pf_config_bulk_set_ggtt(struct xe_gt *gt, unsigned int vfid, static u64 pf_get_max_ggtt(struct xe_gt *gt) { struct xe_ggtt *ggtt = gt_to_tile(gt)->mem.ggtt; - const struct drm_mm *mm = &ggtt->mm; - const struct drm_mm_node *entry; u64 alignment = pf_get_ggtt_alignment(gt); u64 spare = pf_get_spare_ggtt(gt); - u64 hole_min_start = xe_wopcm_size(gt_to_xe(gt)); - u64 hole_start, hole_end, hole_size; - u64 max_hole = 0; + u64 max_hole; - mutex_lock(&ggtt->lock); - - drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { - hole_start = max(hole_start, hole_min_start); - hole_start = ALIGN(hole_start, alignment); - hole_end = ALIGN_DOWN(hole_end, alignment); - if (hole_start >= hole_end) - continue; - hole_size = hole_end - hole_start; - xe_gt_sriov_dbg_verbose(gt, "HOLE start %llx size %lluK\n", - hole_start, hole_size / SZ_1K); - spare -= min3(spare, hole_size, max_hole); - max_hole = max(max_hole, hole_size); - } - - mutex_unlock(&ggtt->lock); + max_hole = xe_ggtt_largest_hole(ggtt, alignment, &spare); xe_gt_sriov_dbg_verbose(gt, "HOLE max %lluK reserved %lluK\n", max_hole / SZ_1K, spare / SZ_1K); @@ -1401,6 +1400,7 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) ALIGN(size, PAGE_SIZE), ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_NEEDS_2M | XE_BO_FLAG_PINNED); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -1844,6 +1844,18 @@ u32 xe_gt_sriov_pf_config_get_threshold(struct xe_gt *gt, unsigned int vfid, return value; } +static void pf_reset_config_thresholds(struct xe_gt *gt, struct xe_gt_sriov_config *config) +{ + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + +#define reset_threshold_config(TAG, ...) ({ \ + config->thresholds[MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG)] = 0; \ +}); + + MAKE_XE_GUC_KLV_THRESHOLDS_SET(reset_threshold_config); +#undef reset_threshold_config +} + static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); @@ -1859,6 +1871,7 @@ static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid) pf_release_config_ctxs(gt, config); pf_release_config_dbs(gt, config); pf_reset_config_sched(gt, config); + pf_reset_config_thresholds(gt, config); } /** @@ -1892,6 +1905,87 @@ int xe_gt_sriov_pf_config_release(struct xe_gt *gt, unsigned int vfid, bool forc return force ? 0 : err; } +static void pf_sanitize_ggtt(struct xe_ggtt_node *ggtt_region, unsigned int vfid) +{ + if (xe_ggtt_node_allocated(ggtt_region)) + xe_ggtt_assign(ggtt_region, vfid); +} + +static int pf_sanitize_lmem(struct xe_tile *tile, struct xe_bo *bo, long timeout) +{ + struct xe_migrate *m = tile->migrate; + struct dma_fence *fence; + int err; + + if (!bo) + return 0; + + xe_bo_lock(bo, false); + fence = xe_migrate_clear(m, bo, bo->ttm.resource, XE_MIGRATE_CLEAR_FLAG_FULL); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + } else if (!fence) { + err = -ENOMEM; + } else { + long ret = dma_fence_wait_timeout(fence, false, timeout); + + err = ret > 0 ? 0 : ret < 0 ? ret : -ETIMEDOUT; + dma_fence_put(fence); + if (!err) + xe_gt_sriov_dbg_verbose(tile->primary_gt, "LMEM cleared in %dms\n", + jiffies_to_msecs(timeout - ret)); + } + xe_bo_unlock(bo); + + return err; +} + +static int pf_sanitize_vf_resources(struct xe_gt *gt, u32 vfid, long timeout) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = gt_to_xe(gt); + int err = 0; + + /* + * Only GGTT and LMEM requires to be cleared by the PF. + * GuC doorbell IDs and context IDs do not need any clearing. + */ + if (!xe_gt_is_media_type(gt)) { + pf_sanitize_ggtt(config->ggtt_region, vfid); + if (IS_DGFX(xe)) + err = pf_sanitize_lmem(tile, config->lmem_obj, timeout); + } + + return err; +} + +/** + * xe_gt_sriov_pf_config_sanitize() - Sanitize VF's resources. + * @gt: the &xe_gt + * @vfid: the VF identifier (can't be PF) + * @timeout: maximum timeout to wait for completion in jiffies + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_sanitize(struct xe_gt *gt, unsigned int vfid, long timeout) +{ + int err; + + xe_gt_assert(gt, vfid != PFID); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + err = pf_sanitize_vf_resources(gt, vfid, timeout); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + if (unlikely(err)) + xe_gt_sriov_notice(gt, "VF%u resource sanitizing failed (%pe)\n", + vfid, ERR_PTR(err)); + return err; +} + /** * xe_gt_sriov_pf_config_push - Reprovision VF's configuration. * @gt: the &xe_gt @@ -2024,13 +2118,15 @@ int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p) for (n = 1; n <= total_vfs; n++) { config = >->sriov.pf.vfs[n].config; - if (!drm_mm_node_allocated(&config->ggtt_region)) + if (!xe_ggtt_node_allocated(config->ggtt_region)) continue; - string_get_size(config->ggtt_region.size, 1, STRING_UNITS_2, buf, sizeof(buf)); + string_get_size(config->ggtt_region->base.size, 1, STRING_UNITS_2, + buf, sizeof(buf)); drm_printf(p, "VF%u:\t%#0llx-%#llx\t(%s)\n", - n, config->ggtt_region.start, - config->ggtt_region.start + config->ggtt_region.size - 1, buf); + n, config->ggtt_region->base.start, + config->ggtt_region->base.start + config->ggtt_region->base.size - 1, + buf); } return 0; @@ -2118,12 +2214,8 @@ int xe_gt_sriov_pf_config_print_dbs(struct xe_gt *gt, struct drm_printer *p) int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_printer *p) { struct xe_ggtt *ggtt = gt_to_tile(gt)->mem.ggtt; - const struct drm_mm *mm = &ggtt->mm; - const struct drm_mm_node *entry; u64 alignment = pf_get_ggtt_alignment(gt); - u64 hole_min_start = xe_wopcm_size(gt_to_xe(gt)); - u64 hole_start, hole_end, hole_size; - u64 spare, avail, total = 0; + u64 spare, avail, total; char buf[10]; xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); @@ -2131,24 +2223,8 @@ int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_prin mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); spare = pf_get_spare_ggtt(gt); + total = xe_ggtt_print_holes(ggtt, alignment, p); - mutex_lock(&ggtt->lock); - - drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { - hole_start = max(hole_start, hole_min_start); - hole_start = ALIGN(hole_start, alignment); - hole_end = ALIGN_DOWN(hole_end, alignment); - if (hole_start >= hole_end) - continue; - hole_size = hole_end - hole_start; - total += hole_size; - - string_get_size(hole_size, 1, STRING_UNITS_2, buf, sizeof(buf)); - drm_printf(p, "range:\t%#llx-%#llx\t(%s)\n", - hole_start, hole_end - 1, buf); - } - - mutex_unlock(&ggtt->lock); mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); string_get_size(total, 1, STRING_UNITS_2, buf, sizeof(buf)); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h index c0e6e4743dc2..42e64769f666 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h @@ -50,6 +50,7 @@ int xe_gt_sriov_pf_config_set_threshold(struct xe_gt *gt, unsigned int vfid, enum xe_guc_klv_threshold_index index, u32 value); int xe_gt_sriov_pf_config_set_fair(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs); +int xe_gt_sriov_pf_config_sanitize(struct xe_gt *gt, unsigned int vfid, long timeout); int xe_gt_sriov_pf_config_release(struct xe_gt *gt, unsigned int vfid, bool force); int xe_gt_sriov_pf_config_push(struct xe_gt *gt, unsigned int vfid, bool refresh); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h index 7bc66656fcc7..2d3b73d78f14 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h @@ -6,8 +6,7 @@ #ifndef _XE_GT_SRIOV_PF_CONFIG_TYPES_H_ #define _XE_GT_SRIOV_PF_CONFIG_TYPES_H_ -#include <drm/drm_mm.h> - +#include "xe_ggtt_types.h" #include "xe_guc_klv_thresholds_set_types.h" struct xe_bo; @@ -19,7 +18,7 @@ struct xe_bo; */ struct xe_gt_sriov_config { /** @ggtt_region: GGTT region assigned to the VF. */ - struct drm_mm_node ggtt_region; + struct xe_ggtt_node *ggtt_region; /** @lmem_obj: LMEM allocation for use by the VF. */ struct xe_bo *lmem_obj; /** @num_ctxs: number of GuC contexts IDs. */ diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c index ebf06e037750..02f7328bd6ce 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c @@ -3,11 +3,17 @@ * Copyright © 2023-2024 Intel Corporation */ +#include <drm/drm_managed.h> + #include "abi/guc_actions_sriov_abi.h" #include "xe_device.h" #include "xe_gt.h" +#include "xe_gt_sriov_pf_config.h" #include "xe_gt_sriov_pf_control.h" +#include "xe_gt_sriov_pf_helpers.h" +#include "xe_gt_sriov_pf_monitor.h" +#include "xe_gt_sriov_pf_service.h" #include "xe_gt_sriov_printk.h" #include "xe_guc_ct.h" #include "xe_sriov.h" @@ -41,10 +47,6 @@ static int guc_action_vf_control_cmd(struct xe_guc *guc, u32 vfid, u32 cmd) }; int ret; - /* XXX those two commands are now sent from the G2H handler */ - if (cmd == GUC_PF_TRIGGER_VF_FLR_START || cmd == GUC_PF_TRIGGER_VF_FLR_FINISH) - return xe_guc_ct_send_g2h_handler(&guc->ct, request, ARRAY_SIZE(request)); - ret = xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request)); return ret > 0 ? -EPROTO : ret; } @@ -54,6 +56,8 @@ static int pf_send_vf_control_cmd(struct xe_gt *gt, unsigned int vfid, u32 cmd) int err; xe_gt_assert(gt, vfid != PFID); + xe_gt_sriov_dbg_verbose(gt, "sending VF%u control command %s\n", + vfid, control_cmd_to_string(cmd)); err = guc_action_vf_control_cmd(>->uc.guc, vfid, cmd); if (unlikely(err)) @@ -88,6 +92,456 @@ static int pf_send_vf_flr_finish(struct xe_gt *gt, unsigned int vfid) } /** + * DOC: The VF state machine + * + * The simplified VF state machine could be presented as:: + * + * pause--------------------------o + * / | + * / v + * (READY)<------------------resume-----(PAUSED) + * ^ \ / / + * | \ / / + * | stop---->(STOPPED)<----stop / + * | / / + * | / / + * o--------<-----flr / + * \ / + * o------<--------------------flr + * + * Where: + * + * * READY - represents a state in which VF is fully operable + * * PAUSED - represents a state in which VF activity is temporarily suspended + * * STOPPED - represents a state in which VF activity is definitely halted + * * pause - represents a request to temporarily suspend VF activity + * * resume - represents a request to resume VF activity + * * stop - represents a request to definitely halt VF activity + * * flr - represents a request to perform VF FLR to restore VF activity + * + * However, each state transition requires additional steps that involves + * communication with GuC that might fail or be interrupted by other requests:: + * + * .................................WIP.... + * : : + * pause--------------------->PAUSE_WIP----------------------------o + * / : / \ : | + * / : o----<---stop flr--o : | + * / : | \ / | : V + * (READY,RESUMED)<--------+------------RESUME_WIP<----+--<-----resume--(PAUSED) + * ^ \ \ : | | : / / + * | \ \ : | | : / / + * | \ \ : | | : / / + * | \ \ : o----<----------------------+--<-------stop / + * | \ \ : | | : / + * | \ \ : V | : / + * | \ stop----->STOP_WIP---------flr--->-----o : / + * | \ : | | : / + * | \ : | V : / + * | flr--------+----->----------------->FLR_WIP<-----flr + * | : | / ^ : + * | : | / | : + * o--------<-------:----+-----<----------------o | : + * : | | : + * :....|...........................|.....: + * | | + * V | + * (STOPPED)--------------------flr + * + * For details about each internal WIP state machine see: + * + * * `The VF PAUSE state machine`_ + * * `The VF RESUME state machine`_ + * * `The VF STOP state machine`_ + * * `The VF FLR state machine`_ + */ + +#ifdef CONFIG_DRM_XE_DEBUG_SRIOV +static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit) +{ + switch (bit) { +#define CASE2STR(_X) \ + case XE_GT_SRIOV_STATE_##_X: return #_X + CASE2STR(WIP); + CASE2STR(FLR_WIP); + CASE2STR(FLR_SEND_START); + CASE2STR(FLR_WAIT_GUC); + CASE2STR(FLR_GUC_DONE); + CASE2STR(FLR_RESET_CONFIG); + CASE2STR(FLR_RESET_DATA); + CASE2STR(FLR_RESET_MMIO); + CASE2STR(FLR_SEND_FINISH); + CASE2STR(FLR_FAILED); + CASE2STR(PAUSE_WIP); + CASE2STR(PAUSE_SEND_PAUSE); + CASE2STR(PAUSE_WAIT_GUC); + CASE2STR(PAUSE_GUC_DONE); + CASE2STR(PAUSE_FAILED); + CASE2STR(PAUSED); + CASE2STR(RESUME_WIP); + CASE2STR(RESUME_SEND_RESUME); + CASE2STR(RESUME_FAILED); + CASE2STR(RESUMED); + CASE2STR(STOP_WIP); + CASE2STR(STOP_SEND_STOP); + CASE2STR(STOP_FAILED); + CASE2STR(STOPPED); + CASE2STR(MISMATCH); +#undef CASE2STR + default: return "?"; + } +} +#endif + +static unsigned long pf_get_default_timeout(enum xe_gt_sriov_control_bits bit) +{ + switch (bit) { + case XE_GT_SRIOV_STATE_FLR_WAIT_GUC: + case XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC: + return HZ / 2; + case XE_GT_SRIOV_STATE_FLR_WIP: + case XE_GT_SRIOV_STATE_FLR_RESET_CONFIG: + return 5 * HZ; + default: + return HZ; + } +} + +static struct xe_gt_sriov_control_state *pf_pick_vf_control(struct xe_gt *gt, unsigned int vfid) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt)); + + return >->sriov.pf.vfs[vfid].control; +} + +static unsigned long *pf_peek_vf_state(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid); + + return &cs->state; +} + +static bool pf_check_vf_state(struct xe_gt *gt, unsigned int vfid, + enum xe_gt_sriov_control_bits bit) +{ + return test_bit(bit, pf_peek_vf_state(gt, vfid)); +} + +static void pf_dump_vf_state(struct xe_gt *gt, unsigned int vfid) +{ + unsigned long state = *pf_peek_vf_state(gt, vfid); + enum xe_gt_sriov_control_bits bit; + + if (state) { + xe_gt_sriov_dbg_verbose(gt, "VF%u state %#lx%s%*pbl\n", + vfid, state, state ? " bits " : "", + (int)BITS_PER_LONG, &state); + for_each_set_bit(bit, &state, BITS_PER_LONG) + xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d)\n", + vfid, control_bit_to_string(bit), bit); + } else { + xe_gt_sriov_dbg_verbose(gt, "VF%u state READY\n", vfid); + } +} + +static bool pf_expect_vf_state(struct xe_gt *gt, unsigned int vfid, + enum xe_gt_sriov_control_bits bit) +{ + bool result = pf_check_vf_state(gt, vfid, bit); + + if (unlikely(!result)) + pf_dump_vf_state(gt, vfid); + + return result; +} + +static bool pf_expect_vf_not_state(struct xe_gt *gt, unsigned int vfid, + enum xe_gt_sriov_control_bits bit) +{ + bool result = !pf_check_vf_state(gt, vfid, bit); + + if (unlikely(!result)) + pf_dump_vf_state(gt, vfid); + + return result; +} + +static bool pf_enter_vf_state(struct xe_gt *gt, unsigned int vfid, + enum xe_gt_sriov_control_bits bit) +{ + if (!test_and_set_bit(bit, pf_peek_vf_state(gt, vfid))) { + xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) enter\n", + vfid, control_bit_to_string(bit), bit); + return true; + } + return false; +} + +static bool pf_exit_vf_state(struct xe_gt *gt, unsigned int vfid, + enum xe_gt_sriov_control_bits bit) +{ + if (test_and_clear_bit(bit, pf_peek_vf_state(gt, vfid))) { + xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) exit\n", + vfid, control_bit_to_string(bit), bit); + return true; + } + return false; +} + +static void pf_escape_vf_state(struct xe_gt *gt, unsigned int vfid, + enum xe_gt_sriov_control_bits bit) +{ + if (pf_exit_vf_state(gt, vfid, bit)) + xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) escaped by %ps\n", + vfid, control_bit_to_string(bit), bit, + __builtin_return_address(0)); +} + +static void pf_enter_vf_mismatch(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH)) { + xe_gt_sriov_dbg(gt, "VF%u state mismatch detected by %ps\n", + vfid, __builtin_return_address(0)); + pf_dump_vf_state(gt, vfid); + } +} + +static void pf_exit_vf_mismatch(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH)) + xe_gt_sriov_dbg(gt, "VF%u state mismatch cleared by %ps\n", + vfid, __builtin_return_address(0)); + + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED); + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED); + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED); + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED); +} + +#define pf_enter_vf_state_machine_bug(gt, vfid) ({ \ + pf_enter_vf_mismatch((gt), (vfid)); \ +}) + +static void pf_queue_control_worker(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + xe_gt_assert(gt, IS_SRIOV_PF(xe)); + + queue_work(xe->sriov.wq, >->sriov.pf.control.worker); +} + +static void pf_queue_vf(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_pf_control *pfc = >->sriov.pf.control; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + spin_lock(&pfc->lock); + list_move_tail(>->sriov.pf.vfs[vfid].control.link, &pfc->list); + spin_unlock(&pfc->lock); + + pf_queue_control_worker(gt); +} + +static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid); +static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid); +static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid); +static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid); + +static bool pf_enter_vf_wip(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) { + struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid); + + reinit_completion(&cs->done); + return true; + } + return false; +} + +static void pf_exit_vf_wip(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) { + struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid); + + pf_exit_vf_flr_wip(gt, vfid); + pf_exit_vf_stop_wip(gt, vfid); + pf_exit_vf_pause_wip(gt, vfid); + pf_exit_vf_resume_wip(gt, vfid); + + complete_all(&cs->done); + } +} + +static int pf_wait_vf_wip_done(struct xe_gt *gt, unsigned int vfid, unsigned long timeout) +{ + struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid); + + return wait_for_completion_timeout(&cs->done, timeout) ? 0 : -ETIMEDOUT; +} + +static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid) +{ + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED); + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); + pf_exit_vf_mismatch(gt, vfid); + pf_exit_vf_wip(gt, vfid); +} + +/** + * DOC: The VF PAUSE state machine + * + * The VF PAUSE state machine looks like:: + * + * (READY,RESUMED)<-------------<---------------------o---------o + * | \ \ + * pause \ \ + * | \ \ + * ....V...........................PAUSE_WIP........ \ \ + * : \ : o \ + * : \ o------<-----busy : | \ + * : \ / / : | | + * : PAUSE_SEND_PAUSE ---failed--->----------o--->(PAUSE_FAILED) | + * : | \ : | | + * : acked rejected---->----------o--->(MISMATCH) / + * : | : / + * : v : / + * : PAUSE_WAIT_GUC : / + * : | : / + * : done : / + * : | : / + * : v : / + * : PAUSE_GUC_DONE o-----restart + * : / : + * : / : + * :....o..............o...............o...........: + * | | | + * completed flr stop + * | | | + * V .....V..... ......V..... + * (PAUSED) : FLR_WIP : : STOP_WIP : + * :.........: :..........: + * + * For the full state machine view, see `The VF state machine`_. + */ + +static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) { + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE); + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC); + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE); + } +} + +static void pf_enter_vf_paused(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) + pf_enter_vf_state_machine_bug(gt, vfid); + + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); + pf_exit_vf_mismatch(gt, vfid); + pf_exit_vf_wip(gt, vfid); +} + +static void pf_enter_vf_pause_completed(struct xe_gt *gt, unsigned int vfid) +{ + pf_enter_vf_paused(gt, vfid); +} + +static void pf_enter_vf_pause_failed(struct xe_gt *gt, unsigned int vfid) +{ + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED); + pf_exit_vf_wip(gt, vfid); +} + +static void pf_enter_vf_pause_rejected(struct xe_gt *gt, unsigned int vfid) +{ + pf_enter_vf_mismatch(gt, vfid); + pf_enter_vf_pause_failed(gt, vfid); +} + +static bool pf_exit_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE)) + return false; + + pf_enter_vf_pause_completed(gt, vfid); + return true; +} + +static void pf_enter_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE)) + pf_queue_vf(gt, vfid); +} + +static void pf_enter_pause_wait_guc(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC)) + pf_enter_vf_state_machine_bug(gt, vfid); +} + +static bool pf_exit_pause_wait_guc(struct xe_gt *gt, unsigned int vfid) +{ + return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC); +} + +static void pf_enter_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE)) + pf_enter_vf_state_machine_bug(gt, vfid); + + pf_queue_vf(gt, vfid); +} + +static bool pf_exit_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid) +{ + int err; + + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE)) + return false; + + /* GuC may actually send a PAUSE_DONE before we get a RESPONSE */ + pf_enter_pause_wait_guc(gt, vfid); + + err = pf_send_vf_pause(gt, vfid); + if (err) { + /* send failed, so we shouldn't expect PAUSE_DONE from GuC */ + pf_exit_pause_wait_guc(gt, vfid); + + if (err == -EBUSY) + pf_enter_vf_pause_send_pause(gt, vfid); + else if (err == -EIO) + pf_enter_vf_pause_rejected(gt, vfid); + else + pf_enter_vf_pause_failed(gt, vfid); + } else { + /* + * we have already moved to WAIT_GUC, maybe even to GUC_DONE + * but since GuC didn't complain, we may clear MISMATCH + */ + pf_exit_vf_mismatch(gt, vfid); + } + + return true; +} + +static bool pf_enter_vf_pause_wip(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) { + pf_enter_vf_wip(gt, vfid); + pf_enter_vf_pause_send_pause(gt, vfid); + return true; + } + + return false; +} + +/** * xe_gt_sriov_pf_control_pause_vf - Pause a VF. * @gt: the &xe_gt * @vfid: the VF identifier @@ -98,7 +552,140 @@ static int pf_send_vf_flr_finish(struct xe_gt *gt, unsigned int vfid) */ int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid) { - return pf_send_vf_pause(gt, vfid); + unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_PAUSE_WIP); + int err; + + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { + xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid); + return -EPERM; + } + + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { + xe_gt_sriov_dbg(gt, "VF%u was already paused!\n", vfid); + return -ESTALE; + } + + if (!pf_enter_vf_pause_wip(gt, vfid)) { + xe_gt_sriov_dbg(gt, "VF%u pause already in progress!\n", vfid); + return -EALREADY; + } + + err = pf_wait_vf_wip_done(gt, vfid, timeout); + if (err) { + xe_gt_sriov_dbg(gt, "VF%u pause didn't finish in %u ms (%pe)\n", + vfid, jiffies_to_msecs(timeout), ERR_PTR(err)); + return err; + } + + if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { + xe_gt_sriov_info(gt, "VF%u paused!\n", vfid); + return 0; + } + + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED)) { + xe_gt_sriov_dbg(gt, "VF%u pause failed!\n", vfid); + return -EIO; + } + + xe_gt_sriov_dbg(gt, "VF%u pause was canceled!\n", vfid); + return -ECANCELED; +} + +/** + * DOC: The VF RESUME state machine + * + * The VF RESUME state machine looks like:: + * + * (PAUSED)<-----------------<------------------------o + * | \ + * resume \ + * | \ + * ....V............................RESUME_WIP...... \ + * : \ : o + * : \ o-------<-----busy : | + * : \ / / : | + * : RESUME_SEND_RESUME ---failed--->--------o--->(RESUME_FAILED) + * : / \ : | + * : acked rejected---->---------o--->(MISMATCH) + * : / : + * :....o..............o...............o.....o.....: + * | | | \ + * completed flr stop restart-->(READY) + * | | | + * V .....V..... ......V..... + * (RESUMED) : FLR_WIP : : STOP_WIP : + * :.........: :..........: + * + * For the full state machine view, see `The VF state machine`_. + */ + +static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP)) + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME); +} + +static void pf_enter_vf_resumed(struct xe_gt *gt, unsigned int vfid) +{ + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); + pf_exit_vf_mismatch(gt, vfid); + pf_exit_vf_wip(gt, vfid); +} + +static void pf_enter_vf_resume_completed(struct xe_gt *gt, unsigned int vfid) +{ + pf_enter_vf_resumed(gt, vfid); +} + +static void pf_enter_vf_resume_failed(struct xe_gt *gt, unsigned int vfid) +{ + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED); + pf_exit_vf_wip(gt, vfid); +} + +static void pf_enter_vf_resume_rejected(struct xe_gt *gt, unsigned int vfid) +{ + pf_enter_vf_mismatch(gt, vfid); + pf_enter_vf_resume_failed(gt, vfid); +} + +static void pf_enter_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME)) + pf_enter_vf_state_machine_bug(gt, vfid); + + pf_queue_vf(gt, vfid); +} + +static bool pf_exit_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid) +{ + int err; + + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME)) + return false; + + err = pf_send_vf_resume(gt, vfid); + if (err == -EBUSY) + pf_enter_vf_resume_send_resume(gt, vfid); + else if (err == -EIO) + pf_enter_vf_resume_rejected(gt, vfid); + else if (err) + pf_enter_vf_resume_failed(gt, vfid); + else + pf_enter_vf_resume_completed(gt, vfid); + return true; +} + +static bool pf_enter_vf_resume_wip(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP)) { + pf_enter_vf_wip(gt, vfid); + pf_enter_vf_resume_send_resume(gt, vfid); + return true; + } + + return false; } /** @@ -112,7 +699,134 @@ int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid) */ int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid) { - return pf_send_vf_resume(gt, vfid); + unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_RESUME_WIP); + int err; + + if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { + xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid); + return -EPERM; + } + + if (!pf_enter_vf_resume_wip(gt, vfid)) { + xe_gt_sriov_dbg(gt, "VF%u resume already in progress!\n", vfid); + return -EALREADY; + } + + err = pf_wait_vf_wip_done(gt, vfid, timeout); + if (err) + return err; + + if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED)) { + xe_gt_sriov_info(gt, "VF%u resumed!\n", vfid); + return 0; + } + + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED)) { + xe_gt_sriov_dbg(gt, "VF%u resume failed!\n", vfid); + return -EIO; + } + + xe_gt_sriov_dbg(gt, "VF%u resume was canceled!\n", vfid); + return -ECANCELED; +} + +/** + * DOC: The VF STOP state machine + * + * The VF STOP state machine looks like:: + * + * (READY,PAUSED,RESUMED)<-------<--------------------o + * | \ + * stop \ + * | \ + * ....V..............................STOP_WIP...... \ + * : \ : o + * : \ o----<----busy : | + * : \ / / : | + * : STOP_SEND_STOP--------failed--->--------o--->(STOP_FAILED) + * : / \ : | + * : acked rejected-------->--------o--->(MISMATCH) + * : / : + * :....o..............o...............o...........: + * | | | + * completed flr restart + * | | | + * V .....V..... V + * (STOPPED) : FLR_WIP : (READY) + * :.........: + * + * For the full state machine view, see `The VF state machine`_. + */ + +static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP)) + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP); +} + +static void pf_enter_vf_stopped(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) + pf_enter_vf_state_machine_bug(gt, vfid); + + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); + pf_exit_vf_mismatch(gt, vfid); + pf_exit_vf_wip(gt, vfid); +} + +static void pf_enter_vf_stop_completed(struct xe_gt *gt, unsigned int vfid) +{ + pf_enter_vf_stopped(gt, vfid); +} + +static void pf_enter_vf_stop_failed(struct xe_gt *gt, unsigned int vfid) +{ + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED); + pf_exit_vf_wip(gt, vfid); +} + +static void pf_enter_vf_stop_rejected(struct xe_gt *gt, unsigned int vfid) +{ + pf_enter_vf_mismatch(gt, vfid); + pf_enter_vf_stop_failed(gt, vfid); +} + +static void pf_enter_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP)) + pf_enter_vf_state_machine_bug(gt, vfid); + + pf_queue_vf(gt, vfid); +} + +static bool pf_exit_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid) +{ + int err; + + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP)) + return false; + + err = pf_send_vf_stop(gt, vfid); + if (err == -EBUSY) + pf_enter_vf_stop_send_stop(gt, vfid); + else if (err == -EIO) + pf_enter_vf_stop_rejected(gt, vfid); + else if (err) + pf_enter_vf_stop_failed(gt, vfid); + else + pf_enter_vf_stop_completed(gt, vfid); + return true; +} + +static bool pf_enter_vf_stop_wip(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP)) { + pf_enter_vf_wip(gt, vfid); + pf_enter_vf_stop_send_stop(gt, vfid); + return true; + } + return false; } /** @@ -126,7 +840,280 @@ int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid) */ int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid) { - return pf_send_vf_stop(gt, vfid); + unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_STOP_WIP); + int err; + + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { + xe_gt_sriov_dbg(gt, "VF%u was already stopped!\n", vfid); + return -ESTALE; + } + + if (!pf_enter_vf_stop_wip(gt, vfid)) { + xe_gt_sriov_dbg(gt, "VF%u stop already in progress!\n", vfid); + return -EALREADY; + } + + err = pf_wait_vf_wip_done(gt, vfid, timeout); + if (err) + return err; + + if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { + xe_gt_sriov_info(gt, "VF%u stopped!\n", vfid); + return 0; + } + + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED)) { + xe_gt_sriov_dbg(gt, "VF%u stop failed!\n", vfid); + return -EIO; + } + + xe_gt_sriov_dbg(gt, "VF%u stop was canceled!\n", vfid); + return -ECANCELED; +} + +/** + * DOC: The VF FLR state machine + * + * The VF FLR state machine looks like:: + * + * (READY,PAUSED,STOPPED)<------------<--------------o + * | \ + * flr \ + * | \ + * ....V..........................FLR_WIP........... \ + * : \ : \ + * : \ o----<----busy : | + * : \ / / : | + * : FLR_SEND_START---failed----->-----------o--->(FLR_FAILED)<---o + * : | \ : | | + * : acked rejected----->-----------o--->(MISMATCH) | + * : | : ^ | + * : v : | | + * : FLR_WAIT_GUC : | | + * : | : | | + * : done : | | + * : | : | | + * : v : | | + * : FLR_GUC_DONE : | | + * : | : | | + * : FLR_RESET_CONFIG---failed--->-----------o--------+-----------o + * : | : | | + * : FLR_RESET_DATA : | | + * : | : | | + * : FLR_RESET_MMIO : | | + * : | : | | + * : | o----<----busy : | | + * : |/ / : | | + * : FLR_SEND_FINISH----failed--->-----------o--------+-----------o + * : / \ : | + * : acked rejected----->-----------o--------o + * : / : + * :....o..............................o...........: + * | | + * completed restart + * | / + * V / + * (READY)<----------<------------o + * + * For the full state machine view, see `The VF state machine`_. + */ + +static void pf_enter_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START)) + pf_enter_vf_state_machine_bug(gt, vfid); + + pf_queue_vf(gt, vfid); +} + +static void pf_enter_vf_flr_wip(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) { + xe_gt_sriov_dbg(gt, "VF%u FLR is already in progress\n", vfid); + return; + } + + pf_enter_vf_wip(gt, vfid); + pf_enter_vf_flr_send_start(gt, vfid); +} + +static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) { + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH); + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO); + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA); + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG); + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE); + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC); + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START); + } +} + +static void pf_enter_vf_flr_completed(struct xe_gt *gt, unsigned int vfid) +{ + pf_enter_vf_ready(gt, vfid); +} + +static void pf_enter_vf_flr_failed(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED)) + xe_gt_sriov_notice(gt, "VF%u FLR failed!\n", vfid); + pf_exit_vf_wip(gt, vfid); +} + +static void pf_enter_vf_flr_rejected(struct xe_gt *gt, unsigned int vfid) +{ + pf_enter_vf_mismatch(gt, vfid); + pf_enter_vf_flr_failed(gt, vfid); +} + +static void pf_enter_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH)) + pf_enter_vf_state_machine_bug(gt, vfid); + + pf_queue_vf(gt, vfid); +} + +static bool pf_exit_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid) +{ + int err; + + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH)) + return false; + + err = pf_send_vf_flr_finish(gt, vfid); + if (err == -EBUSY) + pf_enter_vf_flr_send_finish(gt, vfid); + else if (err == -EIO) + pf_enter_vf_flr_rejected(gt, vfid); + else if (err) + pf_enter_vf_flr_failed(gt, vfid); + else + pf_enter_vf_flr_completed(gt, vfid); + return true; +} + +static void pf_enter_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO)) + pf_enter_vf_state_machine_bug(gt, vfid); + + pf_queue_vf(gt, vfid); +} + +static bool pf_exit_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO)) + return false; + + /* XXX: placeholder */ + + pf_enter_vf_flr_send_finish(gt, vfid); + return true; +} + +static void pf_enter_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA)) + pf_enter_vf_state_machine_bug(gt, vfid); + + pf_queue_vf(gt, vfid); +} + +static bool pf_exit_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA)) + return false; + + xe_gt_sriov_pf_service_reset(gt, vfid); + xe_gt_sriov_pf_monitor_flr(gt, vfid); + + pf_enter_vf_flr_reset_mmio(gt, vfid); + return true; +} + +static void pf_enter_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG)) + pf_enter_vf_state_machine_bug(gt, vfid); + + pf_queue_vf(gt, vfid); +} + +static bool pf_exit_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid) +{ + unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_RESET_CONFIG); + int err; + + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG)) + return false; + + err = xe_gt_sriov_pf_config_sanitize(gt, vfid, timeout); + if (err) + pf_enter_vf_flr_failed(gt, vfid); + else + pf_enter_vf_flr_reset_data(gt, vfid); + return true; +} + +static void pf_enter_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC)) + pf_enter_vf_state_machine_bug(gt, vfid); +} + +static bool pf_exit_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid) +{ + return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC); +} + +static bool pf_exit_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid) +{ + int err; + + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START)) + return false; + + /* GuC may actually send a FLR_DONE before we get a RESPONSE */ + pf_enter_vf_flr_wait_guc(gt, vfid); + + err = pf_send_vf_flr_start(gt, vfid); + if (err) { + /* send failed, so we shouldn't expect FLR_DONE from GuC */ + pf_exit_vf_flr_wait_guc(gt, vfid); + + if (err == -EBUSY) + pf_enter_vf_flr_send_start(gt, vfid); + else if (err == -EIO) + pf_enter_vf_flr_rejected(gt, vfid); + else + pf_enter_vf_flr_failed(gt, vfid); + } else { + /* + * we have already moved to WAIT_GUC, maybe even to GUC_DONE + * but since GuC didn't complain, we may clear MISMATCH + */ + pf_exit_vf_mismatch(gt, vfid); + } + + return true; +} + +static bool pf_exit_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE)) + return false; + + pf_enter_vf_flr_reset_config(gt, vfid); + return true; +} + +static void pf_enter_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE)) + pf_queue_vf(gt, vfid); } /** @@ -140,46 +1127,56 @@ int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid) */ int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid) { + unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_WIP); int err; - /* XXX pf_send_vf_flr_start() expects ct->lock */ - mutex_lock(>->uc.guc.ct.lock); - err = pf_send_vf_flr_start(gt, vfid); - mutex_unlock(>->uc.guc.ct.lock); + pf_enter_vf_flr_wip(gt, vfid); - return err; + err = pf_wait_vf_wip_done(gt, vfid, timeout); + if (err) { + xe_gt_sriov_notice(gt, "VF%u FLR didn't finish in %u ms (%pe)\n", + vfid, jiffies_to_msecs(timeout), ERR_PTR(err)); + return err; + } + + if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED)) + return -EIO; + + return 0; } /** * DOC: The VF FLR Flow with GuC * - * PF GUC PCI - * ======================================================== - * | | | - * (1) | [ ] <----- FLR --| - * | [ ] : - * (2) [ ] <-------- NOTIFY FLR --[ ] - * [ ] | - * (3) [ ] | - * [ ] | - * [ ]-- START FLR ---------> [ ] - * | [ ] - * (4) | [ ] - * | [ ] - * [ ] <--------- FLR DONE -- [ ] - * [ ] | - * (5) [ ] | - * [ ] | - * [ ]-- FINISH FLR --------> [ ] - * | | - * - * Step 1: PCI HW generates interrupt to the GuC about VF FLR - * Step 2: GuC FW sends G2H notification to the PF about VF FLR - * Step 2a: on some platforms G2H is only received from root GuC - * Step 3: PF sends H2G request to the GuC to start VF FLR sequence - * Step 3a: on some platforms PF must send H2G to all other GuCs - * Step 4: GuC FW performs VF FLR cleanups and notifies the PF when done - * Step 5: PF performs VF FLR cleanups and notifies the GuC FW when finished + * The VF FLR flow includes several steps:: + * + * PF GUC PCI + * ======================================================== + * | | | + * (1) | [ ] <----- FLR --| + * | [ ] : + * (2) [ ] <-------- NOTIFY FLR --[ ] + * [ ] | + * (3) [ ] | + * [ ] | + * [ ]-- START FLR ---------> [ ] + * | [ ] + * (4) | [ ] + * | [ ] + * [ ] <--------- FLR DONE -- [ ] + * [ ] | + * (5) [ ] | + * [ ] | + * [ ]-- FINISH FLR --------> [ ] + * | | + * + * * Step 1: PCI HW generates interrupt to the GuC about VF FLR + * * Step 2: GuC FW sends G2H notification to the PF about VF FLR + * * Step 2a: on some platforms G2H is only received from root GuC + * * Step 3: PF sends H2G request to the GuC to start VF FLR sequence + * * Step 3a: on some platforms PF must send H2G to all other GuCs + * * Step 4: GuC FW performs VF FLR cleanups and notifies the PF when done + * * Step 5: PF performs VF FLR cleanups and notifies the GuC FW when finished */ static bool needs_dispatch_flr(struct xe_device *xe) @@ -197,19 +1194,41 @@ static void pf_handle_vf_flr(struct xe_gt *gt, u32 vfid) if (needs_dispatch_flr(xe)) { for_each_gt(gtit, xe, gtid) - pf_send_vf_flr_start(gtit, vfid); + pf_enter_vf_flr_wip(gtit, vfid); } else { - pf_send_vf_flr_start(gt, vfid); + pf_enter_vf_flr_wip(gt, vfid); } } static void pf_handle_vf_flr_done(struct xe_gt *gt, u32 vfid) { - pf_send_vf_flr_finish(gt, vfid); + if (!pf_exit_vf_flr_wait_guc(gt, vfid)) { + xe_gt_sriov_dbg(gt, "Received out of order 'VF%u FLR done'\n", vfid); + pf_enter_vf_mismatch(gt, vfid); + return; + } + + pf_enter_vf_flr_guc_done(gt, vfid); +} + +static void pf_handle_vf_pause_done(struct xe_gt *gt, u32 vfid) +{ + if (!pf_exit_pause_wait_guc(gt, vfid)) { + xe_gt_sriov_dbg(gt, "Received out of order 'VF%u PAUSE done'\n", vfid); + pf_enter_vf_mismatch(gt, vfid); + return; + } + + pf_enter_vf_pause_guc_done(gt, vfid); } static int pf_handle_vf_event(struct xe_gt *gt, u32 vfid, u32 eventid) { + xe_gt_sriov_dbg_verbose(gt, "received VF%u event %#x\n", vfid, eventid); + + if (vfid > xe_gt_sriov_pf_get_totalvfs(gt)) + return -EPROTO; + switch (eventid) { case GUC_PF_NOTIFY_VF_FLR: pf_handle_vf_flr(gt, vfid); @@ -218,6 +1237,7 @@ static int pf_handle_vf_event(struct xe_gt *gt, u32 vfid, u32 eventid) pf_handle_vf_flr_done(gt, vfid); break; case GUC_PF_NOTIFY_VF_PAUSE_DONE: + pf_handle_vf_pause_done(gt, vfid); break; case GUC_PF_NOTIFY_VF_FIXUP_DONE: break; @@ -276,3 +1296,159 @@ int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 return vfid ? pf_handle_vf_event(gt, vfid, eventid) : pf_handle_pf_event(gt, eventid); } + +static bool pf_process_vf_state_machine(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_exit_vf_flr_send_start(gt, vfid)) + return true; + + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC)) { + xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid, + control_bit_to_string(XE_GT_SRIOV_STATE_FLR_WAIT_GUC)); + return false; + } + + if (pf_exit_vf_flr_guc_done(gt, vfid)) + return true; + + if (pf_exit_vf_flr_reset_config(gt, vfid)) + return true; + + if (pf_exit_vf_flr_reset_data(gt, vfid)) + return true; + + if (pf_exit_vf_flr_reset_mmio(gt, vfid)) + return true; + + if (pf_exit_vf_flr_send_finish(gt, vfid)) + return true; + + if (pf_exit_vf_stop_send_stop(gt, vfid)) + return true; + + if (pf_exit_vf_pause_send_pause(gt, vfid)) + return true; + + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC)) { + xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid, + control_bit_to_string(XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC)); + return true; + } + + if (pf_exit_vf_pause_guc_done(gt, vfid)) + return true; + + if (pf_exit_vf_resume_send_resume(gt, vfid)) + return true; + + return false; +} + +static unsigned int pf_control_state_index(struct xe_gt *gt, + struct xe_gt_sriov_control_state *cs) +{ + return container_of(cs, struct xe_gt_sriov_metadata, control) - gt->sriov.pf.vfs; +} + +static void pf_worker_find_work(struct xe_gt *gt) +{ + struct xe_gt_sriov_pf_control *pfc = >->sriov.pf.control; + struct xe_gt_sriov_control_state *cs; + unsigned int vfid; + bool empty; + bool more; + + spin_lock(&pfc->lock); + cs = list_first_entry_or_null(&pfc->list, struct xe_gt_sriov_control_state, link); + if (cs) + list_del_init(&cs->link); + empty = list_empty(&pfc->list); + spin_unlock(&pfc->lock); + + if (!cs) + return; + + /* VF metadata structures are indexed by the VFID */ + vfid = pf_control_state_index(gt, cs); + xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt)); + + more = pf_process_vf_state_machine(gt, vfid); + if (more) + pf_queue_vf(gt, vfid); + else if (!empty) + pf_queue_control_worker(gt); +} + +static void control_worker_func(struct work_struct *w) +{ + struct xe_gt *gt = container_of(w, struct xe_gt, sriov.pf.control.worker); + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + pf_worker_find_work(gt); +} + +static void pf_stop_worker(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + cancel_work_sync(>->sriov.pf.control.worker); +} + +static void control_fini_action(struct drm_device *dev, void *data) +{ + struct xe_gt *gt = data; + + pf_stop_worker(gt); +} + +/** + * xe_gt_sriov_pf_control_init() - Initialize PF's control data. + * @gt: the &xe_gt + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + unsigned int n, totalvfs; + + xe_gt_assert(gt, IS_SRIOV_PF(xe)); + + totalvfs = xe_sriov_pf_get_totalvfs(xe); + for (n = 0; n <= totalvfs; n++) { + struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, n); + + init_completion(&cs->done); + INIT_LIST_HEAD(&cs->link); + } + + spin_lock_init(>->sriov.pf.control.lock); + INIT_LIST_HEAD(>->sriov.pf.control.list); + INIT_WORK(>->sriov.pf.control.worker, control_worker_func); + + return drmm_add_action_or_reset(&xe->drm, control_fini_action, gt); +} + +/** + * xe_gt_sriov_pf_control_restart() - Restart SR-IOV control data after a GT reset. + * @gt: the &xe_gt + * + * Any per-VF status maintained by the PF or any ongoing VF control activity + * performed by the PF must be reset or cancelled when the GT is reset. + * + * This function is for PF only. + */ +void xe_gt_sriov_pf_control_restart(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + unsigned int n, totalvfs; + + xe_gt_assert(gt, IS_SRIOV_PF(xe)); + + pf_stop_worker(gt); + + totalvfs = xe_sriov_pf_get_totalvfs(xe); + for (n = 1; n <= totalvfs; n++) + pf_enter_vf_ready(gt, n); +} diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h index 405d1586f991..c85e64f099cc 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h @@ -11,6 +11,9 @@ struct xe_gt; +int xe_gt_sriov_pf_control_init(struct xe_gt *gt); +void xe_gt_sriov_pf_control_restart(struct xe_gt *gt); + int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h new file mode 100644 index 000000000000..11830aafea45 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PF_CONTROL_TYPES_H_ +#define _XE_GT_SRIOV_PF_CONTROL_TYPES_H_ + +#include <linux/completion.h> +#include <linux/spinlock.h> +#include <linux/workqueue_types.h> + +/** + * enum xe_gt_sriov_control_bits - Various bits used by the PF to represent a VF state + * + * @XE_GT_SRIOV_STATE_WIP: indicates that some operations are in progress. + * @XE_GT_SRIOV_STATE_FLR_WIP: indicates that a VF FLR is in progress. + * @XE_GT_SRIOV_STATE_FLR_SEND_START: indicates that the PF wants to send a FLR START command. + * @XE_GT_SRIOV_STATE_FLR_WAIT_GUC: indicates that the PF awaits for a response from the GuC. + * @XE_GT_SRIOV_STATE_FLR_GUC_DONE: indicates that the PF has received a response from the GuC. + * @XE_GT_SRIOV_STATE_FLR_RESET_CONFIG: indicates that the PF needs to clear VF's resources. + * @XE_GT_SRIOV_STATE_FLR_RESET_DATA: indicates that the PF needs to clear VF's data. + * @XE_GT_SRIOV_STATE_FLR_RESET_MMIO: indicates that the PF needs to reset VF's registers. + * @XE_GT_SRIOV_STATE_FLR_SEND_FINISH: indicates that the PF wants to send a FLR FINISH message. + * @XE_GT_SRIOV_STATE_FLR_FAILED: indicates that VF FLR sequence failed. + * @XE_GT_SRIOV_STATE_PAUSE_WIP: indicates that a VF pause operation is in progress. + * @XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE: indicates that the PF is about to send a PAUSE command. + * @XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC: indicates that the PF awaits for a response from the GuC. + * @XE_GT_SRIOV_STATE_PAUSE_GUC_DONE: indicates that the PF has received a response from the GuC. + * @XE_GT_SRIOV_STATE_PAUSE_FAILED: indicates that a VF pause operation has failed. + * @XE_GT_SRIOV_STATE_PAUSED: indicates that the VF is paused. + * @XE_GT_SRIOV_STATE_RESUME_WIP: indicates the a VF resume operation is in progress. + * @XE_GT_SRIOV_STATE_RESUME_SEND_RESUME: indicates that the PF is about to send RESUME command. + * @XE_GT_SRIOV_STATE_RESUME_FAILED: indicates that a VF resume operation has failed. + * @XE_GT_SRIOV_STATE_RESUMED: indicates that the VF was resumed. + * @XE_GT_SRIOV_STATE_STOP_WIP: indicates that a VF stop operation is in progress. + * @XE_GT_SRIOV_STATE_STOP_SEND_STOP: indicates that the PF wants to send a STOP command. + * @XE_GT_SRIOV_STATE_STOP_FAILED: indicates that the VF stop operation has failed + * @XE_GT_SRIOV_STATE_STOPPED: indicates that the VF was stopped. + * @XE_GT_SRIOV_STATE_MISMATCH: indicates that the PF has detected a VF state mismatch. + */ +enum xe_gt_sriov_control_bits { + XE_GT_SRIOV_STATE_WIP = 1, + + XE_GT_SRIOV_STATE_FLR_WIP, + XE_GT_SRIOV_STATE_FLR_SEND_START, + XE_GT_SRIOV_STATE_FLR_WAIT_GUC, + XE_GT_SRIOV_STATE_FLR_GUC_DONE, + XE_GT_SRIOV_STATE_FLR_RESET_CONFIG, + XE_GT_SRIOV_STATE_FLR_RESET_DATA, + XE_GT_SRIOV_STATE_FLR_RESET_MMIO, + XE_GT_SRIOV_STATE_FLR_SEND_FINISH, + XE_GT_SRIOV_STATE_FLR_FAILED, + + XE_GT_SRIOV_STATE_PAUSE_WIP, + XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE, + XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC, + XE_GT_SRIOV_STATE_PAUSE_GUC_DONE, + XE_GT_SRIOV_STATE_PAUSE_FAILED, + XE_GT_SRIOV_STATE_PAUSED, + + XE_GT_SRIOV_STATE_RESUME_WIP, + XE_GT_SRIOV_STATE_RESUME_SEND_RESUME, + XE_GT_SRIOV_STATE_RESUME_FAILED, + XE_GT_SRIOV_STATE_RESUMED, + + XE_GT_SRIOV_STATE_STOP_WIP, + XE_GT_SRIOV_STATE_STOP_SEND_STOP, + XE_GT_SRIOV_STATE_STOP_FAILED, + XE_GT_SRIOV_STATE_STOPPED, + + XE_GT_SRIOV_STATE_MISMATCH = BITS_PER_LONG - 1, +}; + +/** + * struct xe_gt_sriov_control_state - GT-level per-VF control state. + * + * Used by the PF driver to maintain per-VF control data. + */ +struct xe_gt_sriov_control_state { + /** @state: VF state bits */ + unsigned long state; + + /** @done: completion of async operations */ + struct completion done; + + /** @link: link into worker list */ + struct list_head link; +}; + +/** + * struct xe_gt_sriov_pf_control - GT-level control data. + * + * Used by the PF driver to maintain its data. + */ +struct xe_gt_sriov_pf_control { + /** @worker: worker that executes a VF operations */ + struct work_struct worker; + + /** @list: list of VF entries that have a pending work */ + struct list_head list; + + /** @lock: protects VF pending list */ + spinlock_t lock; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h index 40cbaea3ef44..28e1b130bf87 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h @@ -9,6 +9,7 @@ #include <linux/types.h> #include "xe_gt_sriov_pf_config_types.h" +#include "xe_gt_sriov_pf_control_types.h" #include "xe_gt_sriov_pf_monitor_types.h" #include "xe_gt_sriov_pf_policy_types.h" #include "xe_gt_sriov_pf_service_types.h" @@ -23,6 +24,9 @@ struct xe_gt_sriov_metadata { /** @monitor: per-VF monitoring data. */ struct xe_gt_sriov_monitor monitor; + /** @control: per-VF control data. */ + struct xe_gt_sriov_control_state control; + /** @version: negotiated VF/PF ABI version */ struct xe_gt_sriov_pf_service_version version; }; @@ -30,12 +34,14 @@ struct xe_gt_sriov_metadata { /** * struct xe_gt_sriov_pf - GT level PF virtualization data. * @service: service data. + * @control: control data. * @policy: policy data. * @spare: PF-only provisioning configuration. * @vfs: metadata for all VFs. */ struct xe_gt_sriov_pf { struct xe_gt_sriov_pf_service service; + struct xe_gt_sriov_pf_control control; struct xe_gt_sriov_pf_policy policy; struct xe_gt_sriov_spare_config spare; struct xe_gt_sriov_metadata *vfs; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 8892d6c2291e..4ebc82e607af 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -495,6 +495,25 @@ u64 xe_gt_sriov_vf_lmem(struct xe_gt *gt) return gt->sriov.vf.self_config.lmem_size; } +static struct xe_ggtt_node * +vf_balloon_ggtt_node(struct xe_ggtt *ggtt, u64 start, u64 end) +{ + struct xe_ggtt_node *node; + int err; + + node = xe_ggtt_node_init(ggtt); + if (IS_ERR(node)) + return node; + + err = xe_ggtt_node_insert_balloon(node, start, end); + if (err) { + xe_ggtt_node_fini(node); + return ERR_PTR(err); + } + + return node; +} + static int vf_balloon_ggtt(struct xe_gt *gt) { struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; @@ -502,7 +521,6 @@ static int vf_balloon_ggtt(struct xe_gt *gt) struct xe_ggtt *ggtt = tile->mem.ggtt; struct xe_device *xe = gt_to_xe(gt); u64 start, end; - int err; xe_gt_assert(gt, IS_SRIOV_VF(xe)); xe_gt_assert(gt, !xe_gt_is_media_type(gt)); @@ -528,35 +546,31 @@ static int vf_balloon_ggtt(struct xe_gt *gt) start = xe_wopcm_size(xe); end = config->ggtt_base; if (end != start) { - err = xe_ggtt_balloon(ggtt, start, end, &tile->sriov.vf.ggtt_balloon[0]); - if (err) - goto failed; + tile->sriov.vf.ggtt_balloon[0] = vf_balloon_ggtt_node(ggtt, start, end); + if (IS_ERR(tile->sriov.vf.ggtt_balloon[0])) + return PTR_ERR(tile->sriov.vf.ggtt_balloon[0]); } start = config->ggtt_base + config->ggtt_size; end = GUC_GGTT_TOP; if (end != start) { - err = xe_ggtt_balloon(ggtt, start, end, &tile->sriov.vf.ggtt_balloon[1]); - if (err) - goto deballoon; + tile->sriov.vf.ggtt_balloon[1] = vf_balloon_ggtt_node(ggtt, start, end); + if (IS_ERR(tile->sriov.vf.ggtt_balloon[1])) { + xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[0]); + return PTR_ERR(tile->sriov.vf.ggtt_balloon[1]); + } } return 0; - -deballoon: - xe_ggtt_deballoon(ggtt, &tile->sriov.vf.ggtt_balloon[0]); -failed: - return err; } static void deballoon_ggtt(struct drm_device *drm, void *arg) { struct xe_tile *tile = arg; - struct xe_ggtt *ggtt = tile->mem.ggtt; xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); - xe_ggtt_deballoon(ggtt, &tile->sriov.vf.ggtt_balloon[1]); - xe_ggtt_deballoon(ggtt, &tile->sriov.vf.ggtt_balloon[0]); + xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[1]); + xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[0]); } /** @@ -893,6 +907,32 @@ u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg) } /** + * xe_gt_sriov_vf_write32 - Handle a write to an inaccessible register. + * @gt: the &xe_gt + * @reg: the register to write + * @val: value to write + * + * This function is for VF use only. + * Currently it will trigger a WARN if running on debug build. + */ +void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val) +{ + u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); + + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + xe_gt_assert(gt, !reg.vf); + + /* + * In the future, we may want to handle selected writes to inaccessible + * registers in some custom way, but for now let's just log a warning + * about such attempt, as likely we might be doing something wrong. + */ + xe_gt_WARN(gt, IS_ENABLED(CONFIG_DRM_XE_DEBUG), + "VF is trying to write %#x to an inaccessible register %#x+%#x\n", + val, reg.addr, addr - reg.addr); +} + +/** * xe_gt_sriov_vf_print_config - Print VF self config. * @gt: the &xe_gt * @p: the &drm_printer diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h index 0de7f8cbcfa6..e541ce57bec2 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h @@ -22,6 +22,7 @@ u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt); u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt); u64 xe_gt_sriov_vf_lmem(struct xe_gt *gt); u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg); +void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val); void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p); void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p); diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c new file mode 100644 index 000000000000..c7364a5aef8f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_stats.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include <linux/atomic.h> + +#include <drm/drm_print.h> + +#include "xe_gt.h" +#include "xe_gt_stats.h" + +/** + * xe_gt_stats_incr - Increments the specified stats counter + * @gt: graphics tile + * @id: xe_gt_stats_id type id that needs to be incremented + * @incr: value to be incremented with + * + * Increments the specified stats counter. + */ +void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr) +{ + if (id >= __XE_GT_STATS_NUM_IDS) + return; + + atomic_add(incr, >->stats.counters[id]); +} + +static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = { + "tlb_inval_count", +}; + +/** + * xe_gt_stats_print_info - Print the GT stats + * @gt: graphics tile + * @p: drm_printer where it will be printed out. + * + * This prints out all the available GT stats. + */ +int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p) +{ + enum xe_gt_stats_id id; + + for (id = 0; id < __XE_GT_STATS_NUM_IDS; ++id) + drm_printf(p, "%s: %d\n", stat_description[id], + atomic_read(>->stats.counters[id])); + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_gt_stats.h b/drivers/gpu/drm/xe/xe_gt_stats.h new file mode 100644 index 000000000000..91d944f6c4e4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_stats.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_GT_STATS_H_ +#define _XE_GT_STATS_H_ + +struct xe_gt; +struct drm_printer; + +enum xe_gt_stats_id { + XE_GT_STATS_ID_TLB_INVAL, + /* must be the last entry */ + __XE_GT_STATS_NUM_IDS, +}; + +#ifdef CONFIG_DEBUG_FS +int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p); +void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr); +#else +static inline void +xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, + int incr) +{ +} + +#endif +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c index a05c3699e8b9..ec2b8246204b 100644 --- a/drivers/gpu/drm/xe/xe_gt_sysfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c @@ -51,5 +51,5 @@ int xe_gt_sysfs_init(struct xe_gt *gt) gt->sysfs = &kg->base; - return devm_add_action(xe->drm.dev, gt_sysfs_fini, gt); + return devm_add_action_or_reset(xe->drm.dev, gt_sysfs_fini, gt); } diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 481d83d07367..9d82ea30f4df 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -12,6 +12,7 @@ #include "xe_gt_printk.h" #include "xe_guc.h" #include "xe_guc_ct.h" +#include "xe_gt_stats.h" #include "xe_mmio.h" #include "xe_pm.h" #include "xe_sriov.h" @@ -36,6 +37,15 @@ static long tlb_timeout_jiffies(struct xe_gt *gt) return hw_tlb_timeout + 2 * delay; } +static void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence) +{ + if (WARN_ON_ONCE(!fence->gt)) + return; + + xe_pm_runtime_put(gt_to_xe(fence->gt)); + fence->gt = NULL; /* fini() should be called once */ +} + static void __invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) { @@ -62,6 +72,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work) struct xe_device *xe = gt_to_xe(gt); struct xe_gt_tlb_invalidation_fence *fence, *next; + LNL_FLUSH_WORK(>->uc.guc.ct.g2h_worker); + spin_lock_irq(>->tlb_invalidation.pending_lock); list_for_each_entry_safe(fence, next, >->tlb_invalidation.pending_fences, link) { @@ -182,7 +194,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, action[1] = seqno; ret = xe_guc_ct_send_locked(&guc->ct, action, len, G2H_LEN_DW_TLB_INVALIDATE, 1); - if (!ret && fence) { + if (!ret) { spin_lock_irq(>->tlb_invalidation.pending_lock); /* * We haven't actually published the TLB fence as per @@ -203,7 +215,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, tlb_timeout_jiffies(gt)); } spin_unlock_irq(>->tlb_invalidation.pending_lock); - } else if (ret < 0 && fence) { + } else { __invalidation_fence_signal(xe, fence); } if (!ret) { @@ -213,6 +225,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, gt->tlb_invalidation.seqno = 1; } mutex_unlock(&guc->ct.lock); + xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1); return ret; } @@ -265,10 +278,8 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) xe_gt_tlb_invalidation_fence_init(gt, &fence, true); ret = xe_gt_tlb_invalidation_guc(gt, &fence); - if (ret < 0) { - xe_gt_tlb_invalidation_fence_fini(&fence); + if (ret) return ret; - } xe_gt_tlb_invalidation_fence_wait(&fence); } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { @@ -494,7 +505,8 @@ static const struct dma_fence_ops invalidation_fence_ops = { * @stack: fence is stack variable * * Initialize TLB invalidation fence for use. xe_gt_tlb_invalidation_fence_fini - * must be called if fence is not signaled. + * will be automatically called when fence is signalled (all fences must signal), + * even on error. */ void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, @@ -514,14 +526,3 @@ void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, dma_fence_get(&fence->base); fence->gt = gt; } - -/** - * xe_gt_tlb_invalidation_fence_fini - Finalize TLB invalidation fence - * @fence: TLB invalidation fence to finalize - * - * Drop PM ref which fence took durinig init. - */ -void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence) -{ - xe_pm_runtime_put(gt_to_xe(fence->gt)); -} diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index a84065fa324c..f430d5797af7 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -28,7 +28,6 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, bool stack); -void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence); static inline void xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence) diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index 25ff03ab8448..0662f71c6ede 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -6,6 +6,7 @@ #include "xe_gt_topology.h" #include <linux/bitmap.h> +#include <linux/compiler.h> #include "regs/xe_gt_regs.h" #include "xe_assert.h" @@ -31,7 +32,7 @@ load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...) } static void -load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask) +load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask, enum xe_gt_eu_type *eu_type) { struct xe_device *xe = gt_to_xe(gt); u32 reg_val = xe_mmio_read32(gt, XELP_EU_ENABLE); @@ -47,11 +48,13 @@ load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask) if (GRAPHICS_VERx100(xe) < 1250) reg_val = ~reg_val & XELP_EU_MASK; - /* On PVC, one bit = one EU */ - if (GRAPHICS_VERx100(xe) == 1260) { + if (GRAPHICS_VERx100(xe) == 1260 || GRAPHICS_VER(xe) >= 20) { + /* SIMD16 EUs, one bit == one EU */ + *eu_type = XE_GT_EU_TYPE_SIMD16; val = reg_val; } else { - /* All other platforms, one bit = 2 EU */ + /* SIMD8 EUs, one bit == 2 EU */ + *eu_type = XE_GT_EU_TYPE_SIMD8; for (i = 0; i < fls(reg_val); i++) if (reg_val & BIT(i)) val |= 0x3 << 2 * i; @@ -213,7 +216,7 @@ xe_gt_topology_init(struct xe_gt *gt) XEHP_GT_COMPUTE_DSS_ENABLE, XEHPC_GT_COMPUTE_DSS_ENABLE_EXT, XE2_GT_COMPUTE_DSS_2); - load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss); + load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss, >->fuse_topo.eu_type); load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask); p = drm_dbg_printer(>_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology"); @@ -221,6 +224,18 @@ xe_gt_topology_init(struct xe_gt *gt) xe_gt_topology_dump(gt, &p); } +static const char *eu_type_to_str(enum xe_gt_eu_type eu_type) +{ + switch (eu_type) { + case XE_GT_EU_TYPE_SIMD16: + return "simd16"; + case XE_GT_EU_TYPE_SIMD8: + return "simd8"; + } + + return NULL; +} + void xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p) { @@ -231,6 +246,8 @@ xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "EU mask per DSS: %*pb\n", XE_MAX_EU_FUSE_BITS, gt->fuse_topo.eu_mask_per_dss); + drm_printf(p, "EU type: %s\n", + eu_type_to_str(gt->fuse_topo.eu_type)); drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS, gt->fuse_topo.l3_bank_mask); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 6b5e0b45efb0..3d1c51de0268 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -10,6 +10,7 @@ #include "xe_gt_idle_types.h" #include "xe_gt_sriov_pf_types.h" #include "xe_gt_sriov_vf_types.h" +#include "xe_gt_stats.h" #include "xe_hw_engine_types.h" #include "xe_hw_fence_types.h" #include "xe_oa.h" @@ -27,6 +28,11 @@ enum xe_gt_type { XE_GT_TYPE_MEDIA, }; +enum xe_gt_eu_type { + XE_GT_EU_TYPE_SIMD8, + XE_GT_EU_TYPE_SIMD16, +}; + #define XE_MAX_DSS_FUSE_REGS 3 #define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS) #define XE_MAX_EU_FUSE_REGS 1 @@ -128,6 +134,14 @@ struct xe_gt { u8 has_indirect_ring_state:1; } info; +#if IS_ENABLED(CONFIG_DEBUG_FS) + /** @stats: GT stats */ + struct { + /** @stats.counters: counters for various GT stats */ + atomic_t counters[__XE_GT_STATS_NUM_IDS]; + } stats; +#endif + /** * @mmio: mmio info for GT. All GTs within a tile share the same * register space, but have their own copy of GSI registers at a @@ -233,9 +247,14 @@ struct xe_gt { struct pf_queue { /** @usm.pf_queue.gt: back pointer to GT */ struct xe_gt *gt; -#define PF_QUEUE_NUM_DW 128 /** @usm.pf_queue.data: data in the page fault queue */ - u32 data[PF_QUEUE_NUM_DW]; + u32 *data; + /** + * @usm.pf_queue.num_dw: number of DWORDS in the page + * fault queue. Dynamically calculated based on the number + * of compute resources available. + */ + u32 num_dw; /** * @usm.pf_queue.tail: tail pointer in DWs for page fault queue, * moved by worker which processes faults (consumer). @@ -310,12 +329,6 @@ struct xe_gt { /** @eclass: per hardware engine class interface on the GT */ struct xe_hw_engine_class_intf eclass[XE_ENGINE_CLASS_MAX]; - /** @pcode: GT's PCODE */ - struct { - /** @pcode.lock: protecting GT's PCODE mailbox data */ - struct mutex lock; - } pcode; - /** @sysfs: sysfs' kobj used by xe_gt_sysfs */ struct kobject *sysfs; @@ -343,6 +356,12 @@ struct xe_gt { /** @fuse_topo.l3_bank_mask: L3 bank mask */ xe_l3_bank_mask_t l3_bank_mask; + + /** + * @fuse_topo.eu_type: type/width of EU stored in + * fuse_topo.eu_mask_per_dss + */ + enum xe_gt_eu_type eu_type; } fuse_topo; /** @steering: register steering for individual HW units */ @@ -357,11 +376,23 @@ struct xe_gt { } steering[NUM_STEERING_TYPES]; /** + * @steering_dss_per_grp: number of DSS per steering group (gslice, + * cslice, etc.). + */ + unsigned int steering_dss_per_grp; + + /** * @mcr_lock: protects the MCR_SELECTOR register for the duration * of a steered operation */ spinlock_t mcr_lock; + /** + * @global_invl_lock: protects the register for the duration + * of a global invalidation of l2 cache + */ + spinlock_t global_invl_lock; + /** @wa_active: keep track of active workarounds */ struct { /** @wa_active.gt: bitmap with active GT workarounds */ @@ -370,8 +401,14 @@ struct xe_gt { unsigned long *engine; /** @wa_active.lrc: bitmap with active LRC workarounds */ unsigned long *lrc; - /** @wa_active.oob: bitmap with active OOB workaroudns */ + /** @wa_active.oob: bitmap with active OOB workarounds */ unsigned long *oob; + /** + * @wa_active.oob_initialized: mark oob as initialized to help + * detecting misuse of XE_WA() - it can only be called on + * initialization after OOB WAs have being processed + */ + bool oob_initialized; } wa_active; /** @user_engines: engines present in GT and available to userspace */ diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index de0fe9e65746..52df28032a6f 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -350,6 +350,8 @@ int xe_guc_init(struct xe_guc *guc) if (ret) goto out; + xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); + ret = devm_add_action_or_reset(xe->drm.dev, guc_fini_hw, guc); if (ret) goto out; @@ -358,8 +360,6 @@ int xe_guc_init(struct xe_guc *guc) xe_guc_comm_init_early(guc); - xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); - return 0; out: diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index e0bbf98f849d..42116b167c98 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -11,6 +11,18 @@ #include "xe_hw_engine_types.h" #include "xe_macros.h" +/* + * GuC version number components are defined to be only 8-bit size, + * so converting to a 32bit 8.8.8 integer allows simple (and safe) + * numerical comparisons. + */ +#define MAKE_GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat)) +#define MAKE_GUC_VER_STRUCT(ver) MAKE_GUC_VER((ver).major, (ver).minor, (ver).patch) +#define GUC_SUBMIT_VER(guc) \ + MAKE_GUC_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]) +#define GUC_FIRMWARE_VER(guc) \ + MAKE_GUC_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_RELEASE]) + struct drm_printer; void xe_guc_comm_init_early(struct xe_guc *guc); diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 1c60b685dbc6..d1902a8581ca 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -24,6 +24,7 @@ #include "xe_map.h" #include "xe_mmio.h" #include "xe_platform_types.h" +#include "xe_uc_fw.h" #include "xe_wa.h" /* Slack of a few additional entries per engine */ @@ -367,6 +368,11 @@ static void guc_waklv_init(struct xe_guc_ads *ads) 0xC40, &offset, &remain); + if (XE_WA(gt, 14022293748) || XE_WA(gt, 22019794406)) + guc_waklv_enable_simple(ads, + GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET, + &offset, &remain); + size = guc_ads_waklv_size(ads) - remain; if (!size) return; diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 64afc90ad2c5..9c505d3517cd 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -105,12 +105,20 @@ ct_to_xe(struct xe_guc_ct *ct) * enough space to avoid backpressure on the driver. We increase the size * of the receive buffer (relative to the send) to ensure a G2H response * CTB has a landing spot. + * + * In addition to submissions, the G2H buffer needs to be able to hold + * enough space for recoverable page fault notifications. The number of + * page faults is interrupt driven and can be as much as the number of + * compute resources available. However, most of the actual work for these + * is in a separate page fault worker thread. Therefore we only need to + * make sure the queue has enough space to handle all of the submissions + * and responses and an extra buffer for incoming page faults. */ #define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K) #define CTB_H2G_BUFFER_SIZE (SZ_4K) -#define CTB_G2H_BUFFER_SIZE (4 * CTB_H2G_BUFFER_SIZE) -#define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 4) +#define CTB_G2H_BUFFER_SIZE (SZ_128K) +#define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 2) /** * xe_guc_ct_queue_proc_time_jiffies - Return maximum time to process a full @@ -516,6 +524,7 @@ static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) lockdep_assert_held(&ct->fast_lock); xe_gt_assert(ct_to_gt(ct), ct->ctbs.g2h.info.space + g2h_len <= ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space); + xe_gt_assert(ct_to_gt(ct), ct->g2h_outstanding); ct->ctbs.g2h.info.space += g2h_len; if (!--ct->g2h_outstanding) @@ -658,16 +667,12 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, num_g2h = 1; if (g2h_fence_needs_alloc(g2h_fence)) { - void *ptr; - g2h_fence->seqno = next_ct_seqno(ct, true); - ptr = xa_store(&ct->fence_lookup, - g2h_fence->seqno, - g2h_fence, GFP_ATOMIC); - if (IS_ERR(ptr)) { - ret = PTR_ERR(ptr); + ret = xa_err(xa_store(&ct->fence_lookup, + g2h_fence->seqno, g2h_fence, + GFP_ATOMIC)); + if (ret) goto out; - } } seqno = g2h_fence->seqno; @@ -870,14 +875,11 @@ retry: retry_same_fence: ret = guc_ct_send(ct, action, len, 0, 0, &g2h_fence); if (unlikely(ret == -ENOMEM)) { - void *ptr; - /* Retry allocation /w GFP_KERNEL */ - ptr = xa_store(&ct->fence_lookup, - g2h_fence.seqno, - &g2h_fence, GFP_KERNEL); - if (IS_ERR(ptr)) - return PTR_ERR(ptr); + ret = xa_err(xa_store(&ct->fence_lookup, g2h_fence.seqno, + &g2h_fence, GFP_KERNEL)); + if (ret) + return ret; goto retry_same_fence; } else if (unlikely(ret)) { @@ -894,16 +896,35 @@ retry_same_fence: } ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ); + + if (!ret) { + LNL_FLUSH_WORK(&ct->g2h_worker); + if (g2h_fence.done) { + xe_gt_warn(gt, "G2H fence %u, action %04x, done\n", + g2h_fence.seqno, action[0]); + ret = 1; + } + } + + /* + * Ensure we serialize with completion side to prevent UAF with fence going out of scope on + * the stack, since we have no clue if it will fire after the timeout before we can erase + * from the xa. Also we have some dependent loads and stores below for which we need the + * correct ordering, and we lack the needed barriers. + */ + mutex_lock(&ct->lock); if (!ret) { - xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x", - g2h_fence.seqno, action[0]); + xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x, done %s", + g2h_fence.seqno, action[0], str_yes_no(g2h_fence.done)); xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno); + mutex_unlock(&ct->lock); return -ETIME; } if (g2h_fence.retry) { xe_gt_dbg(gt, "H2G action %#x retrying: reason %#x\n", action[0], g2h_fence.reason); + mutex_unlock(&ct->lock); goto retry; } if (g2h_fence.fail) { @@ -912,7 +933,12 @@ retry_same_fence: ret = -EIO; } - return ret > 0 ? response_buffer ? g2h_fence.response_len : g2h_fence.response_data : ret; + if (ret > 0) + ret = response_buffer ? g2h_fence.response_len : g2h_fence.response_data; + + mutex_unlock(&ct->lock); + + return ret; } /** diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c index d9b570a154a2..af2c817d552c 100644 --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c @@ -6,6 +6,7 @@ #include "xe_guc_hwconfig.h" #include <drm/drm_managed.h> +#include <drm/drm_print.h> #include "abi/guc_actions_abi.h" #include "xe_bo.h" @@ -103,3 +104,99 @@ void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst) xe_map_memcpy_from(xe, dst, &guc->hwconfig.bo->vmap, 0, guc->hwconfig.size); } + +void xe_guc_hwconfig_dump(struct xe_guc *guc, struct drm_printer *p) +{ + size_t size = xe_guc_hwconfig_size(guc); + u32 *hwconfig; + u64 num_dw; + u32 extra_bytes; + int i = 0; + + if (size == 0) { + drm_printf(p, "No hwconfig available\n"); + return; + } + + num_dw = div_u64_rem(size, sizeof(u32), &extra_bytes); + + hwconfig = kzalloc(size, GFP_KERNEL); + if (!hwconfig) { + drm_printf(p, "Error: could not allocate hwconfig memory\n"); + return; + } + + xe_guc_hwconfig_copy(guc, hwconfig); + + /* An entry requires at least three dwords for key, length, value */ + while (i + 3 <= num_dw) { + u32 attribute = hwconfig[i++]; + u32 len_dw = hwconfig[i++]; + + if (i + len_dw > num_dw) { + drm_printf(p, "Error: Attribute %u is %u dwords, but only %llu remain\n", + attribute, len_dw, num_dw - i); + len_dw = num_dw - i; + } + + /* + * If it's a single dword (as most hwconfig attributes are), + * then it's probably a number that makes sense to display + * in decimal form. In the rare cases where it's more than + * one dword, just print it in hex form and let the user + * figure out how to interpret it. + */ + if (len_dw == 1) + drm_printf(p, "[%2u] = %u\n", attribute, hwconfig[i]); + else + drm_printf(p, "[%2u] = { %*ph }\n", attribute, + (int)(len_dw * sizeof(u32)), &hwconfig[i]); + i += len_dw; + } + + if (i < num_dw || extra_bytes) + drm_printf(p, "Error: %llu extra bytes at end of hwconfig\n", + (num_dw - i) * sizeof(u32) + extra_bytes); + + kfree(hwconfig); +} + +/* + * Lookup a specific 32-bit attribute value in the GuC's hwconfig table. + */ +int xe_guc_hwconfig_lookup_u32(struct xe_guc *guc, u32 attribute, u32 *val) +{ + size_t size = xe_guc_hwconfig_size(guc); + u64 num_dw = div_u64(size, sizeof(u32)); + u32 *hwconfig; + bool found = false; + int i = 0; + + if (num_dw == 0) + return -EINVAL; + + hwconfig = kzalloc(size, GFP_KERNEL); + if (!hwconfig) + return -ENOMEM; + + xe_guc_hwconfig_copy(guc, hwconfig); + + /* An entry requires at least three dwords for key, length, value */ + while (i + 3 <= num_dw) { + u32 key = hwconfig[i++]; + u32 len_dw = hwconfig[i++]; + + if (key != attribute) { + i += len_dw; + continue; + } + + *val = hwconfig[i]; + found = true; + break; + } + + kfree(hwconfig); + + return found ? 0 : -ENOENT; +} diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.h b/drivers/gpu/drm/xe/xe_guc_hwconfig.h index b5794d641900..ab4e5038236e 100644 --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.h +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.h @@ -8,10 +8,13 @@ #include <linux/types.h> +struct drm_printer; struct xe_guc; int xe_guc_hwconfig_init(struct xe_guc *guc); u32 xe_guc_hwconfig_size(struct xe_guc *guc); void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst); +void xe_guc_hwconfig_dump(struct xe_guc *guc, struct drm_printer *p); +int xe_guc_hwconfig_lookup_u32(struct xe_guc *guc, u32 attribute, u32 *val); #endif diff --git a/drivers/gpu/drm/xe/xe_guc_id_mgr.c b/drivers/gpu/drm/xe/xe_guc_id_mgr.c index cd0549d0ef89..e845425d670b 100644 --- a/drivers/gpu/drm/xe/xe_guc_id_mgr.c +++ b/drivers/gpu/drm/xe/xe_guc_id_mgr.c @@ -97,8 +97,8 @@ int xe_guc_id_mgr_init(struct xe_guc_id_mgr *idm, unsigned int limit) if (ret) return ret; - xe_gt_info(idm_to_gt(idm), "using %u GUC ID%s\n", - idm->total, str_plural(idm->total)); + xe_gt_dbg(idm_to_gt(idm), "using %u GuC ID%s\n", + idm->total, str_plural(idm->total)); return 0; } diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 32e93a8127d4..034b29984d5e 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -915,7 +915,7 @@ static void pc_init_pcode_freq(struct xe_guc_pc *pc) u32 min = DIV_ROUND_CLOSEST(pc->rpn_freq, GT_FREQUENCY_MULTIPLIER); u32 max = DIV_ROUND_CLOSEST(pc->rp0_freq, GT_FREQUENCY_MULTIPLIER); - XE_WARN_ON(xe_pcode_init_min_freq_table(pc_to_gt(pc), min, max)); + XE_WARN_ON(xe_pcode_init_min_freq_table(gt_to_tile(pc_to_gt(pc)), min, max)); } static int pc_init_freqs(struct xe_guc_pc *pc) @@ -1042,7 +1042,7 @@ static void xe_guc_pc_fini_hw(void *arg) return; XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL)); - XE_WARN_ON(xe_guc_pc_gucrc_disable(pc)); + xe_guc_pc_gucrc_disable(pc); XE_WARN_ON(xe_guc_pc_stop(pc)); /* Bind requested freq to mert_freq_cap before unload */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 6398629e6b4e..4f5d00aea716 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -224,75 +224,28 @@ static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) EXEC_QUEUE_STATE_BANNED)); } -#ifdef CONFIG_PROVE_LOCKING -static int alloc_submit_wq(struct xe_guc *guc) -{ - int i; - - for (i = 0; i < NUM_SUBMIT_WQ; ++i) { - guc->submission_state.submit_wq_pool[i] = - alloc_ordered_workqueue("submit_wq", 0); - if (!guc->submission_state.submit_wq_pool[i]) - goto err_free; - } - - return 0; - -err_free: - while (i) - destroy_workqueue(guc->submission_state.submit_wq_pool[--i]); - - return -ENOMEM; -} - -static void free_submit_wq(struct xe_guc *guc) -{ - int i; - - for (i = 0; i < NUM_SUBMIT_WQ; ++i) - destroy_workqueue(guc->submission_state.submit_wq_pool[i]); -} - -static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) -{ - int idx = guc->submission_state.submit_wq_idx++ % NUM_SUBMIT_WQ; - - return guc->submission_state.submit_wq_pool[idx]; -} -#else -static int alloc_submit_wq(struct xe_guc *guc) -{ - return 0; -} - -static void free_submit_wq(struct xe_guc *guc) -{ - -} - -static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) -{ - return NULL; -} -#endif - static void guc_submit_fini(struct drm_device *drm, void *arg) { struct xe_guc *guc = arg; xa_destroy(&guc->submission_state.exec_queue_lookup); - free_submit_wq(guc); } -static void guc_submit_wedged_fini(struct drm_device *drm, void *arg) +static void guc_submit_wedged_fini(void *arg) { struct xe_guc *guc = arg; struct xe_exec_queue *q; unsigned long index; - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) - if (exec_queue_wedged(q)) + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + if (exec_queue_wedged(q)) { + mutex_unlock(&guc->submission_state.lock); xe_exec_queue_put(q); + mutex_lock(&guc->submission_state.lock); + } + } + mutex_unlock(&guc->submission_state.lock); } static const struct xe_exec_queue_ops guc_exec_queue_ops; @@ -337,14 +290,12 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) if (err) return err; - err = alloc_submit_wq(guc); - if (err) - return err; - gt->exec_queue_ops = &guc_exec_queue_ops; xa_init(&guc->submission_state.exec_queue_lookup); + init_waitqueue_head(&guc->submission_state.fini_wq); + primelockdep(guc); return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); @@ -361,12 +312,14 @@ static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa xe_guc_id_mgr_release_locked(&guc->submission_state.idm, q->guc->id, q->width); + + if (xa_empty(&guc->submission_state.exec_queue_lookup)) + wake_up(&guc->submission_state.fini_wq); } static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) { int ret; - void *ptr; int i; /* @@ -386,12 +339,10 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) q->guc->id = ret; for (i = 0; i < q->width; ++i) { - ptr = xa_store(&guc->submission_state.exec_queue_lookup, - q->guc->id + i, q, GFP_NOWAIT); - if (IS_ERR(ptr)) { - ret = PTR_ERR(ptr); + ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup, + q->guc->id + i, q, GFP_NOWAIT)); + if (ret) goto err_release; - } } return 0; @@ -794,8 +745,6 @@ static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) { struct xe_sched_job *job = to_xe_sched_job(drm_job); - xe_exec_queue_update_run_ticks(job->q); - trace_xe_sched_job_free(job); xe_sched_job_put(job); } @@ -877,7 +826,7 @@ void xe_guc_submit_wedge(struct xe_guc *guc) xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); - err = drmm_add_action_or_reset(&guc_to_xe(guc)->drm, + err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, guc_submit_wedged_fini, guc); if (err) { drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n"); @@ -965,12 +914,22 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) { struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q)); - u32 ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]); - u32 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); + u32 ctx_timestamp, ctx_job_timestamp; u32 timeout_ms = q->sched_props.job_timeout_ms; u32 diff; u64 running_time_ms; + if (!xe_sched_job_started(job)) { + xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started", + xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), + q->guc->id); + + return xe_sched_invalidate_job(job, 2); + } + + ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]); + ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); + /* * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch * possible overflows with a high timeout. @@ -1071,16 +1030,21 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) struct xe_exec_queue *q = job->q; struct xe_gpu_scheduler *sched = &q->guc->sched; struct xe_guc *guc = exec_queue_to_guc(q); + const char *process_name = "no process"; int err = -ETIME; + pid_t pid = -1; int i = 0; bool wedged, skip_timeout_check; /* * TDR has fired before free job worker. Common if exec queue - * immediately closed after last fence signaled. + * immediately closed after last fence signaled. Add back to pending + * list so job can be freed and kick scheduler ensuring free job is not + * lost. */ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { - guc_exec_queue_free_job(drm_job); + xe_sched_add_pending_job(sched, job); + xe_sched_submission_start(sched); return DRM_GPU_SCHED_STAT_NOMINAL; } @@ -1093,10 +1057,6 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) exec_queue_killed_or_banned_or_wedged(q) || exec_queue_destroyed(q); - /* Job hasn't started, can't be timed out */ - if (!skip_timeout_check && !xe_sched_job_started(job)) - goto rearm; - /* * XXX: Sampling timeout doesn't work in wedged mode as we have to * modify scheduling state to read timestamp. We could read the @@ -1168,9 +1128,14 @@ trigger_reset: goto sched_enable; } - xe_gt_notice(guc_to_gt(guc), "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", + if (q->vm && q->vm->xef) { + process_name = q->vm->xef->process_name; + pid = q->vm->xef->pid; + } + xe_gt_notice(guc_to_gt(guc), "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]", xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), - q->guc->id, q->flags); + q->guc->id, q->flags, process_name, pid); + trace_xe_sched_job_timedout(job); if (!exec_queue_killed(q)) @@ -1261,13 +1226,16 @@ static void __guc_exec_queue_fini_async(struct work_struct *w) static void guc_exec_queue_fini_async(struct xe_exec_queue *q) { + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); /* We must block on kernel engines so slabs are empty on driver unload */ if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) __guc_exec_queue_fini_async(&q->guc->fini_async); else - queue_work(system_wq, &q->guc->fini_async); + queue_work(xe->destroy_wq, &q->guc->fini_async); } static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) @@ -1312,6 +1280,15 @@ static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *ms kfree(msg); } +static void __suspend_fence_signal(struct xe_exec_queue *q) +{ + if (!q->guc->suspend_pending) + return; + + WRITE_ONCE(q->guc->suspend_pending, false); + wake_up(&q->guc->suspend_wait); +} + static void suspend_fence_signal(struct xe_exec_queue *q) { struct xe_guc *guc = exec_queue_to_guc(q); @@ -1321,9 +1298,7 @@ static void suspend_fence_signal(struct xe_exec_queue *q) guc_read_stopped(guc)); xe_assert(xe, q->guc->suspend_pending); - q->guc->suspend_pending = false; - smp_wmb(); - wake_up(&q->guc->suspend_wait); + __suspend_fence_signal(q); } static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) @@ -1360,9 +1335,11 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) struct xe_exec_queue *q = msg->private_data; if (guc_exec_queue_allowed_to_change_state(q)) { - q->guc->resume_time = RESUME_PENDING; clear_exec_queue_suspended(q); - enable_scheduling(q); + if (!exec_queue_enabled(q)) { + q->guc->resume_time = RESUME_PENDING; + enable_scheduling(q); + } } else { clear_exec_queue_suspended(q); } @@ -1372,9 +1349,13 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) #define SET_SCHED_PROPS 2 #define SUSPEND 3 #define RESUME 4 +#define OPCODE_MASK 0xf +#define MSG_LOCKED BIT(8) static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) { + struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data)); + trace_xe_sched_msg_recv(msg); switch (msg->opcode) { @@ -1394,7 +1375,7 @@ static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) XE_WARN_ON("Unknown message type"); } - xe_pm_runtime_put(guc_to_xe(exec_queue_to_guc(msg->private_data))); + xe_pm_runtime_put(xe); } static const struct drm_sched_backend_ops drm_sched_ops = { @@ -1414,7 +1395,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) struct xe_device *xe = guc_to_xe(guc); struct xe_guc_exec_queue *ge; long timeout; - int err; + int err, i; xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc))); @@ -1426,11 +1407,13 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) ge->q = q; init_waitqueue_head(&ge->suspend_wait); + for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i) + INIT_LIST_HEAD(&ge->static_msgs[i].link); + timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : msecs_to_jiffies(q->sched_props.job_timeout_ms); err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, - get_submit_wq(guc), - q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 64, + NULL, q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 64, timeout, guc_to_gt(guc)->ordered_wq, NULL, q->name, gt_to_xe(q->gt)->drm.dev); if (err) @@ -1478,6 +1461,7 @@ static void guc_exec_queue_kill(struct xe_exec_queue *q) { trace_xe_exec_queue_kill(q); set_exec_queue_killed(q); + __suspend_fence_signal(q); xe_guc_exec_queue_trigger_cleanup(q); } @@ -1487,11 +1471,26 @@ static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); INIT_LIST_HEAD(&msg->link); - msg->opcode = opcode; + msg->opcode = opcode & OPCODE_MASK; msg->private_data = q; trace_xe_sched_msg_add(msg); - xe_sched_add_msg(&q->guc->sched, msg); + if (opcode & MSG_LOCKED) + xe_sched_add_msg_locked(&q->guc->sched, msg); + else + xe_sched_add_msg(&q->guc->sched, msg); +} + +static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q, + struct xe_sched_msg *msg, + u32 opcode) +{ + if (!list_empty(&msg->link)) + return false; + + guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED); + + return true; } #define STATIC_MSG_CLEANUP 0 @@ -1565,34 +1564,59 @@ static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, static int guc_exec_queue_suspend(struct xe_exec_queue *q) { + struct xe_gpu_scheduler *sched = &q->guc->sched; struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; - if (exec_queue_killed_or_banned_or_wedged(q) || q->guc->suspend_pending) + if (exec_queue_killed_or_banned_or_wedged(q)) return -EINVAL; - q->guc->suspend_pending = true; - guc_exec_queue_add_msg(q, msg, SUSPEND); + xe_sched_msg_lock(sched); + if (guc_exec_queue_try_add_msg(q, msg, SUSPEND)) + q->guc->suspend_pending = true; + xe_sched_msg_unlock(sched); return 0; } -static void guc_exec_queue_suspend_wait(struct xe_exec_queue *q) +static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) { struct xe_guc *guc = exec_queue_to_guc(q); + int ret; + + /* + * Likely don't need to check exec_queue_killed() as we clear + * suspend_pending upon kill but to be paranoid but races in which + * suspend_pending is set after kill also check kill here. + */ + ret = wait_event_interruptible_timeout(q->guc->suspend_wait, + !READ_ONCE(q->guc->suspend_pending) || + exec_queue_killed(q) || + guc_read_stopped(guc), + HZ * 5); + + if (!ret) { + xe_gt_warn(guc_to_gt(guc), + "Suspend fence, guc_id=%d, failed to respond", + q->guc->id); + /* XXX: Trigger GT reset? */ + return -ETIME; + } - wait_event(q->guc->suspend_wait, !q->guc->suspend_pending || - guc_read_stopped(guc)); + return ret < 0 ? ret : 0; } static void guc_exec_queue_resume(struct xe_exec_queue *q) { + struct xe_gpu_scheduler *sched = &q->guc->sched; struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); xe_assert(xe, !q->guc->suspend_pending); - guc_exec_queue_add_msg(q, msg, RESUME); + xe_sched_msg_lock(sched); + guc_exec_queue_try_add_msg(q, msg, RESUME); + xe_sched_msg_unlock(sched); } static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) @@ -1706,8 +1730,13 @@ void xe_guc_submit_stop(struct xe_guc *guc) mutex_lock(&guc->submission_state.lock); - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + /* Prevent redundant attempts to stop parallel queues */ + if (q->guc->id != index) + continue; + guc_exec_queue_stop(guc, q); + } mutex_unlock(&guc->submission_state.lock); @@ -1732,6 +1761,7 @@ static void guc_exec_queue_start(struct xe_exec_queue *q) } xe_sched_submission_start(sched); + xe_sched_submission_resume_tdr(sched); } int xe_guc_submit_start(struct xe_guc *guc) @@ -1744,8 +1774,13 @@ int xe_guc_submit_start(struct xe_guc *guc) mutex_lock(&guc->submission_state.lock); atomic_dec(&guc->submission_state.stopped); - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + /* Prevent redundant attempts to start parallel queues */ + if (q->guc->id != index) + continue; + guc_exec_queue_start(q); + } mutex_unlock(&guc->submission_state.lock); wake_up_all(&guc->ct.wq); diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index 546ac6350a31..ed150fc09ad0 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -72,15 +72,10 @@ struct xe_guc { atomic_t stopped; /** @submission_state.lock: protects submission state */ struct mutex lock; -#ifdef CONFIG_PROVE_LOCKING -#define NUM_SUBMIT_WQ 256 - /** @submission_state.submit_wq_pool: submission ordered workqueues pool */ - struct workqueue_struct *submit_wq_pool[NUM_SUBMIT_WQ]; - /** @submission_state.submit_wq_idx: submission ordered workqueue index */ - int submit_wq_idx; -#endif /** @submission_state.enabled: submission is enabled */ bool enabled; + /** @submission_state.fini_wq: submit fini wait queue */ + wait_queue_head_t fini_wq; } submission_state; /** @hwconfig: Hardware config state */ struct { diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c index 1c9d38b6f5f1..65b2e147c4b9 100644 --- a/drivers/gpu/drm/xe/xe_heci_gsc.c +++ b/drivers/gpu/drm/xe/xe_heci_gsc.c @@ -92,7 +92,7 @@ void xe_heci_gsc_fini(struct xe_device *xe) { struct xe_heci_gsc *heci_gsc = &xe->heci_gsc; - if (!HAS_HECI_GSCFI(xe)) + if (!HAS_HECI_GSCFI(xe) && !HAS_HECI_CSCFI(xe)) return; if (heci_gsc->adev) { @@ -177,12 +177,14 @@ void xe_heci_gsc_init(struct xe_device *xe) const struct heci_gsc_def *def; int ret; - if (!HAS_HECI_GSCFI(xe)) + if (!HAS_HECI_GSCFI(xe) && !HAS_HECI_CSCFI(xe)) return; heci_gsc->irq = -1; - if (xe->info.platform == XE_PVC) { + if (xe->info.platform == XE_BATTLEMAGE) { + def = &heci_gsc_def_dg2; + } else if (xe->info.platform == XE_PVC) { def = &heci_gsc_def_pvc; } else if (xe->info.platform == XE_DG2) { def = &heci_gsc_def_dg2; @@ -232,3 +234,23 @@ void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir) if (ret) drm_err_ratelimited(&xe->drm, "error handling GSC irq: %d\n", ret); } + +void xe_heci_csc_irq_handler(struct xe_device *xe, u32 iir) +{ + int ret; + + if ((iir & CSC_IRQ_INTF(1)) == 0) + return; + + if (!HAS_HECI_CSCFI(xe)) { + drm_warn_once(&xe->drm, "CSC irq: not supported"); + return; + } + + if (xe->heci_gsc.irq < 0) + return; + + ret = generic_handle_irq(xe->heci_gsc.irq); + if (ret) + drm_err_ratelimited(&xe->drm, "error handling GSC irq: %d\n", ret); +} diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.h b/drivers/gpu/drm/xe/xe_heci_gsc.h index 9db454478fae..48b3b1838045 100644 --- a/drivers/gpu/drm/xe/xe_heci_gsc.h +++ b/drivers/gpu/drm/xe/xe_heci_gsc.h @@ -11,10 +11,15 @@ struct xe_device; struct mei_aux_device; /* - * The HECI1 bit corresponds to bit15 and HECI2 to bit14. + * GSC HECI1 bit corresponds to bit15 and HECI2 to bit14. * The reason for this is to allow growth for more interfaces in the future. */ -#define GSC_IRQ_INTF(_x) BIT(15 - (_x)) +#define GSC_IRQ_INTF(_x) BIT(15 - (_x)) + +/* + * CSC HECI1 bit corresponds to bit9 and HECI2 to bit10. + */ +#define CSC_IRQ_INTF(_x) BIT(9 + (_x)) /** * struct xe_heci_gsc - graphics security controller for xe, HECI interface @@ -31,5 +36,6 @@ struct xe_heci_gsc { void xe_heci_gsc_init(struct xe_device *xe); void xe_heci_gsc_fini(struct xe_device *xe); void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir); +void xe_heci_csc_irq_handler(struct xe_device *xe, u32 iir); #endif /* __XE_HECI_GSC_DEV_H__ */ diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index bec4366e5513..f5459f97af23 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -43,14 +43,6 @@ huc_to_guc(struct xe_huc *huc) return &container_of(huc, struct xe_uc, huc)->guc; } -static void free_gsc_pkt(struct drm_device *drm, void *arg) -{ - struct xe_huc *huc = arg; - - xe_bo_unpin_map_no_vm(huc->gsc_pkt); - huc->gsc_pkt = NULL; -} - #define PXP43_HUC_AUTH_INOUT_SIZE SZ_4K static int huc_alloc_gsc_pkt(struct xe_huc *huc) { @@ -59,17 +51,16 @@ static int huc_alloc_gsc_pkt(struct xe_huc *huc) struct xe_bo *bo; /* we use a single object for both input and output */ - bo = xe_bo_create_pin_map(xe, gt_to_tile(gt), NULL, - PXP43_HUC_AUTH_INOUT_SIZE * 2, - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT); + bo = xe_managed_bo_create_pin_map(xe, gt_to_tile(gt), + PXP43_HUC_AUTH_INOUT_SIZE * 2, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT); if (IS_ERR(bo)) return PTR_ERR(bo); huc->gsc_pkt = bo; - return drmm_add_action_or_reset(&xe->drm, free_gsc_pkt, huc); + return 0; } int xe_huc_init(struct xe_huc *huc) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 07ed9fd28f19..c9c3beb3ce8d 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -5,7 +5,10 @@ #include "xe_hw_engine.h" +#include <linux/nospec.h> + #include <drm/drm_managed.h> +#include <uapi/drm/xe_drm.h> #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" @@ -20,6 +23,7 @@ #include "xe_gt_printk.h" #include "xe_gt_mcr.h" #include "xe_gt_topology.h" +#include "xe_hw_engine_group.h" #include "xe_hw_fence.h" #include "xe_irq.h" #include "xe_lrc.h" @@ -263,19 +267,28 @@ static const struct engine_info engine_infos[] = { }, }; -static void hw_engine_fini(struct drm_device *drm, void *arg) +static void hw_engine_fini(void *arg) { struct xe_hw_engine *hwe = arg; if (hwe->exl_port) xe_execlist_port_destroy(hwe->exl_port); - xe_lrc_put(hwe->kernel_lrc); hwe->gt = NULL; } -static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg, - u32 val) +/** + * xe_hw_engine_mmio_write32() - Write engine register + * @hwe: engine + * @reg: register to write into + * @val: desired 32-bit value to write + * + * This function will write val into an engine specific register. + * Forcewake must be held by the caller. + * + */ +void xe_hw_engine_mmio_write32(struct xe_hw_engine *hwe, + struct xe_reg reg, u32 val) { xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base)); xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); @@ -285,7 +298,17 @@ static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg, xe_mmio_write32(hwe->gt, reg, val); } -static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg) +/** + * xe_hw_engine_mmio_read32() - Read engine register + * @hwe: engine + * @reg: register to read from + * + * This function will read from an engine specific register. + * Forcewake must be held by the caller. + * + * Return: value of the 32-bit register. + */ +u32 xe_hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg) { xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base)); xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); @@ -304,14 +327,14 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) xe_mmio_write32(hwe->gt, RCU_MODE, _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE)); - hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); - hw_engine_mmio_write32(hwe, RING_HWS_PGA(0), - xe_bo_ggtt_addr(hwe->hwsp)); - hw_engine_mmio_write32(hwe, RING_MODE(0), - _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); - hw_engine_mmio_write32(hwe, RING_MI_MODE(0), - _MASKED_BIT_DISABLE(STOP_RING)); - hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); + xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); + xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0), + xe_bo_ggtt_addr(hwe->hwsp)); + xe_hw_engine_mmio_write32(hwe, RING_MODE(0), + _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); + xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0), + _MASKED_BIT_DISABLE(STOP_RING)); + xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); } static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt, @@ -425,6 +448,12 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) 0xA, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, + /* Enable Priority Mem Read */ + { XE_RTP_NAME("Priority_Mem_Read"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, {} }; @@ -528,21 +557,13 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, goto err_name; } - hwe->kernel_lrc = xe_lrc_create(hwe, NULL, SZ_16K); - if (IS_ERR(hwe->kernel_lrc)) { - err = PTR_ERR(hwe->kernel_lrc); - goto err_hwsp; - } - if (!xe_device_uc_enabled(xe)) { hwe->exl_port = xe_execlist_port_create(xe, hwe); if (IS_ERR(hwe->exl_port)) { err = PTR_ERR(hwe->exl_port); - goto err_kernel_lrc; + goto err_hwsp; } - } - - if (xe_device_uc_enabled(xe)) { + } else { /* GSCCS has a special interrupt for reset */ if (hwe->class == XE_ENGINE_CLASS_OTHER) hwe->irq_handler = xe_gsc_hwe_irq_handler; @@ -555,10 +576,8 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY) gt->usm.reserved_bcs_instance = hwe->instance; - return drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe); + return devm_add_action_or_reset(xe->drm.dev, hw_engine_fini, hwe); -err_kernel_lrc: - xe_lrc_put(hwe->kernel_lrc); err_hwsp: xe_bo_unpin_map_no_vm(hwe->hwsp); err_name: @@ -761,6 +780,9 @@ int xe_hw_engines_init(struct xe_gt *gt) } hw_engine_setup_logical_mapping(gt); + err = xe_hw_engine_setup_groups(gt); + if (err) + return err; return 0; } @@ -791,7 +813,7 @@ xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe, unsigned int dss; u16 group, instance; - snapshot->reg.instdone.ring = hw_engine_mmio_read32(hwe, RING_INSTDONE(0)); + snapshot->reg.instdone.ring = xe_hw_engine_mmio_read32(hwe, RING_INSTDONE(0)); if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER) return; @@ -887,53 +909,53 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) return snapshot; snapshot->reg.ring_execlist_status = - hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0)); - val = hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0)); + xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0)); snapshot->reg.ring_execlist_status |= val << 32; snapshot->reg.ring_execlist_sq_contents = - hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0)); - val = hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0)); + xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0)); snapshot->reg.ring_execlist_sq_contents |= val << 32; - snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0)); - val = hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)); + snapshot->reg.ring_acthd = xe_hw_engine_mmio_read32(hwe, RING_ACTHD(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)); snapshot->reg.ring_acthd |= val << 32; - snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0)); - val = hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)); + snapshot->reg.ring_bbaddr = xe_hw_engine_mmio_read32(hwe, RING_BBADDR(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)); snapshot->reg.ring_bbaddr |= val << 32; snapshot->reg.ring_dma_fadd = - hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)); - val = hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)); + xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)); snapshot->reg.ring_dma_fadd |= val << 32; - snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0)); - snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, RING_HWS_PGA(0)); - snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0)); + snapshot->reg.ring_hwstam = xe_hw_engine_mmio_read32(hwe, RING_HWSTAM(0)); + snapshot->reg.ring_hws_pga = xe_hw_engine_mmio_read32(hwe, RING_HWS_PGA(0)); + snapshot->reg.ring_start = xe_hw_engine_mmio_read32(hwe, RING_START(0)); if (GRAPHICS_VERx100(hwe->gt->tile->xe) >= 2000) { - val = hw_engine_mmio_read32(hwe, RING_START_UDW(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_START_UDW(0)); snapshot->reg.ring_start |= val << 32; } if (xe_gt_has_indirect_ring_state(hwe->gt)) { snapshot->reg.indirect_ring_state = - hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0)); + xe_hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0)); } snapshot->reg.ring_head = - hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR; + xe_hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR; snapshot->reg.ring_tail = - hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR; - snapshot->reg.ring_ctl = hw_engine_mmio_read32(hwe, RING_CTL(0)); + xe_hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR; + snapshot->reg.ring_ctl = xe_hw_engine_mmio_read32(hwe, RING_CTL(0)); snapshot->reg.ring_mi_mode = - hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); - snapshot->reg.ring_mode = hw_engine_mmio_read32(hwe, RING_MODE(0)); - snapshot->reg.ring_imr = hw_engine_mmio_read32(hwe, RING_IMR(0)); - snapshot->reg.ring_esr = hw_engine_mmio_read32(hwe, RING_ESR(0)); - snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0)); - snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0)); - snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0)); + xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); + snapshot->reg.ring_mode = xe_hw_engine_mmio_read32(hwe, RING_MODE(0)); + snapshot->reg.ring_imr = xe_hw_engine_mmio_read32(hwe, RING_IMR(0)); + snapshot->reg.ring_esr = xe_hw_engine_mmio_read32(hwe, RING_ESR(0)); + snapshot->reg.ring_emr = xe_hw_engine_mmio_read32(hwe, RING_EMR(0)); + snapshot->reg.ring_eir = xe_hw_engine_mmio_read32(hwe, RING_EIR(0)); + snapshot->reg.ipehr = xe_hw_engine_mmio_read32(hwe, RING_IPEHR(0)); xe_hw_engine_snapshot_instdone_capture(hwe, snapshot); if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE) @@ -1135,3 +1157,41 @@ enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe) { return engine_infos[hwe->engine_id].domain; } + +static const enum xe_engine_class user_to_xe_engine_class[] = { + [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, + [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, + [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, + [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, + [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, +}; + +/** + * xe_hw_engine_lookup() - Lookup hardware engine for class:instance + * @xe: xe device + * @eci: engine class and instance + * + * This function will find a hardware engine for given engine + * class and instance. + * + * Return: If found xe_hw_engine pointer, NULL otherwise. + */ +struct xe_hw_engine * +xe_hw_engine_lookup(struct xe_device *xe, + struct drm_xe_engine_class_instance eci) +{ + unsigned int idx; + + if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) + return NULL; + + if (eci.gt_id >= xe->info.gt_count) + return NULL; + + idx = array_index_nospec(eci.engine_class, + ARRAY_SIZE(user_to_xe_engine_class)); + + return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), + user_to_xe_engine_class[idx], + eci.engine_instance, true); +} diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h index 900c8c991430..022819a4a8eb 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.h +++ b/drivers/gpu/drm/xe/xe_hw_engine.h @@ -9,6 +9,8 @@ #include "xe_hw_engine_types.h" struct drm_printer; +struct drm_xe_engine_class_instance; +struct xe_device; #ifdef CONFIG_DRM_XE_JOB_TIMEOUT_MIN #define XE_HW_ENGINE_JOB_TIMEOUT_MIN CONFIG_DRM_XE_JOB_TIMEOUT_MIN @@ -62,6 +64,11 @@ void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p); void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe); bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe); + +struct xe_hw_engine * +xe_hw_engine_lookup(struct xe_device *xe, + struct drm_xe_engine_class_instance eci); + static inline bool xe_hw_engine_is_valid(struct xe_hw_engine *hwe) { return hwe->name; @@ -71,4 +78,7 @@ const char *xe_hw_engine_class_to_str(enum xe_engine_class class); u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe); enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe); +void xe_hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg, u32 val); +u32 xe_hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg); + #endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c new file mode 100644 index 000000000000..82750520a90a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -0,0 +1,372 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include <drm/drm_managed.h> + +#include "xe_assert.h" +#include "xe_device.h" +#include "xe_exec_queue.h" +#include "xe_gt.h" +#include "xe_hw_engine_group.h" +#include "xe_vm.h" + +static void +hw_engine_group_free(struct drm_device *drm, void *arg) +{ + struct xe_hw_engine_group *group = arg; + + destroy_workqueue(group->resume_wq); + kfree(group); +} + +static void +hw_engine_group_resume_lr_jobs_func(struct work_struct *w) +{ + struct xe_exec_queue *q; + struct xe_hw_engine_group *group = container_of(w, struct xe_hw_engine_group, resume_work); + int err; + enum xe_hw_engine_group_execution_mode previous_mode; + + err = xe_hw_engine_group_get_mode(group, EXEC_MODE_LR, &previous_mode); + if (err) + return; + + if (previous_mode == EXEC_MODE_LR) + goto put; + + list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { + if (!xe_vm_in_fault_mode(q->vm)) + continue; + + q->ops->resume(q); + } + +put: + xe_hw_engine_group_put(group); +} + +static struct xe_hw_engine_group * +hw_engine_group_alloc(struct xe_device *xe) +{ + struct xe_hw_engine_group *group; + int err; + + group = kzalloc(sizeof(*group), GFP_KERNEL); + if (!group) + return ERR_PTR(-ENOMEM); + + group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0); + if (!group->resume_wq) + return ERR_PTR(-ENOMEM); + + init_rwsem(&group->mode_sem); + INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func); + INIT_LIST_HEAD(&group->exec_queue_list); + + err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group); + if (err) + return ERR_PTR(err); + + return group; +} + +/** + * xe_hw_engine_setup_groups() - Setup the hw engine groups for the gt + * @gt: The gt for which groups are setup + * + * Return: 0 on success, negative error code on error. + */ +int xe_hw_engine_setup_groups(struct xe_gt *gt) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs; + struct xe_device *xe = gt_to_xe(gt); + int err; + + group_rcs_ccs = hw_engine_group_alloc(xe); + if (IS_ERR(group_rcs_ccs)) { + err = PTR_ERR(group_rcs_ccs); + goto err_group_rcs_ccs; + } + + group_bcs = hw_engine_group_alloc(xe); + if (IS_ERR(group_bcs)) { + err = PTR_ERR(group_bcs); + goto err_group_bcs; + } + + group_vcs_vecs = hw_engine_group_alloc(xe); + if (IS_ERR(group_vcs_vecs)) { + err = PTR_ERR(group_vcs_vecs); + goto err_group_vcs_vecs; + } + + for_each_hw_engine(hwe, gt, id) { + switch (hwe->class) { + case XE_ENGINE_CLASS_COPY: + hwe->hw_engine_group = group_bcs; + break; + case XE_ENGINE_CLASS_RENDER: + case XE_ENGINE_CLASS_COMPUTE: + hwe->hw_engine_group = group_rcs_ccs; + break; + case XE_ENGINE_CLASS_VIDEO_DECODE: + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + hwe->hw_engine_group = group_vcs_vecs; + break; + case XE_ENGINE_CLASS_OTHER: + break; + default: + drm_warn(&xe->drm, "NOT POSSIBLE"); + } + } + + return 0; + +err_group_vcs_vecs: + kfree(group_vcs_vecs); +err_group_bcs: + kfree(group_bcs); +err_group_rcs_ccs: + kfree(group_rcs_ccs); + + return err; +} + +/** + * xe_hw_engine_group_add_exec_queue() - Add an exec queue to a hw engine group + * @group: The hw engine group + * @q: The exec_queue + * + * Return: 0 on success, + * -EINTR if the lock could not be acquired + */ +int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q) +{ + int err; + struct xe_device *xe = gt_to_xe(q->gt); + + xe_assert(xe, group); + xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM)); + xe_assert(xe, q->vm); + + if (xe_vm_in_preempt_fence_mode(q->vm)) + return 0; + + err = down_write_killable(&group->mode_sem); + if (err) + return err; + + if (xe_vm_in_fault_mode(q->vm) && group->cur_mode == EXEC_MODE_DMA_FENCE) { + q->ops->suspend(q); + err = q->ops->suspend_wait(q); + if (err) + goto err_suspend; + + xe_hw_engine_group_resume_faulting_lr_jobs(group); + } + + list_add(&q->hw_engine_group_link, &group->exec_queue_list); + up_write(&group->mode_sem); + + return 0; + +err_suspend: + up_write(&group->mode_sem); + return err; +} + +/** + * xe_hw_engine_group_del_exec_queue() - Delete an exec queue from a hw engine group + * @group: The hw engine group + * @q: The exec_queue + */ +void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q) +{ + struct xe_device *xe = gt_to_xe(q->gt); + + xe_assert(xe, group); + xe_assert(xe, q->vm); + + down_write(&group->mode_sem); + + if (!list_empty(&q->hw_engine_group_link)) + list_del(&q->hw_engine_group_link); + + up_write(&group->mode_sem); +} + +/** + * xe_hw_engine_group_resume_faulting_lr_jobs() - Asynchronously resume the hw engine group's + * faulting LR jobs + * @group: The hw engine group + */ +void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group) +{ + queue_work(group->resume_wq, &group->resume_work); +} + +/** + * xe_hw_engine_group_suspend_faulting_lr_jobs() - Suspend the faulting LR jobs of this group + * @group: The hw engine group + * + * Return: 0 on success, negative error code on error. + */ +static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group *group) +{ + int err; + struct xe_exec_queue *q; + bool need_resume = false; + + lockdep_assert_held_write(&group->mode_sem); + + list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { + if (!xe_vm_in_fault_mode(q->vm)) + continue; + + need_resume = true; + q->ops->suspend(q); + } + + list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { + if (!xe_vm_in_fault_mode(q->vm)) + continue; + + err = q->ops->suspend_wait(q); + if (err) + goto err_suspend; + } + + if (need_resume) + xe_hw_engine_group_resume_faulting_lr_jobs(group); + + return 0; + +err_suspend: + up_write(&group->mode_sem); + return err; +} + +/** + * xe_hw_engine_group_wait_for_dma_fence_jobs() - Wait for dma fence jobs to complete + * @group: The hw engine group + * + * This function is not meant to be called directly from a user IOCTL as dma_fence_wait() + * is not interruptible. + * + * Return: 0 on success, + * -ETIME if waiting for one job failed + */ +static int xe_hw_engine_group_wait_for_dma_fence_jobs(struct xe_hw_engine_group *group) +{ + long timeout; + struct xe_exec_queue *q; + struct dma_fence *fence; + + lockdep_assert_held_write(&group->mode_sem); + + list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { + if (xe_vm_in_lr_mode(q->vm)) + continue; + + fence = xe_exec_queue_last_fence_get_for_resume(q, q->vm); + timeout = dma_fence_wait(fence, false); + dma_fence_put(fence); + + if (timeout < 0) + return -ETIME; + } + + return 0; +} + +static int switch_mode(struct xe_hw_engine_group *group) +{ + int err = 0; + enum xe_hw_engine_group_execution_mode new_mode; + + lockdep_assert_held_write(&group->mode_sem); + + switch (group->cur_mode) { + case EXEC_MODE_LR: + new_mode = EXEC_MODE_DMA_FENCE; + err = xe_hw_engine_group_suspend_faulting_lr_jobs(group); + break; + case EXEC_MODE_DMA_FENCE: + new_mode = EXEC_MODE_LR; + err = xe_hw_engine_group_wait_for_dma_fence_jobs(group); + break; + } + + if (err) + return err; + + group->cur_mode = new_mode; + + return 0; +} + +/** + * xe_hw_engine_group_get_mode() - Get the group to execute in the new mode + * @group: The hw engine group + * @new_mode: The new execution mode + * @previous_mode: Pointer to the previous mode provided for use by caller + * + * Return: 0 if successful, -EINTR if locking failed. + */ +int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group, + enum xe_hw_engine_group_execution_mode new_mode, + enum xe_hw_engine_group_execution_mode *previous_mode) +__acquires(&group->mode_sem) +{ + int err = down_read_interruptible(&group->mode_sem); + + if (err) + return err; + + *previous_mode = group->cur_mode; + + if (new_mode != group->cur_mode) { + up_read(&group->mode_sem); + err = down_write_killable(&group->mode_sem); + if (err) + return err; + + if (new_mode != group->cur_mode) { + err = switch_mode(group); + if (err) { + up_write(&group->mode_sem); + return err; + } + } + downgrade_write(&group->mode_sem); + } + + return err; +} + +/** + * xe_hw_engine_group_put() - Put the group + * @group: The hw engine group + */ +void xe_hw_engine_group_put(struct xe_hw_engine_group *group) +__releases(&group->mode_sem) +{ + up_read(&group->mode_sem); +} + +/** + * xe_hw_engine_group_find_exec_mode() - Find the execution mode for this exec queue + * @q: The exec_queue + */ +enum xe_hw_engine_group_execution_mode +xe_hw_engine_group_find_exec_mode(struct xe_exec_queue *q) +{ + if (xe_vm_in_fault_mode(q->vm)) + return EXEC_MODE_LR; + else + return EXEC_MODE_DMA_FENCE; +} diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.h b/drivers/gpu/drm/xe/xe_hw_engine_group.h new file mode 100644 index 000000000000..797ee81acbf2 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_HW_ENGINE_GROUP_H_ +#define _XE_HW_ENGINE_GROUP_H_ + +#include "xe_hw_engine_group_types.h" + +struct drm_device; +struct xe_exec_queue; +struct xe_gt; + +int xe_hw_engine_setup_groups(struct xe_gt *gt); + +int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q); +void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q); + +int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group, + enum xe_hw_engine_group_execution_mode new_mode, + enum xe_hw_engine_group_execution_mode *previous_mode); +void xe_hw_engine_group_put(struct xe_hw_engine_group *group); + +enum xe_hw_engine_group_execution_mode +xe_hw_engine_group_find_exec_mode(struct xe_exec_queue *q); +void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group); + +#endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group_types.h b/drivers/gpu/drm/xe/xe_hw_engine_group_types.h new file mode 100644 index 000000000000..92b6e0712c03 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine_group_types.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_HW_ENGINE_GROUP_TYPES_H_ +#define _XE_HW_ENGINE_GROUP_TYPES_H_ + +#include "xe_force_wake_types.h" +#include "xe_lrc_types.h" +#include "xe_reg_sr_types.h" + +/** + * enum xe_hw_engine_group_execution_mode - possible execution modes of a hw + * engine group + * + * @EXEC_MODE_LR: execution in long-running mode + * @EXEC_MODE_DMA_FENCE: execution in dma fence mode + */ +enum xe_hw_engine_group_execution_mode { + EXEC_MODE_LR, + EXEC_MODE_DMA_FENCE, +}; + +/** + * struct xe_hw_engine_group - Hardware engine group + * + * hw engines belong to the same group if they share hardware resources in a way + * that prevents them from making progress when one is stuck on a page fault. + */ +struct xe_hw_engine_group { + /** + * @exec_queue_list: list of exec queues attached to this + * xe_hw_engine_group + */ + struct list_head exec_queue_list; + /** @resume_work: worker to resume faulting LR exec queues */ + struct work_struct resume_work; + /** @resume_wq: workqueue to resume faulting LR exec queues */ + struct workqueue_struct *resume_wq; + /** + * @mode_sem: used to protect this group's hardware resources and ensure + * mutual exclusion between execution only in faulting LR mode and + * execution only in DMA_FENCE mode + */ + struct rw_semaphore mode_sem; + /** @cur_mode: current execution mode of this hw engine group */ + enum xe_hw_engine_group_execution_mode cur_mode; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index 70e6434f150d..8be6d420ece4 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -136,8 +136,6 @@ struct xe_hw_engine { enum xe_force_wake_domains domain; /** @hwsp: hardware status page buffer object */ struct xe_bo *hwsp; - /** @kernel_lrc: Kernel LRC (should be replaced /w an xe_engine) */ - struct xe_lrc *kernel_lrc; /** @exl_port: execlists port */ struct xe_execlist_port *exl_port; /** @fence_irq: fence IRQ to run when a hw engine IRQ is received */ @@ -150,6 +148,8 @@ struct xe_hw_engine { struct xe_hw_engine_class_intf *eclass; /** @oa_unit: oa unit for this hw engine */ struct xe_oa_unit *oa_unit; + /** @hw_engine_group: the group of hw engines this one belongs to */ + struct xe_hw_engine_group *hw_engine_group; }; /** diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c index 45a9789cf501..0b4f12be3692 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.c +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -148,20 +148,20 @@ static const char *xe_hw_fence_get_driver_name(struct dma_fence *dma_fence) { struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); - return dev_name(gt_to_xe(fence->ctx->gt)->drm.dev); + return dev_name(fence->xe->drm.dev); } static const char *xe_hw_fence_get_timeline_name(struct dma_fence *dma_fence) { struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); - return fence->ctx->name; + return fence->name; } static bool xe_hw_fence_signaled(struct dma_fence *dma_fence) { struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); - struct xe_device *xe = gt_to_xe(fence->ctx->gt); + struct xe_device *xe = fence->xe; u32 seqno = xe_map_rd(xe, &fence->seqno_map, 0, u32); return dma_fence->error || @@ -253,7 +253,8 @@ void xe_hw_fence_init(struct dma_fence *fence, struct xe_hw_fence_ctx *ctx, struct xe_hw_fence *hw_fence = container_of(fence, typeof(*hw_fence), dma); - hw_fence->ctx = ctx; + hw_fence->xe = gt_to_xe(ctx->gt); + snprintf(hw_fence->name, sizeof(hw_fence->name), "%s", ctx->name); hw_fence->seqno_map = seqno_map; INIT_LIST_HEAD(&hw_fence->irq_link); diff --git a/drivers/gpu/drm/xe/xe_hw_fence_types.h b/drivers/gpu/drm/xe/xe_hw_fence_types.h index b33c4956e8ea..364a61f4bfda 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence_types.h +++ b/drivers/gpu/drm/xe/xe_hw_fence_types.h @@ -12,6 +12,7 @@ #include <linux/list.h> #include <linux/spinlock.h> +struct xe_device; struct xe_gt; /** @@ -61,8 +62,10 @@ struct xe_hw_fence_ctx { struct xe_hw_fence { /** @dma: base dma fence for hardware fence context */ struct dma_fence dma; - /** @ctx: hardware fence context */ - struct xe_hw_fence_ctx *ctx; + /** @xe: Xe device for hw fence driver name */ + struct xe_device *xe; + /** @name: name of hardware fence context */ + char name[MAX_FENCE_NAME_LEN]; /** @seqno_map: I/O map for seqno */ struct iosys_map seqno_map; /** @irq_link: Link in struct xe_hw_fence_irq.pending */ diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 832ea81faeee..aa11728e7e79 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -12,7 +12,6 @@ #include "regs/xe_mchbar_regs.h" #include "regs/xe_pcode_regs.h" #include "xe_device.h" -#include "xe_gt.h" #include "xe_hwmon.h" #include "xe_mmio.h" #include "xe_pcode.h" @@ -65,8 +64,8 @@ struct xe_hwmon_energy_info { struct xe_hwmon { /** @hwmon_dev: hwmon device for xe */ struct device *hwmon_dev; - /** @gt: primary gt */ - struct xe_gt *gt; + /** @xe: Xe device */ + struct xe_device *xe; /** @hwmon_lock: lock for rw attributes*/ struct mutex hwmon_lock; /** @scl_shift_power: pkg power unit */ @@ -82,7 +81,7 @@ struct xe_hwmon { static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg, int channel) { - struct xe_device *xe = gt_to_xe(hwmon->gt); + struct xe_device *xe = hwmon->xe; switch (hwmon_reg) { case REG_PKG_RAPL_LIMIT: @@ -148,8 +147,9 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, int channel, long *value) { u64 reg_val, min, max; - struct xe_device *xe = gt_to_xe(hwmon->gt); + struct xe_device *xe = hwmon->xe; struct xe_reg rapl_limit, pkg_power_sku; + struct xe_gt *mmio = xe_root_mmio_gt(xe); rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); @@ -166,7 +166,7 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, int channel, long *v mutex_lock(&hwmon->hwmon_lock); - reg_val = xe_mmio_read32(hwmon->gt, rapl_limit); + reg_val = xe_mmio_read32(mmio, rapl_limit); /* Check if PL1 limit is disabled */ if (!(reg_val & PKG_PWR_LIM_1_EN)) { *value = PL1_DISABLE; @@ -176,7 +176,7 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, int channel, long *v reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val); *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); - reg_val = xe_mmio_read64_2x32(hwmon->gt, pkg_power_sku); + reg_val = xe_mmio_read64_2x32(mmio, pkg_power_sku); min = REG_FIELD_GET(PKG_MIN_PWR, reg_val); min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); max = REG_FIELD_GET(PKG_MAX_PWR, reg_val); @@ -190,6 +190,7 @@ unlock: static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long value) { + struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); int ret = 0; u64 reg_val; struct xe_reg rapl_limit; @@ -200,10 +201,10 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long va /* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */ if (value == PL1_DISABLE) { - reg_val = xe_mmio_rmw32(hwmon->gt, rapl_limit, PKG_PWR_LIM_1_EN, 0); - reg_val = xe_mmio_read32(hwmon->gt, rapl_limit); + reg_val = xe_mmio_rmw32(mmio, rapl_limit, PKG_PWR_LIM_1_EN, 0); + reg_val = xe_mmio_read32(mmio, rapl_limit); if (reg_val & PKG_PWR_LIM_1_EN) { - drm_warn(>_to_xe(hwmon->gt)->drm, "PL1 disable is not supported!\n"); + drm_warn(&hwmon->xe->drm, "PL1 disable is not supported!\n"); ret = -EOPNOTSUPP; } goto unlock; @@ -212,7 +213,7 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long va /* Computation in 64-bits to avoid overflow. Round to nearest. */ reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER); reg_val = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, reg_val); - reg_val = xe_mmio_rmw32(hwmon->gt, rapl_limit, PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val); + reg_val = xe_mmio_rmw32(mmio, rapl_limit, PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val); unlock: mutex_unlock(&hwmon->hwmon_lock); @@ -221,6 +222,7 @@ unlock: static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, int channel, long *value) { + struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); struct xe_reg reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); u64 reg_val; @@ -229,7 +231,7 @@ static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, int channel, l * for this register can be skipped. * See xe_hwmon_power_is_visible. */ - reg_val = xe_mmio_read32(hwmon->gt, reg); + reg_val = xe_mmio_read32(mmio, reg); reg_val = REG_FIELD_GET(PKG_TDP, reg_val); *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); } @@ -257,11 +259,12 @@ static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, int channel, l static void xe_hwmon_energy_get(struct xe_hwmon *hwmon, int channel, long *energy) { + struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); struct xe_hwmon_energy_info *ei = &hwmon->ei[channel]; u64 reg_val; - reg_val = xe_mmio_read32(hwmon->gt, xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, - channel)); + reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, + channel)); if (reg_val >= ei->reg_val_prev) ei->accum_energy += reg_val - ei->reg_val_prev; @@ -279,19 +282,20 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at char *buf) { struct xe_hwmon *hwmon = dev_get_drvdata(dev); + struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); u32 x, y, x_w = 2; /* 2 bits */ u64 r, tau4, out; int sensor_index = to_sensor_dev_attr(attr)->index; - xe_pm_runtime_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(hwmon->xe); mutex_lock(&hwmon->hwmon_lock); - r = xe_mmio_read32(hwmon->gt, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, sensor_index)); + r = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, sensor_index)); mutex_unlock(&hwmon->hwmon_lock); - xe_pm_runtime_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(hwmon->xe); x = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_X, r); y = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_Y, r); @@ -319,6 +323,7 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a const char *buf, size_t count) { struct xe_hwmon *hwmon = dev_get_drvdata(dev); + struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); u32 x, y, rxy, x_w = 2; /* 2 bits */ u64 tau4, r, max_win; unsigned long val; @@ -371,16 +376,16 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y); - xe_pm_runtime_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(hwmon->xe); mutex_lock(&hwmon->hwmon_lock); - r = xe_mmio_rmw32(hwmon->gt, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, sensor_index), + r = xe_mmio_rmw32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, sensor_index), PKG_PWR_LIM_1_TIME, rxy); mutex_unlock(&hwmon->hwmon_lock); - xe_pm_runtime_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(hwmon->xe); return count; } @@ -406,11 +411,11 @@ static umode_t xe_hwmon_attributes_visible(struct kobject *kobj, struct xe_hwmon *hwmon = dev_get_drvdata(dev); int ret = 0; - xe_pm_runtime_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(hwmon->xe); ret = xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, index)) ? attr->mode : 0; - xe_pm_runtime_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(hwmon->xe); return ret; } @@ -435,22 +440,26 @@ static const struct hwmon_channel_info * const hwmon_info[] = { }; /* I1 is exposed as power_crit or as curr_crit depending on bit 31 */ -static int xe_hwmon_pcode_read_i1(struct xe_gt *gt, u32 *uval) +static int xe_hwmon_pcode_read_i1(const struct xe_hwmon *hwmon, u32 *uval) { + struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); + /* Avoid Illegal Subcommand error */ - if (gt_to_xe(gt)->info.platform == XE_DG2) + if (hwmon->xe->info.platform == XE_DG2) return -ENXIO; - return xe_pcode_read(gt, PCODE_MBOX(PCODE_POWER_SETUP, + return xe_pcode_read(root_tile, PCODE_MBOX(PCODE_POWER_SETUP, POWER_SETUP_SUBCOMMAND_READ_I1, 0), uval, NULL); } -static int xe_hwmon_pcode_write_i1(struct xe_gt *gt, u32 uval) +static int xe_hwmon_pcode_write_i1(const struct xe_hwmon *hwmon, u32 uval) { - return xe_pcode_write(gt, PCODE_MBOX(PCODE_POWER_SETUP, + struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); + + return xe_pcode_write(root_tile, PCODE_MBOX(PCODE_POWER_SETUP, POWER_SETUP_SUBCOMMAND_WRITE_I1, 0), - uval); + (uval & POWER_SETUP_I1_DATA_MASK)); } static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, int channel, @@ -461,7 +470,7 @@ static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, int channel, mutex_lock(&hwmon->hwmon_lock); - ret = xe_hwmon_pcode_read_i1(hwmon->gt, &uval); + ret = xe_hwmon_pcode_read_i1(hwmon, &uval); if (ret) goto unlock; @@ -481,7 +490,7 @@ static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, int channel, mutex_lock(&hwmon->hwmon_lock); uval = DIV_ROUND_CLOSEST_ULL(value << POWER_SETUP_I1_SHIFT, scale_factor); - ret = xe_hwmon_pcode_write_i1(hwmon->gt, uval); + ret = xe_hwmon_pcode_write_i1(hwmon, uval); mutex_unlock(&hwmon->hwmon_lock); return ret; @@ -489,9 +498,10 @@ static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, int channel, static void xe_hwmon_get_voltage(struct xe_hwmon *hwmon, int channel, long *value) { + struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); u64 reg_val; - reg_val = xe_mmio_read32(hwmon->gt, xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS, channel)); + reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS, channel)); /* HW register value in units of 2.5 millivolt */ *value = DIV_ROUND_CLOSEST(REG_FIELD_GET(VOLTAGE_MASK, reg_val) * 2500, SF_VOLTAGE); } @@ -510,7 +520,7 @@ xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) channel)) ? 0444 : 0; case hwmon_power_crit: if (channel == CHANNEL_PKG) - return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) || + return (xe_hwmon_pcode_read_i1(hwmon, &uval) || !(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; break; case hwmon_power_label: @@ -563,10 +573,10 @@ xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr, int channel) switch (attr) { case hwmon_curr_crit: - return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) || + return (xe_hwmon_pcode_read_i1(hwmon, &uval) || (uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; case hwmon_curr_label: - return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) || + return (xe_hwmon_pcode_read_i1(hwmon, &uval) || (uval & POWER_SETUP_I1_WATTS)) ? 0 : 0444; break; default: @@ -654,7 +664,7 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, struct xe_hwmon *hwmon = (struct xe_hwmon *)drvdata; int ret; - xe_pm_runtime_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(hwmon->xe); switch (type) { case hwmon_power: @@ -674,7 +684,7 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, break; } - xe_pm_runtime_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(hwmon->xe); return ret; } @@ -686,7 +696,7 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, struct xe_hwmon *hwmon = dev_get_drvdata(dev); int ret; - xe_pm_runtime_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(hwmon->xe); switch (type) { case hwmon_power: @@ -706,7 +716,7 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, break; } - xe_pm_runtime_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(hwmon->xe); return ret; } @@ -718,7 +728,7 @@ xe_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, struct xe_hwmon *hwmon = dev_get_drvdata(dev); int ret; - xe_pm_runtime_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(hwmon->xe); switch (type) { case hwmon_power: @@ -732,7 +742,7 @@ xe_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, break; } - xe_pm_runtime_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(hwmon->xe); return ret; } @@ -771,6 +781,7 @@ static const struct hwmon_chip_info hwmon_chip_info = { static void xe_hwmon_get_preregistration_info(struct xe_device *xe) { + struct xe_gt *mmio = xe_root_mmio_gt(xe); struct xe_hwmon *hwmon = xe->hwmon; long energy; u64 val_sku_unit = 0; @@ -783,7 +794,7 @@ xe_hwmon_get_preregistration_info(struct xe_device *xe) */ pkg_power_sku_unit = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 0); if (xe_reg_is_valid(pkg_power_sku_unit)) { - val_sku_unit = xe_mmio_read32(hwmon->gt, pkg_power_sku_unit); + val_sku_unit = xe_mmio_read32(mmio, pkg_power_sku_unit); hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit); hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit); hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit); @@ -828,8 +839,8 @@ void xe_hwmon_register(struct xe_device *xe) if (devm_add_action_or_reset(dev, xe_hwmon_mutex_destroy, hwmon)) return; - /* primary GT to access device level properties */ - hwmon->gt = xe->tiles[0].primary_gt; + /* There's only one instance of hwmon per device */ + hwmon->xe = xe; xe_hwmon_get_preregistration_info(xe); diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 85733f993d09..5f2c368c35ad 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -459,6 +459,8 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) * the primary tile. */ if (id == 0) { + if (HAS_HECI_CSCFI(xe)) + xe_heci_csc_irq_handler(xe, master_ctl); xe_display_irq_handler(xe, master_ctl); gu_misc_iir = gu_misc_irq_ack(xe, master_ctl); } diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index 418661a88918..8999ac511555 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -7,7 +7,7 @@ #include <drm/drm_managed.h> -#include "regs/xe_sriov_regs.h" +#include "regs/xe_gt_regs.h" #include "xe_assert.h" #include "xe_bo.h" @@ -71,7 +71,7 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level lmtt->ops->lmtt_pte_num(level)), ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) | - XE_BO_NEEDS_64K | XE_BO_FLAG_PINNED); + XE_BO_FLAG_NEEDS_64K | XE_BO_FLAG_PINNED); if (IS_ERR(bo)) { err = PTR_ERR(bo); goto out_free_pt; diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 58121821f081..aec7db39c061 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -5,6 +5,8 @@ #include "xe_lrc.h" +#include <generated/xe_wa_oob.h> + #include <linux/ascii85.h> #include "instructions/xe_mi_commands.h" @@ -24,6 +26,7 @@ #include "xe_memirq.h" #include "xe_sriov.h" #include "xe_vm.h" +#include "xe_wa.h" #define LRC_VALID BIT_ULL(0) #define LRC_PRIVILEGE BIT_ULL(8) @@ -1581,19 +1584,31 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b int state_table_size = 0; /* - * At the moment we only need to emit non-register state for the RCS - * engine. + * Wa_14019789679 + * + * If the driver doesn't explicitly emit the SVG instructions while + * setting up the default LRC, the context switch will write 0's + * (noops) into the LRC memory rather than the expected instruction + * headers. Application contexts start out as a copy of the default + * LRC, and if they also do not emit specific settings for some SVG + * state, then on context restore they'll unintentionally inherit + * whatever state setting the previous context had programmed into the + * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will + * prevent the hardware from resetting that state back to any specific + * value). + * + * The official workaround only requires emitting 3DSTATE_MESH_CONTROL + * since that's a specific state setting that can easily cause GPU + * hangs if unintentionally inherited. However to be safe we'll + * continue to emit all of the SVG state since it's best not to leak + * any of the state between contexts, even if that leakage is harmless. */ - if (q->hwe->class != XE_ENGINE_CLASS_RENDER) - return; - - switch (GRAPHICS_VERx100(xe)) { - case 1255: - case 1270 ... 2004: + if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) { state_table = xe_hpg_svg_state; state_table_size = ARRAY_SIZE(xe_hpg_svg_state); - break; - default: + } + + if (!state_table) { xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); return; @@ -1634,7 +1649,7 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) if (!snapshot) return NULL; - if (lrc->bo && lrc->bo->vm) + if (lrc->bo->vm) xe_vm_get(lrc->bo->vm); snapshot->context_desc = xe_lrc_ggtt_addr(lrc); diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index c9f5673353ee..cfd31ae49cc1 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -10,7 +10,7 @@ #include <drm/drm_managed.h> #include <drm/ttm/ttm_tt.h> -#include <drm/xe_drm.h> +#include <uapi/drm/xe_drm.h> #include <generated/xe_wa_oob.h> @@ -73,6 +73,7 @@ struct xe_migrate { #define NUM_PT_SLOTS 32 #define LEVEL0_PAGE_TABLE_ENCODE_SIZE SZ_2M #define MAX_NUM_PTE 512 +#define IDENTITY_OFFSET 256ULL /* * Although MI_STORE_DATA_IMM's "length" field is 10-bits, 0x3FE is the largest @@ -84,15 +85,14 @@ struct xe_migrate { #define MAX_PTE_PER_SDI 0x1FE /** - * xe_tile_migrate_engine() - Get this tile's migrate engine. + * xe_tile_migrate_exec_queue() - Get this tile's migrate exec queue. * @tile: The tile. * - * Returns the default migrate engine of this tile. - * TODO: Perhaps this function is slightly misplaced, and even unneeded? + * Returns the default migrate exec queue of this tile. * - * Return: The default migrate engine + * Return: The default migrate exec queue */ -struct xe_exec_queue *xe_tile_migrate_engine(struct xe_tile *tile) +struct xe_exec_queue *xe_tile_migrate_exec_queue(struct xe_tile *tile) { return tile->migrate->q; } @@ -121,14 +121,64 @@ static u64 xe_migrate_vm_addr(u64 slot, u32 level) return (slot + 1ULL) << xe_pt_shift(level + 1); } -static u64 xe_migrate_vram_ofs(struct xe_device *xe, u64 addr) +static u64 xe_migrate_vram_ofs(struct xe_device *xe, u64 addr, bool is_comp_pte) { /* * Remove the DPA to get a correct offset into identity table for the * migrate offset */ + u64 identity_offset = IDENTITY_OFFSET; + + if (GRAPHICS_VER(xe) >= 20 && is_comp_pte) + identity_offset += DIV_ROUND_UP_ULL(xe->mem.vram.actual_physical_size, SZ_1G); + addr -= xe->mem.vram.dpa_base; - return addr + (256ULL << xe_pt_shift(2)); + return addr + (identity_offset << xe_pt_shift(2)); +} + +static void xe_migrate_program_identity(struct xe_device *xe, struct xe_vm *vm, struct xe_bo *bo, + u64 map_ofs, u64 vram_offset, u16 pat_index, u64 pt_2m_ofs) +{ + u64 pos, ofs, flags; + u64 entry; + /* XXX: Unclear if this should be usable_size? */ + u64 vram_limit = xe->mem.vram.actual_physical_size + + xe->mem.vram.dpa_base; + u32 level = 2; + + ofs = map_ofs + XE_PAGE_SIZE * level + vram_offset * 8; + flags = vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, + true, 0); + + xe_assert(xe, IS_ALIGNED(xe->mem.vram.usable_size, SZ_2M)); + + /* + * Use 1GB pages when possible, last chunk always use 2M + * pages as mixing reserved memory (stolen, WOCPM) with a single + * mapping is not allowed on certain platforms. + */ + for (pos = xe->mem.vram.dpa_base; pos < vram_limit; + pos += SZ_1G, ofs += 8) { + if (pos + SZ_1G >= vram_limit) { + entry = vm->pt_ops->pde_encode_bo(bo, pt_2m_ofs, + pat_index); + xe_map_wr(xe, &bo->vmap, ofs, u64, entry); + + flags = vm->pt_ops->pte_encode_addr(xe, 0, + pat_index, + level - 1, + true, 0); + + for (ofs = pt_2m_ofs; pos < vram_limit; + pos += SZ_2M, ofs += 8) + xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags); + break; /* Ensure pos == vram_limit assert correct */ + } + + xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags); + } + + xe_assert(xe, pos == vram_limit); } static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, @@ -137,11 +187,13 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, struct xe_device *xe = tile_to_xe(tile); u16 pat_index = xe->pat.idx[XE_CACHE_WB]; u8 id = tile->id; - u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level, - num_setup = num_level + 1; + u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level; +#define VRAM_IDENTITY_MAP_COUNT 2 + u32 num_setup = num_level + VRAM_IDENTITY_MAP_COUNT; +#undef VRAM_IDENTITY_MAP_COUNT u32 map_ofs, level, i; struct xe_bo *bo, *batch = tile->mem.kernel_bb_pool->bo; - u64 entry, pt30_ofs; + u64 entry, pt29_ofs; /* Can't bump NUM_PT_SLOTS too high */ BUILD_BUG_ON(NUM_PT_SLOTS > SZ_2M/XE_PAGE_SIZE); @@ -161,9 +213,9 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, if (IS_ERR(bo)) return PTR_ERR(bo); - /* PT31 reserved for 2M identity map */ - pt30_ofs = bo->size - 2 * XE_PAGE_SIZE; - entry = vm->pt_ops->pde_encode_bo(bo, pt30_ofs, pat_index); + /* PT30 & PT31 reserved for 2M identity map */ + pt29_ofs = bo->size - 3 * XE_PAGE_SIZE; + entry = vm->pt_ops->pde_encode_bo(bo, pt29_ofs, pat_index); xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry); map_ofs = (num_entries - num_setup) * XE_PAGE_SIZE; @@ -215,12 +267,12 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, } else { u64 batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE); - m->batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr); + m->batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr, false); if (xe->info.has_usm) { batch = tile->primary_gt->usm.bb_pool->bo; batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE); - m->usm_batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr); + m->usm_batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr, false); } } @@ -254,55 +306,36 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, /* Identity map the entire vram at 256GiB offset */ if (IS_DGFX(xe)) { - u64 pos, ofs, flags; - /* XXX: Unclear if this should be usable_size? */ - u64 vram_limit = xe->mem.vram.actual_physical_size + - xe->mem.vram.dpa_base; - - level = 2; - ofs = map_ofs + XE_PAGE_SIZE * level + 256 * 8; - flags = vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, - true, 0); + u64 pt30_ofs = bo->size - 2 * XE_PAGE_SIZE; - xe_assert(xe, IS_ALIGNED(xe->mem.vram.usable_size, SZ_2M)); + xe_migrate_program_identity(xe, vm, bo, map_ofs, IDENTITY_OFFSET, + pat_index, pt30_ofs); + xe_assert(xe, xe->mem.vram.actual_physical_size <= + (MAX_NUM_PTE - IDENTITY_OFFSET) * SZ_1G); /* - * Use 1GB pages when possible, last chunk always use 2M - * pages as mixing reserved memory (stolen, WOCPM) with a single - * mapping is not allowed on certain platforms. + * Identity map the entire vram for compressed pat_index for xe2+ + * if flat ccs is enabled. */ - for (pos = xe->mem.vram.dpa_base; pos < vram_limit; - pos += SZ_1G, ofs += 8) { - if (pos + SZ_1G >= vram_limit) { - u64 pt31_ofs = bo->size - XE_PAGE_SIZE; - - entry = vm->pt_ops->pde_encode_bo(bo, pt31_ofs, - pat_index); - xe_map_wr(xe, &bo->vmap, ofs, u64, entry); - - flags = vm->pt_ops->pte_encode_addr(xe, 0, - pat_index, - level - 1, - true, 0); - - for (ofs = pt31_ofs; pos < vram_limit; - pos += SZ_2M, ofs += 8) - xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags); - break; /* Ensure pos == vram_limit assert correct */ - } - - xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags); + if (GRAPHICS_VER(xe) >= 20 && xe_device_has_flat_ccs(xe)) { + u16 comp_pat_index = xe->pat.idx[XE_CACHE_NONE_COMPRESSION]; + u64 vram_offset = IDENTITY_OFFSET + + DIV_ROUND_UP_ULL(xe->mem.vram.actual_physical_size, SZ_1G); + u64 pt31_ofs = bo->size - XE_PAGE_SIZE; + + xe_assert(xe, xe->mem.vram.actual_physical_size <= (MAX_NUM_PTE - + IDENTITY_OFFSET - IDENTITY_OFFSET / 2) * SZ_1G); + xe_migrate_program_identity(xe, vm, bo, map_ofs, vram_offset, + comp_pat_index, pt31_ofs); } - - xe_assert(xe, pos == vram_limit); } /* * Example layout created above, with root level = 3: * [PT0...PT7]: kernel PT's for copy/clear; 64 or 4KiB PTE's * [PT8]: Kernel PT for VM_BIND, 4 KiB PTE's - * [PT9...PT27]: Userspace PT's for VM_BIND, 4 KiB PTE's - * [PT28 = PDE 0] [PT29 = PDE 1] [PT30 = PDE 2] [PT31 = 2M vram identity map] + * [PT9...PT26]: Userspace PT's for VM_BIND, 4 KiB PTE's + * [PT27 = PDE 0] [PT28 = PDE 1] [PT29 = PDE 2] [PT30 & PT31 = 2M vram identity map] * * This makes the lowest part of the VM point to the pagetables. * Hence the lowest 2M in the vm should point to itself, with a few writes @@ -348,6 +381,11 @@ static u32 xe_migrate_usm_logical_mask(struct xe_gt *gt) return logical_mask; } +static bool xe_migrate_needs_ccs_emit(struct xe_device *xe) +{ + return xe_device_has_flat_ccs(xe) && !(GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe)); +} + /** * xe_migrate_init() - Initialize a migrate context * @tile: Back-pointer to the tile we're initializing for. @@ -404,7 +442,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) m->q = xe_exec_queue_create_class(xe, primary_gt, vm, XE_ENGINE_CLASS_COPY, EXEC_QUEUE_FLAG_KERNEL | - EXEC_QUEUE_FLAG_PERMANENT); + EXEC_QUEUE_FLAG_PERMANENT, 0); } if (IS_ERR(m->q)) { xe_vm_close_and_put(vm); @@ -421,7 +459,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) return ERR_PTR(err); if (IS_DGFX(xe)) { - if (xe_device_has_flat_ccs(xe)) + if (xe_migrate_needs_ccs_emit(xe)) /* min chunk size corresponds to 4K of CCS Metadata */ m->min_chunk_size = SZ_4K * SZ_64K / xe_device_ccs_bytes(xe, SZ_64K); @@ -475,20 +513,26 @@ static bool xe_migrate_allow_identity(u64 size, const struct xe_res_cursor *cur) return cur->size >= size; } +#define PTE_UPDATE_FLAG_IS_VRAM BIT(0) +#define PTE_UPDATE_FLAG_IS_COMP_PTE BIT(1) + static u32 pte_update_size(struct xe_migrate *m, - bool is_vram, + u32 flags, struct ttm_resource *res, struct xe_res_cursor *cur, u64 *L0, u64 *L0_ofs, u32 *L0_pt, u32 cmd_size, u32 pt_ofs, u32 avail_pts) { u32 cmds = 0; + bool is_vram = PTE_UPDATE_FLAG_IS_VRAM & flags; + bool is_comp_pte = PTE_UPDATE_FLAG_IS_COMP_PTE & flags; *L0_pt = pt_ofs; if (is_vram && xe_migrate_allow_identity(*L0, cur)) { /* Offset into identity map. */ *L0_ofs = xe_migrate_vram_ofs(tile_to_xe(m->tile), - cur->start + vram_region_gpu_offset(res)); + cur->start + vram_region_gpu_offset(res), + is_comp_pte); cmds += cmd_size; } else { /* Clip L0 to available size */ @@ -661,7 +705,7 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m, struct xe_gt *gt = m->tile->primary_gt; u32 flush_flags = 0; - if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_indirect) { + if (!copy_ccs && dst_is_indirect) { /* * If the src is already in vram, then it should already * have been cleared by us, or has been populated by the @@ -737,6 +781,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, bool copy_ccs = xe_device_has_flat_ccs(xe) && xe_bo_needs_ccs_pages(src_bo) && xe_bo_needs_ccs_pages(dst_bo); bool copy_system_ccs = copy_ccs && (!src_is_vram || !dst_is_vram); + bool use_comp_pat = xe_device_has_flat_ccs(xe) && + GRAPHICS_VER(xe) >= 20 && src_is_vram && !dst_is_vram; /* Copying CCS between two different BOs is not supported yet. */ if (XE_WARN_ON(copy_ccs && src_bo != dst_bo)) @@ -763,10 +809,11 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */ struct xe_sched_job *job; struct xe_bb *bb; - u32 flush_flags; + u32 flush_flags = 0; u32 update_idx; u64 ccs_ofs, ccs_size; u32 ccs_pt; + u32 pte_flags; bool usm = xe->info.has_usm; u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; @@ -779,17 +826,20 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, src_L0 = min(src_L0, dst_L0); - batch_size += pte_update_size(m, src_is_vram, src, &src_it, &src_L0, + pte_flags = src_is_vram ? PTE_UPDATE_FLAG_IS_VRAM : 0; + pte_flags |= use_comp_pat ? PTE_UPDATE_FLAG_IS_COMP_PTE : 0; + batch_size += pte_update_size(m, pte_flags, src, &src_it, &src_L0, &src_L0_ofs, &src_L0_pt, 0, 0, avail_pts); - batch_size += pte_update_size(m, dst_is_vram, dst, &dst_it, &src_L0, + pte_flags = dst_is_vram ? PTE_UPDATE_FLAG_IS_VRAM : 0; + batch_size += pte_update_size(m, pte_flags, dst, &dst_it, &src_L0, &dst_L0_ofs, &dst_L0_pt, 0, avail_pts, avail_pts); if (copy_system_ccs) { ccs_size = xe_device_ccs_bytes(xe, src_L0); - batch_size += pte_update_size(m, false, NULL, &ccs_it, &ccs_size, + batch_size += pte_update_size(m, 0, NULL, &ccs_it, &ccs_size, &ccs_ofs, &ccs_pt, 0, 2 * avail_pts, avail_pts); @@ -798,7 +848,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, /* Add copy commands size here */ batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) + - ((xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0)); + ((xe_migrate_needs_ccs_emit(xe) ? EMIT_COPY_CCS_DW : 0)); bb = xe_bb_new(gt, batch_size, usm); if (IS_ERR(bb)) { @@ -827,11 +877,12 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, if (!copy_only_ccs) emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, XE_PAGE_SIZE); - flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, - IS_DGFX(xe) ? src_is_vram : src_is_pltt, - dst_L0_ofs, - IS_DGFX(xe) ? dst_is_vram : dst_is_pltt, - src_L0, ccs_ofs, copy_ccs); + if (xe_migrate_needs_ccs_emit(xe)) + flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, + IS_DGFX(xe) ? src_is_vram : src_is_pltt, + dst_L0_ofs, + IS_DGFX(xe) ? dst_is_vram : dst_is_pltt, + src_L0, ccs_ofs, copy_ccs); job = xe_bb_create_migration_job(m->q, bb, xe_migrate_batch_base(m, usm), @@ -986,9 +1037,11 @@ static void emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, * @m: The migration context. * @bo: The buffer object @dst is currently bound to. * @dst: The dst TTM resource to be cleared. + * @clear_flags: flags to specify which data to clear: CCS, BO, or both. * - * Clear the contents of @dst to zero. On flat CCS devices, - * the CCS metadata is cleared to zero as well on VRAM destinations. + * Clear the contents of @dst to zero when XE_MIGRATE_CLEAR_FLAG_BO_DATA is set. + * On flat CCS devices, the CCS metadata is cleared to zero with XE_MIGRATE_CLEAR_FLAG_CCS_DATA. + * Set XE_MIGRATE_CLEAR_FLAG_FULL to clear bo as well as CCS metadata. * TODO: Eliminate the @bo argument. * * Return: Pointer to a dma_fence representing the last clear batch, or @@ -997,18 +1050,27 @@ static void emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, */ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_bo *bo, - struct ttm_resource *dst) + struct ttm_resource *dst, + u32 clear_flags) { bool clear_vram = mem_type_is_vram(dst->mem_type); + bool clear_bo_data = XE_MIGRATE_CLEAR_FLAG_BO_DATA & clear_flags; + bool clear_ccs = XE_MIGRATE_CLEAR_FLAG_CCS_DATA & clear_flags; struct xe_gt *gt = m->tile->primary_gt; struct xe_device *xe = gt_to_xe(gt); - bool clear_system_ccs = (xe_bo_needs_ccs_pages(bo) && !IS_DGFX(xe)) ? true : false; + bool clear_only_system_ccs = false; struct dma_fence *fence = NULL; u64 size = bo->size; struct xe_res_cursor src_it; struct ttm_resource *src = dst; int err; + if (WARN_ON(!clear_bo_data && !clear_ccs)) + return NULL; + + if (!clear_bo_data && clear_ccs && !IS_DGFX(xe)) + clear_only_system_ccs = true; + if (!clear_vram) xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it); else @@ -1022,6 +1084,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_sched_job *job; struct xe_bb *bb; u32 batch_size, update_idx; + u32 pte_flags; bool usm = xe->info.has_usm; u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; @@ -1029,13 +1092,14 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, clear_L0 = xe_migrate_res_sizes(m, &src_it); /* Calculate final sizes and batch size.. */ + pte_flags = clear_vram ? PTE_UPDATE_FLAG_IS_VRAM : 0; batch_size = 2 + - pte_update_size(m, clear_vram, src, &src_it, + pte_update_size(m, pte_flags, src, &src_it, &clear_L0, &clear_L0_ofs, &clear_L0_pt, - clear_system_ccs ? 0 : emit_clear_cmd_len(gt), 0, + clear_bo_data ? emit_clear_cmd_len(gt) : 0, 0, avail_pts); - if (xe_device_has_flat_ccs(xe)) + if (xe_migrate_needs_ccs_emit(xe)) batch_size += EMIT_COPY_CCS_DW; /* Clear commands */ @@ -1054,16 +1118,16 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it)) xe_res_next(&src_it, clear_L0); else - emit_pte(m, bb, clear_L0_pt, clear_vram, clear_system_ccs, + emit_pte(m, bb, clear_L0_pt, clear_vram, clear_only_system_ccs, &src_it, clear_L0, dst); bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; - if (!clear_system_ccs) + if (clear_bo_data) emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE, clear_vram); - if (xe_device_has_flat_ccs(xe)) { + if (xe_migrate_needs_ccs_emit(xe)) { emit_copy_ccs(gt, bb, clear_L0_ofs, true, m->cleared_mem_ofs, false, clear_L0); flush_flags = MI_FLUSH_DW_CCS; @@ -1119,13 +1183,14 @@ err_sync: return ERR_PTR(err); } - if (clear_system_ccs) + if (clear_ccs) bo->ccs_cleared = true; return fence; } static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs, + const struct xe_vm_pgtable_update_op *pt_op, const struct xe_vm_pgtable_update *update, struct xe_migrate_pt_update *pt_update) { @@ -1146,7 +1211,7 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs, if (!ppgtt_ofs) ppgtt_ofs = xe_migrate_vram_ofs(tile_to_xe(tile), xe_bo_addr(update->pt_bo, 0, - XE_PAGE_SIZE)); + XE_PAGE_SIZE), false); do { u64 addr = ppgtt_ofs + ofs * 8; @@ -1160,8 +1225,12 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs, bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); bb->cs[bb->len++] = lower_32_bits(addr); bb->cs[bb->len++] = upper_32_bits(addr); - ops->populate(pt_update, tile, NULL, bb->cs + bb->len, ofs, chunk, - update); + if (pt_op->bind) + ops->populate(pt_update, tile, NULL, bb->cs + bb->len, + ofs, chunk, update); + else + ops->clear(pt_update, tile, NULL, bb->cs + bb->len, + ofs, chunk, update); bb->len += chunk * 2; ofs += chunk; @@ -1186,114 +1255,58 @@ struct migrate_test_params { static struct dma_fence * xe_migrate_update_pgtables_cpu(struct xe_migrate *m, - struct xe_vm *vm, struct xe_bo *bo, - const struct xe_vm_pgtable_update *updates, - u32 num_updates, bool wait_vm, struct xe_migrate_pt_update *pt_update) { XE_TEST_DECLARE(struct migrate_test_params *test = to_migrate_test_params (xe_cur_kunit_priv(XE_TEST_LIVE_MIGRATE));) const struct xe_migrate_pt_update_ops *ops = pt_update->ops; - struct dma_fence *fence; + struct xe_vm *vm = pt_update->vops->vm; + struct xe_vm_pgtable_update_ops *pt_update_ops = + &pt_update->vops->pt_update_ops[pt_update->tile_id]; int err; - u32 i; + u32 i, j; if (XE_TEST_ONLY(test && test->force_gpu)) return ERR_PTR(-ETIME); - if (bo && !dma_resv_test_signaled(bo->ttm.base.resv, - DMA_RESV_USAGE_KERNEL)) - return ERR_PTR(-ETIME); - - if (wait_vm && !dma_resv_test_signaled(xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP)) - return ERR_PTR(-ETIME); - if (ops->pre_commit) { pt_update->job = NULL; err = ops->pre_commit(pt_update); if (err) return ERR_PTR(err); } - for (i = 0; i < num_updates; i++) { - const struct xe_vm_pgtable_update *update = &updates[i]; - - ops->populate(pt_update, m->tile, &update->pt_bo->vmap, NULL, - update->ofs, update->qwords, update); - } - if (vm) { - trace_xe_vm_cpu_bind(vm); - xe_device_wmb(vm->xe); - } - - fence = dma_fence_get_stub(); - - return fence; -} - -static bool no_in_syncs(struct xe_vm *vm, struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs) -{ - struct dma_fence *fence; - int i; - - for (i = 0; i < num_syncs; i++) { - fence = syncs[i].fence; - - if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &fence->flags)) - return false; - } - if (q) { - fence = xe_exec_queue_last_fence_get(q, vm); - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { - dma_fence_put(fence); - return false; + for (i = 0; i < pt_update_ops->num_ops; ++i) { + const struct xe_vm_pgtable_update_op *pt_op = + &pt_update_ops->ops[i]; + + for (j = 0; j < pt_op->num_entries; j++) { + const struct xe_vm_pgtable_update *update = + &pt_op->entries[j]; + + if (pt_op->bind) + ops->populate(pt_update, m->tile, + &update->pt_bo->vmap, NULL, + update->ofs, update->qwords, + update); + else + ops->clear(pt_update, m->tile, + &update->pt_bo->vmap, NULL, + update->ofs, update->qwords, update); } - dma_fence_put(fence); } - return true; + trace_xe_vm_cpu_bind(vm); + xe_device_wmb(vm->xe); + + return dma_fence_get_stub(); } -/** - * xe_migrate_update_pgtables() - Pipelined page-table update - * @m: The migrate context. - * @vm: The vm we'll be updating. - * @bo: The bo whose dma-resv we will await before updating, or NULL if userptr. - * @q: The exec queue to be used for the update or NULL if the default - * migration engine is to be used. - * @updates: An array of update descriptors. - * @num_updates: Number of descriptors in @updates. - * @syncs: Array of xe_sync_entry to await before updating. Note that waits - * will block the engine timeline. - * @num_syncs: Number of entries in @syncs. - * @pt_update: Pointer to a struct xe_migrate_pt_update, which contains - * pointers to callback functions and, if subclassed, private arguments to - * those. - * - * Perform a pipelined page-table update. The update descriptors are typically - * built under the same lock critical section as a call to this function. If - * using the default engine for the updates, they will be performed in the - * order they grab the job_mutex. If different engines are used, external - * synchronization is needed for overlapping updates to maintain page-table - * consistency. Note that the meaing of "overlapping" is that the updates - * touch the same page-table, which might be a higher-level page-directory. - * If no pipelining is needed, then updates may be performed by the cpu. - * - * Return: A dma_fence that, when signaled, indicates the update completion. - */ -struct dma_fence * -xe_migrate_update_pgtables(struct xe_migrate *m, - struct xe_vm *vm, - struct xe_bo *bo, - struct xe_exec_queue *q, - const struct xe_vm_pgtable_update *updates, - u32 num_updates, - struct xe_sync_entry *syncs, u32 num_syncs, - struct xe_migrate_pt_update *pt_update) +static struct dma_fence * +__xe_migrate_update_pgtables(struct xe_migrate *m, + struct xe_migrate_pt_update *pt_update, + struct xe_vm_pgtable_update_ops *pt_update_ops) { const struct xe_migrate_pt_update_ops *ops = pt_update->ops; struct xe_tile *tile = m->tile; @@ -1302,59 +1315,53 @@ xe_migrate_update_pgtables(struct xe_migrate *m, struct xe_sched_job *job; struct dma_fence *fence; struct drm_suballoc *sa_bo = NULL; - struct xe_vma *vma = pt_update->vma; struct xe_bb *bb; - u32 i, batch_size, ppgtt_ofs, update_idx, page_ofs = 0; + u32 i, j, batch_size = 0, ppgtt_ofs, update_idx, page_ofs = 0; + u32 num_updates = 0, current_update = 0; u64 addr; int err = 0; - bool usm = !q && xe->info.has_usm; - bool first_munmap_rebind = vma && - vma->gpuva.flags & XE_VMA_FIRST_REBIND; - struct xe_exec_queue *q_override = !q ? m->q : q; - u16 pat_index = xe->pat.idx[XE_CACHE_WB]; + bool is_migrate = pt_update_ops->q == m->q; + bool usm = is_migrate && xe->info.has_usm; + + for (i = 0; i < pt_update_ops->num_ops; ++i) { + struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i]; + struct xe_vm_pgtable_update *updates = pt_op->entries; - /* Use the CPU if no in syncs and engine is idle */ - if (no_in_syncs(vm, q, syncs, num_syncs) && xe_exec_queue_is_idle(q_override)) { - fence = xe_migrate_update_pgtables_cpu(m, vm, bo, updates, - num_updates, - first_munmap_rebind, - pt_update); - if (!IS_ERR(fence) || fence == ERR_PTR(-EAGAIN)) - return fence; + num_updates += pt_op->num_entries; + for (j = 0; j < pt_op->num_entries; ++j) { + u32 num_cmds = DIV_ROUND_UP(updates[j].qwords, + MAX_PTE_PER_SDI); + + /* align noop + MI_STORE_DATA_IMM cmd prefix */ + batch_size += 4 * num_cmds + updates[j].qwords * 2; + } } /* fixed + PTE entries */ if (IS_DGFX(xe)) - batch_size = 2; + batch_size += 2; else - batch_size = 6 + num_updates * 2; + batch_size += 6 * (num_updates / MAX_PTE_PER_SDI + 1) + + num_updates * 2; - for (i = 0; i < num_updates; i++) { - u32 num_cmds = DIV_ROUND_UP(updates[i].qwords, MAX_PTE_PER_SDI); - - /* align noop + MI_STORE_DATA_IMM cmd prefix */ - batch_size += 4 * num_cmds + updates[i].qwords * 2; - } - - /* - * XXX: Create temp bo to copy from, if batch_size becomes too big? - * - * Worst case: Sum(2 * (each lower level page size) + (top level page size)) - * Should be reasonably bound.. - */ - xe_tile_assert(tile, batch_size < SZ_128K); - - bb = xe_bb_new(gt, batch_size, !q && xe->info.has_usm); + bb = xe_bb_new(gt, batch_size, usm); if (IS_ERR(bb)) return ERR_CAST(bb); /* For sysmem PTE's, need to map them in our hole.. */ if (!IS_DGFX(xe)) { + u32 ptes, ofs; + ppgtt_ofs = NUM_KERNEL_PDE - 1; - if (q) { - xe_tile_assert(tile, num_updates <= NUM_VMUSA_WRITES_PER_UNIT); + if (!is_migrate) { + u32 num_units = DIV_ROUND_UP(num_updates, + NUM_VMUSA_WRITES_PER_UNIT); - sa_bo = drm_suballoc_new(&m->vm_update_sa, 1, + if (num_units > m->vm_update_sa.size) { + err = -ENOBUFS; + goto err_bb; + } + sa_bo = drm_suballoc_new(&m->vm_update_sa, num_units, GFP_KERNEL, true, 0); if (IS_ERR(sa_bo)) { err = PTR_ERR(sa_bo); @@ -1370,18 +1377,49 @@ xe_migrate_update_pgtables(struct xe_migrate *m, } /* Map our PT's to gtt */ - bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(num_updates); - bb->cs[bb->len++] = ppgtt_ofs * XE_PAGE_SIZE + page_ofs; - bb->cs[bb->len++] = 0; /* upper_32_bits */ - - for (i = 0; i < num_updates; i++) { - struct xe_bo *pt_bo = updates[i].pt_bo; + i = 0; + j = 0; + ptes = num_updates; + ofs = ppgtt_ofs * XE_PAGE_SIZE + page_ofs; + while (ptes) { + u32 chunk = min(MAX_PTE_PER_SDI, ptes); + u32 idx = 0; + + bb->cs[bb->len++] = MI_STORE_DATA_IMM | + MI_SDI_NUM_QW(chunk); + bb->cs[bb->len++] = ofs; + bb->cs[bb->len++] = 0; /* upper_32_bits */ + + for (; i < pt_update_ops->num_ops; ++i) { + struct xe_vm_pgtable_update_op *pt_op = + &pt_update_ops->ops[i]; + struct xe_vm_pgtable_update *updates = pt_op->entries; + + for (; j < pt_op->num_entries; ++j, ++current_update, ++idx) { + struct xe_vm *vm = pt_update->vops->vm; + struct xe_bo *pt_bo = updates[j].pt_bo; + + if (idx == chunk) + goto next_cmd; + + xe_tile_assert(tile, pt_bo->size == SZ_4K); + + /* Map a PT at most once */ + if (pt_bo->update_index < 0) + pt_bo->update_index = current_update; + + addr = vm->pt_ops->pte_encode_bo(pt_bo, 0, + XE_CACHE_WB, 0); + bb->cs[bb->len++] = lower_32_bits(addr); + bb->cs[bb->len++] = upper_32_bits(addr); + } - xe_tile_assert(tile, pt_bo->size == SZ_4K); + j = 0; + } - addr = vm->pt_ops->pte_encode_bo(pt_bo, 0, pat_index, 0); - bb->cs[bb->len++] = lower_32_bits(addr); - bb->cs[bb->len++] = upper_32_bits(addr); +next_cmd: + ptes -= chunk; + ofs += chunk * sizeof(u64); } bb->cs[bb->len++] = MI_BATCH_BUFFER_END; @@ -1389,19 +1427,36 @@ xe_migrate_update_pgtables(struct xe_migrate *m, addr = xe_migrate_vm_addr(ppgtt_ofs, 0) + (page_ofs / sizeof(u64)) * XE_PAGE_SIZE; - for (i = 0; i < num_updates; i++) - write_pgtable(tile, bb, addr + i * XE_PAGE_SIZE, - &updates[i], pt_update); + for (i = 0; i < pt_update_ops->num_ops; ++i) { + struct xe_vm_pgtable_update_op *pt_op = + &pt_update_ops->ops[i]; + struct xe_vm_pgtable_update *updates = pt_op->entries; + + for (j = 0; j < pt_op->num_entries; ++j) { + struct xe_bo *pt_bo = updates[j].pt_bo; + + write_pgtable(tile, bb, addr + + pt_bo->update_index * XE_PAGE_SIZE, + pt_op, &updates[j], pt_update); + } + } } else { /* phys pages, no preamble required */ bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; - for (i = 0; i < num_updates; i++) - write_pgtable(tile, bb, 0, &updates[i], pt_update); + for (i = 0; i < pt_update_ops->num_ops; ++i) { + struct xe_vm_pgtable_update_op *pt_op = + &pt_update_ops->ops[i]; + struct xe_vm_pgtable_update *updates = pt_op->entries; + + for (j = 0; j < pt_op->num_entries; ++j) + write_pgtable(tile, bb, 0, pt_op, &updates[j], + pt_update); + } } - job = xe_bb_create_migration_job(q ?: m->q, bb, + job = xe_bb_create_migration_job(pt_update_ops->q, bb, xe_migrate_batch_base(m, usm), update_idx); if (IS_ERR(job)) { @@ -1409,46 +1464,20 @@ xe_migrate_update_pgtables(struct xe_migrate *m, goto err_sa; } - /* Wait on BO move */ - if (bo) { - err = xe_sched_job_add_deps(job, bo->ttm.base.resv, - DMA_RESV_USAGE_KERNEL); - if (err) - goto err_job; - } - - /* - * Munmap style VM unbind, need to wait for all jobs to be complete / - * trigger preempts before moving forward - */ - if (first_munmap_rebind) { - err = xe_sched_job_add_deps(job, xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP); - if (err) - goto err_job; - } - - err = xe_sched_job_last_fence_add_dep(job, vm); - for (i = 0; !err && i < num_syncs; i++) - err = xe_sync_entry_add_deps(&syncs[i], job); - - if (err) - goto err_job; - if (ops->pre_commit) { pt_update->job = job; err = ops->pre_commit(pt_update); if (err) goto err_job; } - if (!q) + if (is_migrate) mutex_lock(&m->job_mutex); xe_sched_job_arm(job); fence = dma_fence_get(&job->drm.s_fence->finished); xe_sched_job_push(job); - if (!q) + if (is_migrate) mutex_unlock(&m->job_mutex); xe_bb_free(bb, fence); @@ -1466,6 +1495,40 @@ err_bb: } /** + * xe_migrate_update_pgtables() - Pipelined page-table update + * @m: The migrate context. + * @pt_update: PT update arguments + * + * Perform a pipelined page-table update. The update descriptors are typically + * built under the same lock critical section as a call to this function. If + * using the default engine for the updates, they will be performed in the + * order they grab the job_mutex. If different engines are used, external + * synchronization is needed for overlapping updates to maintain page-table + * consistency. Note that the meaing of "overlapping" is that the updates + * touch the same page-table, which might be a higher-level page-directory. + * If no pipelining is needed, then updates may be performed by the cpu. + * + * Return: A dma_fence that, when signaled, indicates the update completion. + */ +struct dma_fence * +xe_migrate_update_pgtables(struct xe_migrate *m, + struct xe_migrate_pt_update *pt_update) + +{ + struct xe_vm_pgtable_update_ops *pt_update_ops = + &pt_update->vops->pt_update_ops[pt_update->tile_id]; + struct dma_fence *fence; + + fence = xe_migrate_update_pgtables_cpu(m, pt_update); + + /* -ETIME indicates a job is needed, anything else is legit error */ + if (!IS_ERR(fence) || PTR_ERR(fence) != -ETIME) + return fence; + + return __xe_migrate_update_pgtables(m, pt_update, pt_update_ops); +} + +/** * xe_migrate_wait() - Complete all operations using the xe_migrate context * @m: Migrate context to wait for. * diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index 951f19318ea4..0109866e398a 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -6,7 +6,7 @@ #ifndef _XE_MIGRATE_ #define _XE_MIGRATE_ -#include <drm/drm_mm.h> +#include <linux/types.h> struct dma_fence; struct iosys_map; @@ -47,6 +47,24 @@ struct xe_migrate_pt_update_ops { struct xe_tile *tile, struct iosys_map *map, void *pos, u32 ofs, u32 num_qwords, const struct xe_vm_pgtable_update *update); + /** + * @clear: Clear a command buffer or page-table with ptes. + * @pt_update: Embeddable callback argument. + * @tile: The tile for the current operation. + * @map: struct iosys_map into the memory to be populated. + * @pos: If @map is NULL, map into the memory to be populated. + * @ofs: qword offset into @map, unused if @map is NULL. + * @num_qwords: Number of qwords to write. + * @update: Information about the PTEs to be inserted. + * + * This interface is intended to be used as a callback into the + * page-table system to populate command buffers or shared + * page-tables with PTEs. + */ + void (*clear)(struct xe_migrate_pt_update *pt_update, + struct xe_tile *tile, struct iosys_map *map, + void *pos, u32 ofs, u32 num_qwords, + const struct xe_vm_pgtable_update *update); /** * @pre_commit: Callback to be called just before arming the @@ -67,14 +85,10 @@ struct xe_migrate_pt_update_ops { struct xe_migrate_pt_update { /** @ops: Pointer to the struct xe_migrate_pt_update_ops callbacks */ const struct xe_migrate_pt_update_ops *ops; - /** @vma: The vma we're updating the pagetable for. */ - struct xe_vma *vma; + /** @vops: VMA operations */ + struct xe_vma_ops *vops; /** @job: The job if a GPU page-table update. NULL otherwise */ struct xe_sched_job *job; - /** @start: Start of update for the range fence */ - u64 start; - /** @last: Last of update for the range fence */ - u64 last; /** @tile_id: Tile ID of the update */ u8 tile_id; }; @@ -88,23 +102,22 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct ttm_resource *dst, bool copy_only_ccs); +#define XE_MIGRATE_CLEAR_FLAG_BO_DATA BIT(0) +#define XE_MIGRATE_CLEAR_FLAG_CCS_DATA BIT(1) +#define XE_MIGRATE_CLEAR_FLAG_FULL (XE_MIGRATE_CLEAR_FLAG_BO_DATA | \ + XE_MIGRATE_CLEAR_FLAG_CCS_DATA) struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_bo *bo, - struct ttm_resource *dst); + struct ttm_resource *dst, + u32 clear_flags); struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m); struct dma_fence * xe_migrate_update_pgtables(struct xe_migrate *m, - struct xe_vm *vm, - struct xe_bo *bo, - struct xe_exec_queue *q, - const struct xe_vm_pgtable_update *updates, - u32 num_updates, - struct xe_sync_entry *syncs, u32 num_syncs, struct xe_migrate_pt_update *pt_update); void xe_migrate_wait(struct xe_migrate *m); -struct xe_exec_queue *xe_tile_migrate_engine(struct xe_tile *tile); +struct xe_exec_queue *xe_tile_migrate_exec_queue(struct xe_tile *tile); #endif diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index f92faad4b96d..3fd462fda625 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -29,33 +29,60 @@ static void tiles_fini(void *arg) struct xe_tile *tile; int id; - for_each_tile(tile, xe, id) + for_each_remote_tile(tile, xe, id) tile->mmio.regs = NULL; } -int xe_mmio_probe_tiles(struct xe_device *xe) +/* + * On multi-tile devices, partition the BAR space for MMIO on each tile, + * possibly accounting for register override on the number of tiles available. + * Resulting memory layout is like below: + * + * .----------------------. <- tile_count * tile_mmio_size + * | .... | + * |----------------------| <- 2 * tile_mmio_size + * | tile1->mmio.regs | + * |----------------------| <- 1 * tile_mmio_size + * | tile0->mmio.regs | + * '----------------------' <- 0MB + */ +static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) { - size_t tile_mmio_size = SZ_16M, tile_mmio_ext_size = xe->info.tile_mmio_ext_size; - u8 id, tile_count = xe->info.tile_count; - struct xe_gt *gt = xe_root_mmio_gt(xe); struct xe_tile *tile; void __iomem *regs; - u32 mtcfg; + u8 id; - if (tile_count == 1) - goto add_mmio_ext; + /* + * Nothing to be done as tile 0 has already been setup earlier with the + * entire BAR mapped - see xe_mmio_init() + */ + if (xe->info.tile_count == 1) + return; + /* Possibly override number of tile based on configuration register */ if (!xe->info.skip_mtcfg) { + struct xe_gt *gt = xe_root_mmio_gt(xe); + u8 tile_count; + u32 mtcfg; + + /* + * Although the per-tile mmio regs are not yet initialized, this + * is fine as it's going to the root gt, that's guaranteed to be + * initialized earlier in xe_mmio_init() + */ mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR); tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; + if (tile_count < xe->info.tile_count) { drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n", xe->info.tile_count, tile_count); xe->info.tile_count = tile_count; /* - * FIXME: Needs some work for standalone media, but should be impossible - * with multi-tile for now. + * FIXME: Needs some work for standalone media, but + * should be impossible with multi-tile for now: + * multi-tile platform with standalone media doesn't + * exist */ xe->info.gt_count = xe->info.tile_count; } @@ -67,23 +94,51 @@ int xe_mmio_probe_tiles(struct xe_device *xe) tile->mmio.regs = regs; regs += tile_mmio_size; } +} -add_mmio_ext: - /* - * By design, there's a contiguous multi-tile MMIO space (16MB hard coded per tile). - * When supported, there could be an additional contiguous multi-tile MMIO extension - * space ON TOP of it, and hence the necessity for distinguished MMIO spaces. - */ - if (xe->info.has_mmio_ext) { - regs = xe->mmio.regs + tile_mmio_size * tile_count; +/* + * On top of all the multi-tile MMIO space there can be a platform-dependent + * extension for each tile, resulting in a layout like below: + * + * .----------------------. <- ext_base + tile_count * tile_mmio_ext_size + * | .... | + * |----------------------| <- ext_base + 2 * tile_mmio_ext_size + * | tile1->mmio_ext.regs | + * |----------------------| <- ext_base + 1 * tile_mmio_ext_size + * | tile0->mmio_ext.regs | + * |======================| <- ext_base = tile_count * tile_mmio_size + * | | + * | mmio.regs | + * | | + * '----------------------' <- 0MB + * + * Set up the tile[]->mmio_ext pointers/sizes. + */ +static void mmio_extension_setup(struct xe_device *xe, size_t tile_mmio_size, + size_t tile_mmio_ext_size) +{ + struct xe_tile *tile; + void __iomem *regs; + u8 id; - for_each_tile(tile, xe, id) { - tile->mmio_ext.size = tile_mmio_ext_size; - tile->mmio_ext.regs = regs; + if (!xe->info.has_mmio_ext) + return; - regs += tile_mmio_ext_size; - } + regs = xe->mmio.regs + tile_mmio_size * xe->info.tile_count; + for_each_tile(tile, xe, id) { + tile->mmio_ext.size = tile_mmio_ext_size; + tile->mmio_ext.regs = regs; + regs += tile_mmio_ext_size; } +} + +int xe_mmio_probe_tiles(struct xe_device *xe) +{ + size_t tile_mmio_size = SZ_16M; + size_t tile_mmio_ext_size = xe->info.tile_mmio_ext_size; + + mmio_multi_tile_setup(xe, tile_mmio_size); + mmio_extension_setup(xe, tile_mmio_size, tile_mmio_ext_size); return devm_add_action_or_reset(xe->drm.dev, tiles_fini, xe); } @@ -91,9 +146,11 @@ add_mmio_ext: static void mmio_fini(void *arg) { struct xe_device *xe = arg; + struct xe_tile *root_tile = xe_device_get_root_tile(xe); pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs); xe->mmio.regs = NULL; + root_tile->mmio.regs = NULL; } int xe_mmio_init(struct xe_device *xe) @@ -121,12 +178,29 @@ int xe_mmio_init(struct xe_device *xe) return devm_add_action_or_reset(xe->drm.dev, mmio_fini, xe); } +static void mmio_flush_pending_writes(struct xe_gt *gt) +{ +#define DUMMY_REG_OFFSET 0x130030 + struct xe_tile *tile = gt_to_tile(gt); + int i; + + if (tile->xe->info.platform != XE_LUNARLAKE) + return; + + /* 4 dummy writes */ + for (i = 0; i < 4; i++) + writel(0, tile->mmio.regs + DUMMY_REG_OFFSET); +} + u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg) { struct xe_tile *tile = gt_to_tile(gt); u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); u8 val; + /* Wa_15015404425 */ + mmio_flush_pending_writes(gt); + val = readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); trace_xe_reg_rw(gt, false, addr, val, sizeof(val)); @@ -139,6 +213,9 @@ u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); u16 val; + /* Wa_15015404425 */ + mmio_flush_pending_writes(gt); + val = readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); trace_xe_reg_rw(gt, false, addr, val, sizeof(val)); @@ -151,7 +228,11 @@ void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val) u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); trace_xe_reg_rw(gt, true, addr, val, sizeof(val)); - writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); + + if (!reg.vf && IS_SRIOV_VF(gt_to_xe(gt))) + xe_gt_sriov_vf_write32(gt, reg, val); + else + writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); } u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg) @@ -160,6 +241,9 @@ u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); u32 val; + /* Wa_15015404425 */ + mmio_flush_pending_writes(gt); + if (!reg.vf && IS_SRIOV_VF(gt_to_xe(gt))) val = xe_gt_sriov_vf_read32(gt, reg); else @@ -251,37 +335,24 @@ u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg) return (u64)udw << 32 | ldw; } -/** - * xe_mmio_wait32() - Wait for a register to match the desired masked value - * @gt: MMIO target GT - * @reg: register to read value from - * @mask: mask to be applied to the value read from the register - * @val: desired value after applying the mask - * @timeout_us: time out after this period of time. Wait logic tries to be - * smart, applying an exponential backoff until @timeout_us is reached. - * @out_val: if not NULL, points where to store the last unmasked value - * @atomic: needs to be true if calling from an atomic context - * - * This function polls for the desired masked value and returns zero on success - * or -ETIMEDOUT if timed out. - * - * Note that @timeout_us represents the minimum amount of time to wait before - * giving up. The actual time taken by this function can be a little more than - * @timeout_us for different reasons, specially in non-atomic contexts. Thus, - * it is possible that this function succeeds even after @timeout_us has passed. - */ -int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, - u32 *out_val, bool atomic) +static int __xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, + u32 *out_val, bool atomic, bool expect_match) { ktime_t cur = ktime_get_raw(); const ktime_t end = ktime_add_us(cur, timeout_us); int ret = -ETIMEDOUT; s64 wait = 10; u32 read; + bool check; for (;;) { read = xe_mmio_read32(gt, reg); - if ((read & mask) == val) { + + check = (read & mask) == val; + if (!expect_match) + check = !check; + + if (check) { ret = 0; break; } @@ -302,7 +373,12 @@ int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 t if (ret != 0) { read = xe_mmio_read32(gt, reg); - if ((read & mask) == val) + + check = (read & mask) == val; + if (!expect_match) + check = !check; + + if (check) ret = 0; } @@ -313,62 +389,45 @@ int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 t } /** - * xe_mmio_wait32_not() - Wait for a register to return anything other than the given masked value + * xe_mmio_wait32() - Wait for a register to match the desired masked value * @gt: MMIO target GT * @reg: register to read value from * @mask: mask to be applied to the value read from the register - * @val: value to match after applying the mask + * @val: desired value after applying the mask * @timeout_us: time out after this period of time. Wait logic tries to be * smart, applying an exponential backoff until @timeout_us is reached. * @out_val: if not NULL, points where to store the last unmasked value * @atomic: needs to be true if calling from an atomic context * - * This function polls for a masked value to change from a given value and - * returns zero on success or -ETIMEDOUT if timed out. + * This function polls for the desired masked value and returns zero on success + * or -ETIMEDOUT if timed out. * * Note that @timeout_us represents the minimum amount of time to wait before * giving up. The actual time taken by this function can be a little more than * @timeout_us for different reasons, specially in non-atomic contexts. Thus, * it is possible that this function succeeds even after @timeout_us has passed. */ +int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, + u32 *out_val, bool atomic) +{ + return __xe_mmio_wait32(gt, reg, mask, val, timeout_us, out_val, atomic, true); +} + +/** + * xe_mmio_wait32_not() - Wait for a register to return anything other than the given masked value + * @gt: MMIO target GT + * @reg: register to read value from + * @mask: mask to be applied to the value read from the register + * @val: value not to be matched after applying the mask + * @timeout_us: time out after this period of time + * @out_val: if not NULL, points where to store the last unmasked value + * @atomic: needs to be true if calling from an atomic context + * + * This function works exactly like xe_mmio_wait32() with the exception that + * @val is expected not to be matched. + */ int xe_mmio_wait32_not(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, u32 *out_val, bool atomic) { - ktime_t cur = ktime_get_raw(); - const ktime_t end = ktime_add_us(cur, timeout_us); - int ret = -ETIMEDOUT; - s64 wait = 10; - u32 read; - - for (;;) { - read = xe_mmio_read32(gt, reg); - if ((read & mask) != val) { - ret = 0; - break; - } - - cur = ktime_get_raw(); - if (!ktime_before(cur, end)) - break; - - if (ktime_after(ktime_add_us(cur, wait), end)) - wait = ktime_us_delta(end, cur); - - if (atomic) - udelay(wait); - else - usleep_range(wait, wait << 1); - wait <<= 1; - } - - if (ret != 0) { - read = xe_mmio_read32(gt, reg); - if ((read & mask) != val) - ret = 0; - } - - if (out_val) - *out_val = read; - - return ret; + return __xe_mmio_wait32(gt, reg, mask, val, timeout_us, out_val, atomic, false); } diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index 6ae0cc32c651..26551410ecc8 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -22,7 +22,6 @@ u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set); int xe_mmio_write32_and_verify(struct xe_gt *gt, struct xe_reg reg, u32 val, u32 mask, u32 eval); bool xe_mmio_in_range(const struct xe_gt *gt, const struct xe_mmio_range *range, struct xe_reg reg); -int xe_mmio_probe_vram(struct xe_device *xe); u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg); int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, u32 *out_val, bool atomic); diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index 499540add465..bfc3deebdaa2 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -8,14 +8,17 @@ #include <linux/init.h> #include <linux/module.h> +#include <drm/drm_module.h> + #include "xe_drv.h" #include "xe_hw_fence.h" #include "xe_pci.h" +#include "xe_pm.h" #include "xe_observation.h" #include "xe_sched_job.h" struct xe_modparam xe_modparam = { - .enable_display = true, + .probe_display = true, .guc_log_level = 5, .force_probe = CONFIG_DRM_XE_FORCE_PROBE, .wedged_mode = 1, @@ -25,8 +28,8 @@ struct xe_modparam xe_modparam = { module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444); MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); -module_param_named(enable_display, xe_modparam.enable_display, bool, 0444); -MODULE_PARM_DESC(enable_display, "Enable display"); +module_param_named(probe_display, xe_modparam.probe_display, bool, 0444); +MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched (default: true)"); module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, uint, 0600); MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size(in MiB)"); @@ -61,13 +64,28 @@ module_param_named_unsafe(wedged_mode, xe_modparam.wedged_mode, int, 0600); MODULE_PARM_DESC(wedged_mode, "Module's default policy for the wedged mode - 0=never, 1=upon-critical-errors[default], 2=upon-any-hang"); +static int xe_check_nomodeset(void) +{ + if (drm_firmware_drivers_only()) + return -ENODEV; + + return 0; +} + struct init_funcs { int (*init)(void); void (*exit)(void); }; +static void xe_dummy_exit(void) +{ +} + static const struct init_funcs init_funcs[] = { { + .init = xe_check_nomodeset, + }, + { .init = xe_hw_fence_module_init, .exit = xe_hw_fence_module_exit, }, @@ -83,17 +101,41 @@ static const struct init_funcs init_funcs[] = { .init = xe_observation_sysctl_register, .exit = xe_observation_sysctl_unregister, }, + { + .init = xe_pm_module_init, + .exit = xe_dummy_exit, + }, }; +static int __init xe_call_init_func(unsigned int i) +{ + if (WARN_ON(i >= ARRAY_SIZE(init_funcs))) + return 0; + if (!init_funcs[i].init) + return 0; + + return init_funcs[i].init(); +} + +static void xe_call_exit_func(unsigned int i) +{ + if (WARN_ON(i >= ARRAY_SIZE(init_funcs))) + return; + if (!init_funcs[i].exit) + return; + + init_funcs[i].exit(); +} + static int __init xe_init(void) { int err, i; for (i = 0; i < ARRAY_SIZE(init_funcs); i++) { - err = init_funcs[i].init(); + err = xe_call_init_func(i); if (err) { while (i--) - init_funcs[i].exit(); + xe_call_exit_func(i); return err; } } @@ -106,7 +148,7 @@ static void __exit xe_exit(void) int i; for (i = ARRAY_SIZE(init_funcs) - 1; i >= 0; i--) - init_funcs[i].exit(); + xe_call_exit_func(i); } module_init(xe_init); diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h index 61a0d28a28c8..161a5e6f717f 100644 --- a/drivers/gpu/drm/xe/xe_module.h +++ b/drivers/gpu/drm/xe/xe_module.h @@ -11,7 +11,7 @@ /* Module modprobe variables */ struct xe_modparam { bool force_execlist; - bool enable_display; + bool probe_display; u32 force_vram_bar_size; int guc_log_level; char *guc_firmware_path; diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 6d69f751bf78..2804f14f8f29 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -10,7 +10,7 @@ #include <drm/drm_drv.h> #include <drm/drm_managed.h> -#include <drm/xe_drm.h> +#include <uapi/drm/xe_drm.h> #include "abi/guc_actions_slpc_abi.h" #include "instructions/xe_mi_commands.h" @@ -440,6 +440,10 @@ static void xe_oa_enable(struct xe_oa_stream *stream) val = __format_to_oactrl(format, regs->oa_ctrl_counter_select_mask) | __oa_ccs_select(stream) | OAG_OACONTROL_OA_COUNTER_ENABLE; + if (GRAPHICS_VER(stream->oa->xe) >= 20 && + stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG) + val |= OAG_OACONTROL_OA_PES_DISAG_EN; + xe_mmio_write32(stream->gt, regs->oa_ctrl, val); } @@ -641,7 +645,7 @@ static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc, u32 offset = xe_bo_ggtt_addr(lrc->bo); do { - bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(22) /* GGTT */ | 2; + bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); bb->cs[bb->len++] = offset + flex->offset * sizeof(u32); bb->cs[bb->len++] = 0; bb->cs[bb->len++] = flex->value; @@ -705,8 +709,7 @@ static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable) { RING_CONTEXT_CONTROL(stream->hwe->mmio_base), regs_offset + CTX_CONTEXT_CONTROL, - _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, - enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) + _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE), }, }; struct xe_oa_reg reg_lri = { OAR_OACONTROL, oacontrol }; @@ -738,10 +741,8 @@ static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable) { RING_CONTEXT_CONTROL(stream->hwe->mmio_base), regs_offset + CTX_CONTEXT_CONTROL, - _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, - enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) | - _MASKED_FIELD(CTX_CTRL_RUN_ALONE, - enable ? CTX_CTRL_RUN_ALONE : 0), + _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE) | + _MASKED_FIELD(CTX_CTRL_RUN_ALONE, enable ? CTX_CTRL_RUN_ALONE : 0), }, }; struct xe_oa_reg reg_lri = { OAC_OACONTROL, oacontrol }; @@ -1244,8 +1245,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) vm_flags_mod(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY, VM_MAYWRITE | VM_MAYEXEC); - xe_assert(stream->oa->xe, bo->ttm.ttm->num_pages == - (vma->vm_end - vma->vm_start) >> PAGE_SHIFT); + xe_assert(stream->oa->xe, bo->ttm.ttm->num_pages == vma_pages(vma)); for (i = 0; i < bo->ttm.ttm->num_pages; i++) { ret = remap_pfn_range(vma, start, page_to_pfn(bo->ttm.ttm->pages[i]), PAGE_SIZE, vma->vm_page_prot); @@ -1260,7 +1260,6 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) static const struct file_operations xe_oa_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .release = xe_oa_release, .poll = xe_oa_poll, .read = xe_oa_read, diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 540c3ec53a6d..8862eca73fbe 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -11,7 +11,7 @@ #include <linux/mutex.h> #include <linux/types.h> -#include <drm/xe_drm.h> +#include <uapi/drm/xe_drm.h> #include "regs/xe_reg_defs.h" #include "xe_hw_engine_types.h" diff --git a/drivers/gpu/drm/xe/xe_observation.c b/drivers/gpu/drm/xe/xe_observation.c index fcb584b42a7d..8ec1b84cbb9e 100644 --- a/drivers/gpu/drm/xe/xe_observation.c +++ b/drivers/gpu/drm/xe/xe_observation.c @@ -6,7 +6,7 @@ #include <linux/errno.h> #include <linux/sysctl.h> -#include <drm/xe_drm.h> +#include <uapi/drm/xe_drm.h> #include "xe_oa.h" #include "xe_observation.h" @@ -66,7 +66,6 @@ static struct ctl_table observation_ctl_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, - {} }; /** diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 4ee32ee1cc88..f291a1730024 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -5,7 +5,9 @@ #include "xe_pat.h" -#include <drm/xe_drm.h> +#include <uapi/drm/xe_drm.h> + +#include <generated/xe_wa_oob.h> #include "regs/xe_reg_defs.h" #include "xe_assert.h" @@ -15,6 +17,7 @@ #include "xe_gt_mcr.h" #include "xe_mmio.h" #include "xe_sriov.h" +#include "xe_wa.h" #define _PAT_ATS 0x47fc #define _PAT_INDEX(index) _PICK_EVEN_2RANGES(index, 8, \ @@ -382,7 +385,13 @@ void xe_pat_init_early(struct xe_device *xe) if (GRAPHICS_VER(xe) == 20) { xe->pat.ops = &xe2_pat_ops; xe->pat.table = xe2_pat_table; - xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table); + + /* Wa_16023588340. XXX: Should use XE_WA */ + if (GRAPHICS_VERx100(xe) == 2001) + xe->pat.n_entries = 28; /* Disable CLOS3 */ + else + xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table); + xe->pat.idx[XE_CACHE_NONE] = 3; xe->pat.idx[XE_CACHE_WT] = 15; xe->pat.idx[XE_CACHE_WB] = 2; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 732ee0d02124..5e962e72c97e 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -59,6 +59,7 @@ struct xe_device_desc { u8 has_display:1; u8 has_heci_gscfi:1; + u8 has_heci_cscfi:1; u8 has_llc:1; u8 has_mmio_ext:1; u8 has_sriov:1; @@ -337,14 +338,13 @@ static const struct xe_device_desc mtl_desc = { static const struct xe_device_desc lnl_desc = { PLATFORM(LUNARLAKE), .has_display = true, - .require_force_probe = true, }; static const struct xe_device_desc bmg_desc = { DGFX_FEATURES, PLATFORM(BATTLEMAGE), .has_display = true, - .require_force_probe = true, + .has_heci_cscfi = 1, }; #undef PLATFORM @@ -606,6 +606,7 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.is_dgfx = desc->is_dgfx; xe->info.has_heci_gscfi = desc->has_heci_gscfi; + xe->info.has_heci_cscfi = desc->has_heci_cscfi; xe->info.has_llc = desc->has_llc; xe->info.has_mmio_ext = desc->has_mmio_ext; xe->info.has_sriov = desc->has_sriov; @@ -613,9 +614,9 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.skip_mtcfg = desc->skip_mtcfg; xe->info.skip_pcode = desc->skip_pcode; - xe->info.enable_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && - xe_modparam.enable_display && - desc->has_display; + xe->info.probe_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && + xe_modparam.probe_display && + desc->has_display; err = xe_tile_init_early(xe_device_get_root_tile(xe), xe, 0); if (err) @@ -744,7 +745,7 @@ static void xe_pci_remove(struct pci_dev *pdev) { struct xe_device *xe; - xe = pci_get_drvdata(pdev); + xe = pdev_to_xe_device(pdev); if (!xe) /* driver load aborted, nothing to cleanup */ return; @@ -792,7 +793,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (IS_ERR(xe)) return PTR_ERR(xe); - pci_set_drvdata(pdev, xe); + pci_set_drvdata(pdev, &xe->drm); xe_pm_assert_unbounded_bridge(xe); subplatform_desc = find_subplatform(xe, desc); @@ -815,7 +816,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; - drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx:%s (%d.%02d) media:%s (%d.%02d) display:%s dma_m_s:%d tc:%d gscfi:%d", + drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx:%s (%d.%02d) media:%s (%d.%02d) display:%s dma_m_s:%d tc:%d gscfi:%d cscfi:%d", desc->platform_name, subplatform_desc ? subplatform_desc->name : "", xe->info.devid, xe->info.revid, @@ -826,14 +827,13 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) xe->info.media_name, xe->info.media_verx100 / 100, xe->info.media_verx100 % 100, - str_yes_no(xe->info.enable_display), + str_yes_no(xe->info.probe_display), xe->info.dma_mask_size, xe->info.tile_count, - xe->info.has_heci_gscfi); + xe->info.has_heci_gscfi, xe->info.has_heci_cscfi); - drm_dbg(&xe->drm, "Stepping = (G:%s, M:%s, D:%s, B:%s)\n", + drm_dbg(&xe->drm, "Stepping = (G:%s, M:%s, B:%s)\n", xe_step_name(xe->info.step.graphics), xe_step_name(xe->info.step.media), - xe_step_name(xe->info.step.display), xe_step_name(xe->info.step.basedie)); drm_dbg(&xe->drm, "SR-IOV support: %s (mode: %s)\n", @@ -924,6 +924,8 @@ static int xe_pci_resume(struct device *dev) if (err) return err; + pci_restore_state(pdev); + err = pci_enable_device(pdev); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index 9c4eefdf6642..7397d556996a 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -12,7 +12,6 @@ #include "xe_assert.h" #include "xe_device.h" -#include "xe_gt.h" #include "xe_mmio.h" #include "xe_pcode_api.h" @@ -30,7 +29,7 @@ * - PCODE for display operations */ -static int pcode_mailbox_status(struct xe_gt *gt) +static int pcode_mailbox_status(struct xe_tile *tile) { u32 err; static const struct pcode_err_decode err_decode[] = { @@ -45,9 +44,9 @@ static int pcode_mailbox_status(struct xe_gt *gt) [PCODE_ERROR_MASK] = {-EPROTO, "Unknown"}, }; - err = xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_ERROR_MASK; + err = xe_mmio_read32(tile->primary_gt, PCODE_MAILBOX) & PCODE_ERROR_MASK; if (err) { - drm_err(>_to_xe(gt)->drm, "PCODE Mailbox failed: %d %s", err, + drm_err(&tile_to_xe(tile)->drm, "PCODE Mailbox failed: %d %s", err, err_decode[err].str ?: "Unknown"); return err_decode[err].errno ?: -EPROTO; } @@ -55,84 +54,85 @@ static int pcode_mailbox_status(struct xe_gt *gt) return 0; } -static int __pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, +static int __pcode_mailbox_rw(struct xe_tile *tile, u32 mbox, u32 *data0, u32 *data1, unsigned int timeout_ms, bool return_data, bool atomic) { + struct xe_gt *mmio = tile->primary_gt; int err; - if (gt_to_xe(gt)->info.skip_pcode) + if (tile_to_xe(tile)->info.skip_pcode) return 0; - if ((xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_READY) != 0) + if ((xe_mmio_read32(mmio, PCODE_MAILBOX) & PCODE_READY) != 0) return -EAGAIN; - xe_mmio_write32(gt, PCODE_DATA0, *data0); - xe_mmio_write32(gt, PCODE_DATA1, data1 ? *data1 : 0); - xe_mmio_write32(gt, PCODE_MAILBOX, PCODE_READY | mbox); + xe_mmio_write32(mmio, PCODE_DATA0, *data0); + xe_mmio_write32(mmio, PCODE_DATA1, data1 ? *data1 : 0); + xe_mmio_write32(mmio, PCODE_MAILBOX, PCODE_READY | mbox); - err = xe_mmio_wait32(gt, PCODE_MAILBOX, PCODE_READY, 0, + err = xe_mmio_wait32(mmio, PCODE_MAILBOX, PCODE_READY, 0, timeout_ms * USEC_PER_MSEC, NULL, atomic); if (err) return err; if (return_data) { - *data0 = xe_mmio_read32(gt, PCODE_DATA0); + *data0 = xe_mmio_read32(mmio, PCODE_DATA0); if (data1) - *data1 = xe_mmio_read32(gt, PCODE_DATA1); + *data1 = xe_mmio_read32(mmio, PCODE_DATA1); } - return pcode_mailbox_status(gt); + return pcode_mailbox_status(tile); } -static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, +static int pcode_mailbox_rw(struct xe_tile *tile, u32 mbox, u32 *data0, u32 *data1, unsigned int timeout_ms, bool return_data, bool atomic) { - if (gt_to_xe(gt)->info.skip_pcode) + if (tile_to_xe(tile)->info.skip_pcode) return 0; - lockdep_assert_held(>->pcode.lock); + lockdep_assert_held(&tile->pcode.lock); - return __pcode_mailbox_rw(gt, mbox, data0, data1, timeout_ms, return_data, atomic); + return __pcode_mailbox_rw(tile, mbox, data0, data1, timeout_ms, return_data, atomic); } -int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 data, int timeout) +int xe_pcode_write_timeout(struct xe_tile *tile, u32 mbox, u32 data, int timeout) { int err; - mutex_lock(>->pcode.lock); - err = pcode_mailbox_rw(gt, mbox, &data, NULL, timeout, false, false); - mutex_unlock(>->pcode.lock); + mutex_lock(&tile->pcode.lock); + err = pcode_mailbox_rw(tile, mbox, &data, NULL, timeout, false, false); + mutex_unlock(&tile->pcode.lock); return err; } -int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1) +int xe_pcode_read(struct xe_tile *tile, u32 mbox, u32 *val, u32 *val1) { int err; - mutex_lock(>->pcode.lock); - err = pcode_mailbox_rw(gt, mbox, val, val1, 1, true, false); - mutex_unlock(>->pcode.lock); + mutex_lock(&tile->pcode.lock); + err = pcode_mailbox_rw(tile, mbox, val, val1, 1, true, false); + mutex_unlock(&tile->pcode.lock); return err; } -static int pcode_try_request(struct xe_gt *gt, u32 mbox, +static int pcode_try_request(struct xe_tile *tile, u32 mbox, u32 request, u32 reply_mask, u32 reply, u32 *status, bool atomic, int timeout_us, bool locked) { int slept, wait = 10; - xe_gt_assert(gt, timeout_us > 0); + xe_tile_assert(tile, timeout_us > 0); for (slept = 0; slept < timeout_us; slept += wait) { if (locked) - *status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true, + *status = pcode_mailbox_rw(tile, mbox, &request, NULL, 1, true, atomic); else - *status = __pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true, + *status = __pcode_mailbox_rw(tile, mbox, &request, NULL, 1, true, atomic); if ((*status == 0) && ((request & reply_mask) == reply)) return 0; @@ -149,7 +149,7 @@ static int pcode_try_request(struct xe_gt *gt, u32 mbox, /** * xe_pcode_request - send PCODE request until acknowledgment - * @gt: gt + * @tile: tile * @mbox: PCODE mailbox ID the request is targeted for * @request: request ID * @reply_mask: mask used to check for request acknowledgment @@ -166,17 +166,17 @@ static int pcode_try_request(struct xe_gt *gt, u32 mbox, * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some * other error as reported by PCODE. */ -int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, - u32 reply_mask, u32 reply, int timeout_base_ms) +int xe_pcode_request(struct xe_tile *tile, u32 mbox, u32 request, + u32 reply_mask, u32 reply, int timeout_base_ms) { u32 status; int ret; - xe_gt_assert(gt, timeout_base_ms <= 3); + xe_tile_assert(tile, timeout_base_ms <= 3); - mutex_lock(>->pcode.lock); + mutex_lock(&tile->pcode.lock); - ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status, + ret = pcode_try_request(tile, mbox, request, reply_mask, reply, &status, false, timeout_base_ms * 1000, true); if (!ret) goto out; @@ -191,20 +191,20 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, * requests, and for any quirks of the PCODE firmware that delays * the request completion. */ - drm_err(>_to_xe(gt)->drm, + drm_err(&tile_to_xe(tile)->drm, "PCODE timeout, retrying with preemption disabled\n"); preempt_disable(); - ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status, + ret = pcode_try_request(tile, mbox, request, reply_mask, reply, &status, true, 50 * 1000, true); preempt_enable(); out: - mutex_unlock(>->pcode.lock); + mutex_unlock(&tile->pcode.lock); return status ? status : ret; } /** * xe_pcode_init_min_freq_table - Initialize PCODE's QOS frequency table - * @gt: gt instance + * @tile: tile instance * @min_gt_freq: Minimal (RPn) GT frequency in units of 50MHz. * @max_gt_freq: Maximal (RP0) GT frequency in units of 50MHz. * @@ -227,30 +227,30 @@ out: * - -EACCES, "PCODE Rejected" * - -EPROTO, "Unknown" */ -int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq, +int xe_pcode_init_min_freq_table(struct xe_tile *tile, u32 min_gt_freq, u32 max_gt_freq) { int ret; u32 freq; - if (!gt_to_xe(gt)->info.has_llc) + if (!tile_to_xe(tile)->info.has_llc) return 0; if (max_gt_freq <= min_gt_freq) return -EINVAL; - mutex_lock(>->pcode.lock); + mutex_lock(&tile->pcode.lock); for (freq = min_gt_freq; freq <= max_gt_freq; freq++) { u32 data = freq << PCODE_FREQ_RING_RATIO_SHIFT | freq; - ret = pcode_mailbox_rw(gt, PCODE_WRITE_MIN_FREQ_TABLE, + ret = pcode_mailbox_rw(tile, PCODE_WRITE_MIN_FREQ_TABLE, &data, NULL, 1, false, false); if (ret) goto unlock; } unlock: - mutex_unlock(>->pcode.lock); + mutex_unlock(&tile->pcode.lock); return ret; } @@ -270,7 +270,7 @@ unlock: int xe_pcode_ready(struct xe_device *xe, bool locked) { u32 status, request = DGFX_GET_INIT_STATUS; - struct xe_gt *gt = xe_root_mmio_gt(xe); + struct xe_tile *tile = xe_device_get_root_tile(xe); int timeout_us = 180000000; /* 3 min */ int ret; @@ -281,15 +281,15 @@ int xe_pcode_ready(struct xe_device *xe, bool locked) return 0; if (locked) - mutex_lock(>->pcode.lock); + mutex_lock(&tile->pcode.lock); - ret = pcode_try_request(gt, DGFX_PCODE_STATUS, request, + ret = pcode_try_request(tile, DGFX_PCODE_STATUS, request, DGFX_INIT_STATUS_COMPLETE, DGFX_INIT_STATUS_COMPLETE, &status, false, timeout_us, locked); if (locked) - mutex_unlock(>->pcode.lock); + mutex_unlock(&tile->pcode.lock); if (ret) drm_err(&xe->drm, @@ -300,14 +300,14 @@ int xe_pcode_ready(struct xe_device *xe, bool locked) /** * xe_pcode_init: initialize components of PCODE - * @gt: gt instance + * @tile: tile instance * * This function initializes the xe_pcode component. * To be called once only during probe. */ -void xe_pcode_init(struct xe_gt *gt) +void xe_pcode_init(struct xe_tile *tile) { - drmm_mutex_init(>_to_xe(gt)->drm, >->pcode.lock); + drmm_mutex_init(&tile_to_xe(tile)->drm, &tile->pcode.lock); } /** diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h index 3f54c6d2a57d..ba33991d72a7 100644 --- a/drivers/gpu/drm/xe/xe_pcode.h +++ b/drivers/gpu/drm/xe/xe_pcode.h @@ -7,21 +7,21 @@ #define _XE_PCODE_H_ #include <linux/types.h> -struct xe_gt; +struct xe_tile; struct xe_device; -void xe_pcode_init(struct xe_gt *gt); +void xe_pcode_init(struct xe_tile *tile); int xe_pcode_probe_early(struct xe_device *xe); int xe_pcode_ready(struct xe_device *xe, bool locked); -int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq, +int xe_pcode_init_min_freq_table(struct xe_tile *tile, u32 min_gt_freq, u32 max_gt_freq); -int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1); -int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 val, +int xe_pcode_read(struct xe_tile *tile, u32 mbox, u32 *val, u32 *val1); +int xe_pcode_write_timeout(struct xe_tile *tile, u32 mbox, u32 val, int timeout_ms); -#define xe_pcode_write(gt, mbox, val) \ - xe_pcode_write_timeout(gt, mbox, val, 1) +#define xe_pcode_write(tile, mbox, val) \ + xe_pcode_write_timeout(tile, mbox, val, 1) -int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, +int xe_pcode_request(struct xe_tile *tile, u32 mbox, u32 request, u32 reply_mask, u32 reply, int timeout_ms); #define PCODE_MBOX(mbcmd, param1, param2)\ diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index de3b5df65e48..33eb039053e4 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -20,6 +20,7 @@ #include "xe_guc.h" #include "xe_irq.h" #include "xe_pcode.h" +#include "xe_trace.h" #include "xe_wa.h" /** @@ -69,12 +70,42 @@ */ #ifdef CONFIG_LOCKDEP -static struct lockdep_map xe_pm_runtime_lockdep_map = { - .name = "xe_pm_runtime_lockdep_map" +static struct lockdep_map xe_pm_runtime_d3cold_map = { + .name = "xe_rpm_d3cold_map" +}; + +static struct lockdep_map xe_pm_runtime_nod3cold_map = { + .name = "xe_rpm_nod3cold_map" }; #endif /** + * xe_rpm_reclaim_safe() - Whether runtime resume can be done from reclaim context + * @xe: The xe device. + * + * Return: true if it is safe to runtime resume from reclaim context. + * false otherwise. + */ +bool xe_rpm_reclaim_safe(const struct xe_device *xe) +{ + return !xe->d3cold.capable && !xe->info.has_sriov; +} + +static void xe_rpm_lockmap_acquire(const struct xe_device *xe) +{ + lock_map_acquire(xe_rpm_reclaim_safe(xe) ? + &xe_pm_runtime_nod3cold_map : + &xe_pm_runtime_d3cold_map); +} + +static void xe_rpm_lockmap_release(const struct xe_device *xe) +{ + lock_map_release(xe_rpm_reclaim_safe(xe) ? + &xe_pm_runtime_nod3cold_map : + &xe_pm_runtime_d3cold_map); +} + +/** * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle * @xe: xe device instance * @@ -87,21 +118,22 @@ int xe_pm_suspend(struct xe_device *xe) int err; drm_dbg(&xe->drm, "Suspending device\n"); + trace_xe_pm_suspend(xe, __builtin_return_address(0)); for_each_gt(gt, xe, id) xe_gt_suspend_prepare(gt); + xe_display_pm_suspend(xe); + /* FIXME: Super racey... */ err = xe_bo_evict_all(xe); if (err) goto err; - xe_display_pm_suspend(xe, false); - for_each_gt(gt, xe, id) { err = xe_gt_suspend(gt); if (err) { - xe_display_pm_resume(xe, false); + xe_display_pm_resume(xe); goto err; } } @@ -131,6 +163,7 @@ int xe_pm_resume(struct xe_device *xe) int err; drm_dbg(&xe->drm, "Resuming device\n"); + trace_xe_pm_resume(xe, __builtin_return_address(0)); for_each_tile(tile, xe, id) xe_wa_apply_tile_workarounds(tile); @@ -151,11 +184,11 @@ int xe_pm_resume(struct xe_device *xe) xe_irq_resume(xe); - xe_display_pm_resume(xe, false); - for_each_gt(gt, xe, id) xe_gt_resume(gt); + xe_display_pm_resume(xe); + err = xe_bo_restore_user(xe); if (err) goto err; @@ -326,6 +359,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe) u8 id; int err = 0; + trace_xe_pm_runtime_suspend(xe, __builtin_return_address(0)); /* Disable access_ongoing asserts and prevent recursive pm calls */ xe_pm_write_callback_task(xe, current); @@ -350,7 +384,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe) * annotation here and in xe_pm_runtime_get() lockdep will see * the potential lock inversion and give us a nice splat. */ - lock_map_acquire(&xe_pm_runtime_lockdep_map); + xe_rpm_lockmap_acquire(xe); /* * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify @@ -362,11 +396,12 @@ int xe_pm_runtime_suspend(struct xe_device *xe) xe_bo_runtime_pm_release_mmap_offset(bo); mutex_unlock(&xe->mem_access.vram_userfault.lock); + xe_display_pm_runtime_suspend(xe); + if (xe->d3cold.allowed) { err = xe_bo_evict_all(xe); if (err) goto out; - xe_display_pm_suspend(xe, true); } for_each_gt(gt, xe, id) { @@ -381,8 +416,8 @@ int xe_pm_runtime_suspend(struct xe_device *xe) xe_display_pm_suspend_late(xe); out: if (err) - xe_display_pm_resume(xe, true); - lock_map_release(&xe_pm_runtime_lockdep_map); + xe_display_pm_runtime_resume(xe); + xe_rpm_lockmap_release(xe); xe_pm_write_callback_task(xe, NULL); return err; } @@ -399,10 +434,11 @@ int xe_pm_runtime_resume(struct xe_device *xe) u8 id; int err = 0; + trace_xe_pm_runtime_resume(xe, __builtin_return_address(0)); /* Disable access_ongoing asserts and prevent recursive pm calls */ xe_pm_write_callback_task(xe, current); - lock_map_acquire(&xe_pm_runtime_lockdep_map); + xe_rpm_lockmap_acquire(xe); if (xe->d3cold.allowed) { err = xe_pcode_ready(xe, true); @@ -425,14 +461,16 @@ int xe_pm_runtime_resume(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_resume(gt); + xe_display_pm_runtime_resume(xe); + if (xe->d3cold.allowed) { - xe_display_pm_resume(xe, true); err = xe_bo_restore_user(xe); if (err) goto out; } + out: - lock_map_release(&xe_pm_runtime_lockdep_map); + xe_rpm_lockmap_release(xe); xe_pm_write_callback_task(xe, NULL); return err; } @@ -446,15 +484,37 @@ out: * stuff that can happen inside the runtime_resume callback by acquiring * a dummy lock (it doesn't protect anything and gets compiled out on * non-debug builds). Lockdep then only needs to see the - * xe_pm_runtime_lockdep_map -> runtime_resume callback once, and then can - * hopefully validate all the (callers_locks) -> xe_pm_runtime_lockdep_map. + * xe_pm_runtime_xxx_map -> runtime_resume callback once, and then can + * hopefully validate all the (callers_locks) -> xe_pm_runtime_xxx_map. * For example if the (callers_locks) are ever grabbed in the * runtime_resume callback, lockdep should give us a nice splat. */ -static void pm_runtime_lockdep_prime(void) +static void xe_rpm_might_enter_cb(const struct xe_device *xe) { - lock_map_acquire(&xe_pm_runtime_lockdep_map); - lock_map_release(&xe_pm_runtime_lockdep_map); + xe_rpm_lockmap_acquire(xe); + xe_rpm_lockmap_release(xe); +} + +/* + * Prime the lockdep maps for known locking orders that need to + * be supported but that may not always occur on all systems. + */ +static void xe_pm_runtime_lockdep_prime(void) +{ + struct dma_resv lockdep_resv; + + dma_resv_init(&lockdep_resv); + lock_map_acquire(&xe_pm_runtime_d3cold_map); + /* D3Cold takes the dma_resv locks to evict bos */ + dma_resv_lock(&lockdep_resv, NULL); + dma_resv_unlock(&lockdep_resv); + lock_map_release(&xe_pm_runtime_d3cold_map); + + /* Shrinkers might like to wake up the device under reclaim. */ + fs_reclaim_acquire(GFP_KERNEL); + lock_map_acquire(&xe_pm_runtime_nod3cold_map); + lock_map_release(&xe_pm_runtime_nod3cold_map); + fs_reclaim_release(GFP_KERNEL); } /** @@ -463,12 +523,13 @@ static void pm_runtime_lockdep_prime(void) */ void xe_pm_runtime_get(struct xe_device *xe) { + trace_xe_pm_runtime_get(xe, __builtin_return_address(0)); pm_runtime_get_noresume(xe->drm.dev); if (xe_pm_read_callback_task(xe) == current) return; - pm_runtime_lockdep_prime(); + xe_rpm_might_enter_cb(xe); pm_runtime_resume(xe->drm.dev); } @@ -478,6 +539,7 @@ void xe_pm_runtime_get(struct xe_device *xe) */ void xe_pm_runtime_put(struct xe_device *xe) { + trace_xe_pm_runtime_put(xe, __builtin_return_address(0)); if (xe_pm_read_callback_task(xe) == current) { pm_runtime_put_noidle(xe->drm.dev); } else { @@ -495,10 +557,11 @@ void xe_pm_runtime_put(struct xe_device *xe) */ int xe_pm_runtime_get_ioctl(struct xe_device *xe) { + trace_xe_pm_runtime_get_ioctl(xe, __builtin_return_address(0)); if (WARN_ON(xe_pm_read_callback_task(xe) == current)) return -ELOOP; - pm_runtime_lockdep_prime(); + xe_rpm_might_enter_cb(xe); return pm_runtime_get_sync(xe->drm.dev); } @@ -532,6 +595,22 @@ bool xe_pm_runtime_get_if_in_use(struct xe_device *xe) return pm_runtime_get_if_in_use(xe->drm.dev) > 0; } +/* + * Very unreliable! Should only be used to suppress the false positive case + * in the missing outer rpm protection warning. + */ +static bool xe_pm_suspending_or_resuming(struct xe_device *xe) +{ +#ifdef CONFIG_PM + struct device *dev = xe->drm.dev; + + return dev->power.runtime_status == RPM_SUSPENDING || + dev->power.runtime_status == RPM_RESUMING; +#else + return false; +#endif +} + /** * xe_pm_runtime_get_noresume - Bump runtime PM usage counter without resuming * @xe: xe device instance @@ -548,8 +627,11 @@ void xe_pm_runtime_get_noresume(struct xe_device *xe) ref = xe_pm_runtime_get_if_in_use(xe); - if (drm_WARN(&xe->drm, !ref, "Missing outer runtime PM protection\n")) + if (!ref) { pm_runtime_get_noresume(xe->drm.dev); + drm_WARN(&xe->drm, !xe_pm_suspending_or_resuming(xe), + "Missing outer runtime PM protection\n"); + } } /** @@ -566,7 +648,7 @@ bool xe_pm_runtime_resume_and_get(struct xe_device *xe) return true; } - pm_runtime_lockdep_prime(); + xe_rpm_might_enter_cb(xe); return pm_runtime_resume_and_get(xe->drm.dev) >= 0; } @@ -658,3 +740,14 @@ void xe_pm_d3cold_allowed_toggle(struct xe_device *xe) drm_dbg(&xe->drm, "d3cold: allowed=%s\n", str_yes_no(xe->d3cold.allowed)); } + +/** + * xe_pm_module_init() - Perform xe_pm specific module initialization. + * + * Return: 0 on success. Currently doesn't fail. + */ +int __init xe_pm_module_init(void) +{ + xe_pm_runtime_lockdep_prime(); + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index 104a21ae6dfd..998d1ed64556 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -31,6 +31,8 @@ bool xe_pm_runtime_resume_and_get(struct xe_device *xe); void xe_pm_assert_unbounded_bridge(struct xe_device *xe); int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold); void xe_pm_d3cold_allowed_toggle(struct xe_device *xe); +bool xe_rpm_reclaim_safe(const struct xe_device *xe); struct task_struct *xe_pm_read_callback_task(struct xe_device *xe); +int xe_pm_module_init(void); #endif diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c index e8b8ae5c6485..83fbeea5aa20 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence.c +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c @@ -17,10 +17,16 @@ static void preempt_fence_work_func(struct work_struct *w) container_of(w, typeof(*pfence), preempt_work); struct xe_exec_queue *q = pfence->q; - if (pfence->error) + if (pfence->error) { dma_fence_set_error(&pfence->base, pfence->error); - else - q->ops->suspend_wait(q); + } else if (!q->ops->reset_status(q)) { + int err = q->ops->suspend_wait(q); + + if (err) + dma_fence_set_error(&pfence->base, err); + } else { + dma_fence_set_error(&pfence->base, -ENOENT); + } dma_fence_signal(&pfence->base); /* @@ -128,8 +134,9 @@ xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_exec_queue *q, { list_del_init(&pfence->link); pfence->q = xe_exec_queue_get(q); + spin_lock_init(&pfence->lock); dma_fence_init(&pfence->base, &preempt_fence_ops, - &q->lr.lock, context, seqno); + &pfence->lock, context, seqno); return &pfence->base; } diff --git a/drivers/gpu/drm/xe/xe_preempt_fence_types.h b/drivers/gpu/drm/xe/xe_preempt_fence_types.h index b54b5c29b533..312c3372a49f 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence_types.h +++ b/drivers/gpu/drm/xe/xe_preempt_fence_types.h @@ -25,6 +25,8 @@ struct xe_preempt_fence { struct xe_exec_queue *q; /** @preempt_work: work struct which issues preemption */ struct work_struct preempt_work; + /** @lock: dma-fence fence lock */ + spinlock_t lock; /** @error: preempt fence is in error state */ int error; }; diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 31a751a5de3f..f27f579f4d85 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -3,18 +3,23 @@ * Copyright © 2022 Intel Corporation */ +#include <linux/dma-fence-array.h> + #include "xe_pt.h" #include "regs/xe_gtt_defs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_drm_client.h" +#include "xe_exec_queue.h" #include "xe_gt.h" #include "xe_gt_tlb_invalidation.h" #include "xe_migrate.h" #include "xe_pt_types.h" #include "xe_pt_walk.h" #include "xe_res_cursor.h" +#include "xe_sched_job.h" +#include "xe_sync.h" #include "xe_trace.h" #include "xe_ttm_stolen_mgr.h" #include "xe_vm.h" @@ -325,6 +330,7 @@ xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent, entry->pt = parent; entry->flags = 0; entry->qwords = 0; + entry->pt_bo->update_index = -1; if (alloc_entries) { entry->pt_entries = kmalloc_array(XE_PDES, @@ -842,19 +848,27 @@ xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *t } } -static void xe_pt_abort_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries) +static void xe_pt_cancel_bind(struct xe_vma *vma, + struct xe_vm_pgtable_update *entries, + u32 num_entries) { u32 i, j; for (i = 0; i < num_entries; i++) { - if (!entries[i].pt_entries) + struct xe_pt *pt = entries[i].pt; + + if (!pt) continue; - for (j = 0; j < entries[i].qwords; j++) - xe_pt_destroy(entries[i].pt_entries[j].pt, xe_vma_vm(vma)->flags, NULL); + if (pt->level) { + for (j = 0; j < entries[i].qwords; j++) + xe_pt_destroy(entries[i].pt_entries[j].pt, + xe_vma_vm(vma)->flags, NULL); + } + kfree(entries[i].pt_entries); + entries[i].pt_entries = NULL; + entries[i].qwords = 0; } } @@ -864,18 +878,15 @@ static void xe_pt_commit_locks_assert(struct xe_vma *vma) lockdep_assert_held(&vm->lock); - if (xe_vma_is_userptr(vma)) - lockdep_assert_held_read(&vm->userptr.notifier_lock); - else if (!xe_vma_is_null(vma)) + if (!xe_vma_is_userptr(vma) && !xe_vma_is_null(vma)) dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv); xe_vm_assert_held(vm); } -static void xe_pt_commit_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries, bool rebind, - struct llist_head *deferred) +static void xe_pt_commit(struct xe_vma *vma, + struct xe_vm_pgtable_update *entries, + u32 num_entries, struct llist_head *deferred) { u32 i, j; @@ -883,31 +894,90 @@ static void xe_pt_commit_bind(struct xe_vma *vma, for (i = 0; i < num_entries; i++) { struct xe_pt *pt = entries[i].pt; + + if (!pt->level) + continue; + + for (j = 0; j < entries[i].qwords; j++) { + struct xe_pt *oldpte = entries[i].pt_entries[j].pt; + + xe_pt_destroy(oldpte, xe_vma_vm(vma)->flags, deferred); + } + } +} + +static void xe_pt_abort_bind(struct xe_vma *vma, + struct xe_vm_pgtable_update *entries, + u32 num_entries, bool rebind) +{ + int i, j; + + xe_pt_commit_locks_assert(vma); + + for (i = num_entries - 1; i >= 0; --i) { + struct xe_pt *pt = entries[i].pt; struct xe_pt_dir *pt_dir; if (!rebind) - pt->num_live += entries[i].qwords; + pt->num_live -= entries[i].qwords; - if (!pt->level) { - kfree(entries[i].pt_entries); + if (!pt->level) continue; + + pt_dir = as_xe_pt_dir(pt); + for (j = 0; j < entries[i].qwords; j++) { + u32 j_ = j + entries[i].ofs; + struct xe_pt *newpte = xe_pt_entry(pt_dir, j_); + struct xe_pt *oldpte = entries[i].pt_entries[j].pt; + + pt_dir->children[j_] = oldpte ? &oldpte->base : 0; + xe_pt_destroy(newpte, xe_vma_vm(vma)->flags, NULL); } + } +} + +static void xe_pt_commit_prepare_bind(struct xe_vma *vma, + struct xe_vm_pgtable_update *entries, + u32 num_entries, bool rebind) +{ + u32 i, j; + + xe_pt_commit_locks_assert(vma); + + for (i = 0; i < num_entries; i++) { + struct xe_pt *pt = entries[i].pt; + struct xe_pt_dir *pt_dir; + + if (!rebind) + pt->num_live += entries[i].qwords; + + if (!pt->level) + continue; pt_dir = as_xe_pt_dir(pt); for (j = 0; j < entries[i].qwords; j++) { u32 j_ = j + entries[i].ofs; struct xe_pt *newpte = entries[i].pt_entries[j].pt; + struct xe_pt *oldpte = NULL; if (xe_pt_entry(pt_dir, j_)) - xe_pt_destroy(xe_pt_entry(pt_dir, j_), - xe_vma_vm(vma)->flags, deferred); + oldpte = xe_pt_entry(pt_dir, j_); pt_dir->children[j_] = &newpte->base; + entries[i].pt_entries[j].pt = oldpte; } - kfree(entries[i].pt_entries); } } +static void xe_pt_free_bind(struct xe_vm_pgtable_update *entries, + u32 num_entries) +{ + u32 i; + + for (i = 0; i < num_entries; i++) + kfree(entries[i].pt_entries); +} + static int xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma, struct xe_vm_pgtable_update *entries, u32 *num_entries) @@ -918,20 +988,19 @@ xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma, err = xe_pt_stage_bind(tile, vma, entries, num_entries); if (!err) xe_tile_assert(tile, *num_entries); - else /* abort! */ - xe_pt_abort_bind(vma, entries, *num_entries); return err; } static void xe_vm_dbg_print_entries(struct xe_device *xe, const struct xe_vm_pgtable_update *entries, - unsigned int num_entries) + unsigned int num_entries, bool bind) #if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) { unsigned int i; - vm_dbg(&xe->drm, "%u entries to update\n", num_entries); + vm_dbg(&xe->drm, "%s: %u entries to update\n", bind ? "bind" : "unbind", + num_entries); for (i = 0; i < num_entries; i++) { const struct xe_vm_pgtable_update *entry = &entries[i]; struct xe_pt *xe_pt = entry->pt; @@ -952,66 +1021,108 @@ static void xe_vm_dbg_print_entries(struct xe_device *xe, {} #endif -#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT - -static int xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) +static bool no_in_syncs(struct xe_sync_entry *syncs, u32 num_syncs) { - u32 divisor = uvma->userptr.divisor ? uvma->userptr.divisor : 2; - static u32 count; + int i; - if (count++ % divisor == divisor - 1) { - struct xe_vm *vm = xe_vma_vm(&uvma->vma); + for (i = 0; i < num_syncs; i++) { + struct dma_fence *fence = syncs[i].fence; - uvma->userptr.divisor = divisor << 1; - spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&uvma->userptr.invalidate_link, - &vm->userptr.invalidated); - spin_unlock(&vm->userptr.invalidated_lock); - return true; + if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &fence->flags)) + return false; } - return false; + return true; } -#else +static int job_test_add_deps(struct xe_sched_job *job, + struct dma_resv *resv, + enum dma_resv_usage usage) +{ + if (!job) { + if (!dma_resv_test_signaled(resv, usage)) + return -ETIME; -static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) + return 0; + } + + return xe_sched_job_add_deps(job, resv, usage); +} + +static int vma_add_deps(struct xe_vma *vma, struct xe_sched_job *job) { - return false; + struct xe_bo *bo = xe_vma_bo(vma); + + xe_bo_assert_held(bo); + + if (bo && !bo->vm) + return job_test_add_deps(job, bo->ttm.base.resv, + DMA_RESV_USAGE_KERNEL); + + return 0; } -#endif +static int op_add_deps(struct xe_vm *vm, struct xe_vma_op *op, + struct xe_sched_job *job) +{ + int err = 0; -/** - * struct xe_pt_migrate_pt_update - Callback argument for pre-commit callbacks - * @base: Base we derive from. - * @bind: Whether this is a bind or an unbind operation. A bind operation - * makes the pre-commit callback error with -EAGAIN if it detects a - * pending invalidation. - * @locked: Whether the pre-commit callback locked the userptr notifier lock - * and it needs unlocking. - */ -struct xe_pt_migrate_pt_update { - struct xe_migrate_pt_update base; - bool bind; - bool locked; -}; + switch (op->base.op) { + case DRM_GPUVA_OP_MAP: + if (!op->map.immediate && xe_vm_in_fault_mode(vm)) + break; + + err = vma_add_deps(op->map.vma, job); + break; + case DRM_GPUVA_OP_REMAP: + if (op->remap.prev) + err = vma_add_deps(op->remap.prev, job); + if (!err && op->remap.next) + err = vma_add_deps(op->remap.next, job); + break; + case DRM_GPUVA_OP_UNMAP: + break; + case DRM_GPUVA_OP_PREFETCH: + err = vma_add_deps(gpuva_to_vma(op->base.prefetch.va), job); + break; + default: + drm_warn(&vm->xe->drm, "NOT POSSIBLE"); + } + + return err; +} -/* - * This function adds the needed dependencies to a page-table update job - * to make sure racing jobs for separate bind engines don't race writing - * to the same page-table range, wreaking havoc. Initially use a single - * fence for the entire VM. An optimization would use smaller granularity. - */ static int xe_pt_vm_dependencies(struct xe_sched_job *job, - struct xe_range_fence_tree *rftree, - u64 start, u64 last) + struct xe_vm *vm, + struct xe_vma_ops *vops, + struct xe_vm_pgtable_update_ops *pt_update_ops, + struct xe_range_fence_tree *rftree) { struct xe_range_fence *rtfence; struct dma_fence *fence; - int err; + struct xe_vma_op *op; + int err = 0, i; + + xe_vm_assert_held(vm); + + if (!job && !no_in_syncs(vops->syncs, vops->num_syncs)) + return -ETIME; - rtfence = xe_range_fence_tree_first(rftree, start, last); + if (!job && !xe_exec_queue_is_idle(pt_update_ops->q)) + return -ETIME; + + if (pt_update_ops->wait_vm_bookkeep || pt_update_ops->wait_vm_kernel) { + err = job_test_add_deps(job, xe_vm_resv(vm), + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_BOOKKEEP : + DMA_RESV_USAGE_KERNEL); + if (err) + return err; + } + + rtfence = xe_range_fence_tree_first(rftree, pt_update_ops->start, + pt_update_ops->last); while (rtfence) { fence = rtfence->fence; @@ -1029,80 +1140,175 @@ static int xe_pt_vm_dependencies(struct xe_sched_job *job, return err; } - rtfence = xe_range_fence_tree_next(rtfence, start, last); + rtfence = xe_range_fence_tree_next(rtfence, + pt_update_ops->start, + pt_update_ops->last); } - return 0; + list_for_each_entry(op, &vops->list, link) { + err = op_add_deps(vm, op, job); + if (err) + return err; + } + + if (!(pt_update_ops->q->flags & EXEC_QUEUE_FLAG_KERNEL)) { + if (job) + err = xe_sched_job_last_fence_add_dep(job, vm); + else + err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm); + } + + for (i = 0; job && !err && i < vops->num_syncs; i++) + err = xe_sync_entry_add_deps(&vops->syncs[i], job); + + return err; } static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update) { - struct xe_range_fence_tree *rftree = - &xe_vma_vm(pt_update->vma)->rftree[pt_update->tile_id]; + struct xe_vma_ops *vops = pt_update->vops; + struct xe_vm *vm = vops->vm; + struct xe_range_fence_tree *rftree = &vm->rftree[pt_update->tile_id]; + struct xe_vm_pgtable_update_ops *pt_update_ops = + &vops->pt_update_ops[pt_update->tile_id]; + + return xe_pt_vm_dependencies(pt_update->job, vm, pt_update->vops, + pt_update_ops, rftree); +} + +#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT - return xe_pt_vm_dependencies(pt_update->job, rftree, - pt_update->start, pt_update->last); +static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) +{ + u32 divisor = uvma->userptr.divisor ? uvma->userptr.divisor : 2; + static u32 count; + + if (count++ % divisor == divisor - 1) { + uvma->userptr.divisor = divisor << 1; + return true; + } + + return false; } -static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) +#else + +static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) { - struct xe_pt_migrate_pt_update *userptr_update = - container_of(pt_update, typeof(*userptr_update), base); - struct xe_userptr_vma *uvma = to_userptr_vma(pt_update->vma); - unsigned long notifier_seq = uvma->userptr.notifier_seq; - struct xe_vm *vm = xe_vma_vm(&uvma->vma); - int err = xe_pt_vm_dependencies(pt_update->job, - &vm->rftree[pt_update->tile_id], - pt_update->start, - pt_update->last); + return false; +} - if (err) - return err; +#endif - userptr_update->locked = false; +static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma, + struct xe_vm_pgtable_update_ops *pt_update) +{ + struct xe_userptr_vma *uvma; + unsigned long notifier_seq; - /* - * Wait until nobody is running the invalidation notifier, and - * since we're exiting the loop holding the notifier lock, - * nobody can proceed invalidating either. - * - * Note that we don't update the vma->userptr.notifier_seq since - * we don't update the userptr pages. - */ - do { - down_read(&vm->userptr.notifier_lock); - if (!mmu_interval_read_retry(&uvma->userptr.notifier, - notifier_seq)) - break; + lockdep_assert_held_read(&vm->userptr.notifier_lock); - up_read(&vm->userptr.notifier_lock); + if (!xe_vma_is_userptr(vma)) + return 0; - if (userptr_update->bind) - return -EAGAIN; + uvma = to_userptr_vma(vma); + notifier_seq = uvma->userptr.notifier_seq; - notifier_seq = mmu_interval_read_begin(&uvma->userptr.notifier); - } while (true); + if (uvma->userptr.initial_bind && !xe_vm_in_fault_mode(vm)) + return 0; - /* Inject errors to test_whether they are handled correctly */ - if (userptr_update->bind && xe_pt_userptr_inject_eagain(uvma)) { - up_read(&vm->userptr.notifier_lock); + if (!mmu_interval_read_retry(&uvma->userptr.notifier, + notifier_seq) && + !xe_pt_userptr_inject_eagain(uvma)) + return 0; + + if (xe_vm_in_fault_mode(vm)) { return -EAGAIN; - } + } else { + spin_lock(&vm->userptr.invalidated_lock); + list_move_tail(&uvma->userptr.invalidate_link, + &vm->userptr.invalidated); + spin_unlock(&vm->userptr.invalidated_lock); - userptr_update->locked = true; + if (xe_vm_in_preempt_fence_mode(vm)) { + struct dma_resv_iter cursor; + struct dma_fence *fence; + long err; + + dma_resv_iter_begin(&cursor, xe_vm_resv(vm), + DMA_RESV_USAGE_BOOKKEEP); + dma_resv_for_each_fence_unlocked(&cursor, fence) + dma_fence_enable_sw_signaling(fence); + dma_resv_iter_end(&cursor); + + err = dma_resv_wait_timeout(xe_vm_resv(vm), + DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT); + XE_WARN_ON(err <= 0); + } + } return 0; } -static const struct xe_migrate_pt_update_ops bind_ops = { - .populate = xe_vm_populate_pgtable, - .pre_commit = xe_pt_pre_commit, -}; +static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op, + struct xe_vm_pgtable_update_ops *pt_update) +{ + int err = 0; -static const struct xe_migrate_pt_update_ops userptr_bind_ops = { - .populate = xe_vm_populate_pgtable, - .pre_commit = xe_pt_userptr_pre_commit, -}; + lockdep_assert_held_read(&vm->userptr.notifier_lock); + + switch (op->base.op) { + case DRM_GPUVA_OP_MAP: + if (!op->map.immediate && xe_vm_in_fault_mode(vm)) + break; + + err = vma_check_userptr(vm, op->map.vma, pt_update); + break; + case DRM_GPUVA_OP_REMAP: + if (op->remap.prev) + err = vma_check_userptr(vm, op->remap.prev, pt_update); + if (!err && op->remap.next) + err = vma_check_userptr(vm, op->remap.next, pt_update); + break; + case DRM_GPUVA_OP_UNMAP: + break; + case DRM_GPUVA_OP_PREFETCH: + err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va), + pt_update); + break; + default: + drm_warn(&vm->xe->drm, "NOT POSSIBLE"); + } + + return err; +} + +static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) +{ + struct xe_vm *vm = pt_update->vops->vm; + struct xe_vma_ops *vops = pt_update->vops; + struct xe_vm_pgtable_update_ops *pt_update_ops = + &vops->pt_update_ops[pt_update->tile_id]; + struct xe_vma_op *op; + int err; + + err = xe_pt_pre_commit(pt_update); + if (err) + return err; + + down_read(&vm->userptr.notifier_lock); + + list_for_each_entry(op, &vops->list, link) { + err = op_check_userptr(vm, op, pt_update_ops); + if (err) { + up_read(&vm->userptr.notifier_lock); + break; + } + } + + return err; +} struct invalidation_fence { struct xe_gt_tlb_invalidation_fence base; @@ -1144,10 +1350,10 @@ static void invalidation_fence_work_func(struct work_struct *w) ifence->end, ifence->asid); } -static int invalidation_fence_init(struct xe_gt *gt, - struct invalidation_fence *ifence, - struct dma_fence *fence, - u64 start, u64 end, u32 asid) +static void invalidation_fence_init(struct xe_gt *gt, + struct invalidation_fence *ifence, + struct dma_fence *fence, + u64 start, u64 end, u32 asid) { int ret; @@ -1172,192 +1378,6 @@ static int invalidation_fence_init(struct xe_gt *gt, } xe_gt_assert(gt, !ret || ret == -ENOENT); - - return ret && ret != -ENOENT ? ret : 0; -} - -static void xe_pt_calc_rfence_interval(struct xe_vma *vma, - struct xe_pt_migrate_pt_update *update, - struct xe_vm_pgtable_update *entries, - u32 num_entries) -{ - int i, level = 0; - - for (i = 0; i < num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &entries[i]; - - if (entry->pt->level > level) - level = entry->pt->level; - } - - /* Greedy (non-optimal) calculation but simple */ - update->base.start = ALIGN_DOWN(xe_vma_start(vma), - 0x1ull << xe_pt_shift(level)); - update->base.last = ALIGN(xe_vma_end(vma), - 0x1ull << xe_pt_shift(level)) - 1; -} - -/** - * __xe_pt_bind_vma() - Build and connect a page-table tree for the vma - * address range. - * @tile: The tile to bind for. - * @vma: The vma to bind. - * @q: The exec_queue with which to do pipelined page-table updates. - * @syncs: Entries to sync on before binding the built tree to the live vm tree. - * @num_syncs: Number of @sync entries. - * @rebind: Whether we're rebinding this vma to the same address range without - * an unbind in-between. - * - * This function builds a page-table tree (see xe_pt_stage_bind() for more - * information on page-table building), and the xe_vm_pgtable_update entries - * abstracting the operations needed to attach it to the main vm tree. It - * then takes the relevant locks and updates the metadata side of the main - * vm tree and submits the operations for pipelined attachment of the - * gpu page-table to the vm main tree, (which can be done either by the - * cpu and the GPU). - * - * Return: A valid dma-fence representing the pipelined attachment operation - * on success, an error pointer on error. - */ -struct dma_fence * -__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs, - bool rebind) -{ - struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; - struct xe_pt_migrate_pt_update bind_pt_update = { - .base = { - .ops = xe_vma_is_userptr(vma) ? &userptr_bind_ops : &bind_ops, - .vma = vma, - .tile_id = tile->id, - }, - .bind = true, - }; - struct xe_vm *vm = xe_vma_vm(vma); - u32 num_entries; - struct dma_fence *fence; - struct invalidation_fence *ifence = NULL; - struct xe_range_fence *rfence; - int err; - - bind_pt_update.locked = false; - xe_bo_assert_held(xe_vma_bo(vma)); - xe_vm_assert_held(vm); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing bind, with range [%llx...%llx) engine %p.\n", - xe_vma_start(vma), xe_vma_end(vma), q); - - err = xe_pt_prepare_bind(tile, vma, entries, &num_entries); - if (err) - goto err; - - err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); - if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); - if (err) - goto err; - - xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); - - xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); - xe_pt_calc_rfence_interval(vma, &bind_pt_update, entries, - num_entries); - - /* - * If rebind, we have to invalidate TLB on !LR vms to invalidate - * cached PTEs point to freed memory. on LR vms this is done - * automatically when the context is re-enabled by the rebind worker, - * or in fault mode it was invalidated on PTE zapping. - * - * If !rebind, and scratch enabled VMs, there is a chance the scratch - * PTE is already cached in the TLB so it needs to be invalidated. - * on !LR VMs this is done in the ring ops preceding a batch, but on - * non-faulting LR, in particular on user-space batch buffer chaining, - * it needs to be done here. - */ - if ((!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) { - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!ifence) - return ERR_PTR(-ENOMEM); - } else if (rebind && !xe_vm_in_lr_mode(vm)) { - /* We bump also if batch_invalidate_tlb is true */ - vm->tlb_flush_seqno++; - } - - rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); - if (!rfence) { - kfree(ifence); - return ERR_PTR(-ENOMEM); - } - - fence = xe_migrate_update_pgtables(tile->migrate, - vm, xe_vma_bo(vma), q, - entries, num_entries, - syncs, num_syncs, - &bind_pt_update.base); - if (!IS_ERR(fence)) { - bool last_munmap_rebind = vma->gpuva.flags & XE_VMA_LAST_REBIND; - LLIST_HEAD(deferred); - int err; - - err = xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - bind_pt_update.base.start, - bind_pt_update.base.last, fence); - if (err) - dma_fence_wait(fence, false); - - /* TLB invalidation must be done before signaling rebind */ - if (ifence) { - int err = invalidation_fence_init(tile->primary_gt, - ifence, fence, - xe_vma_start(vma), - xe_vma_end(vma), - xe_vma_vm(vma)->usm.asid); - if (err) { - dma_fence_put(fence); - kfree(ifence); - return ERR_PTR(err); - } - fence = &ifence->base.base; - } - - /* add shared fence now for pagetable delayed destroy */ - dma_resv_add_fence(xe_vm_resv(vm), fence, rebind || - last_munmap_rebind ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - DMA_RESV_USAGE_BOOKKEEP); - xe_pt_commit_bind(vma, entries, num_entries, rebind, - bind_pt_update.locked ? &deferred : NULL); - - /* This vma is live (again?) now */ - vma->tile_present |= BIT(tile->id); - - if (bind_pt_update.locked) { - to_userptr_vma(vma)->userptr.initial_bind = true; - up_read(&vm->userptr.notifier_lock); - xe_bo_put_commit(&deferred); - } - if (!rebind && last_munmap_rebind && - xe_vm_in_preempt_fence_mode(vm)) - xe_vm_queue_rebind_worker(vm); - } else { - kfree(rfence); - kfree(ifence); - if (bind_pt_update.locked) - up_read(&vm->userptr.notifier_lock); - xe_pt_abort_bind(vma, entries, num_entries); - } - - return fence; - -err: - return ERR_PTR(err); } struct xe_pt_stage_unbind_walk { @@ -1442,6 +1462,7 @@ xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset, struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); pgoff_t end_offset; u64 size = 1ull << walk->shifts[--level]; + int err; if (!IS_ALIGNED(addr, size)) addr = xe_walk->modified_start; @@ -1457,7 +1478,10 @@ xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset, &end_offset)) return 0; - (void)xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, false); + err = xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, true); + if (err) + return err; + xe_walk->wupd.updates[level].update->qwords = end_offset - offset; return 0; @@ -1510,8 +1534,8 @@ xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update, void *ptr, u32 qword_ofs, u32 num_qwords, const struct xe_vm_pgtable_update *update) { - struct xe_vma *vma = pt_update->vma; - u64 empty = __xe_pt_empty_pte(tile, xe_vma_vm(vma), update->pt->level); + struct xe_vm *vm = pt_update->vops->vm; + u64 empty = __xe_pt_empty_pte(tile, vm, update->pt->level); int i; if (map && map->is_iomem) @@ -1525,181 +1549,644 @@ xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update, memset64(ptr, empty, num_qwords); } +static void xe_pt_abort_unbind(struct xe_vma *vma, + struct xe_vm_pgtable_update *entries, + u32 num_entries) +{ + int i, j; + + xe_pt_commit_locks_assert(vma); + + for (i = num_entries - 1; i >= 0; --i) { + struct xe_vm_pgtable_update *entry = &entries[i]; + struct xe_pt *pt = entry->pt; + struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); + + pt->num_live += entry->qwords; + + if (!pt->level) + continue; + + for (j = entry->ofs; j < entry->ofs + entry->qwords; j++) + pt_dir->children[j] = + entries[i].pt_entries[j - entry->ofs].pt ? + &entries[i].pt_entries[j - entry->ofs].pt->base : NULL; + } +} + static void -xe_pt_commit_unbind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 num_entries, - struct llist_head *deferred) +xe_pt_commit_prepare_unbind(struct xe_vma *vma, + struct xe_vm_pgtable_update *entries, + u32 num_entries) { - u32 j; + int i, j; xe_pt_commit_locks_assert(vma); - for (j = 0; j < num_entries; ++j) { - struct xe_vm_pgtable_update *entry = &entries[j]; + for (i = 0; i < num_entries; ++i) { + struct xe_vm_pgtable_update *entry = &entries[i]; struct xe_pt *pt = entry->pt; + struct xe_pt_dir *pt_dir; pt->num_live -= entry->qwords; - if (pt->level) { - struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - u32 i; + if (!pt->level) + continue; - for (i = entry->ofs; i < entry->ofs + entry->qwords; - i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), - xe_vma_vm(vma)->flags, deferred); + pt_dir = as_xe_pt_dir(pt); + for (j = entry->ofs; j < entry->ofs + entry->qwords; j++) { + entry->pt_entries[j - entry->ofs].pt = + xe_pt_entry(pt_dir, j); + pt_dir->children[j] = NULL; + } + } +} - pt_dir->children[i] = NULL; - } +static void +xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops, + struct xe_vma *vma) +{ + u32 current_op = pt_update_ops->current_op; + struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; + int i, level = 0; + u64 start, last; + + for (i = 0; i < pt_op->num_entries; i++) { + const struct xe_vm_pgtable_update *entry = &pt_op->entries[i]; + + if (entry->pt->level > level) + level = entry->pt->level; + } + + /* Greedy (non-optimal) calculation but simple */ + start = ALIGN_DOWN(xe_vma_start(vma), 0x1ull << xe_pt_shift(level)); + last = ALIGN(xe_vma_end(vma), 0x1ull << xe_pt_shift(level)) - 1; + + if (start < pt_update_ops->start) + pt_update_ops->start = start; + if (last > pt_update_ops->last) + pt_update_ops->last = last; +} + +static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma) +{ + int shift = xe_device_get_root_tile(xe)->media_gt ? 1 : 0; + + if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) + return dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, + xe->info.tile_count << shift); + + return 0; +} + +static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, + struct xe_vm_pgtable_update_ops *pt_update_ops, + struct xe_vma *vma) +{ + u32 current_op = pt_update_ops->current_op; + struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; + int err; + + xe_bo_assert_held(xe_vma_bo(vma)); + + vm_dbg(&xe_vma_vm(vma)->xe->drm, + "Preparing bind, with range [%llx...%llx)\n", + xe_vma_start(vma), xe_vma_end(vma) - 1); + + pt_op->vma = NULL; + pt_op->bind = true; + pt_op->rebind = BIT(tile->id) & vma->tile_present; + + err = vma_reserve_fences(tile_to_xe(tile), vma); + if (err) + return err; + + err = xe_pt_prepare_bind(tile, vma, pt_op->entries, + &pt_op->num_entries); + if (!err) { + xe_tile_assert(tile, pt_op->num_entries <= + ARRAY_SIZE(pt_op->entries)); + xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, + pt_op->num_entries, true); + + xe_pt_update_ops_rfence_interval(pt_update_ops, vma); + ++pt_update_ops->current_op; + pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); + + /* + * If rebind, we have to invalidate TLB on !LR vms to invalidate + * cached PTEs point to freed memory. On LR vms this is done + * automatically when the context is re-enabled by the rebind worker, + * or in fault mode it was invalidated on PTE zapping. + * + * If !rebind, and scratch enabled VMs, there is a chance the scratch + * PTE is already cached in the TLB so it needs to be invalidated. + * On !LR VMs this is done in the ring ops preceding a batch, but on + * non-faulting LR, in particular on user-space batch buffer chaining, + * it needs to be done here. + */ + if ((!pt_op->rebind && xe_vm_has_scratch(vm) && + xe_vm_in_preempt_fence_mode(vm))) + pt_update_ops->needs_invalidation = true; + else if (pt_op->rebind && !xe_vm_in_lr_mode(vm)) + /* We bump also if batch_invalidate_tlb is true */ + vm->tlb_flush_seqno++; + + vma->tile_staged |= BIT(tile->id); + pt_op->vma = vma; + xe_pt_commit_prepare_bind(vma, pt_op->entries, + pt_op->num_entries, pt_op->rebind); + } else { + xe_pt_cancel_bind(vma, pt_op->entries, pt_op->num_entries); + } + + return err; +} + +static int unbind_op_prepare(struct xe_tile *tile, + struct xe_vm_pgtable_update_ops *pt_update_ops, + struct xe_vma *vma) +{ + u32 current_op = pt_update_ops->current_op; + struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; + int err; + + if (!((vma->tile_present | vma->tile_staged) & BIT(tile->id))) + return 0; + + xe_bo_assert_held(xe_vma_bo(vma)); + + vm_dbg(&xe_vma_vm(vma)->xe->drm, + "Preparing unbind, with range [%llx...%llx)\n", + xe_vma_start(vma), xe_vma_end(vma) - 1); + + /* + * Wait for invalidation to complete. Can corrupt internal page table + * state if an invalidation is running while preparing an unbind. + */ + if (xe_vma_is_userptr(vma) && xe_vm_in_fault_mode(xe_vma_vm(vma))) + mmu_interval_read_begin(&to_userptr_vma(vma)->userptr.notifier); + + pt_op->vma = vma; + pt_op->bind = false; + pt_op->rebind = false; + + err = vma_reserve_fences(tile_to_xe(tile), vma); + if (err) + return err; + + pt_op->num_entries = xe_pt_stage_unbind(tile, vma, pt_op->entries); + + xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, + pt_op->num_entries, false); + xe_pt_update_ops_rfence_interval(pt_update_ops, vma); + ++pt_update_ops->current_op; + pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); + pt_update_ops->needs_invalidation = true; + + xe_pt_commit_prepare_unbind(vma, pt_op->entries, pt_op->num_entries); + + return 0; +} + +static int op_prepare(struct xe_vm *vm, + struct xe_tile *tile, + struct xe_vm_pgtable_update_ops *pt_update_ops, + struct xe_vma_op *op) +{ + int err = 0; + + xe_vm_assert_held(vm); + + switch (op->base.op) { + case DRM_GPUVA_OP_MAP: + if (!op->map.immediate && xe_vm_in_fault_mode(vm)) + break; + + err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma); + pt_update_ops->wait_vm_kernel = true; + break; + case DRM_GPUVA_OP_REMAP: + err = unbind_op_prepare(tile, pt_update_ops, + gpuva_to_vma(op->base.remap.unmap->va)); + + if (!err && op->remap.prev) { + err = bind_op_prepare(vm, tile, pt_update_ops, + op->remap.prev); + pt_update_ops->wait_vm_bookkeep = true; } + if (!err && op->remap.next) { + err = bind_op_prepare(vm, tile, pt_update_ops, + op->remap.next); + pt_update_ops->wait_vm_bookkeep = true; + } + break; + case DRM_GPUVA_OP_UNMAP: + err = unbind_op_prepare(tile, pt_update_ops, + gpuva_to_vma(op->base.unmap.va)); + break; + case DRM_GPUVA_OP_PREFETCH: + err = bind_op_prepare(vm, tile, pt_update_ops, + gpuva_to_vma(op->base.prefetch.va)); + pt_update_ops->wait_vm_kernel = true; + break; + default: + drm_warn(&vm->xe->drm, "NOT POSSIBLE"); } + + return err; } -static const struct xe_migrate_pt_update_ops unbind_ops = { - .populate = xe_migrate_clear_pgtable_callback, +static void +xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops *pt_update_ops) +{ + init_llist_head(&pt_update_ops->deferred); + pt_update_ops->start = ~0x0ull; + pt_update_ops->last = 0x0ull; +} + +/** + * xe_pt_update_ops_prepare() - Prepare PT update operations + * @tile: Tile of PT update operations + * @vops: VMA operationa + * + * Prepare PT update operations which includes updating internal PT state, + * allocate memory for page tables, populate page table being pruned in, and + * create PT update operations for leaf insertion / removal. + * + * Return: 0 on success, negative error code on error. + */ +int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops) +{ + struct xe_vm_pgtable_update_ops *pt_update_ops = + &vops->pt_update_ops[tile->id]; + struct xe_vma_op *op; + int shift = tile->media_gt ? 1 : 0; + int err; + + lockdep_assert_held(&vops->vm->lock); + xe_vm_assert_held(vops->vm); + + xe_pt_update_ops_init(pt_update_ops); + + err = dma_resv_reserve_fences(xe_vm_resv(vops->vm), + tile_to_xe(tile)->info.tile_count << shift); + if (err) + return err; + + list_for_each_entry(op, &vops->list, link) { + err = op_prepare(vops->vm, tile, pt_update_ops, op); + + if (err) + return err; + } + + xe_tile_assert(tile, pt_update_ops->current_op <= + pt_update_ops->num_ops); + +#ifdef TEST_VM_OPS_ERROR + if (vops->inject_error && + vops->vm->xe->vm_inject_error_position == FORCE_OP_ERROR_PREPARE) + return -ENOSPC; +#endif + + return 0; +} + +static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, + struct xe_vm_pgtable_update_ops *pt_update_ops, + struct xe_vma *vma, struct dma_fence *fence, + struct dma_fence *fence2) +{ + if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { + dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); + if (fence2) + dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); + } + vma->tile_present |= BIT(tile->id); + vma->tile_staged &= ~BIT(tile->id); + if (xe_vma_is_userptr(vma)) { + lockdep_assert_held_read(&vm->userptr.notifier_lock); + to_userptr_vma(vma)->userptr.initial_bind = true; + } + + /* + * Kick rebind worker if this bind triggers preempt fences and not in + * the rebind worker + */ + if (pt_update_ops->wait_vm_bookkeep && + xe_vm_in_preempt_fence_mode(vm) && + !current->mm) + xe_vm_queue_rebind_worker(vm); +} + +static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, + struct xe_vm_pgtable_update_ops *pt_update_ops, + struct xe_vma *vma, struct dma_fence *fence, + struct dma_fence *fence2) +{ + if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { + dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); + if (fence2) + dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); + } + vma->tile_present &= ~BIT(tile->id); + if (!vma->tile_present) { + list_del_init(&vma->combined_links.rebind); + if (xe_vma_is_userptr(vma)) { + lockdep_assert_held_read(&vm->userptr.notifier_lock); + + spin_lock(&vm->userptr.invalidated_lock); + list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); + spin_unlock(&vm->userptr.invalidated_lock); + } + } +} + +static void op_commit(struct xe_vm *vm, + struct xe_tile *tile, + struct xe_vm_pgtable_update_ops *pt_update_ops, + struct xe_vma_op *op, struct dma_fence *fence, + struct dma_fence *fence2) +{ + xe_vm_assert_held(vm); + + switch (op->base.op) { + case DRM_GPUVA_OP_MAP: + if (!op->map.immediate && xe_vm_in_fault_mode(vm)) + break; + + bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence, + fence2); + break; + case DRM_GPUVA_OP_REMAP: + unbind_op_commit(vm, tile, pt_update_ops, + gpuva_to_vma(op->base.remap.unmap->va), fence, + fence2); + + if (op->remap.prev) + bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, + fence, fence2); + if (op->remap.next) + bind_op_commit(vm, tile, pt_update_ops, op->remap.next, + fence, fence2); + break; + case DRM_GPUVA_OP_UNMAP: + unbind_op_commit(vm, tile, pt_update_ops, + gpuva_to_vma(op->base.unmap.va), fence, fence2); + break; + case DRM_GPUVA_OP_PREFETCH: + bind_op_commit(vm, tile, pt_update_ops, + gpuva_to_vma(op->base.prefetch.va), fence, fence2); + break; + default: + drm_warn(&vm->xe->drm, "NOT POSSIBLE"); + } +} + +static const struct xe_migrate_pt_update_ops migrate_ops = { + .populate = xe_vm_populate_pgtable, + .clear = xe_migrate_clear_pgtable_callback, .pre_commit = xe_pt_pre_commit, }; -static const struct xe_migrate_pt_update_ops userptr_unbind_ops = { - .populate = xe_migrate_clear_pgtable_callback, +static const struct xe_migrate_pt_update_ops userptr_migrate_ops = { + .populate = xe_vm_populate_pgtable, + .clear = xe_migrate_clear_pgtable_callback, .pre_commit = xe_pt_userptr_pre_commit, }; /** - * __xe_pt_unbind_vma() - Disconnect and free a page-table tree for the vma - * address range. - * @tile: The tile to unbind for. - * @vma: The vma to unbind. - * @q: The exec_queue with which to do pipelined page-table updates. - * @syncs: Entries to sync on before disconnecting the tree to be destroyed. - * @num_syncs: Number of @sync entries. + * xe_pt_update_ops_run() - Run PT update operations + * @tile: Tile of PT update operations + * @vops: VMA operationa * - * This function builds a the xe_vm_pgtable_update entries abstracting the - * operations needed to detach the page-table tree to be destroyed from the - * man vm tree. - * It then takes the relevant locks and submits the operations for - * pipelined detachment of the gpu page-table from the vm main tree, - * (which can be done either by the cpu and the GPU), Finally it frees the - * detached page-table tree. + * Run PT update operations which includes committing internal PT state changes, + * creating job for PT update operations for leaf insertion / removal, and + * installing job fence in various places. * - * Return: A valid dma-fence representing the pipelined detachment operation - * on success, an error pointer on error. + * Return: fence on success, negative ERR_PTR on error. */ struct dma_fence * -__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs) +xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) { - struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; - struct xe_pt_migrate_pt_update unbind_pt_update = { - .base = { - .ops = xe_vma_is_userptr(vma) ? &userptr_unbind_ops : - &unbind_ops, - .vma = vma, - .tile_id = tile->id, - }, - }; - struct xe_vm *vm = xe_vma_vm(vma); - u32 num_entries; - struct dma_fence *fence = NULL; - struct invalidation_fence *ifence; + struct xe_vm *vm = vops->vm; + struct xe_vm_pgtable_update_ops *pt_update_ops = + &vops->pt_update_ops[tile->id]; + struct dma_fence *fence; + struct invalidation_fence *ifence = NULL, *mfence = NULL; + struct dma_fence **fences = NULL; + struct dma_fence_array *cf = NULL; struct xe_range_fence *rfence; - int err; - - LLIST_HEAD(deferred); + struct xe_vma_op *op; + int err = 0, i; + struct xe_migrate_pt_update update = { + .ops = pt_update_ops->needs_userptr_lock ? + &userptr_migrate_ops : + &migrate_ops, + .vops = vops, + .tile_id = tile->id, + }; - xe_bo_assert_held(xe_vma_bo(vma)); + lockdep_assert_held(&vm->lock); xe_vm_assert_held(vm); - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing unbind, with range [%llx...%llx) engine %p.\n", - xe_vma_start(vma), xe_vma_end(vma), q); + if (!pt_update_ops->current_op) { + xe_tile_assert(tile, xe_vm_in_fault_mode(vm)); - num_entries = xe_pt_stage_unbind(tile, vma, entries); - xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); - - xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); - xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries, - num_entries); + return dma_fence_get_stub(); + } - err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); - if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); - if (err) - return ERR_PTR(err); +#ifdef TEST_VM_OPS_ERROR + if (vops->inject_error && + vm->xe->vm_inject_error_position == FORCE_OP_ERROR_RUN) + return ERR_PTR(-ENOSPC); +#endif - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!ifence) - return ERR_PTR(-ENOMEM); + if (pt_update_ops->needs_invalidation) { + ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); + if (!ifence) { + err = -ENOMEM; + goto kill_vm_tile1; + } + if (tile->media_gt) { + mfence = kzalloc(sizeof(*ifence), GFP_KERNEL); + if (!mfence) { + err = -ENOMEM; + goto free_ifence; + } + fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL); + if (!fences) { + err = -ENOMEM; + goto free_ifence; + } + cf = dma_fence_array_alloc(2); + if (!cf) { + err = -ENOMEM; + goto free_ifence; + } + } + } rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); if (!rfence) { - kfree(ifence); - return ERR_PTR(-ENOMEM); + err = -ENOMEM; + goto free_ifence; } - /* - * Even if we were already evicted and unbind to destroy, we need to - * clear again here. The eviction may have updated pagetables at a - * lower level, because it needs to be more conservative. - */ - fence = xe_migrate_update_pgtables(tile->migrate, - vm, NULL, q ? q : - vm->q[tile->id], - entries, num_entries, - syncs, num_syncs, - &unbind_pt_update.base); - if (!IS_ERR(fence)) { - int err; - - err = xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - unbind_pt_update.base.start, - unbind_pt_update.base.last, fence); - if (err) - dma_fence_wait(fence, false); + fence = xe_migrate_update_pgtables(tile->migrate, &update); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + goto free_rfence; + } - /* TLB invalidation must be done before signaling unbind */ - err = invalidation_fence_init(tile->primary_gt, ifence, fence, - xe_vma_start(vma), - xe_vma_end(vma), - xe_vma_vm(vma)->usm.asid); - if (err) { - dma_fence_put(fence); - kfree(ifence); - return ERR_PTR(err); + /* Point of no return - VM killed if failure after this */ + for (i = 0; i < pt_update_ops->current_op; ++i) { + struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i]; + + xe_pt_commit(pt_op->vma, pt_op->entries, + pt_op->num_entries, &pt_update_ops->deferred); + pt_op->vma = NULL; /* skip in xe_pt_update_ops_abort */ + } + + if (xe_range_fence_insert(&vm->rftree[tile->id], rfence, + &xe_range_fence_kfree_ops, + pt_update_ops->start, + pt_update_ops->last, fence)) + dma_fence_wait(fence, false); + + /* tlb invalidation must be done before signaling rebind */ + if (ifence) { + if (mfence) + dma_fence_get(fence); + invalidation_fence_init(tile->primary_gt, ifence, fence, + pt_update_ops->start, + pt_update_ops->last, vm->usm.asid); + if (mfence) { + invalidation_fence_init(tile->media_gt, mfence, fence, + pt_update_ops->start, + pt_update_ops->last, vm->usm.asid); + fences[0] = &ifence->base.base; + fences[1] = &mfence->base.base; + dma_fence_array_init(cf, 2, fences, + vm->composite_fence_ctx, + vm->composite_fence_seqno++, + false); + fence = &cf->base; + } else { + fence = &ifence->base.base; } - fence = &ifence->base.base; + } - /* add shared fence now for pagetable delayed destroy */ + if (!mfence) { dma_resv_add_fence(xe_vm_resv(vm), fence, + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); - /* This fence will be installed by caller when doing eviction */ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - DMA_RESV_USAGE_BOOKKEEP); - xe_pt_commit_unbind(vma, entries, num_entries, - unbind_pt_update.locked ? &deferred : NULL); - vma->tile_present &= ~BIT(tile->id); + list_for_each_entry(op, &vops->list, link) + op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL); } else { - kfree(rfence); - kfree(ifence); - } + dma_resv_add_fence(xe_vm_resv(vm), &ifence->base.base, + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); - if (!vma->tile_present) - list_del_init(&vma->combined_links.rebind); + dma_resv_add_fence(xe_vm_resv(vm), &mfence->base.base, + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); - if (unbind_pt_update.locked) { - xe_tile_assert(tile, xe_vma_is_userptr(vma)); + list_for_each_entry(op, &vops->list, link) + op_commit(vops->vm, tile, pt_update_ops, op, + &ifence->base.base, &mfence->base.base); + } - if (!vma->tile_present) { - spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); - } + if (pt_update_ops->needs_userptr_lock) up_read(&vm->userptr.notifier_lock); - xe_bo_put_commit(&deferred); - } return fence; + +free_rfence: + kfree(rfence); +free_ifence: + kfree(cf); + kfree(fences); + kfree(mfence); + kfree(ifence); +kill_vm_tile1: + if (err != -EAGAIN && tile->id) + xe_vm_kill(vops->vm, false); + + return ERR_PTR(err); +} + +/** + * xe_pt_update_ops_fini() - Finish PT update operations + * @tile: Tile of PT update operations + * @vops: VMA operations + * + * Finish PT update operations by committing to destroy page table memory + */ +void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops) +{ + struct xe_vm_pgtable_update_ops *pt_update_ops = + &vops->pt_update_ops[tile->id]; + int i; + + lockdep_assert_held(&vops->vm->lock); + xe_vm_assert_held(vops->vm); + + for (i = 0; i < pt_update_ops->current_op; ++i) { + struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i]; + + xe_pt_free_bind(pt_op->entries, pt_op->num_entries); + } + xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); +} + +/** + * xe_pt_update_ops_abort() - Abort PT update operations + * @tile: Tile of PT update operations + * @vops: VMA operationa + * + * Abort PT update operations by unwinding internal PT state + */ +void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops) +{ + struct xe_vm_pgtable_update_ops *pt_update_ops = + &vops->pt_update_ops[tile->id]; + int i; + + lockdep_assert_held(&vops->vm->lock); + xe_vm_assert_held(vops->vm); + + for (i = pt_update_ops->num_ops - 1; i >= 0; --i) { + struct xe_vm_pgtable_update_op *pt_op = + &pt_update_ops->ops[i]; + + if (!pt_op->vma || i >= pt_update_ops->current_op) + continue; + + if (pt_op->bind) + xe_pt_abort_bind(pt_op->vma, pt_op->entries, + pt_op->num_entries, + pt_op->rebind); + else + xe_pt_abort_unbind(pt_op->vma, pt_op->entries, + pt_op->num_entries); + } + + xe_pt_update_ops_fini(tile, vops); } diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h index 71a4fbfcff43..9ab386431cad 100644 --- a/drivers/gpu/drm/xe/xe_pt.h +++ b/drivers/gpu/drm/xe/xe_pt.h @@ -17,6 +17,7 @@ struct xe_sync_entry; struct xe_tile; struct xe_vm; struct xe_vma; +struct xe_vma_ops; /* Largest huge pte is currently 1GiB. May become device dependent. */ #define MAX_HUGEPTE_LEVEL 2 @@ -34,14 +35,11 @@ void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred); -struct dma_fence * -__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs, - bool rebind); - -struct dma_fence * -__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs); +int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops); +struct dma_fence *xe_pt_update_ops_run(struct xe_tile *tile, + struct xe_vma_ops *vops); +void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops); +void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops); bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma); diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h index cee70cb0f014..384cc04de719 100644 --- a/drivers/gpu/drm/xe/xe_pt_types.h +++ b/drivers/gpu/drm/xe/xe_pt_types.h @@ -74,4 +74,52 @@ struct xe_vm_pgtable_update { u32 flags; }; +/** struct xe_vm_pgtable_update_op - Page table update operation */ +struct xe_vm_pgtable_update_op { + /** @entries: entries to update for this operation */ + struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; + /** @vma: VMA for operation, operation not valid if NULL */ + struct xe_vma *vma; + /** @num_entries: number of entries for this update operation */ + u32 num_entries; + /** @bind: is a bind */ + bool bind; + /** @rebind: is a rebind */ + bool rebind; +}; + +/** struct xe_vm_pgtable_update_ops: page table update operations */ +struct xe_vm_pgtable_update_ops { + /** @ops: operations */ + struct xe_vm_pgtable_update_op *ops; + /** @deferred: deferred list to destroy PT entries */ + struct llist_head deferred; + /** @q: exec queue for PT operations */ + struct xe_exec_queue *q; + /** @start: start address of ops */ + u64 start; + /** @last: last address of ops */ + u64 last; + /** @num_ops: number of operations */ + u32 num_ops; + /** @current_op: current operations */ + u32 current_op; + /** @needs_userptr_lock: Needs userptr lock */ + bool needs_userptr_lock; + /** @needs_invalidation: Needs invalidation */ + bool needs_invalidation; + /** + * @wait_vm_bookkeep: PT operations need to wait until VM is idle + * (bookkeep dma-resv slots are idle) and stage all future VM activity + * behind these operations (install PT operations into VM kernel + * dma-resv slot). + */ + bool wait_vm_bookkeep; + /** + * @wait_vm_kernel: PT operations need to wait until VM kernel dma-resv + * slots are idle. + */ + bool wait_vm_kernel; +}; + #endif diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 4e01df6b1b7a..848da8e68c7a 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -9,7 +9,7 @@ #include <linux/sched/clock.h> #include <drm/ttm/ttm_placement.h> -#include <drm/xe_drm.h> +#include <uapi/drm/xe_drm.h> #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" @@ -161,7 +161,11 @@ query_engine_cycles(struct xe_device *xe, cpu_clock); xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); - resp.width = 36; + + if (GRAPHICS_VER(xe) >= 20) + resp.width = 64; + else + resp.width = 36; /* Only write to the output fields of user query */ if (put_user(resp.cpu_timestamp, &query_ptr->cpu_timestamp)) @@ -518,7 +522,9 @@ static int query_gt_topology(struct xe_device *xe, if (err) return err; - topo.type = DRM_XE_TOPO_EU_PER_DSS; + topo.type = gt->fuse_topo.eu_type == XE_GT_EU_TYPE_SIMD16 ? + DRM_XE_TOPO_SIMD16_EU_PER_DSS : + DRM_XE_TOPO_EU_PER_DSS; err = copy_mask(&query_ptr, &topo, gt->fuse_topo.eu_mask_per_dss, sizeof(gt->fuse_topo.eu_mask_per_dss)); diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h index 655af89b31a9..dca374b6521c 100644 --- a/drivers/gpu/drm/xe/xe_res_cursor.h +++ b/drivers/gpu/drm/xe/xe_res_cursor.h @@ -26,7 +26,6 @@ #include <linux/scatterlist.h> -#include <drm/drm_mm.h> #include <drm/ttm/ttm_placement.h> #include <drm/ttm/ttm_range_manager.h> #include <drm/ttm/ttm_resource.h> diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 5efe83cc82ab..86c705d18c0d 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -7,7 +7,7 @@ #include <kunit/visibility.h> -#include <drm/xe_drm.h> +#include <uapi/drm/xe_drm.h> #include "xe_gt.h" #include "xe_gt_topology.h" @@ -217,21 +217,19 @@ void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx, ctx->active_entries = active_entries; ctx->n_entries = n_entries; } +EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process_ctx_enable_active_tracking); static void rtp_mark_active(struct xe_device *xe, struct xe_rtp_process_ctx *ctx, - unsigned int first, unsigned int last) + unsigned int idx) { if (!ctx->active_entries) return; - if (drm_WARN_ON(&xe->drm, last > ctx->n_entries)) + if (drm_WARN_ON(&xe->drm, idx >= ctx->n_entries)) return; - if (first == last) - bitmap_set(ctx->active_entries, first, 1); - else - bitmap_set(ctx->active_entries, first, last - first + 1); + bitmap_set(ctx->active_entries, idx, 1); } /** @@ -276,8 +274,7 @@ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, } if (match) - rtp_mark_active(xe, ctx, entry - entries, - entry - entries); + rtp_mark_active(xe, ctx, entry - entries); } } EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process_to_sr); @@ -288,44 +285,29 @@ EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process_to_sr); * @entries: Table with RTP definitions * * Walk the table pointed by @entries (with an empty sentinel), executing the - * rules. A few differences from xe_rtp_process_to_sr(): - * - * 1. There is no action associated with each entry since this uses - * struct xe_rtp_entry. Its main use is for marking active workarounds via - * xe_rtp_process_ctx_enable_active_tracking(). - * 2. There is support for OR operations by having entries with no name. + * rules. One difference from xe_rtp_process_to_sr(): there is no action + * associated with each entry since this uses struct xe_rtp_entry. Its main use + * is for marking active workarounds via + * xe_rtp_process_ctx_enable_active_tracking(). */ void xe_rtp_process(struct xe_rtp_process_ctx *ctx, const struct xe_rtp_entry *entries) { - const struct xe_rtp_entry *entry, *first_entry; + const struct xe_rtp_entry *entry; struct xe_hw_engine *hwe; struct xe_gt *gt; struct xe_device *xe; rtp_get_context(ctx, &hwe, >, &xe); - first_entry = entries; - if (drm_WARN_ON(&xe->drm, !first_entry->name)) - return; - for (entry = entries; entry && entry->rules; entry++) { - if (entry->name) - first_entry = entry; - if (!rule_matches(xe, gt, hwe, entry->rules, entry->n_rules)) continue; - /* Fast-forward entry, eliminating the OR'ed entries */ - for (entry++; entry && entry->rules; entry++) - if (entry->name) - break; - entry--; - - rtp_mark_active(xe, ctx, first_entry - entries, - entry - entries); + rtp_mark_active(xe, ctx, entry - entries); } } +EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process); bool xe_rtp_match_even_instance(const struct xe_gt *gt, const struct xe_hw_engine *hwe) diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index ad446731192c..827d932b6908 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -374,7 +374,7 @@ struct xe_reg_sr; * XE_RTP_RULES - Helper to set multiple rules to a struct xe_rtp_entry_sr entry * @...: Rules * - * At least one rule is needed and up to 6 are supported. Multiple rules are + * At least one rule is needed and up to 12 are supported. Multiple rules are * AND'ed together, i.e. all the rules must evaluate to true for the entry to * be processed. See XE_RTP_MATCH_* for the possible match rules. Example: * @@ -399,7 +399,7 @@ struct xe_reg_sr; * XE_RTP_ACTIONS - Helper to set multiple actions to a struct xe_rtp_entry_sr * @...: Actions to be taken * - * At least one action is needed and up to 6 are supported. See XE_RTP_ACTION_* + * At least one action is needed and up to 12 are supported. See XE_RTP_ACTION_* * for the possible actions. Example: * * .. code-block:: c diff --git a/drivers/gpu/drm/xe/xe_rtp_helpers.h b/drivers/gpu/drm/xe/xe_rtp_helpers.h index c59e40fd7fff..a33b0ae98bbc 100644 --- a/drivers/gpu/drm/xe/xe_rtp_helpers.h +++ b/drivers/gpu/drm/xe/xe_rtp_helpers.h @@ -60,6 +60,12 @@ #define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_) #define XE_RTP_PASTE_5(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_4(prefix_, sep_, _XE_TUPLE_TAIL args_) #define XE_RTP_PASTE_6(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_5(prefix_, sep_, _XE_TUPLE_TAIL args_) +#define XE_RTP_PASTE_7(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_6(prefix_, sep_, _XE_TUPLE_TAIL args_) +#define XE_RTP_PASTE_8(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_7(prefix_, sep_, _XE_TUPLE_TAIL args_) +#define XE_RTP_PASTE_9(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_8(prefix_, sep_, _XE_TUPLE_TAIL args_) +#define XE_RTP_PASTE_10(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_9(prefix_, sep_, _XE_TUPLE_TAIL args_) +#define XE_RTP_PASTE_11(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_10(prefix_, sep_, _XE_TUPLE_TAIL args_) +#define XE_RTP_PASTE_12(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_11(prefix_, sep_, _XE_TUPLE_TAIL args_) /* * XE_RTP_DROP_CAST - Drop cast to convert a compound statement to a initializer diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index 8941522b7705..fe2cb2a96f78 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -25,10 +25,9 @@ static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg) drm_suballoc_manager_fini(&sa_manager->base); - if (bo->vmap.is_iomem) + if (sa_manager->is_iomem) kvfree(sa_manager->cpu_ptr); - xe_bo_unpin_map_no_vm(bo); sa_manager->bo = NULL; } @@ -47,16 +46,17 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 sa_manager->bo = NULL; - bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); + bo = xe_managed_bo_create_pin_map(xe, tile, size, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); if (IS_ERR(bo)) { drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n", PTR_ERR(bo)); return (struct xe_sa_manager *)bo; } sa_manager->bo = bo; + sa_manager->is_iomem = bo->vmap.is_iomem; drm_suballoc_manager_init(&sa_manager->base, managed_size, align); sa_manager->gpu_addr = xe_bo_ggtt_addr(bo); @@ -64,7 +64,6 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 if (bo->vmap.is_iomem) { sa_manager->cpu_ptr = kvzalloc(managed_size, GFP_KERNEL); if (!sa_manager->cpu_ptr) { - xe_bo_unpin_map_no_vm(sa_manager->bo); sa_manager->bo = NULL; return ERR_PTR(-ENOMEM); } @@ -84,6 +83,13 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager, unsigned int size) { + /* + * BB to large, return -ENOBUFS indicating user should split + * array of binds into smaller chunks. + */ + if (size > sa_manager->base.size) + return ERR_PTR(-ENOBUFS); + return drm_suballoc_new(&sa_manager->base, size, GFP_KERNEL, true, 0); } diff --git a/drivers/gpu/drm/xe/xe_sa_types.h b/drivers/gpu/drm/xe/xe_sa_types.h index 2ef896aeca1d..2b070ff1292e 100644 --- a/drivers/gpu/drm/xe/xe_sa_types.h +++ b/drivers/gpu/drm/xe/xe_sa_types.h @@ -14,6 +14,7 @@ struct xe_sa_manager { struct xe_bo *bo; u64 gpu_addr; void *cpu_ptr; + bool is_iomem; }; #endif diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 44d534e362cd..eeccc1c318ae 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -5,7 +5,7 @@ #include "xe_sched_job.h" -#include <drm/xe_drm.h> +#include <uapi/drm/xe_drm.h> #include <linux/dma-fence-chain.h> #include <linux/slab.h> @@ -89,8 +89,7 @@ static void xe_sched_job_free_fences(struct xe_sched_job *job) if (ptrs->lrc_fence) xe_lrc_free_seqno_fence(ptrs->lrc_fence); - if (ptrs->chain_fence) - dma_fence_chain_free(ptrs->chain_fence); + dma_fence_chain_free(ptrs->chain_fence); } } @@ -171,12 +170,13 @@ void xe_sched_job_destroy(struct kref *ref) struct xe_sched_job *job = container_of(ref, struct xe_sched_job, refcount); struct xe_device *xe = job_to_xe(job); + struct xe_exec_queue *q = job->q; xe_sched_job_free_fences(job); - xe_exec_queue_put(job->q); dma_fence_put(job->fence); drm_sched_job_cleanup(&job->drm); job_free(job); + xe_exec_queue_put(q); xe_pm_runtime_put(xe); } diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c index a274a5fb1401..5a1d65e4f19f 100644 --- a/drivers/gpu/drm/xe/xe_sriov.c +++ b/drivers/gpu/drm/xe/xe_sriov.c @@ -5,7 +5,7 @@ #include <drm/drm_managed.h> -#include "regs/xe_sriov_regs.h" +#include "regs/xe_regs.h" #include "xe_assert.h" #include "xe_device.h" diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c index eaf1b718f26c..c77b5c317fa0 100644 --- a/drivers/gpu/drm/xe/xe_step.c +++ b/drivers/gpu/drm/xe/xe_step.c @@ -28,23 +28,17 @@ * use a macro to define these to make it easier to identify the platforms * where the two steppings can deviate. */ -#define COMMON_GT_MEDIA_STEP(x_) \ - .graphics = STEP_##x_, \ - .media = STEP_##x_ - #define COMMON_STEP(x_) \ - COMMON_GT_MEDIA_STEP(x_), \ .graphics = STEP_##x_, \ - .media = STEP_##x_, \ - .display = STEP_##x_ + .media = STEP_##x_ __diag_push(); __diag_ignore_all("-Woverride-init", "Allow field overrides in table"); /* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */ static const struct xe_step_info tgl_revids[] = { - [0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_B0 }, - [1] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_D0 }, + [0] = { COMMON_STEP(A0) }, + [1] = { COMMON_STEP(B0) }, }; static const struct xe_step_info dg1_revids[] = { @@ -53,49 +47,49 @@ static const struct xe_step_info dg1_revids[] = { }; static const struct xe_step_info adls_revids[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 }, - [0x1] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A2 }, - [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 }, - [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_B0 }, - [0xC] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_C0 }, + [0x0] = { COMMON_STEP(A0) }, + [0x1] = { COMMON_STEP(A0) }, + [0x4] = { COMMON_STEP(B0) }, + [0x8] = { COMMON_STEP(C0) }, + [0xC] = { COMMON_STEP(D0) }, }; static const struct xe_step_info adls_rpls_revids[] = { - [0x4] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_D0 }, - [0xC] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_C0 }, + [0x4] = { COMMON_STEP(D0) }, + [0xC] = { COMMON_STEP(D0) }, }; static const struct xe_step_info adlp_revids[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 }, - [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 }, - [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_C0 }, - [0xC] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_D0 }, + [0x0] = { COMMON_STEP(A0) }, + [0x4] = { COMMON_STEP(B0) }, + [0x8] = { COMMON_STEP(C0) }, + [0xC] = { COMMON_STEP(C0) }, }; static const struct xe_step_info adlp_rpl_revids[] = { - [0x4] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_E0 }, + [0x4] = { COMMON_STEP(C0) }, }; static const struct xe_step_info adln_revids[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_D0 }, + [0x0] = { COMMON_STEP(A0) }, }; static const struct xe_step_info dg2_g10_revid_step_tbl[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 }, - [0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_A0 }, - [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 }, - [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_C0 }, + [0x0] = { COMMON_STEP(A0) }, + [0x1] = { COMMON_STEP(A1) }, + [0x4] = { COMMON_STEP(B0) }, + [0x8] = { COMMON_STEP(C0) }, }; static const struct xe_step_info dg2_g11_revid_step_tbl[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_B0 }, - [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_C0 }, - [0x5] = { COMMON_GT_MEDIA_STEP(B1), .display = STEP_C0 }, + [0x0] = { COMMON_STEP(A0) }, + [0x4] = { COMMON_STEP(B0) }, + [0x5] = { COMMON_STEP(B1) }, }; static const struct xe_step_info dg2_g12_revid_step_tbl[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_C0 }, - [0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_C0 }, + [0x0] = { COMMON_STEP(A0) }, + [0x1] = { COMMON_STEP(A1) }, }; static const struct xe_step_info pvc_revid_step_tbl[] = { @@ -195,7 +189,6 @@ struct xe_step_info xe_step_pre_gmdid_get(struct xe_device *xe) } else { drm_dbg(&xe->drm, "Using future steppings\n"); step.graphics = STEP_FUTURE; - step.display = STEP_FUTURE; } } diff --git a/drivers/gpu/drm/xe/xe_step_types.h b/drivers/gpu/drm/xe/xe_step_types.h index ccc9b4795e95..d978cc2512f2 100644 --- a/drivers/gpu/drm/xe/xe_step_types.h +++ b/drivers/gpu/drm/xe/xe_step_types.h @@ -11,12 +11,15 @@ struct xe_step_info { u8 graphics; u8 media; - u8 display; u8 basedie; }; #define STEP_ENUM_VAL(name) STEP_##name, +/* + * Always define four minor steppings 0-3 for each stepping to match GMD ID + * spacing of values. See xe_step_gmdid_get(). + */ #define STEP_NAME_LIST(func) \ func(A0) \ func(A1) \ @@ -34,7 +37,30 @@ struct xe_step_info { func(D1) \ func(D2) \ func(D3) \ - func(E0) + func(E0) \ + func(E1) \ + func(E2) \ + func(E3) \ + func(F0) \ + func(F1) \ + func(F2) \ + func(F3) \ + func(G0) \ + func(G1) \ + func(G2) \ + func(G3) \ + func(H0) \ + func(H1) \ + func(H2) \ + func(H3) \ + func(I0) \ + func(I1) \ + func(I2) \ + func(I3) \ + func(J0) \ + func(J1) \ + func(J2) \ + func(J3) /* * Symbolic steppings that do not match the hardware. These are valid both as gt diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index e8d31e010860..2e72c06fd40d 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -12,7 +12,7 @@ #include <drm/drm_print.h> #include <drm/drm_syncobj.h> -#include <drm/xe_drm.h> +#include <uapi/drm/xe_drm.h> #include "xe_device_types.h" #include "xe_exec_queue.h" @@ -54,11 +54,12 @@ static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr, { struct xe_user_fence *ufence; u64 __user *ptr = u64_to_user_ptr(addr); + u64 __maybe_unused prefetch_val; - if (!access_ok(ptr, sizeof(ptr))) + if (get_user(prefetch_val, ptr)) return ERR_PTR(-EFAULT); - ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); + ufence = kzalloc(sizeof(*ufence), GFP_KERNEL); if (!ufence) return ERR_PTR(-ENOMEM); @@ -204,26 +205,11 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, return 0; } -int xe_sync_entry_wait(struct xe_sync_entry *sync) -{ - if (sync->fence) - dma_fence_wait(sync->fence, true); - - return 0; -} - int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job) { - int err; - - if (sync->fence) { - err = drm_sched_job_add_dependency(&job->drm, - dma_fence_get(sync->fence)); - if (err) { - dma_fence_put(sync->fence); - return err; - } - } + if (sync->fence) + return drm_sched_job_add_dependency(&job->drm, + dma_fence_get(sync->fence)); return 0; } @@ -264,10 +250,8 @@ void xe_sync_entry_cleanup(struct xe_sync_entry *sync) { if (sync->syncobj) drm_syncobj_put(sync->syncobj); - if (sync->fence) - dma_fence_put(sync->fence); - if (sync->chain_fence) - dma_fence_chain_free(sync->chain_fence); + dma_fence_put(sync->fence); + dma_fence_chain_free(sync->chain_fence); if (sync->ufence) user_fence_put(sync->ufence); } diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h index 006dbf780793..256ffc1e54dc 100644 --- a/drivers/gpu/drm/xe/xe_sync.h +++ b/drivers/gpu/drm/xe/xe_sync.h @@ -22,7 +22,6 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, struct xe_sync_entry *sync, struct drm_xe_sync __user *sync_user, unsigned int flags); -int xe_sync_entry_wait(struct xe_sync_entry *sync); int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job); void xe_sync_entry_signal(struct xe_sync_entry *sync, diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 15ea0a942f67..dda5268507d8 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -9,6 +9,7 @@ #include "xe_ggtt.h" #include "xe_gt.h" #include "xe_migrate.h" +#include "xe_pcode.h" #include "xe_sa.h" #include "xe_tile.h" #include "xe_tile_sysfs.h" @@ -124,6 +125,8 @@ int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id) if (IS_ERR(tile->primary_gt)) return PTR_ERR(tile->primary_gt); + xe_pcode_init(tile); + return 0; } diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index baba14fb1e32..8573d7a87d84 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -309,7 +309,7 @@ DECLARE_EVENT_CLASS(xe_hw_fence, TP_ARGS(fence), TP_STRUCT__entry( - __string(dev, __dev_name_gt(fence->ctx->gt)) + __string(dev, __dev_name_xe(fence->xe)) __field(u64, ctx) __field(u32, seqno) __field(struct xe_hw_fence *, fence) @@ -369,6 +369,58 @@ TRACE_EVENT(xe_reg_rw, (u32)(__entry->val >> 32)) ); +DECLARE_EVENT_CLASS(xe_pm_runtime, + TP_PROTO(struct xe_device *xe, void *caller), + TP_ARGS(xe, caller), + + TP_STRUCT__entry( + __string(dev, __dev_name_xe(xe)) + __field(void *, caller) + ), + + TP_fast_assign( + __assign_str(dev); + __entry->caller = caller; + ), + + TP_printk("dev=%s caller_function=%pS", __get_str(dev), __entry->caller) +); + +DEFINE_EVENT(xe_pm_runtime, xe_pm_runtime_get, + TP_PROTO(struct xe_device *xe, void *caller), + TP_ARGS(xe, caller) +); + +DEFINE_EVENT(xe_pm_runtime, xe_pm_runtime_put, + TP_PROTO(struct xe_device *xe, void *caller), + TP_ARGS(xe, caller) +); + +DEFINE_EVENT(xe_pm_runtime, xe_pm_resume, + TP_PROTO(struct xe_device *xe, void *caller), + TP_ARGS(xe, caller) +); + +DEFINE_EVENT(xe_pm_runtime, xe_pm_suspend, + TP_PROTO(struct xe_device *xe, void *caller), + TP_ARGS(xe, caller) +); + +DEFINE_EVENT(xe_pm_runtime, xe_pm_runtime_resume, + TP_PROTO(struct xe_device *xe, void *caller), + TP_ARGS(xe, caller) +); + +DEFINE_EVENT(xe_pm_runtime, xe_pm_runtime_suspend, + TP_PROTO(struct xe_device *xe, void *caller), + TP_ARGS(xe, caller) +); + +DEFINE_EVENT(xe_pm_runtime, xe_pm_runtime_get_ioctl, + TP_PROTO(struct xe_device *xe, void *caller), + TP_ARGS(xe, caller) +); + #endif /* This part must be outside protection */ diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h index f39f09ed3495..9b1a1d4304ae 100644 --- a/drivers/gpu/drm/xe/xe_trace_bo.h +++ b/drivers/gpu/drm/xe/xe_trace_bo.h @@ -117,11 +117,6 @@ DEFINE_EVENT(xe_vma, xe_vma_acc, TP_ARGS(vma) ); -DEFINE_EVENT(xe_vma, xe_vma_fail, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - DEFINE_EVENT(xe_vma, xe_vma_bind, TP_PROTO(struct xe_vma *vma), TP_ARGS(vma) @@ -237,6 +232,11 @@ DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_exit, TP_ARGS(vm) ); +DEFINE_EVENT(xe_vm, xe_vm_ops_fail, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + #endif /* This part must be outside protection */ diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index f46fd2df84de..f7113cf6109d 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -5,7 +5,6 @@ */ #include <drm/drm_managed.h> -#include <drm/drm_mm.h> #include <drm/ttm/ttm_device.h> #include <drm/ttm/ttm_placement.h> diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index d4e6fa918942..0d5e04158917 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -39,12 +39,51 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { }, { XE_RTP_NAME("Tuning: Compression Overfetch"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), - XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX)), + XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX), + SET(CCCHKNREG1, L3CMPCTRL)) + }, + { XE_RTP_NAME("Tuning: Compression Overfetch - media"), + XE_RTP_RULES(MEDIA_VERSION(2000)), + XE_RTP_ACTIONS(CLR(XE2LPM_CCCHKNREG1, ENCOMPPERFFIX), + SET(XE2LPM_CCCHKNREG1, L3CMPCTRL)) }, { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN)) }, + { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3 - media"), + XE_RTP_RULES(MEDIA_VERSION(2000)), + XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG3, COMPPWOVERFETCHEN)) + }, + { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_ACTIONS(SET(L3SQCREG2, + COMPMEMRD256BOVRFETCHEN)) + }, + { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only - media"), + XE_RTP_RULES(MEDIA_VERSION(2000)), + XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG2, + COMPMEMRD256BOVRFETCHEN)) + }, + { XE_RTP_NAME("Tuning: Stateless compression control"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT, + REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0))) + }, + { XE_RTP_NAME("Tuning: Stateless compression control - media"), + XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, 2000)), + XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT, + REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0))) + }, + { XE_RTP_NAME("Tuning: L3 RW flush all Cache"), + XE_RTP_RULES(GRAPHICS_VERSION(2004)), + XE_RTP_ACTIONS(SET(SCRATCH3_LBCF, RWFLUSHALLEN)) + }, + { XE_RTP_NAME("Tuning: L3 RW flush all cache - media"), + XE_RTP_RULES(MEDIA_VERSION(2000)), + XE_RTP_ACTIONS(SET(XE2LPM_SCRATCH3_LBCF, RWFLUSHALLEN)) + }, + {} }; @@ -93,6 +132,14 @@ static const struct xe_rtp_entry_sr lrc_tunings[] = { REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) }, + /* Xe2_HPG */ + + { XE_RTP_NAME("Tuning: vs hit max value"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(FIELD_SET(FF_MODE, VS_HIT_MAX_VALUE_MASK, + REG_FIELD_PREP(VS_HIT_MAX_VALUE_MASK, 0x3f))) + }, + {} }; diff --git a/drivers/gpu/drm/xe/xe_uc_debugfs.c b/drivers/gpu/drm/xe/xe_uc_debugfs.c index 78eb8db73791..24a4209051ee 100644 --- a/drivers/gpu/drm/xe/xe_uc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_uc_debugfs.c @@ -8,6 +8,7 @@ #include <drm/drm_debugfs.h> #include "xe_gt.h" +#include "xe_gsc_debugfs.h" #include "xe_guc_debugfs.h" #include "xe_huc_debugfs.h" #include "xe_macros.h" @@ -23,6 +24,7 @@ void xe_uc_debugfs_register(struct xe_uc *uc, struct dentry *parent) return; } + xe_gsc_debugfs_register(&uc->gsc, root); xe_guc_debugfs_register(&uc->guc, root); xe_huc_debugfs_register(&uc->huc, root); } diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 5f23ecd98376..d431d0031185 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -15,6 +15,7 @@ #include "xe_gsc.h" #include "xe_gt.h" #include "xe_gt_printk.h" +#include "xe_guc.h" #include "xe_map.h" #include "xe_mmio.h" #include "xe_module.h" @@ -105,17 +106,20 @@ struct fw_blobs_by_type { }; #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ - fw_def(LUNARLAKE, major_ver(xe, guc, lnl, 70, 19, 2)) \ - fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 19, 2)) \ - fw_def(DG2, major_ver(i915, guc, dg2, 70, 19, 2)) \ - fw_def(DG1, major_ver(i915, guc, dg1, 70, 19, 2)) \ - fw_def(ALDERLAKE_N, major_ver(i915, guc, tgl, 70, 19, 2)) \ - fw_def(ALDERLAKE_P, major_ver(i915, guc, adlp, 70, 19, 2)) \ - fw_def(ALDERLAKE_S, major_ver(i915, guc, tgl, 70, 19, 2)) \ - fw_def(ROCKETLAKE, major_ver(i915, guc, tgl, 70, 19, 2)) \ - fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 19, 2)) + fw_def(BATTLEMAGE, major_ver(xe, guc, bmg, 70, 29, 2)) \ + fw_def(LUNARLAKE, major_ver(xe, guc, lnl, 70, 29, 2)) \ + fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 29, 2)) \ + fw_def(DG2, major_ver(i915, guc, dg2, 70, 29, 2)) \ + fw_def(DG1, major_ver(i915, guc, dg1, 70, 29, 2)) \ + fw_def(ALDERLAKE_N, major_ver(i915, guc, tgl, 70, 29, 2)) \ + fw_def(ALDERLAKE_P, major_ver(i915, guc, adlp, 70, 29, 2)) \ + fw_def(ALDERLAKE_S, major_ver(i915, guc, tgl, 70, 29, 2)) \ + fw_def(ROCKETLAKE, major_ver(i915, guc, tgl, 70, 29, 2)) \ + fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 29, 2)) #define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \ + fw_def(BATTLEMAGE, no_ver(xe, huc, bmg)) \ + fw_def(LUNARLAKE, no_ver(xe, huc, lnl)) \ fw_def(METEORLAKE, no_ver(i915, huc_gsc, mtl)) \ fw_def(DG1, no_ver(i915, huc, dg1)) \ fw_def(ALDERLAKE_P, no_ver(i915, huc, tgl)) \ @@ -125,7 +129,8 @@ struct fw_blobs_by_type { /* for the GSC FW we match the compatibility version and not the release one */ #define XE_GSC_FIRMWARE_DEFS(fw_def, major_ver) \ - fw_def(METEORLAKE, major_ver(i915, gsc, mtl, 1, 0, 0)) + fw_def(LUNARLAKE, major_ver(xe, gsc, lnl, 104, 1, 0)) \ + fw_def(METEORLAKE, major_ver(i915, gsc, mtl, 102, 1, 0)) #define MAKE_FW_PATH(dir__, uc__, shortname__, version__) \ __stringify(dir__) "/" __stringify(shortname__) "_" __stringify(uc__) version__ ".bin" @@ -136,6 +141,8 @@ struct fw_blobs_by_type { MAKE_FW_PATH(dir_, uc_, shortname_, "_" __stringify(a)) #define fw_filename_no_ver(dir_, uc_, shortname_) \ MAKE_FW_PATH(dir_, uc_, shortname_, "") +#define fw_filename_gsc(dir_, uc_, shortname_, a, b, c) \ + MAKE_FW_PATH(dir_, uc_, shortname_, "_" __stringify(b)) #define uc_fw_entry_mmp_ver(dir_, uc_, shortname_, a, b, c) \ { fw_filename_mmp_ver(dir_, uc_, shortname_, a, b, c), \ @@ -146,6 +153,9 @@ struct fw_blobs_by_type { #define uc_fw_entry_no_ver(dir_, uc_, shortname_) \ { fw_filename_no_ver(dir_, uc_, shortname_), \ 0, 0 } +#define uc_fw_entry_gsc(dir_, uc_, shortname_, a, b, c) \ + { fw_filename_gsc(dir_, uc_, shortname_, a, b, c), \ + a, b, c } /* All blobs need to be declared via MODULE_FIRMWARE() */ #define XE_UC_MODULE_FIRMWARE(platform__, fw_filename) \ @@ -161,7 +171,7 @@ XE_GUC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE, fw_filename_mmp_ver, fw_filename_major_ver) XE_HUC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE, fw_filename_mmp_ver, fw_filename_no_ver) -XE_GSC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE, fw_filename_major_ver) +XE_GSC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE, fw_filename_gsc) static struct xe_gt * __uc_fw_to_gt(struct xe_uc_fw *uc_fw, enum xe_uc_fw_type type) @@ -204,7 +214,7 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw) uc_fw_entry_no_ver) }; static const struct uc_fw_entry entries_gsc[] = { - XE_GSC_FIRMWARE_DEFS(XE_UC_FW_ENTRY, uc_fw_entry_major_ver) + XE_GSC_FIRMWARE_DEFS(XE_UC_FW_ENTRY, uc_fw_entry_gsc) }; static const struct fw_blobs_by_type blobs_all[XE_UC_FW_NUM_TYPES] = { [XE_UC_FW_TYPE_GUC] = { entries_guc, ARRAY_SIZE(entries_guc) }, @@ -306,10 +316,10 @@ static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css) xe_gt_assert(gt, uc_fw->type == XE_UC_FW_TYPE_GUC); - /* We don't support GuC releases older than 70.19 */ - if (release->major < 70 || (release->major == 70 && release->minor < 19)) { - xe_gt_err(gt, "Unsupported GuC v%u.%u! v70.19 or newer is required\n", - release->major, release->minor); + /* We don't support GuC releases older than 70.29.2 */ + if (MAKE_GUC_VER_STRUCT(*release) < MAKE_GUC_VER(70, 29, 2)) { + xe_gt_err(gt, "Unsupported GuC v%u.%u.%u! v70.29.2 or newer is required\n", + release->major, release->minor, release->patch); return -EINVAL; } diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h index c108e9d08e70..6195e353f269 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.h +++ b/drivers/gpu/drm/xe/xe_uc_fw.h @@ -65,7 +65,7 @@ const char *xe_uc_fw_status_repr(enum xe_uc_fw_status status) return "<invalid>"; } -static inline int xe_uc_fw_status_to_error(enum xe_uc_fw_status status) +static inline int xe_uc_fw_status_to_error(const enum xe_uc_fw_status status) { switch (status) { case XE_UC_FIRMWARE_NOT_SUPPORTED: @@ -108,7 +108,7 @@ static inline const char *xe_uc_fw_type_repr(enum xe_uc_fw_type type) } static inline enum xe_uc_fw_status -__xe_uc_fw_status(struct xe_uc_fw *uc_fw) +__xe_uc_fw_status(const struct xe_uc_fw *uc_fw) { /* shouldn't call this before checking hw/blob availability */ XE_WARN_ON(uc_fw->status == XE_UC_FIRMWARE_UNINITIALIZED); @@ -156,6 +156,11 @@ static inline bool xe_uc_fw_is_overridden(const struct xe_uc_fw *uc_fw) return uc_fw->user_overridden; } +static inline bool xe_uc_fw_is_in_error_state(const struct xe_uc_fw *uc_fw) +{ + return xe_uc_fw_status_to_error(__xe_uc_fw_status(uc_fw)) < 0; +} + static inline void xe_uc_fw_sanitize(struct xe_uc_fw *uc_fw) { if (xe_uc_fw_is_loadable(uc_fw)) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index c7561a56abaf..c99380271de6 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -12,7 +12,7 @@ #include <drm/drm_print.h> #include <drm/ttm/ttm_execbuf_util.h> #include <drm/ttm/ttm_tt.h> -#include <drm/xe_drm.h> +#include <uapi/drm/xe_drm.h> #include <linux/ascii85.h> #include <linux/delay.h> #include <linux/kthread.h> @@ -133,8 +133,10 @@ static int wait_for_existing_preempt_fences(struct xe_vm *vm) if (q->lr.pfence) { long timeout = dma_fence_wait(q->lr.pfence, false); - if (timeout < 0) + /* Only -ETIME on fence indicates VM needs to be killed */ + if (timeout < 0 || q->lr.pfence->error == -ETIME) return -ETIME; + dma_fence_put(q->lr.pfence); q->lr.pfence = NULL; } @@ -273,6 +275,8 @@ out_up_write: * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM * @vm: The VM. * @q: The exec_queue + * + * Note that this function might be called multiple times on the same queue. */ void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) { @@ -280,8 +284,10 @@ void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) return; down_write(&vm->lock); - list_del(&q->lr.link); - --vm->preempt.num_exec_queues; + if (!list_empty(&q->lr.link)) { + list_del_init(&q->lr.link); + --vm->preempt.num_exec_queues; + } if (q->lr.pfence) { dma_fence_enable_sw_signaling(q->lr.pfence); dma_fence_put(q->lr.pfence); @@ -311,7 +317,15 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm) #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 -static void xe_vm_kill(struct xe_vm *vm, bool unlocked) +/** + * xe_vm_kill() - VM Kill + * @vm: The VM. + * @unlocked: Flag indicates the VM's dma-resv is not held + * + * Kill the VM by setting banned flag indicated VM is no longer available for + * use. If in preempt fence mode, also kill all exec queue attached to the VM. + */ +void xe_vm_kill(struct xe_vm *vm, bool unlocked) { struct xe_exec_queue *q; @@ -708,6 +722,42 @@ int xe_vm_userptr_check_repin(struct xe_vm *vm) list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; } +static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) +{ + int i; + + for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) { + if (!vops->pt_update_ops[i].num_ops) + continue; + + vops->pt_update_ops[i].ops = + kmalloc_array(vops->pt_update_ops[i].num_ops, + sizeof(*vops->pt_update_ops[i].ops), + GFP_KERNEL); + if (!vops->pt_update_ops[i].ops) + return array_of_binds ? -ENOBUFS : -ENOMEM; + } + + return 0; +} + +static void xe_vma_ops_fini(struct xe_vma_ops *vops) +{ + int i; + + for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) + kfree(vops->pt_update_ops[i].ops); +} + +static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask) +{ + int i; + + for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) + if (BIT(i) & tile_mask) + ++vops->pt_update_ops[i].num_ops; +} + static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, u8 tile_mask) { @@ -735,6 +785,7 @@ static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, xe_vm_populate_rebind(op, vma, tile_mask); list_add_tail(&op->link, &vops->list); + xe_vma_ops_incr_pt_update_ops(vops, tile_mask); return 0; } @@ -751,7 +802,7 @@ int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) struct xe_vma *vma, *next; struct xe_vma_ops vops; struct xe_vma_op *op, *next_op; - int err; + int err, i; lockdep_assert_held(&vm->lock); if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || @@ -759,6 +810,8 @@ int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) return 0; xe_vma_ops_init(&vops, vm, NULL, NULL, 0); + for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) + vops.pt_update_ops[i].wait_vm_bookkeep = true; xe_vm_assert_held(vm); list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { @@ -775,6 +828,10 @@ int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) goto free_ops; } + err = xe_vma_ops_alloc(&vops, false); + if (err) + goto free_ops; + fence = ops_execute(vm, &vops); if (IS_ERR(fence)) { err = PTR_ERR(fence); @@ -789,6 +846,7 @@ free_ops: list_del(&op->link); kfree(op); } + xe_vma_ops_fini(&vops); return err; } @@ -798,6 +856,8 @@ struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_ma struct dma_fence *fence = NULL; struct xe_vma_ops vops; struct xe_vma_op *op, *next_op; + struct xe_tile *tile; + u8 id; int err; lockdep_assert_held(&vm->lock); @@ -805,17 +865,30 @@ struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_ma xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); xe_vma_ops_init(&vops, vm, NULL, NULL, 0); + for_each_tile(tile, vm->xe, id) { + vops.pt_update_ops[id].wait_vm_bookkeep = true; + vops.pt_update_ops[tile->id].q = + xe_tile_migrate_exec_queue(tile); + } err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); if (err) return ERR_PTR(err); + err = xe_vma_ops_alloc(&vops, false); + if (err) { + fence = ERR_PTR(err); + goto free_ops; + } + fence = ops_execute(vm, &vops); +free_ops: list_for_each_entry_safe(op, next_op, &vops.list, link) { list_del(&op->link); kfree(op); } + xe_vma_ops_fini(&vops); return fence; } @@ -1122,7 +1195,7 @@ static const struct drm_gpuvm_ops gpuvm_ops = { .vm_free = xe_vm_free, }; -static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index) +static u64 pde_encode_pat_index(u16 pat_index) { u64 pte = 0; @@ -1135,8 +1208,7 @@ static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index) return pte; } -static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index, - u32 pt_level) +static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) { u64 pte = 0; @@ -1177,12 +1249,11 @@ static u64 pte_encode_ps(u32 pt_level) static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, const u16 pat_index) { - struct xe_device *xe = xe_bo_device(bo); u64 pde; pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); pde |= XE_PAGE_PRESENT | XE_PAGE_RW; - pde |= pde_encode_pat_index(xe, pat_index); + pde |= pde_encode_pat_index(pat_index); return pde; } @@ -1190,12 +1261,11 @@ static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, u16 pat_index, u32 pt_level) { - struct xe_device *xe = xe_bo_device(bo); u64 pte; pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); pte |= XE_PAGE_PRESENT | XE_PAGE_RW; - pte |= pte_encode_pat_index(xe, pat_index, pt_level); + pte |= pte_encode_pat_index(pat_index, pt_level); pte |= pte_encode_ps(pt_level); if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) @@ -1207,14 +1277,12 @@ static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, u16 pat_index, u32 pt_level) { - struct xe_device *xe = xe_vma_vm(vma)->xe; - pte |= XE_PAGE_PRESENT; if (likely(!xe_vma_read_only(vma))) pte |= XE_PAGE_RW; - pte |= pte_encode_pat_index(xe, pat_index, pt_level); + pte |= pte_encode_pat_index(pat_index, pt_level); pte |= pte_encode_ps(pt_level); if (unlikely(xe_vma_is_null(vma))) @@ -1234,7 +1302,7 @@ static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, pte = addr; pte |= XE_PAGE_PRESENT | XE_PAGE_RW; - pte |= pte_encode_pat_index(xe, pat_index, pt_level); + pte |= pte_encode_pat_index(pat_index, pt_level); pte |= pte_encode_ps(pt_level); if (devmem) @@ -1333,6 +1401,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) init_rwsem(&vm->userptr.notifier_lock); spin_lock_init(&vm->userptr.invalidated_lock); + ttm_lru_bulk_move_init(&vm->lru_bulk_move); + INIT_WORK(&vm->destroy_work, vm_destroy_work_func); INIT_LIST_HEAD(&vm->preempt.exec_queues); @@ -1412,19 +1482,13 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) /* Kernel migration VM shouldn't have a circular loop.. */ if (!(flags & XE_VM_FLAG_MIGRATION)) { for_each_tile(tile, xe, id) { - struct xe_gt *gt = tile->primary_gt; - struct xe_vm *migrate_vm; struct xe_exec_queue *q; u32 create_flags = EXEC_QUEUE_FLAG_VM; if (!vm->pt_root[id]) continue; - migrate_vm = xe_migrate_get_vm(tile->migrate); - q = xe_exec_queue_create_class(xe, gt, migrate_vm, - XE_ENGINE_CLASS_COPY, - create_flags); - xe_vm_put(migrate_vm); + q = xe_exec_queue_create_bind(xe, tile, create_flags, 0); if (IS_ERR(q)) { err = PTR_ERR(q); goto err_close; @@ -1437,13 +1501,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) if (number_tiles > 1) vm->composite_fence_ctx = dma_fence_context_alloc(1); - mutex_lock(&xe->usm.lock); - if (flags & XE_VM_FLAG_FAULT_MODE) - xe->usm.num_vm_in_fault_mode++; - else if (!(flags & XE_VM_FLAG_MIGRATION)) - xe->usm.num_vm_in_non_fault_mode++; - mutex_unlock(&xe->usm.lock); - trace_xe_vm_create(vm); return vm; @@ -1458,6 +1515,7 @@ err_no_resv: mutex_destroy(&vm->snap_mutex); for_each_tile(tile, xe, id) xe_range_fence_tree_fini(&vm->rftree[id]); + ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); kfree(vm); if (flags & XE_VM_FLAG_LR_MODE) xe_pm_runtime_put(xe); @@ -1555,12 +1613,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) up_write(&vm->lock); - mutex_lock(&xe->usm.lock); - if (vm->flags & XE_VM_FLAG_FAULT_MODE) - xe->usm.num_vm_in_fault_mode--; - else if (!(vm->flags & XE_VM_FLAG_MIGRATION)) - xe->usm.num_vm_in_non_fault_mode--; - + down_write(&xe->usm.lock); if (vm->usm.asid) { void *lookup; @@ -1570,7 +1623,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); xe_assert(xe, lookup == vm); } - mutex_unlock(&xe->usm.lock); + up_write(&xe->usm.lock); for_each_tile(tile, xe, id) xe_range_fence_tree_fini(&vm->rftree[id]); @@ -1602,6 +1655,8 @@ static void vm_destroy_work_func(struct work_struct *w) trace_xe_vm_free(vm); + ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); + if (vm->xef) xe_file_put(vm->xef); @@ -1641,147 +1696,6 @@ to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) return q ? q : vm->q[0]; } -static struct dma_fence * -xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs, - bool first_op, bool last_op) -{ - struct xe_vm *vm = xe_vma_vm(vma); - struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); - struct xe_tile *tile; - struct dma_fence *fence = NULL; - struct dma_fence **fences = NULL; - struct dma_fence_array *cf = NULL; - int cur_fence = 0; - int number_tiles = hweight8(vma->tile_present); - int err; - u8 id; - - trace_xe_vma_unbind(vma); - - if (number_tiles > 1) { - fences = kmalloc_array(number_tiles, sizeof(*fences), - GFP_KERNEL); - if (!fences) - return ERR_PTR(-ENOMEM); - } - - for_each_tile(tile, vm->xe, id) { - if (!(vma->tile_present & BIT(id))) - goto next; - - fence = __xe_pt_unbind_vma(tile, vma, q ? q : vm->q[id], - first_op ? syncs : NULL, - first_op ? num_syncs : 0); - if (IS_ERR(fence)) { - err = PTR_ERR(fence); - goto err_fences; - } - - if (fences) - fences[cur_fence++] = fence; - -next: - if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list)) - q = list_next_entry(q, multi_gt_list); - } - - if (fences) { - cf = dma_fence_array_create(number_tiles, fences, - vm->composite_fence_ctx, - vm->composite_fence_seqno++, - false); - if (!cf) { - --vm->composite_fence_seqno; - err = -ENOMEM; - goto err_fences; - } - } - - fence = cf ? &cf->base : !fence ? - xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence; - - return fence; - -err_fences: - if (fences) { - while (cur_fence) - dma_fence_put(fences[--cur_fence]); - kfree(fences); - } - - return ERR_PTR(err); -} - -static struct dma_fence * -xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs, - u8 tile_mask, bool first_op, bool last_op) -{ - struct xe_tile *tile; - struct dma_fence *fence; - struct dma_fence **fences = NULL; - struct dma_fence_array *cf = NULL; - struct xe_vm *vm = xe_vma_vm(vma); - int cur_fence = 0; - int number_tiles = hweight8(tile_mask); - int err; - u8 id; - - trace_xe_vma_bind(vma); - - if (number_tiles > 1) { - fences = kmalloc_array(number_tiles, sizeof(*fences), - GFP_KERNEL); - if (!fences) - return ERR_PTR(-ENOMEM); - } - - for_each_tile(tile, vm->xe, id) { - if (!(tile_mask & BIT(id))) - goto next; - - fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id], - first_op ? syncs : NULL, - first_op ? num_syncs : 0, - vma->tile_present & BIT(id)); - if (IS_ERR(fence)) { - err = PTR_ERR(fence); - goto err_fences; - } - - if (fences) - fences[cur_fence++] = fence; - -next: - if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list)) - q = list_next_entry(q, multi_gt_list); - } - - if (fences) { - cf = dma_fence_array_create(number_tiles, fences, - vm->composite_fence_ctx, - vm->composite_fence_seqno++, - false); - if (!cf) { - --vm->composite_fence_seqno; - err = -ENOMEM; - goto err_fences; - } - } - - return cf ? &cf->base : fence; - -err_fences: - if (fences) { - while (cur_fence) - dma_fence_put(fences[--cur_fence]); - kfree(fences); - } - - return ERR_PTR(err); -} - static struct xe_user_fence * find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) { @@ -1797,48 +1711,6 @@ find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) return NULL; } -static struct dma_fence * -xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q, - struct xe_bo *bo, struct xe_sync_entry *syncs, u32 num_syncs, - u8 tile_mask, bool immediate, bool first_op, bool last_op) -{ - struct dma_fence *fence; - struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); - - xe_vm_assert_held(vm); - xe_bo_assert_held(bo); - - if (immediate) { - fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, tile_mask, - first_op, last_op); - if (IS_ERR(fence)) - return fence; - } else { - xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); - - fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm); - } - - return fence; -} - -static struct dma_fence * -xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, - struct xe_exec_queue *q, struct xe_sync_entry *syncs, - u32 num_syncs, bool first_op, bool last_op) -{ - struct dma_fence *fence; - - xe_vm_assert_held(vm); - xe_bo_assert_held(xe_vma_bo(vma)); - - fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op, last_op); - if (IS_ERR(fence)) - return fence; - - return fence; -} - #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ DRM_XE_VM_CREATE_FLAG_LR_MODE | \ DRM_XE_VM_CREATE_FLAG_FAULT_MODE) @@ -1879,14 +1751,6 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) return -EINVAL; - if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && - xe_device_in_non_fault_mode(xe))) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) && - xe_device_in_fault_mode(xe))) - return -EINVAL; - if (XE_IOCTL_DBG(xe, args->extensions)) return -EINVAL; @@ -1901,25 +1765,18 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, if (IS_ERR(vm)) return PTR_ERR(vm); - mutex_lock(&xef->vm.lock); - err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); - mutex_unlock(&xef->vm.lock); - if (err) - goto err_close_and_put; - if (xe->info.has_asid) { - mutex_lock(&xe->usm.lock); + down_write(&xe->usm.lock); err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, XA_LIMIT(1, XE_MAX_ASID - 1), &xe->usm.next_asid, GFP_KERNEL); - mutex_unlock(&xe->usm.lock); + up_write(&xe->usm.lock); if (err < 0) - goto err_free_id; + goto err_close_and_put; vm->usm.asid = asid; } - args->vm_id = id; vm->xef = xe_file_get(xef); /* Record BO memory for VM pagetable created against client */ @@ -1932,12 +1789,15 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); #endif + /* user id alloc must always be last in ioctl to prevent UAF */ + err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); + if (err) + goto err_close_and_put; + + args->vm_id = id; + return 0; -err_free_id: - mutex_lock(&xef->vm.lock); - xa_erase(&xef->vm.xa, id); - mutex_unlock(&xef->vm.lock); err_close_and_put: xe_vm_close_and_put(vm); @@ -1979,21 +1839,6 @@ static const u32 region_to_mem_type[] = { XE_PL_VRAM1, }; -static struct dma_fence * -xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, - struct xe_exec_queue *q, struct xe_sync_entry *syncs, - u32 num_syncs, bool first_op, bool last_op) -{ - struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); - - if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated)) { - return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs, - vma->tile_mask, true, first_op, last_op); - } else { - return xe_exec_queue_last_fence_get(wait_exec_queue, vm); - } -} - static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, bool post_commit) { @@ -2281,14 +2126,10 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) return err; } - -static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, - struct drm_gpuva_ops *ops, - struct xe_sync_entry *syncs, u32 num_syncs, - struct xe_vma_ops *vops, bool last) +static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, + struct xe_vma_ops *vops) { struct xe_device *xe = vm->xe; - struct xe_vma_op *last_op = NULL; struct drm_gpuva_op *__op; struct xe_tile *tile; u8 id, tile_mask = 0; @@ -2302,19 +2143,10 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, drm_gpuva_for_each_op(__op, ops) { struct xe_vma_op *op = gpuva_op_to_vma_op(__op); struct xe_vma *vma; - bool first = list_empty(&vops->list); unsigned int flags = 0; INIT_LIST_HEAD(&op->link); list_add_tail(&op->link, &vops->list); - - if (first) { - op->flags |= XE_VMA_OP_FIRST; - op->num_syncs = num_syncs; - op->syncs = syncs; - } - - op->q = q; op->tile_mask = tile_mask; switch (op->base.op) { @@ -2333,6 +2165,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, return PTR_ERR(vma); op->map.vma = vma; + if (op->map.immediate || !xe_vm_in_fault_mode(vm)) + xe_vma_ops_incr_pt_update_ops(vops, + op->tile_mask); break; } case DRM_GPUVA_OP_REMAP: @@ -2377,6 +2212,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx", (ULL)op->remap.start, (ULL)op->remap.range); + } else { + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); } } @@ -2413,203 +2250,30 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx", (ULL)op->remap.start, (ULL)op->remap.range); + } else { + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); } } + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); break; } case DRM_GPUVA_OP_UNMAP: case DRM_GPUVA_OP_PREFETCH: - /* Nothing to do */ + /* FIXME: Need to skip some prefetch ops */ + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); break; default: drm_warn(&vm->xe->drm, "NOT POSSIBLE"); } - last_op = op; - err = xe_vma_op_commit(vm, op); if (err) return err; } - /* FIXME: Unhandled corner case */ - XE_WARN_ON(!last_op && last && !list_empty(&vops->list)); - - if (!last_op) - return 0; - - if (last) { - last_op->flags |= XE_VMA_OP_LAST; - last_op->num_syncs = num_syncs; - last_op->syncs = syncs; - } - return 0; } -static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma *vma, - struct xe_vma_op *op) -{ - struct dma_fence *fence = NULL; - - lockdep_assert_held(&vm->lock); - - xe_vm_assert_held(vm); - xe_bo_assert_held(xe_vma_bo(vma)); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - fence = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma), - op->syncs, op->num_syncs, - op->tile_mask, - op->map.immediate || !xe_vm_in_fault_mode(vm), - op->flags & XE_VMA_OP_FIRST, - op->flags & XE_VMA_OP_LAST); - break; - case DRM_GPUVA_OP_REMAP: - { - bool prev = !!op->remap.prev; - bool next = !!op->remap.next; - - if (!op->remap.unmap_done) { - if (prev || next) - vma->gpuva.flags |= XE_VMA_FIRST_REBIND; - fence = xe_vm_unbind(vm, vma, op->q, op->syncs, - op->num_syncs, - op->flags & XE_VMA_OP_FIRST, - op->flags & XE_VMA_OP_LAST && - !prev && !next); - if (IS_ERR(fence)) - break; - op->remap.unmap_done = true; - } - - if (prev) { - op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND; - dma_fence_put(fence); - fence = xe_vm_bind(vm, op->remap.prev, op->q, - xe_vma_bo(op->remap.prev), op->syncs, - op->num_syncs, - op->remap.prev->tile_mask, true, - false, - op->flags & XE_VMA_OP_LAST && !next); - op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND; - if (IS_ERR(fence)) - break; - op->remap.prev = NULL; - } - - if (next) { - op->remap.next->gpuva.flags |= XE_VMA_LAST_REBIND; - dma_fence_put(fence); - fence = xe_vm_bind(vm, op->remap.next, op->q, - xe_vma_bo(op->remap.next), - op->syncs, op->num_syncs, - op->remap.next->tile_mask, true, - false, op->flags & XE_VMA_OP_LAST); - op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND; - if (IS_ERR(fence)) - break; - op->remap.next = NULL; - } - - break; - } - case DRM_GPUVA_OP_UNMAP: - fence = xe_vm_unbind(vm, vma, op->q, op->syncs, - op->num_syncs, op->flags & XE_VMA_OP_FIRST, - op->flags & XE_VMA_OP_LAST); - break; - case DRM_GPUVA_OP_PREFETCH: - fence = xe_vm_prefetch(vm, vma, op->q, op->syncs, op->num_syncs, - op->flags & XE_VMA_OP_FIRST, - op->flags & XE_VMA_OP_LAST); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - if (IS_ERR(fence)) - trace_xe_vma_fail(vma); - - return fence; -} - -static struct dma_fence * -__xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma, - struct xe_vma_op *op) -{ - struct dma_fence *fence; - int err; - -retry_userptr: - fence = op_execute(vm, vma, op); - if (IS_ERR(fence) && PTR_ERR(fence) == -EAGAIN) { - lockdep_assert_held_write(&vm->lock); - - if (op->base.op == DRM_GPUVA_OP_REMAP) { - if (!op->remap.unmap_done) - vma = gpuva_to_vma(op->base.remap.unmap->va); - else if (op->remap.prev) - vma = op->remap.prev; - else - vma = op->remap.next; - } - - if (xe_vma_is_userptr(vma)) { - err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); - if (!err) - goto retry_userptr; - - fence = ERR_PTR(err); - trace_xe_vma_fail(vma); - } - } - - return fence; -} - -static struct dma_fence * -xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op) -{ - struct dma_fence *fence = ERR_PTR(-ENOMEM); - - lockdep_assert_held(&vm->lock); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - fence = __xe_vma_op_execute(vm, op->map.vma, op); - break; - case DRM_GPUVA_OP_REMAP: - { - struct xe_vma *vma; - - if (!op->remap.unmap_done) - vma = gpuva_to_vma(op->base.remap.unmap->va); - else if (op->remap.prev) - vma = op->remap.prev; - else - vma = op->remap.next; - - fence = __xe_vma_op_execute(vm, vma, op); - break; - } - case DRM_GPUVA_OP_UNMAP: - fence = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va), - op); - break; - case DRM_GPUVA_OP_PREFETCH: - fence = __xe_vma_op_execute(vm, - gpuva_to_vma(op->base.prefetch.va), - op); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return fence; -} - static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, bool post_commit, bool prev_post_commit, bool next_post_commit) @@ -2792,27 +2456,158 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, return err; } +#ifdef TEST_VM_OPS_ERROR + if (vops->inject_error && + vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK) + return -ENOSPC; +#endif + return 0; } +static void op_trace(struct xe_vma_op *op) +{ + switch (op->base.op) { + case DRM_GPUVA_OP_MAP: + trace_xe_vma_bind(op->map.vma); + break; + case DRM_GPUVA_OP_REMAP: + trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va)); + if (op->remap.prev) + trace_xe_vma_bind(op->remap.prev); + if (op->remap.next) + trace_xe_vma_bind(op->remap.next); + break; + case DRM_GPUVA_OP_UNMAP: + trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va)); + break; + case DRM_GPUVA_OP_PREFETCH: + trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va)); + break; + default: + XE_WARN_ON("NOT POSSIBLE"); + } +} + +static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops) +{ + struct xe_vma_op *op; + + list_for_each_entry(op, &vops->list, link) + op_trace(op); +} + +static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops) +{ + struct xe_exec_queue *q = vops->q; + struct xe_tile *tile; + int number_tiles = 0; + u8 id; + + for_each_tile(tile, vm->xe, id) { + if (vops->pt_update_ops[id].num_ops) + ++number_tiles; + + if (vops->pt_update_ops[id].q) + continue; + + if (q) { + vops->pt_update_ops[id].q = q; + if (vm->pt_root[id] && !list_empty(&q->multi_gt_list)) + q = list_next_entry(q, multi_gt_list); + } else { + vops->pt_update_ops[id].q = vm->q[id]; + } + } + + return number_tiles; +} + static struct dma_fence *ops_execute(struct xe_vm *vm, struct xe_vma_ops *vops) { - struct xe_vma_op *op, *next; + struct xe_tile *tile; struct dma_fence *fence = NULL; + struct dma_fence **fences = NULL; + struct dma_fence_array *cf = NULL; + int number_tiles = 0, current_fence = 0, err; + u8 id; - list_for_each_entry_safe(op, next, &vops->list, link) { - dma_fence_put(fence); - fence = xe_vma_op_execute(vm, op); - if (IS_ERR(fence)) { - drm_warn(&vm->xe->drm, "VM op(%d) failed with %ld", - op->base.op, PTR_ERR(fence)); - fence = ERR_PTR(-ENOSPC); - break; + number_tiles = vm_ops_setup_tile_args(vm, vops); + if (number_tiles == 0) + return ERR_PTR(-ENODATA); + + if (number_tiles > 1) { + fences = kmalloc_array(number_tiles, sizeof(*fences), + GFP_KERNEL); + if (!fences) { + fence = ERR_PTR(-ENOMEM); + goto err_trace; + } + } + + for_each_tile(tile, vm->xe, id) { + if (!vops->pt_update_ops[id].num_ops) + continue; + + err = xe_pt_update_ops_prepare(tile, vops); + if (err) { + fence = ERR_PTR(err); + goto err_out; + } + } + + trace_xe_vm_ops_execute(vops); + + for_each_tile(tile, vm->xe, id) { + if (!vops->pt_update_ops[id].num_ops) + continue; + + fence = xe_pt_update_ops_run(tile, vops); + if (IS_ERR(fence)) + goto err_out; + + if (fences) + fences[current_fence++] = fence; + } + + if (fences) { + cf = dma_fence_array_create(number_tiles, fences, + vm->composite_fence_ctx, + vm->composite_fence_seqno++, + false); + if (!cf) { + --vm->composite_fence_seqno; + fence = ERR_PTR(-ENOMEM); + goto err_out; } + fence = &cf->base; + } + + for_each_tile(tile, vm->xe, id) { + if (!vops->pt_update_ops[id].num_ops) + continue; + + xe_pt_update_ops_fini(tile, vops); } return fence; + +err_out: + for_each_tile(tile, vm->xe, id) { + if (!vops->pt_update_ops[id].num_ops) + continue; + + xe_pt_update_ops_abort(tile, vops); + } + while (current_fence) + dma_fence_put(fences[--current_fence]); + kfree(fences); + kfree(cf); + +err_trace: + trace_xe_vm_ops_fail(vm); + return fence; } static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) @@ -2892,12 +2687,10 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, fence = ops_execute(vm, vops); if (IS_ERR(fence)) { err = PTR_ERR(fence); - /* FIXME: Killing VM rather than proper error handling */ - xe_vm_kill(vm, false); goto unlock; - } else { - vm_bind_ioctl_ops_fini(vm, vops, fence); } + + vm_bind_ioctl_ops_fini(vm, vops, fence); } unlock: @@ -2905,11 +2698,18 @@ unlock: return err; } -#define SUPPORTED_FLAGS \ +#define SUPPORTED_FLAGS_STUB \ (DRM_XE_VM_BIND_FLAG_READONLY | \ DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ DRM_XE_VM_BIND_FLAG_NULL | \ DRM_XE_VM_BIND_FLAG_DUMPABLE) + +#ifdef TEST_VM_OPS_ERROR +#define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) +#else +#define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB +#endif + #define XE_64K_PAGE_MASK 0xffffull #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) @@ -2935,7 +2735,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, sizeof(struct drm_xe_vm_bind_op), GFP_KERNEL | __GFP_ACCOUNT); if (!*bind_ops) - return -ENOMEM; + return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; err = __copy_from_user(*bind_ops, bind_user, sizeof(struct drm_xe_vm_bind_op) * @@ -3074,7 +2874,16 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, return -EINVAL; } - if (bo->flags & XE_BO_FLAG_INTERNAL_64K) { + /* + * Some platforms require 64k VM_BIND alignment, + * specifically those with XE_VRAM_FLAGS_NEED64K. + * + * Other platforms may have BO's set to 64k physical placement, + * but can be mapped at 4k offsets anyway. This check is only + * there for the former case. + */ + if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) && + (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) { if (XE_IOCTL_DBG(xe, obj_offset & XE_64K_PAGE_MASK) || XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || @@ -3254,10 +3063,18 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto unwind_ops; } - err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs, num_syncs, - &vops, i == args->num_binds - 1); + err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops); if (err) goto unwind_ops; + +#ifdef TEST_VM_OPS_ERROR + if (flags & FORCE_OP_ERROR) { + vops.inject_error = true; + vm->xe->vm_inject_error_position = + (vm->xe->vm_inject_error_position + 1) % + FORCE_OP_ERROR_COUNT; + } +#endif } /* Nothing to do */ @@ -3266,11 +3083,16 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto unwind_ops; } + err = xe_vma_ops_alloc(&vops, args->num_binds > 1); + if (err) + goto unwind_ops; + err = vm_bind_ioctl_ops_execute(vm, &vops); unwind_ops: if (err && err != -ENODATA) vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); + xe_vma_ops_fini(&vops); for (i = args->num_binds - 1; i >= 0; --i) if (ops[i]) drm_gpuva_ops_free(&vm->gpuvm, ops[i]); @@ -3341,9 +3163,10 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) { struct xe_device *xe = xe_vma_vm(vma)->xe; struct xe_tile *tile; - struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE]; - u32 tile_needs_invalidate = 0; + struct xe_gt_tlb_invalidation_fence + fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; u8 id; + u32 fence_id = 0; int ret = 0; xe_assert(xe, !xe_vma_is_null(vma)); @@ -3371,27 +3194,33 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) if (xe_pt_zap_ptes(tile, vma)) { xe_device_wmb(xe); xe_gt_tlb_invalidation_fence_init(tile->primary_gt, - &fence[id], true); + &fence[fence_id], + true); - /* - * FIXME: We potentially need to invalidate multiple - * GTs within the tile - */ ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, - &fence[id], vma); - if (ret < 0) { - xe_gt_tlb_invalidation_fence_fini(&fence[id]); + &fence[fence_id], vma); + if (ret) goto wait; - } + ++fence_id; - tile_needs_invalidate |= BIT(id); + if (!tile->media_gt) + continue; + + xe_gt_tlb_invalidation_fence_init(tile->media_gt, + &fence[fence_id], + true); + + ret = xe_gt_tlb_invalidation_vma(tile->media_gt, + &fence[fence_id], vma); + if (ret) + goto wait; + ++fence_id; } } wait: - for_each_tile(tile, xe, id) - if (tile_needs_invalidate & BIT(id)) - xe_gt_tlb_invalidation_fence_wait(&fence[id]); + for (id = 0; id < fence_id; ++id) + xe_gt_tlb_invalidation_fence_wait(&fence[id]); vma->tile_invalidated = vma->tile_mask; diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index b481608b12f1..c864dba35e1d 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -259,6 +259,8 @@ static inline struct dma_resv *xe_vm_resv(struct xe_vm *vm) return drm_gpuvm_resv(&vm->gpuvm); } +void xe_vm_kill(struct xe_vm *vm, bool unlocked); + /** * xe_vm_assert_held(vm) - Assert that the vm's reservation object is held. * @vm: The vm diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index ce1a63a5e3e7..7f9a303e51d8 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -21,18 +21,27 @@ struct xe_bo; struct xe_sync_entry; struct xe_user_fence; struct xe_vm; +struct xe_vm_pgtable_update_op; + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) +#define TEST_VM_OPS_ERROR +#define FORCE_OP_ERROR BIT(31) + +#define FORCE_OP_ERROR_LOCK 0 +#define FORCE_OP_ERROR_PREPARE 1 +#define FORCE_OP_ERROR_RUN 2 +#define FORCE_OP_ERROR_COUNT 3 +#endif #define XE_VMA_READ_ONLY DRM_GPUVA_USERBITS #define XE_VMA_DESTROYED (DRM_GPUVA_USERBITS << 1) #define XE_VMA_ATOMIC_PTE_BIT (DRM_GPUVA_USERBITS << 2) -#define XE_VMA_FIRST_REBIND (DRM_GPUVA_USERBITS << 3) -#define XE_VMA_LAST_REBIND (DRM_GPUVA_USERBITS << 4) -#define XE_VMA_PTE_4K (DRM_GPUVA_USERBITS << 5) -#define XE_VMA_PTE_2M (DRM_GPUVA_USERBITS << 6) -#define XE_VMA_PTE_1G (DRM_GPUVA_USERBITS << 7) -#define XE_VMA_PTE_64K (DRM_GPUVA_USERBITS << 8) -#define XE_VMA_PTE_COMPACT (DRM_GPUVA_USERBITS << 9) -#define XE_VMA_DUMPABLE (DRM_GPUVA_USERBITS << 10) +#define XE_VMA_PTE_4K (DRM_GPUVA_USERBITS << 3) +#define XE_VMA_PTE_2M (DRM_GPUVA_USERBITS << 4) +#define XE_VMA_PTE_1G (DRM_GPUVA_USERBITS << 5) +#define XE_VMA_PTE_64K (DRM_GPUVA_USERBITS << 6) +#define XE_VMA_PTE_COMPACT (DRM_GPUVA_USERBITS << 7) +#define XE_VMA_DUMPABLE (DRM_GPUVA_USERBITS << 8) /** struct xe_userptr - User pointer */ struct xe_userptr { @@ -99,6 +108,9 @@ struct xe_vma { */ u8 tile_present; + /** @tile_staged: bind is staged for this VMA */ + u8 tile_staged; + /** * @pat_index: The pat index to use when encoding the PTEs for this vma. */ @@ -314,31 +326,18 @@ struct xe_vma_op_prefetch { /** enum xe_vma_op_flags - flags for VMA operation */ enum xe_vma_op_flags { - /** @XE_VMA_OP_FIRST: first VMA operation for a set of syncs */ - XE_VMA_OP_FIRST = BIT(0), - /** @XE_VMA_OP_LAST: last VMA operation for a set of syncs */ - XE_VMA_OP_LAST = BIT(1), /** @XE_VMA_OP_COMMITTED: VMA operation committed */ - XE_VMA_OP_COMMITTED = BIT(2), + XE_VMA_OP_COMMITTED = BIT(0), /** @XE_VMA_OP_PREV_COMMITTED: Previous VMA operation committed */ - XE_VMA_OP_PREV_COMMITTED = BIT(3), + XE_VMA_OP_PREV_COMMITTED = BIT(1), /** @XE_VMA_OP_NEXT_COMMITTED: Next VMA operation committed */ - XE_VMA_OP_NEXT_COMMITTED = BIT(4), + XE_VMA_OP_NEXT_COMMITTED = BIT(2), }; /** struct xe_vma_op - VMA operation */ struct xe_vma_op { /** @base: GPUVA base operation */ struct drm_gpuva_op base; - /** @q: exec queue for this operation */ - struct xe_exec_queue *q; - /** - * @syncs: syncs for this operation, only used on first and last - * operation - */ - struct xe_sync_entry *syncs; - /** @num_syncs: number of syncs */ - u32 num_syncs; /** @link: async operation link */ struct list_head link; /** @flags: operation flags */ @@ -362,12 +361,18 @@ struct xe_vma_ops { struct list_head list; /** @vm: VM */ struct xe_vm *vm; - /** @q: exec queue these operations */ + /** @q: exec queue for VMA operations */ struct xe_exec_queue *q; /** @syncs: syncs these operation */ struct xe_sync_entry *syncs; /** @num_syncs: number of syncs */ u32 num_syncs; + /** @pt_update_ops: page table update operations */ + struct xe_vm_pgtable_update_ops pt_update_ops[XE_MAX_TILES_PER_DEVICE]; +#ifdef TEST_VM_OPS_ERROR + /** @inject_error: inject error to test error handling */ + bool inject_error; +#endif }; #endif diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index 5bcd59190353..80ba2fc78837 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -182,6 +182,7 @@ static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size) offset = offset_hi << 32; /* HW view bits 39:32 */ offset |= offset_lo << 6; /* HW view bits 31:6 */ offset *= num_enabled; /* convert to SW view */ + offset = round_up(offset, SZ_128K); /* SW must round up to nearest 128K */ /* We don't expect any holes */ xe_assert_msg(xe, offset == (xe_mmio_read64_2x32(gt, GSMBASE) - ccs_size), diff --git a/drivers/gpu/drm/xe/xe_vram_freq.c b/drivers/gpu/drm/xe/xe_vram_freq.c index 99ff95e408e0..b26e26d73dae 100644 --- a/drivers/gpu/drm/xe/xe_vram_freq.c +++ b/drivers/gpu/drm/xe/xe_vram_freq.c @@ -34,7 +34,6 @@ static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct xe_tile *tile = dev_to_tile(dev); - struct xe_gt *gt = tile->primary_gt; u32 val, mbox; int err; @@ -42,7 +41,7 @@ static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr, | REG_FIELD_PREP(PCODE_MB_PARAM1, PCODE_MBOX_FC_SC_READ_FUSED_P0) | REG_FIELD_PREP(PCODE_MB_PARAM2, PCODE_MBOX_DOMAIN_HBM); - err = xe_pcode_read(gt, mbox, &val, NULL); + err = xe_pcode_read(tile, mbox, &val, NULL); if (err) return err; @@ -57,7 +56,6 @@ static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct xe_tile *tile = dev_to_tile(dev); - struct xe_gt *gt = tile->primary_gt; u32 val, mbox; int err; @@ -65,7 +63,7 @@ static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr, | REG_FIELD_PREP(PCODE_MB_PARAM1, PCODE_MBOX_FC_SC_READ_FUSED_PN) | REG_FIELD_PREP(PCODE_MB_PARAM2, PCODE_MBOX_DOMAIN_HBM); - err = xe_pcode_read(gt, mbox, &val, NULL); + err = xe_pcode_read(tile, mbox, &val, NULL); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index c7bf0862b231..353936a0f877 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -486,6 +486,10 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, SLM_WMTP_RESTORE)) }, + { XE_RTP_NAME("14021402888"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) + }, /* Xe2_HPG */ @@ -538,6 +542,20 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) }, + { XE_RTP_NAME("14021821874"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, STK_ID_RESTRICT)) + }, + + /* Xe2_LPM */ + + { XE_RTP_NAME("16021639441"), + XE_RTP_RULES(MEDIA_VERSION(2000)), + XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), + GHWSP_CSB_REPORT_DIS | + PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, /* Xe2_HPM */ @@ -692,6 +710,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { DIS_PARTIAL_AUTOSTRIP | DIS_AUTOSTRIP)) }, + { XE_RTP_NAME("15016589081"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) + }, /* Xe2_HPG */ { XE_RTP_NAME("15010599737"), @@ -715,6 +737,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { DIS_PARTIAL_AUTOSTRIP | DIS_AUTOSTRIP)) }, + { XE_RTP_NAME("15016589081"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) + }, {} }; @@ -741,6 +767,7 @@ void xe_wa_process_oob(struct xe_gt *gt) xe_rtp_process_ctx_enable_active_tracking(&ctx, gt->wa_active.oob, ARRAY_SIZE(oob_was)); + gt->wa_active.oob_initialized = true; xe_rtp_process(&ctx, oob_was); } diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h index db9ddeaf69bf..52337405b5bc 100644 --- a/drivers/gpu/drm/xe/xe_wa.h +++ b/drivers/gpu/drm/xe/xe_wa.h @@ -6,6 +6,8 @@ #ifndef _XE_WA_ #define _XE_WA_ +#include "xe_assert.h" + struct drm_printer; struct xe_gt; struct xe_hw_engine; @@ -25,6 +27,9 @@ void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p); * @gt__: gt instance * @id__: XE_OOB_<id__>, as generated by build system in generated/xe_wa_oob.h */ -#define XE_WA(gt__, id__) test_bit(XE_WA_OOB_ ## id__, (gt__)->wa_active.oob) +#define XE_WA(gt__, id__) ({ \ + xe_gt_assert(gt__, (gt__)->wa_active.oob_initialized); \ + test_bit(XE_WA_OOB_ ## id__, (gt__)->wa_active.oob); \ +}) #endif diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 26066beb4f6f..920ca5060146 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -27,5 +27,13 @@ 16022287689 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) 13011645652 GRAPHICS_VERSION(2004) +14022293748 GRAPHICS_VERSION(2001) + GRAPHICS_VERSION(2004) +22019794406 GRAPHICS_VERSION(2001) + GRAPHICS_VERSION(2004) 22019338487 MEDIA_VERSION(2000) GRAPHICS_VERSION(2001) +22019338487_display PLATFORM(LUNARLAKE) +16023588340 GRAPHICS_VERSION(2001) +14019789679 GRAPHICS_VERSION(1255) + GRAPHICS_VERSION_RANGE(1270, 2004) diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c index f69721339201..5b4264ea38bd 100644 --- a/drivers/gpu/drm/xe/xe_wait_user_fence.c +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c @@ -8,7 +8,7 @@ #include <drm/drm_device.h> #include <drm/drm_file.h> #include <drm/drm_utils.h> -#include <drm/xe_drm.h> +#include <uapi/drm/xe_drm.h> #include "xe_device.h" #include "xe_gt.h" @@ -155,6 +155,13 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, } if (!timeout) { + LNL_FLUSH_WORKQUEUE(xe->ordered_wq); + err = do_compare(addr, args->value, args->mask, + args->op); + if (err <= 0) { + drm_dbg(&xe->drm, "LNL_FLUSH_WORKQUEUE resolved ufence timeout\n"); + break; + } err = -ETIME; break; } @@ -169,9 +176,6 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, args->timeout = 0; } - if (!timeout && !(err < 0)) - err = -ETIME; - if (q) xe_exec_queue_put(q); |