diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 148 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_debug.h | 29 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process.c | 10 |
4 files changed, 190 insertions, 2 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index ee086a0a46df..826a99acb6fb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1488,6 +1488,11 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep, goto out_unlock; } + if (!kfd_dbg_has_gws_support(dev) && p->debug_trap_enabled) { + retval = -EBUSY; + goto out_unlock; + } + retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL); mutex_unlock(&p->mutex); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c index 898cc1fe3d13..73b07b5f17f1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c @@ -21,13 +21,78 @@ */ #include "kfd_debug.h" +#include "kfd_device_queue_manager.h" #include <linux/file.h> +static int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd) +{ + uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode; + uint32_t flags = pdd->process->dbg_flags; + + if (!kfd_dbg_is_per_vmid_supported(pdd->dev)) + return 0; + + return amdgpu_mes_set_shader_debugger(pdd->dev->adev, pdd->proc_ctx_gpu_addr, spi_dbg_cntl, + pdd->watch_points, flags); +} + +/* kfd_dbg_trap_deactivate: + * target: target process + * unwind: If this is unwinding a failed kfd_dbg_trap_enable() + * unwind_count: + * If unwind == true, how far down the pdd list we need + * to unwind + * else: ignored + */ +static void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind_count) +{ + int i; + + for (i = 0; i < target->n_pdds; i++) { + struct kfd_process_device *pdd = target->pdds[i]; + + /* If this is an unwind, and we have unwound the required + * enable calls on the pdd list, we need to stop now + * otherwise we may mess up another debugger session. + */ + if (unwind && i == unwind_count) + break; + + /* GFX off is already disabled by debug activate if not RLC restore supported. */ + if (kfd_dbg_is_rlc_restore_supported(pdd->dev)) + amdgpu_gfx_off_ctrl(pdd->dev->adev, false); + pdd->spi_dbg_override = + pdd->dev->kfd2kgd->disable_debug_trap( + pdd->dev->adev, + target->runtime_info.ttmp_setup, + pdd->dev->vm_info.last_vmid_kfd); + amdgpu_gfx_off_ctrl(pdd->dev->adev, true); + + if (!kfd_dbg_is_per_vmid_supported(pdd->dev) && + release_debug_trap_vmid(pdd->dev->dqm, &pdd->qpd)) + pr_err("Failed to release debug vmid on [%i]\n", pdd->dev->id); + + if (!pdd->dev->kfd->shared_resources.enable_mes) + debug_refresh_runlist(pdd->dev->dqm); + else + kfd_dbg_set_mes_debug_mode(pdd); + } +} + int kfd_dbg_trap_disable(struct kfd_process *target) { if (!target->debug_trap_enabled) return 0; + /* + * Defer deactivation to runtime if runtime not enabled otherwise reset + * attached running target runtime state to enable for re-attach. + */ + if (target->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED) + kfd_dbg_trap_deactivate(target, false, 0); + else if (target->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_DISABLED) + target->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED; + fput(target->dbg_ev_file); target->dbg_ev_file = NULL; @@ -42,16 +107,89 @@ int kfd_dbg_trap_disable(struct kfd_process *target) return 0; } +static int kfd_dbg_trap_activate(struct kfd_process *target) +{ + int i, r = 0; + + for (i = 0; i < target->n_pdds; i++) { + struct kfd_process_device *pdd = target->pdds[i]; + + if (!kfd_dbg_is_per_vmid_supported(pdd->dev)) { + r = reserve_debug_trap_vmid(pdd->dev->dqm, &pdd->qpd); + + if (r) { + target->runtime_info.runtime_state = (r == -EBUSY) ? + DEBUG_RUNTIME_STATE_ENABLED_BUSY : + DEBUG_RUNTIME_STATE_ENABLED_ERROR; + + goto unwind_err; + } + } + + /* Disable GFX OFF to prevent garbage read/writes to debug registers. + * If RLC restore of debug registers is not supported and runtime enable + * hasn't done so already on ttmp setup request, restore the trap config registers. + * + * If RLC restore of debug registers is not supported, keep gfx off disabled for + * the debug session. + */ + amdgpu_gfx_off_ctrl(pdd->dev->adev, false); + if (!(kfd_dbg_is_rlc_restore_supported(pdd->dev) || + target->runtime_info.ttmp_setup)) + pdd->dev->kfd2kgd->enable_debug_trap(pdd->dev->adev, true, + pdd->dev->vm_info.last_vmid_kfd); + + pdd->spi_dbg_override = pdd->dev->kfd2kgd->enable_debug_trap( + pdd->dev->adev, + false, + pdd->dev->vm_info.last_vmid_kfd); + + if (kfd_dbg_is_rlc_restore_supported(pdd->dev)) + amdgpu_gfx_off_ctrl(pdd->dev->adev, true); + + if (!pdd->dev->kfd->shared_resources.enable_mes) + r = debug_refresh_runlist(pdd->dev->dqm); + else + r = kfd_dbg_set_mes_debug_mode(pdd); + + if (r) { + target->runtime_info.runtime_state = + DEBUG_RUNTIME_STATE_ENABLED_ERROR; + goto unwind_err; + } + } + + return 0; + +unwind_err: + /* Enabling debug failed, we need to disable on + * all GPUs so the enable is all or nothing. + */ + kfd_dbg_trap_deactivate(target, true, i); + return r; +} + int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd, void __user *runtime_info, uint32_t *runtime_size) { struct file *f; uint32_t copy_size; - int r = 0; + int i, r = 0; if (target->debug_trap_enabled) return -EALREADY; + /* Enable pre-checks */ + for (i = 0; i < target->n_pdds; i++) { + struct kfd_process_device *pdd = target->pdds[i]; + + if (!KFD_IS_SOC15(pdd->dev)) + return -ENODEV; + + if (!kfd_dbg_has_gws_support(pdd->dev) && pdd->qpd.num_gws) + return -EBUSY; + } + copy_size = min((size_t)(*runtime_size), sizeof(target->runtime_info)); f = fget(fd); @@ -62,6 +200,10 @@ int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd, target->dbg_ev_file = f; + /* defer activation to runtime if not runtime enabled */ + if (target->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED) + kfd_dbg_trap_activate(target); + /* We already hold the process reference but hold another one for the * debug session. */ @@ -71,8 +213,10 @@ int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd, if (target->debugger_process) atomic_inc(&target->debugger_process->debugged_process_count); - if (copy_to_user(runtime_info, (void *)&target->runtime_info, copy_size)) + if (copy_to_user(runtime_info, (void *)&target->runtime_info, copy_size)) { + kfd_dbg_trap_deactivate(target, false, 0); r = -EFAULT; + } *runtime_size = sizeof(target->runtime_info); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h index db6d72e7930f..17481f824647 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h @@ -34,4 +34,33 @@ static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_node *dev) return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2); } +/* + * If GFX off is enabled, chips that do not support RLC restore for the debug + * registers will disable GFX off temporarily for the entire debug session. + * See disable_on_trap_action_entry and enable_on_trap_action_exit for details. + */ +static inline bool kfd_dbg_is_rlc_restore_supported(struct kfd_node *dev) +{ + return !(KFD_GC_VERSION(dev) == IP_VERSION(10, 1, 10) || + KFD_GC_VERSION(dev) == IP_VERSION(10, 1, 1)); +} + +static inline bool kfd_dbg_has_gws_support(struct kfd_node *dev) +{ + if ((KFD_GC_VERSION(dev) == IP_VERSION(9, 0, 1) + && dev->kfd->mec2_fw_version < 0x81b6) || + (KFD_GC_VERSION(dev) >= IP_VERSION(9, 1, 0) + && KFD_GC_VERSION(dev) <= IP_VERSION(9, 2, 2) + && dev->kfd->mec2_fw_version < 0x1b6) || + (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0) + && dev->kfd->mec2_fw_version < 0x1b6) || + (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) + && dev->kfd->mec2_fw_version < 0x30) || + (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) && + KFD_GC_VERSION(dev) < IP_VERSION(12, 0, 0))) + return false; + + /* Assume debugging and cooperative launch supported otherwise. */ + return true; +} #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 725d936b2cc7..e77cadadb09b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1165,9 +1165,19 @@ static void kfd_process_free_notifier(struct mmu_notifier *mn) static void kfd_process_notifier_release_internal(struct kfd_process *p) { + int i; + cancel_delayed_work_sync(&p->eviction_work); cancel_delayed_work_sync(&p->restore_work); + for (i = 0; i < p->n_pdds; i++) { + struct kfd_process_device *pdd = p->pdds[i]; + + /* re-enable GFX OFF since runtime enable with ttmp setup disabled it. */ + if (!kfd_dbg_is_rlc_restore_supported(pdd->dev) && p->runtime_info.ttmp_setup) + amdgpu_gfx_off_ctrl(pdd->dev->adev, true); + } + /* Indicate to other users that MM is no longer valid */ p->mm = NULL; kfd_dbg_trap_disable(p); |