summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd
diff options
context:
space:
mode:
authorLijo Lazar <lijo.lazar@amd.com>2024-03-21 19:19:48 +0530
committerAlex Deucher <alexander.deucher@amd.com>2024-04-09 22:13:50 -0400
commit301661b00a0582555d38c7ea0f94235729b00963 (patch)
treecbe5d0f42ce088bd40a0e00b4fbf85bfdf776f27 /drivers/gpu/drm/amd
parent7b8081ea67533628310bb23b84f887fca4319a86 (diff)
downloadlinux-stable-301661b00a0582555d38c7ea0f94235729b00963.tar.gz
linux-stable-301661b00a0582555d38c7ea0f94235729b00963.tar.bz2
linux-stable-301661b00a0582555d38c7ea0f94235729b00963.zip
drm/amd/pm: Add special handling for RAS messages
When a RAS fatal error is detected, PMFW will only process priority messages. Other messages won't be taken up for processing and therefore won't get any response in such a state. Add logic to filter out non-priority messages when RAS error is detected. Also, don't poll response response status register before sending priority messages. Use firmware capability flag to determine whether to filter priority messages. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Reviewed-by: Asad Kamal <asad.kamal@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd')
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c65
1 files changed, 59 insertions, 6 deletions
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 3227e514e8ae..6d1c3af927ca 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -235,6 +235,50 @@ static void __smu_cmn_send_msg(struct smu_context *smu,
WREG32(smu->msg_reg, msg);
}
+static inline uint32_t __smu_cmn_get_msg_flags(struct smu_context *smu,
+ enum smu_message_type msg)
+{
+ return smu->message_map[msg].flags;
+}
+
+static int __smu_cmn_ras_filter_msg(struct smu_context *smu,
+ enum smu_message_type msg, bool *poll)
+{
+ struct amdgpu_device *adev = smu->adev;
+ uint32_t flags, resp;
+ bool fed_status;
+
+ flags = __smu_cmn_get_msg_flags(smu, msg);
+ *poll = true;
+
+ /* When there is RAS fatal error, FW won't process non-RAS priority
+ * messages. Don't allow any messages other than RAS priority messages.
+ */
+ fed_status = amdgpu_ras_get_fed_status(adev);
+ if (fed_status) {
+ if (!(flags & SMU_MSG_RAS_PRI)) {
+ dev_dbg(adev->dev,
+ "RAS error detected, skip sending %s",
+ smu_get_message_name(smu, msg));
+ return -EACCES;
+ }
+
+ /* FW will ignore non-priority messages when a RAS fatal error
+ * is detected. Hence it is possible that a previous message
+ * wouldn't have got response. Allow to continue without polling
+ * for response status for priority messages.
+ */
+ resp = RREG32(smu->resp_reg);
+ dev_dbg(adev->dev,
+ "Sending RAS priority message %s response status: %x",
+ smu_get_message_name(smu, msg), resp);
+ if (resp == 0)
+ *poll = false;
+ }
+
+ return 0;
+}
+
static int __smu_cmn_send_debug_msg(struct smu_context *smu,
u32 msg,
u32 param)
@@ -354,6 +398,7 @@ int smu_cmn_send_smc_msg_with_param(struct smu_context *smu,
{
struct amdgpu_device *adev = smu->adev;
int res, index;
+ bool poll = true;
u32 reg;
if (adev->no_hw_access)
@@ -366,12 +411,20 @@ int smu_cmn_send_smc_msg_with_param(struct smu_context *smu,
return index == -EACCES ? 0 : index;
mutex_lock(&smu->message_lock);
- reg = __smu_cmn_poll_stat(smu);
- res = __smu_cmn_reg2errno(smu, reg);
- if (reg == SMU_RESP_NONE ||
- res == -EREMOTEIO) {
- __smu_cmn_reg_print_error(smu, reg, index, param, msg);
- goto Out;
+
+ if (smu->smc_fw_caps & SMU_FW_CAP_RAS_PRI) {
+ res = __smu_cmn_ras_filter_msg(smu, msg, &poll);
+ if (res)
+ goto Out;
+ }
+
+ if (poll) {
+ reg = __smu_cmn_poll_stat(smu);
+ res = __smu_cmn_reg2errno(smu, reg);
+ if (reg == SMU_RESP_NONE || res == -EREMOTEIO) {
+ __smu_cmn_reg_print_error(smu, reg, index, param, msg);
+ goto Out;
+ }
}
__smu_cmn_send_msg(smu, (uint16_t) index, param);
reg = __smu_cmn_poll_stat(smu);