summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
diff options
context:
space:
mode:
authorTao Zhou <tao.zhou1@amd.com>2024-10-18 18:58:54 +0800
committerAlex Deucher <alexander.deucher@amd.com>2024-12-10 10:26:46 -0500
commitc3d4acf0c3bbba4eb24812f12412d642fa3d5378 (patch)
treeb8b5db1c5dc8f608d16e73f700223a036964340f /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
parente1ee2111ca48169a9fdc5075f7863f5d4d591e2f (diff)
downloadlinux-c3d4acf0c3bbba4eb24812f12412d642fa3d5378.tar.gz
linux-c3d4acf0c3bbba4eb24812f12412d642fa3d5378.tar.bz2
linux-c3d4acf0c3bbba4eb24812f12412d642fa3d5378.zip
drm/amdgpu: store only one RAS bad page record for all pages in one row
So eeprom space can be saved, compatible with legacy way. Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c35
1 files changed, 27 insertions, 8 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 4df9a8dfe9eb..882a33e134d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2849,7 +2849,7 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data;
struct amdgpu_ras_eeprom_control *control;
- int save_count;
+ int save_count, unit_num, bad_page_num, i;
if (!con || !con->eh_data) {
if (new_cnt)
@@ -2861,19 +2861,38 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
mutex_lock(&con->recovery_lock);
control = &con->eeprom_control;
data = con->eh_data;
- save_count = data->count - control->ras_num_recs;
+ bad_page_num = control->ras_num_recs;
+ /* one record on eeprom stands for all pages in one memory row
+ * in this mode
+ */
+ if (control->rec_type == AMDGPU_RAS_EEPROM_REC_MCA)
+ bad_page_num = control->ras_num_recs * adev->umc.retire_unit;
+
+ save_count = data->count - bad_page_num;
mutex_unlock(&con->recovery_lock);
+ unit_num = save_count / adev->umc.retire_unit;
if (new_cnt)
- *new_cnt = save_count / adev->umc.retire_unit;
+ *new_cnt = unit_num;
/* only new entries are saved */
if (save_count > 0) {
- if (amdgpu_ras_eeprom_append(control,
- &data->bps[control->ras_num_recs],
- save_count)) {
- dev_err(adev->dev, "Failed to save EEPROM table data!");
- return -EIO;
+ if (control->rec_type == AMDGPU_RAS_EEPROM_REC_PA) {
+ if (amdgpu_ras_eeprom_append(control,
+ &data->bps[control->ras_num_recs],
+ save_count)) {
+ dev_err(adev->dev, "Failed to save EEPROM table data!");
+ return -EIO;
+ }
+ } else {
+ for (i = 0; i < unit_num; i++) {
+ if (amdgpu_ras_eeprom_append(control,
+ &data->bps[bad_page_num + i * adev->umc.retire_unit],
+ 1)) {
+ dev_err(adev->dev, "Failed to save EEPROM table data!");
+ return -EIO;
+ }
+ }
}
dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count);