diff options
author | Tao Zhou <tao.zhou1@amd.com> | 2024-10-18 18:58:54 +0800 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2024-12-10 10:26:46 -0500 |
commit | c3d4acf0c3bbba4eb24812f12412d642fa3d5378 (patch) | |
tree | b8b5db1c5dc8f608d16e73f700223a036964340f /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | |
parent | e1ee2111ca48169a9fdc5075f7863f5d4d591e2f (diff) | |
download | linux-c3d4acf0c3bbba4eb24812f12412d642fa3d5378.tar.gz linux-c3d4acf0c3bbba4eb24812f12412d642fa3d5378.tar.bz2 linux-c3d4acf0c3bbba4eb24812f12412d642fa3d5378.zip |
drm/amdgpu: store only one RAS bad page record for all pages in one row
So eeprom space can be saved, compatible with legacy way.
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 35 |
1 files changed, 27 insertions, 8 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 4df9a8dfe9eb..882a33e134d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2849,7 +2849,7 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev, struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data; struct amdgpu_ras_eeprom_control *control; - int save_count; + int save_count, unit_num, bad_page_num, i; if (!con || !con->eh_data) { if (new_cnt) @@ -2861,19 +2861,38 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev, mutex_lock(&con->recovery_lock); control = &con->eeprom_control; data = con->eh_data; - save_count = data->count - control->ras_num_recs; + bad_page_num = control->ras_num_recs; + /* one record on eeprom stands for all pages in one memory row + * in this mode + */ + if (control->rec_type == AMDGPU_RAS_EEPROM_REC_MCA) + bad_page_num = control->ras_num_recs * adev->umc.retire_unit; + + save_count = data->count - bad_page_num; mutex_unlock(&con->recovery_lock); + unit_num = save_count / adev->umc.retire_unit; if (new_cnt) - *new_cnt = save_count / adev->umc.retire_unit; + *new_cnt = unit_num; /* only new entries are saved */ if (save_count > 0) { - if (amdgpu_ras_eeprom_append(control, - &data->bps[control->ras_num_recs], - save_count)) { - dev_err(adev->dev, "Failed to save EEPROM table data!"); - return -EIO; + if (control->rec_type == AMDGPU_RAS_EEPROM_REC_PA) { + if (amdgpu_ras_eeprom_append(control, + &data->bps[control->ras_num_recs], + save_count)) { + dev_err(adev->dev, "Failed to save EEPROM table data!"); + return -EIO; + } + } else { + for (i = 0; i < unit_num; i++) { + if (amdgpu_ras_eeprom_append(control, + &data->bps[bad_page_num + i * adev->umc.retire_unit], + 1)) { + dev_err(adev->dev, "Failed to save EEPROM table data!"); + return -EIO; + } + } } dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count); |