summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
diff options
context:
space:
mode:
authorganglxie <ganglxie@amd.com>2025-02-24 15:06:51 +0800
committerAlex Deucher <alexander.deucher@amd.com>2025-02-25 11:45:12 -0500
commita8f921a10a8c2e2b209ad9ed1f1b7f48192c30aa (patch)
tree1827acc7e00f65372c6569c2af7214414c5fa67b /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
parent0153d27673ac5c122d2437c1e573923963abd181 (diff)
downloadlinux-a8f921a10a8c2e2b209ad9ed1f1b7f48192c30aa.tar.gz
linux-a8f921a10a8c2e2b209ad9ed1f1b7f48192c30aa.tar.bz2
linux-a8f921a10a8c2e2b209ad9ed1f1b7f48192c30aa.zip
drm/amdgpu: Change page/record number calculation based on nps
save only one record to save eeprom space,and bad_page_num = pa_rec_num + mca_rec_num*16 Signed-off-by: ganglxie <ganglxie@amd.com> Reviewed-by: Tao Zhou <tao.zhou1@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c49
1 files changed, 22 insertions, 27 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index f0349094f8c9..493dd004d6fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2981,24 +2981,14 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
/* only new entries are saved */
if (save_count > 0) {
- if (control->rec_type == AMDGPU_RAS_EEPROM_REC_PA) {
+ for (i = 0; i < unit_num; i++) {
if (amdgpu_ras_eeprom_append(control,
- &data->bps[control->ras_num_recs],
- save_count)) {
+ &data->bps[bad_page_num + i * adev->umc.retire_unit],
+ 1)) {
dev_err(adev->dev, "Failed to save EEPROM table data!");
return -EIO;
}
- } else {
- for (i = 0; i < unit_num; i++) {
- if (amdgpu_ras_eeprom_append(control,
- &data->bps[bad_page_num + i * adev->umc.retire_unit],
- 1)) {
- dev_err(adev->dev, "Failed to save EEPROM table data!");
- return -EIO;
- }
- }
}
-
dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count);
}
@@ -3014,7 +3004,7 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
struct amdgpu_ras_eeprom_control *control =
&adev->psp.ras_context.ras->eeprom_control;
struct eeprom_table_record *bps;
- int ret;
+ int ret, i = 0;
/* no bad page record, skip eeprom access */
if (control->ras_num_recs == 0 || amdgpu_bad_page_threshold == 0)
@@ -3028,13 +3018,23 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
if (ret) {
dev_err(adev->dev, "Failed to load EEPROM table records!");
} else {
- if (control->ras_num_recs > 1 &&
- adev->umc.ras && adev->umc.ras->convert_ras_err_addr) {
- if ((bps[0].address == bps[1].address) &&
- (bps[0].mem_channel == bps[1].mem_channel))
- control->rec_type = AMDGPU_RAS_EEPROM_REC_PA;
- else
- control->rec_type = AMDGPU_RAS_EEPROM_REC_MCA;
+ if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) {
+ for (i = 0; i < control->ras_num_recs; i++) {
+ if ((control->ras_num_recs - i) >= adev->umc.retire_unit) {
+ if ((bps[i].address == bps[i + 1].address) &&
+ (bps[i].mem_channel == bps[i + 1].mem_channel)) {
+ control->ras_num_pa_recs += adev->umc.retire_unit;
+ i += (adev->umc.retire_unit - 1);
+ } else {
+ control->ras_num_mca_recs +=
+ (control->ras_num_recs - i);
+ break;
+ }
+ } else {
+ control->ras_num_mca_recs += (control->ras_num_recs - i);
+ break;
+ }
+ }
}
ret = amdgpu_ras_eeprom_check(control);
@@ -3440,12 +3440,7 @@ int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev)
return ret;
if (!adev->umc.ras || !adev->umc.ras->convert_ras_err_addr)
- control->rec_type = AMDGPU_RAS_EEPROM_REC_PA;
-
- /* default status is MCA storage */
- if (control->ras_num_recs <= 1 &&
- adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
- control->rec_type = AMDGPU_RAS_EEPROM_REC_MCA;
+ control->ras_num_pa_recs = control->ras_num_recs;
if (control->ras_num_recs) {
ret = amdgpu_ras_load_bad_pages(adev);