summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2021-03-18 14:04:06 +0100
committerAlex Deucher <alexander.deucher@amd.com>2021-03-23 23:40:06 -0400
commite5c04edfcde373b093d2a07322873b3ce1c5b88e (patch)
treee8eb34a03fa13a26c8568206987e31d7e0244ae8 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
parent6b44b667e24cf89603ebdaa31b939c034d425162 (diff)
downloadlinux-stable-e5c04edfcde373b093d2a07322873b3ce1c5b88e.tar.gz
linux-stable-e5c04edfcde373b093d2a07322873b3ce1c5b88e.tar.bz2
linux-stable-e5c04edfcde373b093d2a07322873b3ce1c5b88e.zip
drm/amdgpu: revert "reserve backup pages for bad page retirment"
As noted during the review this approach doesn't make sense at all. We should not apply any limitation on the VRAM applications can use inside the kernel. If an application or end user wants to reserve a certain amount of VRAM for bad pages handling we should do this in the upper layer. This reverts commit f89b881c81d9a6481fc17b46b351ca38f5dd6f3a. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c29
1 files changed, 11 insertions, 18 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index a90bf33358d3..0e16683876aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1790,14 +1790,13 @@ static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
return ret;
}
-static uint32_t
-amdgpu_ras_calculate_badpags_threshold(struct amdgpu_device *adev)
+static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev,
+ uint32_t max_length)
{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
int tmp_threshold = amdgpu_bad_page_threshold;
u64 val;
- uint32_t max_length = 0;
- max_length = amdgpu_ras_eeprom_get_record_max_length();
/*
* Justification of value bad_page_cnt_threshold in ras structure
*
@@ -1823,18 +1822,20 @@ amdgpu_ras_calculate_badpags_threshold(struct amdgpu_device *adev)
tmp_threshold = max_length;
if (tmp_threshold == -1) {
- val = adev->gmc.real_vram_size;
+ val = adev->gmc.mc_vram_size;
do_div(val, RAS_BAD_PAGE_RATE);
- tmp_threshold = min(lower_32_bits(val), max_length);
+ con->bad_page_cnt_threshold = min(lower_32_bits(val),
+ max_length);
+ } else {
+ con->bad_page_cnt_threshold = tmp_threshold;
}
-
- return tmp_threshold;
}
int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data **data;
+ uint32_t max_eeprom_records_len = 0;
bool exc_err_limit = false;
int ret;
@@ -1854,16 +1855,8 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
atomic_set(&con->in_recovery, 0);
con->adev = adev;
- if (!con->bad_page_cnt_threshold) {
- con->bad_page_cnt_threshold =
- amdgpu_ras_calculate_badpags_threshold(adev);
-
- ret = amdgpu_vram_mgr_reserve_backup_pages(
- ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM),
- con->bad_page_cnt_threshold);
- if (ret)
- goto out;
- }
+ max_eeprom_records_len = amdgpu_ras_eeprom_get_record_max_length();
+ amdgpu_ras_validate_threshold(adev, max_eeprom_records_len);
/* Todo: During test the SMU might fail to read the eeprom through I2C
* when the GPU is pending on XGMI reset during probe time