drm/amdgpu: revert "reserve backup pages for bad page retirment"

As noted during the review this approach doesn't make sense at all. We should not apply any limitation on the VRAM applications can use inside the kernel. If an application or end user wants to reserve a certain amount of VRAM for bad pages handling we should do this in the upper layer. This reverts commit f89b881c81d9a6481fc17b46b351ca38f5dd6f3a. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
author: Christian König <christian.koenig@amd.com> 2021-03-18 14:04:06 +0100
committer: Alex Deucher <alexander.deucher@amd.com> 2021-03-23 23:40:06 -0400
commit: e5c04edfcde373b093d2a07322873b3ce1c5b88e (patch)
tree: e8eb34a03fa13a26c8568206987e31d7e0244ae8 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
parent: 6b44b667e24cf89603ebdaa31b939c034d425162 (diff)
download: linux-stable-e5c04edfcde373b093d2a07322873b3ce1c5b88e.tar.gz
linux-stable-e5c04edfcde373b093d2a07322873b3ce1c5b88e.tar.bz2
linux-stable-e5c04edfcde373b093d2a07322873b3ce1c5b88e.zip
1 files changed, 11 insertions, 18 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index a90bf33358d3..0e16683876aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1790,14 +1790,13 @@ static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
 	return ret;
 }
 
-static uint32_t
-amdgpu_ras_calculate_badpags_threshold(struct amdgpu_device *adev)
+static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev,
+					uint32_t max_length)
 {
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 	int tmp_threshold = amdgpu_bad_page_threshold;
 	u64 val;
-	uint32_t max_length = 0;
 
-	max_length = amdgpu_ras_eeprom_get_record_max_length();
 	/*
 	 * Justification of value bad_page_cnt_threshold in ras structure
 	 *
@@ -1823,18 +1822,20 @@ amdgpu_ras_calculate_badpags_threshold(struct amdgpu_device *adev)
 		tmp_threshold = max_length;
 
 	if (tmp_threshold == -1) {
-		val = adev->gmc.real_vram_size;
+		val = adev->gmc.mc_vram_size;
 		do_div(val, RAS_BAD_PAGE_RATE);
-		tmp_threshold = min(lower_32_bits(val), max_length);
+		con->bad_page_cnt_threshold = min(lower_32_bits(val),
+						max_length);
+	} else {
+		con->bad_page_cnt_threshold = tmp_threshold;
 	}
-
-	return tmp_threshold;
 }
 
 int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
 {
 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 	struct ras_err_handler_data **data;
+	uint32_t max_eeprom_records_len = 0;
 	bool exc_err_limit = false;
 	int ret;
 
@@ -1854,16 +1855,8 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
 	atomic_set(&con->in_recovery, 0);
 	con->adev = adev;
 
-	if (!con->bad_page_cnt_threshold) {
-		con->bad_page_cnt_threshold =
-			amdgpu_ras_calculate_badpags_threshold(adev);
-
-		ret = amdgpu_vram_mgr_reserve_backup_pages(
-			ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM),
-			con->bad_page_cnt_threshold);
-		if (ret)
-			goto out;
-	}
+	max_eeprom_records_len = amdgpu_ras_eeprom_get_record_max_length();
+	amdgpu_ras_validate_threshold(adev, max_eeprom_records_len);
 
 	/* Todo: During test the SMU might fail to read the eeprom through I2C
 	 * when the GPU is pending on XGMI reset during probe time
author	Christian König <christian.koenig@amd.com>	2021-03-18 14:04:06 +0100
committer	Alex Deucher <alexander.deucher@amd.com>	2021-03-23 23:40:06 -0400
commit	e5c04edfcde373b093d2a07322873b3ce1c5b88e (patch)
tree	e8eb34a03fa13a26c8568206987e31d7e0244ae8 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
parent	6b44b667e24cf89603ebdaa31b939c034d425162 (diff)
download	linux-stable-e5c04edfcde373b093d2a07322873b3ce1c5b88e.tar.gz linux-stable-e5c04edfcde373b093d2a07322873b3ce1c5b88e.tar.bz2 linux-stable-e5c04edfcde373b093d2a07322873b3ce1c5b88e.zip