summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2025-09-22 08:44:52 +1000
committerDave Airlie <airlied@redhat.com>2025-09-22 08:45:51 +1000
commit342f141ba9f4c9e39de342d047a5245e8f4cda19 (patch)
tree4cf6d75acda32087f69739988f0a4eece260f243 /drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
parent0faeb8cf99c040886ac4917b0d7f4684dc9ae846 (diff)
parenta490c8d77d500b5981e739be3d59c60cfe382536 (diff)
Merge tag 'amd-drm-next-6.18-2025-09-19' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.18-2025-09-19: amdgpu: - Fence drv clean up fix - DPC fixes - Misc display fixes - Support the MMIO remap page as a ttm pool - JPEG parser updates - UserQ updates - VCN ctx handling fixes - Documentation updates - Misc cleanups - SMU 13.0.x updates - SI DPM updates - GC 11.x cleaner shader updates - DMCUB updates - DML fixes - Improve fallback handling for pixel encoding - VCN reset improvements - DCE6 DC updates - DSC fixes - Use devm for i2c buses - GPUVM locking updates - GPUVM documentation improvements - Drop non-DC DCE11 code - S0ix fixes - Backlight fix - SR-IOV fixes amdkfd: - SVM updates Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://lore.kernel.org/r/20250919193354.2989255-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c55
1 files changed, 55 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 13f0cdeb59c4..3328ab63376b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -828,11 +828,14 @@ static void amdgpu_virt_init_ras(struct amdgpu_device *adev)
{
ratelimit_state_init(&adev->virt.ras.ras_error_cnt_rs, 5 * HZ, 1);
ratelimit_state_init(&adev->virt.ras.ras_cper_dump_rs, 5 * HZ, 1);
+ ratelimit_state_init(&adev->virt.ras.ras_chk_criti_rs, 5 * HZ, 1);
ratelimit_set_flags(&adev->virt.ras.ras_error_cnt_rs,
RATELIMIT_MSG_ON_RELEASE);
ratelimit_set_flags(&adev->virt.ras.ras_cper_dump_rs,
RATELIMIT_MSG_ON_RELEASE);
+ ratelimit_set_flags(&adev->virt.ras.ras_chk_criti_rs,
+ RATELIMIT_MSG_ON_RELEASE);
mutex_init(&adev->virt.ras.ras_telemetry_mutex);
@@ -1501,3 +1504,55 @@ void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev)
if (virt->ops && virt->ops->req_bad_pages)
virt->ops->req_bad_pages(adev);
}
+
+static int amdgpu_virt_cache_chk_criti_hit(struct amdgpu_device *adev,
+ struct amdsriov_ras_telemetry *host_telemetry,
+ bool *hit)
+{
+ struct amd_sriov_ras_chk_criti *tmp = NULL;
+ uint32_t checksum, used_size;
+
+ checksum = host_telemetry->header.checksum;
+ used_size = host_telemetry->header.used_size;
+
+ if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
+ return 0;
+
+ tmp = kmemdup(&host_telemetry->body.chk_criti, used_size, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ if (checksum != amd_sriov_msg_checksum(tmp, used_size, 0, 0))
+ goto out;
+
+ if (hit)
+ *hit = tmp->hit ? true : false;
+
+out:
+ kfree(tmp);
+
+ return 0;
+}
+
+int amdgpu_virt_check_vf_critical_region(struct amdgpu_device *adev, u64 addr, bool *hit)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ int r = -EPERM;
+
+ if (!virt->ops || !virt->ops->req_ras_chk_criti)
+ return -EOPNOTSUPP;
+
+ /* Host allows 15 ras telemetry requests per 60 seconds. Afterwhich, the Host
+ * will ignore incoming guest messages. Ratelimit the guest messages to
+ * prevent guest self DOS.
+ */
+ if (__ratelimit(&virt->ras.ras_chk_criti_rs)) {
+ mutex_lock(&virt->ras.ras_telemetry_mutex);
+ if (!virt->ops->req_ras_chk_criti(adev, addr))
+ r = amdgpu_virt_cache_chk_criti_hit(
+ adev, virt->fw_reserve.ras_telemetry, hit);
+ mutex_unlock(&virt->ras.ras_telemetry_mutex);
+ }
+
+ return r;
+}