diff options
author | Jason Gunthorpe <jgg@nvidia.com> | 2021-08-30 09:49:59 -0300 |
---|---|---|
committer | Jason Gunthorpe <jgg@nvidia.com> | 2021-08-30 09:49:59 -0300 |
commit | 6a217437f9f5482a3f6f2dc5fcd27cf0f62409ac (patch) | |
tree | c82270181daeb43eb9984b586784b70f13ef1df4 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | |
parent | 65f90c8e38c9900692338864156e7824cf8a6501 (diff) | |
parent | 79fbd3e1241cea83dded06db2b8bcd5893d877d7 (diff) |
Merge branch 'sg_nents' into rdma.git for-next
From Maor Gottlieb
====================
Fix the use of nents and orig_nents in the sg table append helpers. The
nents should be used by the DMA layer to store the number of DMA mapped
sges, the orig_nents is the number of CPU sges.
Since the sg append logic doesn't always create a SGL with exactly
orig_nents entries store a total_nents as well to allow the table to be
properly free'd and reorganize the freeing logic to share across all the
use cases.
====================
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
* 'sg_nents':
RDMA: Use the sg_table directly and remove the opencoded version from umem
lib/scatterlist: Fix wrong update of orig_nents
lib/scatterlist: Provide a dedicated function to support table append
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 49 |
1 files changed, 35 insertions, 14 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index c13b02caf8c3..fc66aca28594 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -809,7 +809,7 @@ static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev, /* query/inject/cure begin */ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, - struct ras_query_if *info) + struct ras_query_if *info) { struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); struct ras_err_data err_data = {0, 0, 0, NULL}; @@ -1043,17 +1043,32 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, return ret; } -/* get the total error counts on all IPs */ -void amdgpu_ras_query_error_count(struct amdgpu_device *adev, - unsigned long *ce_count, - unsigned long *ue_count) +/** + * amdgpu_ras_query_error_count -- Get error counts of all IPs + * adev: pointer to AMD GPU device + * ce_count: pointer to an integer to be set to the count of correctible errors. + * ue_count: pointer to an integer to be set to the count of uncorrectible + * errors. + * + * If set, @ce_count or @ue_count, count and return the corresponding + * error counts in those integer pointers. Return 0 if the device + * supports RAS. Return -EOPNOTSUPP if the device doesn't support RAS. + */ +int amdgpu_ras_query_error_count(struct amdgpu_device *adev, + unsigned long *ce_count, + unsigned long *ue_count) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj; unsigned long ce, ue; if (!adev->ras_enabled || !con) - return; + return -EOPNOTSUPP; + + /* Don't count since no reporting. + */ + if (!ce_count && !ue_count) + return 0; ce = 0; ue = 0; @@ -1061,9 +1076,11 @@ void amdgpu_ras_query_error_count(struct amdgpu_device *adev, struct ras_query_if info = { .head = obj->head, }; + int res; - if (amdgpu_ras_query_error_status(adev, &info)) - return; + res = amdgpu_ras_query_error_status(adev, &info); + if (res) + return res; ce += info.ce_count; ue += info.ue_count; @@ -1074,6 +1091,8 @@ void amdgpu_ras_query_error_count(struct amdgpu_device *adev, if (ue_count) *ue_count = ue; + + return 0; } /* query/inject/cure end */ @@ -2137,9 +2156,10 @@ static void amdgpu_ras_counte_dw(struct work_struct *work) /* Cache new values. */ - amdgpu_ras_query_error_count(adev, &ce_count, &ue_count); - atomic_set(&con->ras_ce_count, ce_count); - atomic_set(&con->ras_ue_count, ue_count); + if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count) == 0) { + atomic_set(&con->ras_ce_count, ce_count); + atomic_set(&con->ras_ue_count, ue_count); + } pm_runtime_mark_last_busy(dev->dev); Out: @@ -2312,9 +2332,10 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev, /* Those are the cached values at init. */ - amdgpu_ras_query_error_count(adev, &ce_count, &ue_count); - atomic_set(&con->ras_ce_count, ce_count); - atomic_set(&con->ras_ue_count, ue_count); + if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count) == 0) { + atomic_set(&con->ras_ce_count, ce_count); + atomic_set(&con->ras_ue_count, ue_count); + } return 0; cleanup: |