diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 240 | 
1 files changed, 83 insertions, 157 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index f808841310fd..b6015157763a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -22,6 +22,7 @@   */  #include <linux/firmware.h> +#include <drm/drm_exec.h>  #include "amdgpu_mes.h"  #include "amdgpu.h" @@ -38,120 +39,70 @@ int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev)  		       PAGE_SIZE);  } -int amdgpu_mes_alloc_process_doorbells(struct amdgpu_device *adev, -				      unsigned int *doorbell_index) -{ -	int r = ida_simple_get(&adev->mes.doorbell_ida, 2, -			       adev->mes.max_doorbell_slices, -			       GFP_KERNEL); -	if (r > 0) -		*doorbell_index = r; - -	return r; -} - -void amdgpu_mes_free_process_doorbells(struct amdgpu_device *adev, -				      unsigned int doorbell_index) -{ -	if (doorbell_index) -		ida_simple_remove(&adev->mes.doorbell_ida, doorbell_index); -} - -unsigned int amdgpu_mes_get_doorbell_dw_offset_in_bar( -					struct amdgpu_device *adev, -					uint32_t doorbell_index, -					unsigned int doorbell_id) -{ -	return ((doorbell_index * -		amdgpu_mes_doorbell_process_slice(adev)) / sizeof(u32) + -		doorbell_id * 2); -} - -static int amdgpu_mes_queue_doorbell_get(struct amdgpu_device *adev, +static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev,  					 struct amdgpu_mes_process *process,  					 int ip_type, uint64_t *doorbell_index)  {  	unsigned int offset, found; +	struct amdgpu_mes *mes = &adev->mes; -	if (ip_type == AMDGPU_RING_TYPE_SDMA) { +	if (ip_type == AMDGPU_RING_TYPE_SDMA)  		offset = adev->doorbell_index.sdma_engine[0]; -		found = find_next_zero_bit(process->doorbell_bitmap, -					   AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS, -					   offset); -	} else { -		found = find_first_zero_bit(process->doorbell_bitmap, -					    AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS); -	} +	else +		offset = 0; -	if (found >= AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS) { +	found = find_next_zero_bit(mes->doorbell_bitmap, mes->num_mes_dbs, offset); +	if (found >= mes->num_mes_dbs) {  		DRM_WARN("No doorbell available\n");  		return -ENOSPC;  	} -	set_bit(found, process->doorbell_bitmap); - -	*doorbell_index = amdgpu_mes_get_doorbell_dw_offset_in_bar(adev, -				process->doorbell_index, found); +	set_bit(found, mes->doorbell_bitmap); +	/* Get the absolute doorbell index on BAR */ +	*doorbell_index = mes->db_start_dw_offset + found * 2;  	return 0;  } -static void amdgpu_mes_queue_doorbell_free(struct amdgpu_device *adev, +static void amdgpu_mes_kernel_doorbell_free(struct amdgpu_device *adev,  					   struct amdgpu_mes_process *process,  					   uint32_t doorbell_index)  { -	unsigned int old, doorbell_id; - -	doorbell_id = doorbell_index - -		(process->doorbell_index * -		 amdgpu_mes_doorbell_process_slice(adev)) / sizeof(u32); -	doorbell_id /= 2; +	unsigned int old, rel_index; +	struct amdgpu_mes *mes = &adev->mes; -	old = test_and_clear_bit(doorbell_id, process->doorbell_bitmap); +	/* Find the relative index of the doorbell in this object */ +	rel_index = (doorbell_index - mes->db_start_dw_offset) / 2; +	old = test_and_clear_bit(rel_index, mes->doorbell_bitmap);  	WARN_ON(!old);  }  static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev)  { -	size_t doorbell_start_offset; -	size_t doorbell_aperture_size; -	size_t doorbell_process_limit; -	size_t aggregated_doorbell_start;  	int i; +	struct amdgpu_mes *mes = &adev->mes; -	aggregated_doorbell_start = (adev->doorbell_index.max_assignment + 1) * sizeof(u32); -	aggregated_doorbell_start = -		roundup(aggregated_doorbell_start, PAGE_SIZE); - -	doorbell_start_offset = aggregated_doorbell_start + PAGE_SIZE; -	doorbell_start_offset = -		roundup(doorbell_start_offset, -			amdgpu_mes_doorbell_process_slice(adev)); - -	doorbell_aperture_size = adev->doorbell.size; -	doorbell_aperture_size = -			rounddown(doorbell_aperture_size, -				  amdgpu_mes_doorbell_process_slice(adev)); - -	if (doorbell_aperture_size > doorbell_start_offset) -		doorbell_process_limit = -			(doorbell_aperture_size - doorbell_start_offset) / -			amdgpu_mes_doorbell_process_slice(adev); -	else -		return -ENOSPC; - -	adev->mes.doorbell_id_offset = doorbell_start_offset / sizeof(u32); -	adev->mes.max_doorbell_slices = doorbell_process_limit; +	/* Bitmap for dynamic allocation of kernel doorbells */ +	mes->doorbell_bitmap = bitmap_zalloc(PAGE_SIZE / sizeof(u32), GFP_KERNEL); +	if (!mes->doorbell_bitmap) { +		DRM_ERROR("Failed to allocate MES doorbell bitmap\n"); +		return -ENOMEM; +	} -	/* allocate Qword range for aggregated doorbell */ -	for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) -		adev->mes.aggregated_doorbells[i] = -			aggregated_doorbell_start / sizeof(u32) + i * 2; +	mes->num_mes_dbs = PAGE_SIZE / AMDGPU_ONE_DOORBELL_SIZE; +	for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) { +		adev->mes.aggregated_doorbells[i] = mes->db_start_dw_offset + i * 2; +		set_bit(i, mes->doorbell_bitmap); +	} -	DRM_INFO("max_doorbell_slices=%zu\n", doorbell_process_limit);  	return 0;  } +static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev) +{ +	bitmap_free(adev->mes.doorbell_bitmap); +} +  int amdgpu_mes_init(struct amdgpu_device *adev)  {  	int i, r; @@ -250,6 +201,7 @@ void amdgpu_mes_fini(struct amdgpu_device *adev)  	amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);  	amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);  	amdgpu_device_wb_free(adev, adev->mes.read_val_offs); +	amdgpu_mes_doorbell_free(adev);  	idr_destroy(&adev->mes.pasid_idr);  	idr_destroy(&adev->mes.gang_id_idr); @@ -278,15 +230,6 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,  		return -ENOMEM;  	} -	process->doorbell_bitmap = -		kzalloc(DIV_ROUND_UP(AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS, -				     BITS_PER_BYTE), GFP_KERNEL); -	if (!process->doorbell_bitmap) { -		DRM_ERROR("failed to allocate doorbell bitmap\n"); -		kfree(process); -		return -ENOMEM; -	} -  	/* allocate the process context bo and map it */  	r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_PROC_CTX_SIZE, PAGE_SIZE,  				    AMDGPU_GEM_DOMAIN_GTT, @@ -313,15 +256,6 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,  		goto clean_up_ctx;  	} -	/* allocate the starting doorbell index of the process */ -	r = amdgpu_mes_alloc_process_doorbells(adev, &process->doorbell_index); -	if (r < 0) { -		DRM_ERROR("failed to allocate doorbell for process\n"); -		goto clean_up_pasid; -	} - -	DRM_DEBUG("process doorbell index = %d\n", process->doorbell_index); -  	INIT_LIST_HEAD(&process->gang_list);  	process->vm = vm;  	process->pasid = pasid; @@ -331,15 +265,12 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,  	amdgpu_mes_unlock(&adev->mes);  	return 0; -clean_up_pasid: -	idr_remove(&adev->mes.pasid_idr, pasid); -	amdgpu_mes_unlock(&adev->mes);  clean_up_ctx: +	amdgpu_mes_unlock(&adev->mes);  	amdgpu_bo_free_kernel(&process->proc_ctx_bo,  			      &process->proc_ctx_gpu_addr,  			      &process->proc_ctx_cpu_ptr);  clean_up_memory: -	kfree(process->doorbell_bitmap);  	kfree(process);  	return r;  } @@ -385,7 +316,6 @@ void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid)  		idr_remove(&adev->mes.gang_id_idr, gang->gang_id);  	} -	amdgpu_mes_free_process_doorbells(adev, process->doorbell_index);  	idr_remove(&adev->mes.pasid_idr, pasid);  	amdgpu_mes_unlock(&adev->mes); @@ -407,7 +337,6 @@ void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid)  	amdgpu_bo_free_kernel(&process->proc_ctx_bo,  			      &process->proc_ctx_gpu_addr,  			      &process->proc_ctx_cpu_ptr); -	kfree(process->doorbell_bitmap);  	kfree(process);  } @@ -642,6 +571,8 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,  	unsigned long flags;  	int r; +	memset(&queue_input, 0, sizeof(struct mes_add_queue_input)); +  	/* allocate the mes queue buffer */  	queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL);  	if (!queue) { @@ -679,7 +610,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,  	*queue_id = queue->queue_id = r;  	/* allocate a doorbell index for the queue */ -	r = amdgpu_mes_queue_doorbell_get(adev, gang->process, +	r = amdgpu_mes_kernel_doorbell_get(adev, gang->process,  					  qprops->queue_type,  					  &qprops->doorbell_off);  	if (r) @@ -737,7 +668,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,  	return 0;  clean_up_doorbell: -	amdgpu_mes_queue_doorbell_free(adev, gang->process, +	amdgpu_mes_kernel_doorbell_free(adev, gang->process,  				       qprops->doorbell_off);  clean_up_queue_id:  	spin_lock_irqsave(&adev->mes.queue_id_lock, flags); @@ -792,7 +723,7 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id)  			  queue_id);  	list_del(&queue->list); -	amdgpu_mes_queue_doorbell_free(adev, gang->process, +	amdgpu_mes_kernel_doorbell_free(adev, gang->process,  				       queue->doorbell_off);  	amdgpu_mes_unlock(&adev->mes); @@ -1168,34 +1099,31 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,  				 struct amdgpu_mes_ctx_data *ctx_data)  {  	struct amdgpu_bo_va *bo_va; -	struct ww_acquire_ctx ticket; -	struct list_head list; -	struct amdgpu_bo_list_entry pd; -	struct ttm_validate_buffer csa_tv;  	struct amdgpu_sync sync; +	struct drm_exec exec;  	int r;  	amdgpu_sync_create(&sync); -	INIT_LIST_HEAD(&list); -	INIT_LIST_HEAD(&csa_tv.head); -	csa_tv.bo = &ctx_data->meta_data_obj->tbo; -	csa_tv.num_shared = 1; - -	list_add(&csa_tv.head, &list); -	amdgpu_vm_get_pd_bo(vm, &list, &pd); - -	r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL); -	if (r) { -		DRM_ERROR("failed to reserve meta data BO: err=%d\n", r); -		return r; +	drm_exec_init(&exec, 0); +	drm_exec_until_all_locked(&exec) { +		r = drm_exec_lock_obj(&exec, +				      &ctx_data->meta_data_obj->tbo.base); +		drm_exec_retry_on_contention(&exec); +		if (unlikely(r)) +			goto error_fini_exec; + +		r = amdgpu_vm_lock_pd(vm, &exec, 0); +		drm_exec_retry_on_contention(&exec); +		if (unlikely(r)) +			goto error_fini_exec;  	}  	bo_va = amdgpu_vm_bo_add(adev, vm, ctx_data->meta_data_obj);  	if (!bo_va) { -		ttm_eu_backoff_reservation(&ticket, &list);  		DRM_ERROR("failed to create bo_va for meta data BO\n"); -		return -ENOMEM; +		r = -ENOMEM; +		goto error_fini_exec;  	}  	r = amdgpu_vm_bo_map(adev, bo_va, ctx_data->meta_data_gpu_addr, 0, @@ -1205,33 +1133,35 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,  	if (r) {  		DRM_ERROR("failed to do bo_map on meta data, err=%d\n", r); -		goto error; +		goto error_del_bo_va;  	}  	r = amdgpu_vm_bo_update(adev, bo_va, false);  	if (r) {  		DRM_ERROR("failed to do vm_bo_update on meta data\n"); -		goto error; +		goto error_del_bo_va;  	}  	amdgpu_sync_fence(&sync, bo_va->last_pt_update);  	r = amdgpu_vm_update_pdes(adev, vm, false);  	if (r) {  		DRM_ERROR("failed to update pdes on meta data\n"); -		goto error; +		goto error_del_bo_va;  	}  	amdgpu_sync_fence(&sync, vm->last_update);  	amdgpu_sync_wait(&sync, false); -	ttm_eu_backoff_reservation(&ticket, &list); +	drm_exec_fini(&exec);  	amdgpu_sync_free(&sync);  	ctx_data->meta_data_va = bo_va;  	return 0; -error: +error_del_bo_va:  	amdgpu_vm_bo_del(adev, bo_va); -	ttm_eu_backoff_reservation(&ticket, &list); + +error_fini_exec: +	drm_exec_fini(&exec);  	amdgpu_sync_free(&sync);  	return r;  } @@ -1242,34 +1172,30 @@ int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,  	struct amdgpu_bo_va *bo_va = ctx_data->meta_data_va;  	struct amdgpu_bo *bo = ctx_data->meta_data_obj;  	struct amdgpu_vm *vm = bo_va->base.vm; -	struct amdgpu_bo_list_entry vm_pd; -	struct list_head list, duplicates; -	struct dma_fence *fence = NULL; -	struct ttm_validate_buffer tv; -	struct ww_acquire_ctx ticket; -	long r = 0; - -	INIT_LIST_HEAD(&list); -	INIT_LIST_HEAD(&duplicates); - -	tv.bo = &bo->tbo; -	tv.num_shared = 2; -	list_add(&tv.head, &list); - -	amdgpu_vm_get_pd_bo(vm, &list, &vm_pd); - -	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates); -	if (r) { -		dev_err(adev->dev, "leaking bo va because " -			"we fail to reserve bo (%ld)\n", r); -		return r; +	struct dma_fence *fence; +	struct drm_exec exec; +	long r; + +	drm_exec_init(&exec, 0); +	drm_exec_until_all_locked(&exec) { +		r = drm_exec_lock_obj(&exec, +				      &ctx_data->meta_data_obj->tbo.base); +		drm_exec_retry_on_contention(&exec); +		if (unlikely(r)) +			goto out_unlock; + +		r = amdgpu_vm_lock_pd(vm, &exec, 0); +		drm_exec_retry_on_contention(&exec); +		if (unlikely(r)) +			goto out_unlock;  	}  	amdgpu_vm_bo_del(adev, bo_va);  	if (!amdgpu_vm_ready(vm))  		goto out_unlock; -	r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, &fence); +	r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, +				   &fence);  	if (r)  		goto out_unlock;  	if (fence) { @@ -1288,7 +1214,7 @@ int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,  out_unlock:  	if (unlikely(r < 0))  		dev_err(adev->dev, "failed to clear page tables (%ld)\n", r); -	ttm_eu_backoff_reservation(&ticket, &list); +	drm_exec_fini(&exec);  	return r;  } | 
