diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 203 | 
1 files changed, 108 insertions, 95 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index dc262d2c2925..b4236572eae1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -58,6 +58,7 @@  #include "amdgpu_amdkfd.h"  #include "amdgpu_sdma.h"  #include "amdgpu_ras.h" +#include "amdgpu_hmm.h"  #include "amdgpu_atomfirmware.h"  #include "amdgpu_res_cursor.h"  #include "bif/bif_4_1_d.h" @@ -189,7 +190,6 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,  	struct amdgpu_device *adev = ring->adev;  	unsigned offset, num_pages, num_dw, num_bytes;  	uint64_t src_addr, dst_addr; -	struct dma_fence *fence;  	struct amdgpu_job *job;  	void *cpu_addr;  	uint64_t flags; @@ -229,7 +229,9 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,  	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);  	num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE; -	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, +	r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, +				     AMDGPU_FENCE_OWNER_UNDEFINED, +				     num_dw * 4 + num_bytes,  				     AMDGPU_IB_POOL_DELAYED, &job);  	if (r)  		return r; @@ -269,18 +271,8 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,  		}  	} -	r = amdgpu_job_submit(job, &adev->mman.entity, -			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence); -	if (r) -		goto error_free; - -	dma_fence_put(fence); - -	return r; - -error_free: -	amdgpu_job_free(job); -	return r; +	dma_fence_put(amdgpu_job_submit(job)); +	return 0;  }  /** @@ -381,7 +373,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,  	dst.offset = 0;  	r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, -				       new_mem->num_pages << PAGE_SHIFT, +				       new_mem->size,  				       amdgpu_bo_encrypted(abo),  				       bo->base.resv, &fence);  	if (r) @@ -424,7 +416,7 @@ error:  static bool amdgpu_mem_visible(struct amdgpu_device *adev,  			       struct ttm_resource *mem)  { -	u64 mem_size = (u64)mem->num_pages << PAGE_SHIFT; +	u64 mem_size = (u64)mem->size;  	struct amdgpu_res_cursor cursor;  	u64 end; @@ -439,6 +431,9 @@ static bool amdgpu_mem_visible(struct amdgpu_device *adev,  	while (cursor.remaining) {  		amdgpu_res_next(&cursor, cursor.size); +		if (!cursor.remaining) +			break; +  		/* ttm_resource_ioremap only supports contiguous memory */  		if (end != cursor.start)  			return false; @@ -568,7 +563,7 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,  				     struct ttm_resource *mem)  {  	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); -	size_t bus_size = (size_t)mem->num_pages << PAGE_SHIFT; +	size_t bus_size = (size_t)mem->size;  	switch (mem->mem_type) {  	case TTM_PL_SYSTEM: @@ -640,9 +635,6 @@ struct amdgpu_ttm_tt {  	struct task_struct	*usertask;  	uint32_t		userflags;  	bool			bound; -#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) -	struct hmm_range	*range; -#endif  };  #define ttm_to_amdgpu_ttm_tt(ptr)	container_of(ptr, struct amdgpu_ttm_tt, ttm) @@ -655,7 +647,8 @@ struct amdgpu_ttm_tt {   * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only   * once afterwards to stop HMM tracking   */ -int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) +int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, +				 struct hmm_range **range)  {  	struct ttm_tt *ttm = bo->tbo.ttm;  	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); @@ -665,16 +658,15 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)  	bool readonly;  	int r = 0; +	/* Make sure get_user_pages_done() can cleanup gracefully */ +	*range = NULL; +  	mm = bo->notifier.mm;  	if (unlikely(!mm)) {  		DRM_DEBUG_DRIVER("BO is not registered?\n");  		return -EFAULT;  	} -	/* Another get_user_pages is running at the same time?? */ -	if (WARN_ON(gtt->range)) -		return -EFAULT; -  	if (!mmget_not_zero(mm)) /* Happens during process shutdown */  		return -ESRCH; @@ -691,9 +683,8 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)  	}  	readonly = amdgpu_ttm_tt_is_readonly(ttm); -	r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start, -				       ttm->num_pages, >t->range, readonly, -				       true, NULL); +	r = amdgpu_hmm_range_get_pages(&bo->notifier, start, ttm->num_pages, +				       readonly, NULL, pages, range);  out_unlock:  	mmap_read_unlock(mm);  	if (r) @@ -710,30 +701,24 @@ out_unlock:   *   * Returns: true if pages are still valid   */ -bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, +				       struct hmm_range *range)  {  	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); -	bool r = false; -	if (!gtt || !gtt->userptr) +	if (!gtt || !gtt->userptr || !range)  		return false;  	DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%x\n",  		gtt->userptr, ttm->num_pages); -	WARN_ONCE(!gtt->range || !gtt->range->hmm_pfns, -		"No user pages to check\n"); - -	if (gtt->range) { -		/* -		 * FIXME: Must always hold notifier_lock for this, and must -		 * not ignore the return code. -		 */ -		r = amdgpu_hmm_range_get_pages_done(gtt->range); -		gtt->range = NULL; -	} +	WARN_ONCE(!range->hmm_pfns, "No user pages to check\n"); -	return !r; +	/* +	 * FIXME: Must always hold notifier_lock for this, and must +	 * not ignore the return code. +	 */ +	return !amdgpu_hmm_range_get_pages_done(range);  }  #endif @@ -810,20 +795,6 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev,  	/* unmap the pages mapped to the device */  	dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);  	sg_free_table(ttm->sg); - -#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) -	if (gtt->range) { -		unsigned long i; - -		for (i = 0; i < ttm->num_pages; i++) { -			if (ttm->pages[i] != -			    hmm_pfn_to_page(gtt->range->hmm_pfns[i])) -				break; -		} - -		WARN((i == ttm->num_pages), "Missing get_user_page_done\n"); -	} -#endif  }  static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, @@ -1174,8 +1145,9 @@ int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,   * @addr:  The address in the current tasks VM space to use   * @flags: Requirements of userptr object.   * - * Called by amdgpu_gem_userptr_ioctl() to bind userptr pages - * to current task + * Called by amdgpu_gem_userptr_ioctl() and kfd_ioctl_alloc_memory_of_gpu() to + * bind userptr pages to current task and by kfd_ioctl_acquire_vm() to + * initialize GPU VM for a KFD process.   */  int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,  			      uint64_t addr, uint32_t flags) @@ -1414,7 +1386,8 @@ static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,  }  static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, -					unsigned long offset, void *buf, int len, int write) +					unsigned long offset, void *buf, +					int len, int write)  {  	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);  	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); @@ -1438,26 +1411,27 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,  		memcpy(adev->mman.sdma_access_ptr, buf, len);  	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); -	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_DELAYED, &job); +	r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, +				     AMDGPU_FENCE_OWNER_UNDEFINED, +				     num_dw * 4, AMDGPU_IB_POOL_DELAYED, +				     &job);  	if (r)  		goto out;  	amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm); -	src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) + src_mm.start; +	src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) + +		src_mm.start;  	dst_addr = amdgpu_bo_gpu_offset(adev->mman.sdma_access_bo);  	if (write)  		swap(src_addr, dst_addr); -	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, PAGE_SIZE, false); +	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, +				PAGE_SIZE, false);  	amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);  	WARN_ON(job->ibs[0].length_dw > num_dw); -	r = amdgpu_job_submit(job, &adev->mman.entity, AMDGPU_FENCE_OWNER_UNDEFINED, &fence); -	if (r) { -		amdgpu_job_free(job); -		goto out; -	} +	fence = amdgpu_job_submit(job);  	if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))  		r = -ETIMEDOUT; @@ -1557,6 +1531,23 @@ static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)  		NULL, &adev->mman.fw_vram_usage_va);  } +/* + * Driver Reservation functions + */ +/** + * amdgpu_ttm_drv_reserve_vram_fini - free drv reserved vram + * + * @adev: amdgpu_device pointer + * + * free drv reserved vram if it has been reserved. + */ +static void amdgpu_ttm_drv_reserve_vram_fini(struct amdgpu_device *adev) +{ +	amdgpu_bo_free_kernel(&adev->mman.drv_vram_usage_reserved_bo, +						  NULL, +						  &adev->mman.drv_vram_usage_va); +} +  /**   * amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw   * @@ -1583,6 +1574,32 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)  					  &adev->mman.fw_vram_usage_va);  } +/** + * amdgpu_ttm_drv_reserve_vram_init - create bo vram reservation from driver + * + * @adev: amdgpu_device pointer + * + * create bo vram reservation from drv. + */ +static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev) +{ +	u64 vram_size = adev->gmc.visible_vram_size; + +	adev->mman.drv_vram_usage_va = NULL; +	adev->mman.drv_vram_usage_reserved_bo = NULL; + +	if (adev->mman.drv_vram_usage_size == 0 || +	    adev->mman.drv_vram_usage_size > vram_size) +		return 0; + +	return amdgpu_bo_create_kernel_at(adev, +					  adev->mman.drv_vram_usage_start_offset, +					  adev->mman.drv_vram_usage_size, +					  AMDGPU_GEM_DOMAIN_VRAM, +					  &adev->mman.drv_vram_usage_reserved_bo, +					  &adev->mman.drv_vram_usage_va); +} +  /*   * Memoy training reservation functions   */ @@ -1751,6 +1768,14 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)  	}  	/* +	 *The reserved vram for driver must be pinned to the specified +	 *place on the VRAM, so reserve it early. +	 */ +	r = amdgpu_ttm_drv_reserve_vram_init(adev); +	if (r) +		return r; + +	/*  	 * only NAVI10 and onwards ASIC support for IP discovery.  	 * If IP discovery enabled, a block of memory should be  	 * reserved for IP discovey. @@ -1875,6 +1900,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)  	amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,  					&adev->mman.sdma_access_ptr);  	amdgpu_ttm_fw_reserve_vram_fini(adev); +	amdgpu_ttm_drv_reserve_vram_fini(adev);  	if (drm_dev_enter(adev_to_drm(adev), &idx)) { @@ -1956,7 +1982,9 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,  		AMDGPU_IB_POOL_DELAYED;  	int r; -	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, pool, job); +	r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, +				     AMDGPU_FENCE_OWNER_UNDEFINED, +				     num_dw * 4, pool, job);  	if (r)  		return r; @@ -1966,17 +1994,11 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,  							adev->gart.bo);  		(*job)->vm_needs_flush = true;  	} -	if (resv) { -		r = amdgpu_sync_resv(adev, &(*job)->sync, resv, -				     AMDGPU_SYNC_ALWAYS, -				     AMDGPU_FENCE_OWNER_UNDEFINED); -		if (r) { -			DRM_ERROR("sync failed (%d).\n", r); -			amdgpu_job_free(*job); -			return r; -		} -	} -	return 0; +	if (!resv) +		return 0; + +	return drm_sched_job_add_resv_dependencies(&(*job)->base, resv, +						   DMA_RESV_USAGE_BOOKKEEP);  }  int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, @@ -2021,8 +2043,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,  	if (direct_submit)  		r = amdgpu_job_submit_direct(job, ring, fence);  	else -		r = amdgpu_job_submit(job, &adev->mman.entity, -				      AMDGPU_FENCE_OWNER_UNDEFINED, fence); +		*fence = amdgpu_job_submit(job);  	if (r)  		goto error_free; @@ -2067,16 +2088,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,  	amdgpu_ring_pad_ib(ring, &job->ibs[0]);  	WARN_ON(job->ibs[0].length_dw > num_dw); -	r = amdgpu_job_submit(job, &adev->mman.entity, -			      AMDGPU_FENCE_OWNER_UNDEFINED, fence); -	if (r) -		goto error_free; - +	*fence = amdgpu_job_submit(job);  	return 0; - -error_free: -	amdgpu_job_free(job); -	return r;  }  int amdgpu_fill_buffer(struct amdgpu_bo *bo, @@ -2292,9 +2305,9 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,  		if (p->mapping != adev->mman.bdev.dev_mapping)  			return -EPERM; -		ptr = kmap(p); +		ptr = kmap_local_page(p);  		r = copy_to_user(buf, ptr + off, bytes); -		kunmap(p); +		kunmap_local(ptr);  		if (r)  			return -EFAULT; @@ -2343,9 +2356,9 @@ static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,  		if (p->mapping != adev->mman.bdev.dev_mapping)  			return -EPERM; -		ptr = kmap(p); +		ptr = kmap_local_page(p);  		r = copy_from_user(ptr + off, buf, bytes); -		kunmap(p); +		kunmap_local(ptr);  		if (r)  			return -EFAULT; | 
