diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 171 | 
1 files changed, 102 insertions, 69 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index b64938ed8cb6..55e0284b2bdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -58,6 +58,7 @@  #include "amdgpu_amdkfd.h"  #include "amdgpu_sdma.h"  #include "amdgpu_ras.h" +#include "amdgpu_hmm.h"  #include "amdgpu_atomfirmware.h"  #include "amdgpu_res_cursor.h"  #include "bif/bif_4_1_d.h" @@ -189,7 +190,6 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,  	struct amdgpu_device *adev = ring->adev;  	unsigned offset, num_pages, num_dw, num_bytes;  	uint64_t src_addr, dst_addr; -	struct dma_fence *fence;  	struct amdgpu_job *job;  	void *cpu_addr;  	uint64_t flags; @@ -229,7 +229,9 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,  	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);  	num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE; -	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, +	r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, +				     AMDGPU_FENCE_OWNER_UNDEFINED, +				     num_dw * 4 + num_bytes,  				     AMDGPU_IB_POOL_DELAYED, &job);  	if (r)  		return r; @@ -269,18 +271,8 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,  		}  	} -	r = amdgpu_job_submit(job, &adev->mman.entity, -			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence); -	if (r) -		goto error_free; - -	dma_fence_put(fence); - -	return r; - -error_free: -	amdgpu_job_free(job); -	return r; +	dma_fence_put(amdgpu_job_submit(job)); +	return 0;  }  /** @@ -381,7 +373,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,  	dst.offset = 0;  	r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, -				       new_mem->num_pages << PAGE_SHIFT, +				       new_mem->size,  				       amdgpu_bo_encrypted(abo),  				       bo->base.resv, &fence);  	if (r) @@ -424,7 +416,7 @@ error:  static bool amdgpu_mem_visible(struct amdgpu_device *adev,  			       struct ttm_resource *mem)  { -	u64 mem_size = (u64)mem->num_pages << PAGE_SHIFT; +	u64 mem_size = (u64)mem->size;  	struct amdgpu_res_cursor cursor;  	u64 end; @@ -571,7 +563,7 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,  				     struct ttm_resource *mem)  {  	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); -	size_t bus_size = (size_t)mem->num_pages << PAGE_SHIFT; +	size_t bus_size = (size_t)mem->size;  	switch (mem->mem_type) {  	case TTM_PL_SYSTEM: @@ -691,9 +683,8 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages,  	}  	readonly = amdgpu_ttm_tt_is_readonly(ttm); -	r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start, -				       ttm->num_pages, range, readonly, -				       true, NULL); +	r = amdgpu_hmm_range_get_pages(&bo->notifier, start, ttm->num_pages, +				       readonly, NULL, pages, range);  out_unlock:  	mmap_read_unlock(mm);  	if (r) @@ -704,8 +695,19 @@ out_unlock:  	return r;  } +/* amdgpu_ttm_tt_discard_user_pages - Discard range and pfn array allocations + */ +void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm, +				      struct hmm_range *range) +{ +	struct amdgpu_ttm_tt *gtt = (void *)ttm; + +	if (gtt && gtt->userptr && range) +		amdgpu_hmm_range_get_pages_done(range); +} +  /* - * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change + * amdgpu_ttm_tt_get_user_pages_done - stop HMM track the CPU page table change   * Check if the pages backing this ttm range have been invalidated   *   * Returns: true if pages are still valid @@ -723,10 +725,6 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,  	WARN_ONCE(!range->hmm_pfns, "No user pages to check\n"); -	/* -	 * FIXME: Must always hold notifier_lock for this, and must -	 * not ignore the return code. -	 */  	return !amdgpu_hmm_range_get_pages_done(range);  }  #endif @@ -1154,8 +1152,9 @@ int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,   * @addr:  The address in the current tasks VM space to use   * @flags: Requirements of userptr object.   * - * Called by amdgpu_gem_userptr_ioctl() to bind userptr pages - * to current task + * Called by amdgpu_gem_userptr_ioctl() and kfd_ioctl_alloc_memory_of_gpu() to + * bind userptr pages to current task and by kfd_ioctl_acquire_vm() to + * initialize GPU VM for a KFD process.   */  int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,  			      uint64_t addr, uint32_t flags) @@ -1394,7 +1393,8 @@ static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,  }  static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, -					unsigned long offset, void *buf, int len, int write) +					unsigned long offset, void *buf, +					int len, int write)  {  	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);  	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); @@ -1418,26 +1418,27 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,  		memcpy(adev->mman.sdma_access_ptr, buf, len);  	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); -	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_DELAYED, &job); +	r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, +				     AMDGPU_FENCE_OWNER_UNDEFINED, +				     num_dw * 4, AMDGPU_IB_POOL_DELAYED, +				     &job);  	if (r)  		goto out;  	amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm); -	src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) + src_mm.start; +	src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) + +		src_mm.start;  	dst_addr = amdgpu_bo_gpu_offset(adev->mman.sdma_access_bo);  	if (write)  		swap(src_addr, dst_addr); -	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, PAGE_SIZE, false); +	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, +				PAGE_SIZE, false);  	amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);  	WARN_ON(job->ibs[0].length_dw > num_dw); -	r = amdgpu_job_submit(job, &adev->mman.entity, AMDGPU_FENCE_OWNER_UNDEFINED, &fence); -	if (r) { -		amdgpu_job_free(job); -		goto out; -	} +	fence = amdgpu_job_submit(job);  	if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))  		r = -ETIMEDOUT; @@ -1537,6 +1538,23 @@ static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)  		NULL, &adev->mman.fw_vram_usage_va);  } +/* + * Driver Reservation functions + */ +/** + * amdgpu_ttm_drv_reserve_vram_fini - free drv reserved vram + * + * @adev: amdgpu_device pointer + * + * free drv reserved vram if it has been reserved. + */ +static void amdgpu_ttm_drv_reserve_vram_fini(struct amdgpu_device *adev) +{ +	amdgpu_bo_free_kernel(&adev->mman.drv_vram_usage_reserved_bo, +						  NULL, +						  &adev->mman.drv_vram_usage_va); +} +  /**   * amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw   * @@ -1558,11 +1576,35 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)  	return amdgpu_bo_create_kernel_at(adev,  					  adev->mman.fw_vram_usage_start_offset,  					  adev->mman.fw_vram_usage_size, -					  AMDGPU_GEM_DOMAIN_VRAM,  					  &adev->mman.fw_vram_usage_reserved_bo,  					  &adev->mman.fw_vram_usage_va);  } +/** + * amdgpu_ttm_drv_reserve_vram_init - create bo vram reservation from driver + * + * @adev: amdgpu_device pointer + * + * create bo vram reservation from drv. + */ +static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev) +{ +	u64 vram_size = adev->gmc.visible_vram_size; + +	adev->mman.drv_vram_usage_va = NULL; +	adev->mman.drv_vram_usage_reserved_bo = NULL; + +	if (adev->mman.drv_vram_usage_size == 0 || +	    adev->mman.drv_vram_usage_size > vram_size) +		return 0; + +	return amdgpu_bo_create_kernel_at(adev, +					  adev->mman.drv_vram_usage_start_offset, +					  adev->mman.drv_vram_usage_size, +					  &adev->mman.drv_vram_usage_reserved_bo, +					  &adev->mman.drv_vram_usage_va); +} +  /*   * Memoy training reservation functions   */ @@ -1639,7 +1681,6 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)  		ret = amdgpu_bo_create_kernel_at(adev,  					 ctx->c2p_train_data_offset,  					 ctx->train_data_size, -					 AMDGPU_GEM_DOMAIN_VRAM,  					 &ctx->c2p_bo,  					 NULL);  		if (ret) { @@ -1653,7 +1694,6 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)  	ret = amdgpu_bo_create_kernel_at(adev,  				adev->gmc.real_vram_size - adev->mman.discovery_tmr_size,  				adev->mman.discovery_tmr_size, -				AMDGPU_GEM_DOMAIN_VRAM,  				&adev->mman.discovery_memory,  				NULL);  	if (ret) { @@ -1731,6 +1771,14 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)  	}  	/* +	 *The reserved vram for driver must be pinned to the specified +	 *place on the VRAM, so reserve it early. +	 */ +	r = amdgpu_ttm_drv_reserve_vram_init(adev); +	if (r) +		return r; + +	/*  	 * only NAVI10 and onwards ASIC support for IP discovery.  	 * If IP discovery enabled, a block of memory should be  	 * reserved for IP discovey. @@ -1746,21 +1794,18 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)  	 * avoid display artifacts while transitioning between pre-OS  	 * and driver.  */  	r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size, -				       AMDGPU_GEM_DOMAIN_VRAM,  				       &adev->mman.stolen_vga_memory,  				       NULL);  	if (r)  		return r;  	r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,  				       adev->mman.stolen_extended_size, -				       AMDGPU_GEM_DOMAIN_VRAM,  				       &adev->mman.stolen_extended_memory,  				       NULL);  	if (r)  		return r;  	r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_reserved_offset,  				       adev->mman.stolen_reserved_size, -				       AMDGPU_GEM_DOMAIN_VRAM,  				       &adev->mman.stolen_reserved_memory,  				       NULL);  	if (r) @@ -1855,6 +1900,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)  	amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,  					&adev->mman.sdma_access_ptr);  	amdgpu_ttm_fw_reserve_vram_fini(adev); +	amdgpu_ttm_drv_reserve_vram_fini(adev);  	if (drm_dev_enter(adev_to_drm(adev), &idx)) { @@ -1936,7 +1982,9 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,  		AMDGPU_IB_POOL_DELAYED;  	int r; -	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, pool, job); +	r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, +				     AMDGPU_FENCE_OWNER_UNDEFINED, +				     num_dw * 4, pool, job);  	if (r)  		return r; @@ -1946,17 +1994,11 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,  							adev->gart.bo);  		(*job)->vm_needs_flush = true;  	} -	if (resv) { -		r = amdgpu_sync_resv(adev, &(*job)->sync, resv, -				     AMDGPU_SYNC_ALWAYS, -				     AMDGPU_FENCE_OWNER_UNDEFINED); -		if (r) { -			DRM_ERROR("sync failed (%d).\n", r); -			amdgpu_job_free(*job); -			return r; -		} -	} -	return 0; +	if (!resv) +		return 0; + +	return drm_sched_job_add_resv_dependencies(&(*job)->base, resv, +						   DMA_RESV_USAGE_BOOKKEEP);  }  int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, @@ -2001,8 +2043,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,  	if (direct_submit)  		r = amdgpu_job_submit_direct(job, ring, fence);  	else -		r = amdgpu_job_submit(job, &adev->mman.entity, -				      AMDGPU_FENCE_OWNER_UNDEFINED, fence); +		*fence = amdgpu_job_submit(job);  	if (r)  		goto error_free; @@ -2047,16 +2088,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,  	amdgpu_ring_pad_ib(ring, &job->ibs[0]);  	WARN_ON(job->ibs[0].length_dw > num_dw); -	r = amdgpu_job_submit(job, &adev->mman.entity, -			      AMDGPU_FENCE_OWNER_UNDEFINED, fence); -	if (r) -		goto error_free; - +	*fence = amdgpu_job_submit(job);  	return 0; - -error_free: -	amdgpu_job_free(job); -	return r;  }  int amdgpu_fill_buffer(struct amdgpu_bo *bo, @@ -2272,9 +2305,9 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,  		if (p->mapping != adev->mman.bdev.dev_mapping)  			return -EPERM; -		ptr = kmap(p); +		ptr = kmap_local_page(p);  		r = copy_to_user(buf, ptr + off, bytes); -		kunmap(p); +		kunmap_local(ptr);  		if (r)  			return -EFAULT; @@ -2323,9 +2356,9 @@ static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,  		if (p->mapping != adev->mman.bdev.dev_mapping)  			return -EPERM; -		ptr = kmap(p); +		ptr = kmap_local_page(p);  		r = copy_from_user(ptr + off, buf, bytes); -		kunmap(p); +		kunmap_local(ptr);  		if (r)  			return -EFAULT; | 
