diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 124 | 
1 files changed, 73 insertions, 51 deletions
| diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 16137c4247bb..3cb4681c5f53 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1574,7 +1574,6 @@ retry_flush_work:  static void svm_range_restore_work(struct work_struct *work)  {  	struct delayed_work *dwork = to_delayed_work(work); -	struct amdkfd_process_info *process_info;  	struct svm_range_list *svms;  	struct svm_range *prange;  	struct kfd_process *p; @@ -1594,12 +1593,10 @@ static void svm_range_restore_work(struct work_struct *work)  	 * the lifetime of this thread, kfd_process and mm will be valid.  	 */  	p = container_of(svms, struct kfd_process, svms); -	process_info = p->kgd_process_info;  	mm = p->mm;  	if (!mm)  		return; -	mutex_lock(&process_info->lock);  	svm_range_list_lock_and_flush_work(svms, mm);  	mutex_lock(&svms->lock); @@ -1652,7 +1649,6 @@ static void svm_range_restore_work(struct work_struct *work)  out_reschedule:  	mutex_unlock(&svms->lock);  	mmap_write_unlock(mm); -	mutex_unlock(&process_info->lock);  	/* If validation failed, reschedule another attempt */  	if (evicted_ranges) { @@ -1968,10 +1964,16 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms)  	struct kfd_process_device *pdd;  	struct amdgpu_device *adev;  	struct kfd_process *p; +	int drain;  	uint32_t i;  	p = container_of(svms, struct kfd_process, svms); +restart: +	drain = atomic_read(&svms->drain_pagefaults); +	if (!drain) +		return; +  	for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) {  		pdd = p->pdds[i];  		if (!pdd) @@ -1983,6 +1985,8 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms)  		amdgpu_ih_wait_on_checkpoint_process(adev, &adev->irq.ih1);  		pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);  	} +	if (atomic_cmpxchg(&svms->drain_pagefaults, drain, 0) != drain) +		goto restart;  }  static void svm_range_deferred_list_work(struct work_struct *work) @@ -1990,43 +1994,41 @@ static void svm_range_deferred_list_work(struct work_struct *work)  	struct svm_range_list *svms;  	struct svm_range *prange;  	struct mm_struct *mm; +	struct kfd_process *p;  	svms = container_of(work, struct svm_range_list, deferred_list_work);  	pr_debug("enter svms 0x%p\n", svms); +	p = container_of(svms, struct kfd_process, svms); +	/* Avoid mm is gone when inserting mmu notifier */ +	mm = get_task_mm(p->lead_thread); +	if (!mm) { +		pr_debug("svms 0x%p process mm gone\n", svms); +		return; +	} +retry: +	mmap_write_lock(mm); + +	/* Checking for the need to drain retry faults must be inside +	 * mmap write lock to serialize with munmap notifiers. +	 */ +	if (unlikely(atomic_read(&svms->drain_pagefaults))) { +		mmap_write_unlock(mm); +		svm_range_drain_retry_fault(svms); +		goto retry; +	} +  	spin_lock(&svms->deferred_list_lock);  	while (!list_empty(&svms->deferred_range_list)) {  		prange = list_first_entry(&svms->deferred_range_list,  					  struct svm_range, deferred_list); +		list_del_init(&prange->deferred_list);  		spin_unlock(&svms->deferred_list_lock); +  		pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange,  			 prange->start, prange->last, prange->work_item.op); -		mm = prange->work_item.mm; -retry: -		mmap_write_lock(mm);  		mutex_lock(&svms->lock); - -		/* Checking for the need to drain retry faults must be in -		 * mmap write lock to serialize with munmap notifiers. -		 * -		 * Remove from deferred_list must be inside mmap write lock, -		 * otherwise, svm_range_list_lock_and_flush_work may hold mmap -		 * write lock, and continue because deferred_list is empty, then -		 * deferred_list handle is blocked by mmap write lock. -		 */ -		spin_lock(&svms->deferred_list_lock); -		if (unlikely(svms->drain_pagefaults)) { -			svms->drain_pagefaults = false; -			spin_unlock(&svms->deferred_list_lock); -			mutex_unlock(&svms->lock); -			mmap_write_unlock(mm); -			svm_range_drain_retry_fault(svms); -			goto retry; -		} -		list_del_init(&prange->deferred_list); -		spin_unlock(&svms->deferred_list_lock); -  		mutex_lock(&prange->migrate_mutex);  		while (!list_empty(&prange->child_list)) {  			struct svm_range *pchild; @@ -2042,12 +2044,13 @@ retry:  		svm_range_handle_list_op(svms, prange);  		mutex_unlock(&svms->lock); -		mmap_write_unlock(mm);  		spin_lock(&svms->deferred_list_lock);  	}  	spin_unlock(&svms->deferred_list_lock); +	mmap_write_unlock(mm); +	mmput(mm);  	pr_debug("exit svms 0x%p\n", svms);  } @@ -2056,12 +2059,6 @@ svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange,  			struct mm_struct *mm, enum svm_work_list_ops op)  {  	spin_lock(&svms->deferred_list_lock); -	/* Make sure pending page faults are drained in the deferred worker -	 * before the range is freed to avoid straggler interrupts on -	 * unmapped memory causing "phantom faults". -	 */ -	if (op == SVM_OP_UNMAP_RANGE) -		svms->drain_pagefaults = true;  	/* if prange is on the deferred list */  	if (!list_empty(&prange->deferred_list)) {  		pr_debug("update exist prange 0x%p work op %d\n", prange, op); @@ -2140,6 +2137,12 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,  	pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms,  		 prange, prange->start, prange->last, start, last); +	/* Make sure pending page faults are drained in the deferred worker +	 * before the range is freed to avoid straggler interrupts on +	 * unmapped memory causing "phantom faults". +	 */ +	atomic_inc(&svms->drain_pagefaults); +  	unmap_parent = start <= prange->start && last >= prange->last;  	list_for_each_entry(pchild, &prange->child_list, child_list) { @@ -2559,20 +2562,13 @@ svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p,  }  static bool -svm_fault_allowed(struct mm_struct *mm, uint64_t addr, bool write_fault) +svm_fault_allowed(struct vm_area_struct *vma, bool write_fault)  {  	unsigned long requested = VM_READ; -	struct vm_area_struct *vma;  	if (write_fault)  		requested |= VM_WRITE; -	vma = find_vma(mm, addr << PAGE_SHIFT); -	if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) { -		pr_debug("address 0x%llx VMA is removed\n", addr); -		return true; -	} -  	pr_debug("requested 0x%lx, vma permission flags 0x%lx\n", requested,  		vma->vm_flags);  	return (vma->vm_flags & requested) == requested; @@ -2590,6 +2586,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,  	int32_t best_loc;  	int32_t gpuidx = MAX_GPU_INSTANCE;  	bool write_locked = false; +	struct vm_area_struct *vma;  	int r = 0;  	if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) { @@ -2600,7 +2597,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,  	p = kfd_lookup_process_by_pasid(pasid);  	if (!p) {  		pr_debug("kfd process not founded pasid 0x%x\n", pasid); -		return -ESRCH; +		return 0;  	}  	if (!p->xnack_enabled) {  		pr_debug("XNACK not enabled for pasid 0x%x\n", pasid); @@ -2611,10 +2608,19 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,  	pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr); +	if (atomic_read(&svms->drain_pagefaults)) { +		pr_debug("draining retry fault, drop fault 0x%llx\n", addr); +		r = 0; +		goto out; +	} + +	/* p->lead_thread is available as kfd_process_wq_release flush the work +	 * before releasing task ref. +	 */  	mm = get_task_mm(p->lead_thread);  	if (!mm) {  		pr_debug("svms 0x%p failed to get mm\n", svms); -		r = -ESRCH; +		r = 0;  		goto out;  	} @@ -2652,6 +2658,7 @@ retry_write_locked:  	if (svm_range_skip_recover(prange)) {  		amdgpu_gmc_filter_faults_remove(adev, addr, pasid); +		r = 0;  		goto out_unlock_range;  	} @@ -2660,10 +2667,21 @@ retry_write_locked:  	if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) {  		pr_debug("svms 0x%p [0x%lx %lx] already restored\n",  			 svms, prange->start, prange->last); +		r = 0;  		goto out_unlock_range;  	} -	if (!svm_fault_allowed(mm, addr, write_fault)) { +	/* __do_munmap removed VMA, return success as we are handling stale +	 * retry fault. +	 */ +	vma = find_vma(mm, addr << PAGE_SHIFT); +	if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) { +		pr_debug("address 0x%llx VMA is removed\n", addr); +		r = 0; +		goto out_unlock_range; +	} + +	if (!svm_fault_allowed(vma, write_fault)) {  		pr_debug("fault addr 0x%llx no %s permission\n", addr,  			write_fault ? "write" : "read");  		r = -EPERM; @@ -2741,6 +2759,14 @@ void svm_range_list_fini(struct kfd_process *p)  	/* Ensure list work is finished before process is destroyed */  	flush_work(&p->svms.deferred_list_work); +	/* +	 * Ensure no retry fault comes in afterwards, as page fault handler will +	 * not find kfd process and take mm lock to recover fault. +	 */ +	atomic_inc(&p->svms.drain_pagefaults); +	svm_range_drain_retry_fault(&p->svms); + +  	list_for_each_entry_safe(prange, next, &p->svms.list, list) {  		svm_range_unlink(prange);  		svm_range_remove_notifier(prange); @@ -2761,6 +2787,7 @@ int svm_range_list_init(struct kfd_process *p)  	mutex_init(&svms->lock);  	INIT_LIST_HEAD(&svms->list);  	atomic_set(&svms->evicted_ranges, 0); +	atomic_set(&svms->drain_pagefaults, 0);  	INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work);  	INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work);  	INIT_LIST_HEAD(&svms->deferred_range_list); @@ -3150,7 +3177,6 @@ static int  svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,  		   uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)  { -	struct amdkfd_process_info *process_info = p->kgd_process_info;  	struct mm_struct *mm = current->mm;  	struct list_head update_list;  	struct list_head insert_list; @@ -3169,8 +3195,6 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,  	svms = &p->svms; -	mutex_lock(&process_info->lock); -  	svm_range_list_lock_and_flush_work(svms, mm);  	r = svm_range_is_valid(p, start, size); @@ -3246,8 +3270,6 @@ out_unlock_range:  	mutex_unlock(&svms->lock);  	mmap_read_unlock(mm);  out: -	mutex_unlock(&process_info->lock); -  	pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid,  		 &p->svms, start, start + size - 1, r); | 
