diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
| -rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 973 | 
1 files changed, 238 insertions, 735 deletions
| diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 216f52b744a6..6728ea5c71d4 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -25,18 +25,9 @@   *   */ -#include <drm/drmP.h>  #include <drm/drm_vma_manager.h> +#include <drm/drm_pci.h>  #include <drm/i915_drm.h> -#include "i915_drv.h" -#include "i915_gem_clflush.h" -#include "i915_vgpu.h" -#include "i915_trace.h" -#include "intel_drv.h" -#include "intel_frontbuffer.h" -#include "intel_mocs.h" -#include "intel_workarounds.h" -#include "i915_gemfs.h"  #include <linux/dma-fence-array.h>  #include <linux/kthread.h>  #include <linux/reservation.h> @@ -46,6 +37,19 @@  #include <linux/swap.h>  #include <linux/pci.h>  #include <linux/dma-buf.h> +#include <linux/mman.h> + +#include "i915_drv.h" +#include "i915_gem_clflush.h" +#include "i915_gemfs.h" +#include "i915_reset.h" +#include "i915_trace.h" +#include "i915_vgpu.h" + +#include "intel_drv.h" +#include "intel_frontbuffer.h" +#include "intel_mocs.h" +#include "intel_workarounds.h"  static void i915_gem_flush_free_objects(struct drm_i915_private *i915); @@ -139,6 +143,8 @@ int i915_mutex_lock_interruptible(struct drm_device *dev)  static u32 __i915_gem_park(struct drm_i915_private *i915)  { +	intel_wakeref_t wakeref; +  	GEM_TRACE("\n");  	lockdep_assert_held(&i915->drm.struct_mutex); @@ -169,14 +175,13 @@ static u32 __i915_gem_park(struct drm_i915_private *i915)  	i915_pmu_gt_parked(i915);  	i915_vma_parked(i915); -	i915->gt.awake = false; +	wakeref = fetch_and_zero(&i915->gt.awake); +	GEM_BUG_ON(!wakeref);  	if (INTEL_GEN(i915) >= 6)  		gen6_rps_idle(i915); -	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ); - -	intel_runtime_pm_put(i915); +	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);  	return i915->gt.epoch;  } @@ -201,12 +206,11 @@ void i915_gem_unpark(struct drm_i915_private *i915)  	lockdep_assert_held(&i915->drm.struct_mutex);  	GEM_BUG_ON(!i915->gt.active_requests); +	assert_rpm_wakelock_held(i915);  	if (i915->gt.awake)  		return; -	intel_runtime_pm_get_noresume(i915); -  	/*  	 * It seems that the DMC likes to transition between the DC states a lot  	 * when there are no connected displays (no active power domains) during @@ -218,9 +222,9 @@ void i915_gem_unpark(struct drm_i915_private *i915)  	 * Work around it by grabbing a GT IRQ power domain whilst there is any  	 * GT activity, preventing any DC state transitions.  	 */ -	intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); +	i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); +	GEM_BUG_ON(!i915->gt.awake); -	i915->gt.awake = true;  	if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */  		i915->gt.epoch = 1; @@ -243,21 +247,19 @@ int  i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,  			    struct drm_file *file)  { -	struct drm_i915_private *dev_priv = to_i915(dev); -	struct i915_ggtt *ggtt = &dev_priv->ggtt; +	struct i915_ggtt *ggtt = &to_i915(dev)->ggtt;  	struct drm_i915_gem_get_aperture *args = data;  	struct i915_vma *vma;  	u64 pinned; +	mutex_lock(&ggtt->vm.mutex); +  	pinned = ggtt->vm.reserved; -	mutex_lock(&dev->struct_mutex); -	list_for_each_entry(vma, &ggtt->vm.active_list, vm_link) -		if (i915_vma_is_pinned(vma)) -			pinned += vma->node.size; -	list_for_each_entry(vma, &ggtt->vm.inactive_list, vm_link) +	list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)  		if (i915_vma_is_pinned(vma))  			pinned += vma->node.size; -	mutex_unlock(&dev->struct_mutex); + +	mutex_unlock(&ggtt->vm.mutex);  	args->aper_size = ggtt->vm.total;  	args->aper_available_size = args->aper_size - pinned; @@ -437,15 +439,19 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)  	if (ret)  		return ret; -	while ((vma = list_first_entry_or_null(&obj->vma_list, -					       struct i915_vma, -					       obj_link))) { +	spin_lock(&obj->vma.lock); +	while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, +						       struct i915_vma, +						       obj_link))) {  		list_move_tail(&vma->obj_link, &still_in_list); +		spin_unlock(&obj->vma.lock); +  		ret = i915_vma_unbind(vma); -		if (ret) -			break; + +		spin_lock(&obj->vma.lock);  	} -	list_splice(&still_in_list, &obj->vma_list); +	list_splice(&still_in_list, &obj->vma.list); +	spin_unlock(&obj->vma.lock);  	return ret;  } @@ -655,11 +661,6 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj,  		     struct intel_rps_client *rps_client)  {  	might_sleep(); -#if IS_ENABLED(CONFIG_LOCKDEP) -	GEM_BUG_ON(debug_locks && -		   !!lockdep_is_held(&obj->base.dev->struct_mutex) != -		   !!(flags & I915_WAIT_LOCKED)); -#endif  	GEM_BUG_ON(timeout < 0);  	timeout = i915_gem_object_wait_reservation(obj->resv, @@ -711,8 +712,8 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj)  static int  i915_gem_create(struct drm_file *file,  		struct drm_i915_private *dev_priv, -		uint64_t size, -		uint32_t *handle_p) +		u64 size, +		u32 *handle_p)  {  	struct drm_i915_gem_object *obj;  	int ret; @@ -783,6 +784,8 @@ fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)  void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)  { +	intel_wakeref_t wakeref; +  	/*  	 * No actual flushing is required for the GTT write domain for reads  	 * from the GTT domain. Writes to it "immediately" go to main memory @@ -809,13 +812,13 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)  	i915_gem_chipset_flush(dev_priv); -	intel_runtime_pm_get(dev_priv); -	spin_lock_irq(&dev_priv->uncore.lock); +	with_intel_runtime_pm(dev_priv, wakeref) { +		spin_lock_irq(&dev_priv->uncore.lock); -	POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE)); +		POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE)); -	spin_unlock_irq(&dev_priv->uncore.lock); -	intel_runtime_pm_put(dev_priv); +		spin_unlock_irq(&dev_priv->uncore.lock); +	}  }  static void @@ -859,58 +862,6 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)  	obj->write_domain = 0;  } -static inline int -__copy_to_user_swizzled(char __user *cpu_vaddr, -			const char *gpu_vaddr, int gpu_offset, -			int length) -{ -	int ret, cpu_offset = 0; - -	while (length > 0) { -		int cacheline_end = ALIGN(gpu_offset + 1, 64); -		int this_length = min(cacheline_end - gpu_offset, length); -		int swizzled_gpu_offset = gpu_offset ^ 64; - -		ret = __copy_to_user(cpu_vaddr + cpu_offset, -				     gpu_vaddr + swizzled_gpu_offset, -				     this_length); -		if (ret) -			return ret + length; - -		cpu_offset += this_length; -		gpu_offset += this_length; -		length -= this_length; -	} - -	return 0; -} - -static inline int -__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, -			  const char __user *cpu_vaddr, -			  int length) -{ -	int ret, cpu_offset = 0; - -	while (length > 0) { -		int cacheline_end = ALIGN(gpu_offset + 1, 64); -		int this_length = min(cacheline_end - gpu_offset, length); -		int swizzled_gpu_offset = gpu_offset ^ 64; - -		ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, -				       cpu_vaddr + cpu_offset, -				       this_length); -		if (ret) -			return ret + length; - -		cpu_offset += this_length; -		gpu_offset += this_length; -		length -= this_length; -	} - -	return 0; -} -  /*   * Pins the specified object's pages and synchronizes the object with   * GPU accesses. Sets needs_clflush to non-zero if the caller should @@ -1030,72 +981,23 @@ err_unpin:  	return ret;  } -static void -shmem_clflush_swizzled_range(char *addr, unsigned long length, -			     bool swizzled) -{ -	if (unlikely(swizzled)) { -		unsigned long start = (unsigned long) addr; -		unsigned long end = (unsigned long) addr + length; - -		/* For swizzling simply ensure that we always flush both -		 * channels. Lame, but simple and it works. Swizzled -		 * pwrite/pread is far from a hotpath - current userspace -		 * doesn't use it at all. */ -		start = round_down(start, 128); -		end = round_up(end, 128); - -		drm_clflush_virt_range((void *)start, end - start); -	} else { -		drm_clflush_virt_range(addr, length); -	} - -} - -/* Only difference to the fast-path function is that this can handle bit17 - * and uses non-atomic copy and kmap functions. */  static int -shmem_pread_slow(struct page *page, int offset, int length, -		 char __user *user_data, -		 bool page_do_bit17_swizzling, bool needs_clflush) +shmem_pread(struct page *page, int offset, int len, char __user *user_data, +	    bool needs_clflush)  {  	char *vaddr;  	int ret;  	vaddr = kmap(page); -	if (needs_clflush) -		shmem_clflush_swizzled_range(vaddr + offset, length, -					     page_do_bit17_swizzling); - -	if (page_do_bit17_swizzling) -		ret = __copy_to_user_swizzled(user_data, vaddr, offset, length); -	else -		ret = __copy_to_user(user_data, vaddr + offset, length); -	kunmap(page); -	return ret ? - EFAULT : 0; -} - -static int -shmem_pread(struct page *page, int offset, int length, char __user *user_data, -	    bool page_do_bit17_swizzling, bool needs_clflush) -{ -	int ret; +	if (needs_clflush) +		drm_clflush_virt_range(vaddr + offset, len); -	ret = -ENODEV; -	if (!page_do_bit17_swizzling) { -		char *vaddr = kmap_atomic(page); +	ret = __copy_to_user(user_data, vaddr + offset, len); -		if (needs_clflush) -			drm_clflush_virt_range(vaddr + offset, length); -		ret = __copy_to_user_inatomic(user_data, vaddr + offset, length); -		kunmap_atomic(vaddr); -	} -	if (ret == 0) -		return 0; +	kunmap(page); -	return shmem_pread_slow(page, offset, length, user_data, -				page_do_bit17_swizzling, needs_clflush); +	return ret ? -EFAULT : 0;  }  static int @@ -1104,15 +1006,10 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,  {  	char __user *user_data;  	u64 remain; -	unsigned int obj_do_bit17_swizzling;  	unsigned int needs_clflush;  	unsigned int idx, offset;  	int ret; -	obj_do_bit17_swizzling = 0; -	if (i915_gem_object_needs_bit17_swizzle(obj)) -		obj_do_bit17_swizzling = BIT(17); -  	ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);  	if (ret)  		return ret; @@ -1130,7 +1027,6 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,  		unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);  		ret = shmem_pread(page, offset, length, user_data, -				  page_to_phys(page) & obj_do_bit17_swizzling,  				  needs_clflush);  		if (ret)  			break; @@ -1174,6 +1070,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,  {  	struct drm_i915_private *i915 = to_i915(obj->base.dev);  	struct i915_ggtt *ggtt = &i915->ggtt; +	intel_wakeref_t wakeref;  	struct drm_mm_node node;  	struct i915_vma *vma;  	void __user *user_data; @@ -1184,7 +1081,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,  	if (ret)  		return ret; -	intel_runtime_pm_get(i915); +	wakeref = intel_runtime_pm_get(i915);  	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,  				       PIN_MAPPABLE |  				       PIN_NONFAULT | @@ -1257,7 +1154,7 @@ out_unpin:  		i915_vma_unpin(vma);  	}  out_unlock: -	intel_runtime_pm_put(i915); +	intel_runtime_pm_put(i915, wakeref);  	mutex_unlock(&i915->drm.struct_mutex);  	return ret; @@ -1358,6 +1255,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,  {  	struct drm_i915_private *i915 = to_i915(obj->base.dev);  	struct i915_ggtt *ggtt = &i915->ggtt; +	intel_wakeref_t wakeref;  	struct drm_mm_node node;  	struct i915_vma *vma;  	u64 remain, offset; @@ -1376,13 +1274,14 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,  		 * This easily dwarfs any performance advantage from  		 * using the cache bypass of indirect GGTT access.  		 */ -		if (!intel_runtime_pm_get_if_in_use(i915)) { +		wakeref = intel_runtime_pm_get_if_in_use(i915); +		if (!wakeref) {  			ret = -EFAULT;  			goto out_unlock;  		}  	} else {  		/* No backing pages, no fallback, we must force GGTT access */ -		intel_runtime_pm_get(i915); +		wakeref = intel_runtime_pm_get(i915);  	}  	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, @@ -1464,39 +1363,12 @@ out_unpin:  		i915_vma_unpin(vma);  	}  out_rpm: -	intel_runtime_pm_put(i915); +	intel_runtime_pm_put(i915, wakeref);  out_unlock:  	mutex_unlock(&i915->drm.struct_mutex);  	return ret;  } -static int -shmem_pwrite_slow(struct page *page, int offset, int length, -		  char __user *user_data, -		  bool page_do_bit17_swizzling, -		  bool needs_clflush_before, -		  bool needs_clflush_after) -{ -	char *vaddr; -	int ret; - -	vaddr = kmap(page); -	if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) -		shmem_clflush_swizzled_range(vaddr + offset, length, -					     page_do_bit17_swizzling); -	if (page_do_bit17_swizzling) -		ret = __copy_from_user_swizzled(vaddr, offset, user_data, -						length); -	else -		ret = __copy_from_user(vaddr + offset, user_data, length); -	if (needs_clflush_after) -		shmem_clflush_swizzled_range(vaddr + offset, length, -					     page_do_bit17_swizzling); -	kunmap(page); - -	return ret ? -EFAULT : 0; -} -  /* Per-page copy function for the shmem pwrite fastpath.   * Flushes invalid cachelines before writing to the target if   * needs_clflush_before is set and flushes out any written cachelines after @@ -1504,31 +1376,24 @@ shmem_pwrite_slow(struct page *page, int offset, int length,   */  static int  shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, -	     bool page_do_bit17_swizzling,  	     bool needs_clflush_before,  	     bool needs_clflush_after)  { +	char *vaddr;  	int ret; -	ret = -ENODEV; -	if (!page_do_bit17_swizzling) { -		char *vaddr = kmap_atomic(page); +	vaddr = kmap(page); -		if (needs_clflush_before) -			drm_clflush_virt_range(vaddr + offset, len); -		ret = __copy_from_user_inatomic(vaddr + offset, user_data, len); -		if (needs_clflush_after) -			drm_clflush_virt_range(vaddr + offset, len); +	if (needs_clflush_before) +		drm_clflush_virt_range(vaddr + offset, len); -		kunmap_atomic(vaddr); -	} -	if (ret == 0) -		return ret; +	ret = __copy_from_user(vaddr + offset, user_data, len); +	if (!ret && needs_clflush_after) +		drm_clflush_virt_range(vaddr + offset, len); -	return shmem_pwrite_slow(page, offset, len, user_data, -				 page_do_bit17_swizzling, -				 needs_clflush_before, -				 needs_clflush_after); +	kunmap(page); + +	return ret ? -EFAULT : 0;  }  static int @@ -1538,7 +1403,6 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,  	struct drm_i915_private *i915 = to_i915(obj->base.dev);  	void __user *user_data;  	u64 remain; -	unsigned int obj_do_bit17_swizzling;  	unsigned int partial_cacheline_write;  	unsigned int needs_clflush;  	unsigned int offset, idx; @@ -1553,10 +1417,6 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,  	if (ret)  		return ret; -	obj_do_bit17_swizzling = 0; -	if (i915_gem_object_needs_bit17_swizzle(obj)) -		obj_do_bit17_swizzling = BIT(17); -  	/* If we don't overwrite a cacheline completely we need to be  	 * careful to have up-to-date data by first clflushing. Don't  	 * overcomplicate things and flush the entire patch. @@ -1573,7 +1433,6 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,  		unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);  		ret = shmem_pwrite(page, offset, length, user_data, -				   page_to_phys(page) & obj_do_bit17_swizzling,  				   (offset | length) & partial_cacheline_write,  				   needs_clflush & CLFLUSH_AFTER);  		if (ret) @@ -1677,23 +1536,21 @@ err:  static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)  { -	struct drm_i915_private *i915; +	struct drm_i915_private *i915 = to_i915(obj->base.dev);  	struct list_head *list;  	struct i915_vma *vma;  	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); +	mutex_lock(&i915->ggtt.vm.mutex);  	for_each_ggtt_vma(vma, obj) { -		if (i915_vma_is_active(vma)) -			continue; -  		if (!drm_mm_node_allocated(&vma->node))  			continue; -		list_move_tail(&vma->vm_link, &vma->vm->inactive_list); +		list_move_tail(&vma->vm_link, &vma->vm->bound_list);  	} +	mutex_unlock(&i915->ggtt.vm.mutex); -	i915 = to_i915(obj->base.dev);  	spin_lock(&i915->mm.obj_lock);  	list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;  	list_move_tail(&obj->mm.link, list); @@ -1713,8 +1570,8 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,  {  	struct drm_i915_gem_set_domain *args = data;  	struct drm_i915_gem_object *obj; -	uint32_t read_domains = args->read_domains; -	uint32_t write_domain = args->write_domain; +	u32 read_domains = args->read_domains; +	u32 write_domain = args->write_domain;  	int err;  	/* Only handle setting domains to types used by the CPU. */ @@ -1824,6 +1681,16 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,  	return 0;  } +static inline bool +__vma_matches(struct vm_area_struct *vma, struct file *filp, +	      unsigned long addr, unsigned long size) +{ +	if (vma->vm_file != filp) +		return false; + +	return vma->vm_start == addr && (vma->vm_end - vma->vm_start) == size; +} +  /**   * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address   *			 it is mapped to. @@ -1873,6 +1740,9 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,  	addr = vm_mmap(obj->base.filp, 0, args->size,  		       PROT_READ | PROT_WRITE, MAP_SHARED,  		       args->offset); +	if (IS_ERR_VALUE(addr)) +		goto err; +  	if (args->flags & I915_MMAP_WC) {  		struct mm_struct *mm = current->mm;  		struct vm_area_struct *vma; @@ -1882,23 +1752,28 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,  			return -EINTR;  		}  		vma = find_vma(mm, addr); -		if (vma) +		if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))  			vma->vm_page_prot =  				pgprot_writecombine(vm_get_page_prot(vma->vm_flags));  		else  			addr = -ENOMEM;  		up_write(&mm->mmap_sem); +		if (IS_ERR_VALUE(addr)) +			goto err;  		/* This may race, but that's ok, it only gets set */  		WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);  	}  	i915_gem_object_put(obj); -	if (IS_ERR((void *)addr)) -		return addr; -	args->addr_ptr = (uint64_t) addr; +	args->addr_ptr = (u64)addr;  	return 0; + +err: +	i915_gem_object_put(obj); + +	return addr;  }  static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) @@ -2009,6 +1884,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)  	struct drm_i915_private *dev_priv = to_i915(dev);  	struct i915_ggtt *ggtt = &dev_priv->ggtt;  	bool write = area->vm_flags & VM_WRITE; +	intel_wakeref_t wakeref;  	struct i915_vma *vma;  	pgoff_t page_offset;  	int ret; @@ -2038,7 +1914,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)  	if (ret)  		goto err; -	intel_runtime_pm_get(dev_priv); +	wakeref = intel_runtime_pm_get(dev_priv);  	ret = i915_mutex_lock_interruptible(dev);  	if (ret) @@ -2116,7 +1992,7 @@ err_unpin:  err_unlock:  	mutex_unlock(&dev->struct_mutex);  err_rpm: -	intel_runtime_pm_put(dev_priv); +	intel_runtime_pm_put(dev_priv, wakeref);  	i915_gem_object_unpin_pages(obj);  err:  	switch (ret) { @@ -2189,6 +2065,7 @@ void  i915_gem_release_mmap(struct drm_i915_gem_object *obj)  {  	struct drm_i915_private *i915 = to_i915(obj->base.dev); +	intel_wakeref_t wakeref;  	/* Serialisation between user GTT access and our code depends upon  	 * revoking the CPU's PTE whilst the mutex is held. The next user @@ -2199,7 +2076,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)  	 * wakeref.  	 */  	lockdep_assert_held(&i915->drm.struct_mutex); -	intel_runtime_pm_get(i915); +	wakeref = intel_runtime_pm_get(i915);  	if (!obj->userfault_count)  		goto out; @@ -2216,7 +2093,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)  	wmb();  out: -	intel_runtime_pm_put(i915); +	intel_runtime_pm_put(i915, wakeref);  }  void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) @@ -2296,8 +2173,8 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)  int  i915_gem_mmap_gtt(struct drm_file *file,  		  struct drm_device *dev, -		  uint32_t handle, -		  uint64_t *offset) +		  u32 handle, +		  u64 *offset)  {  	struct drm_i915_gem_object *obj;  	int ret; @@ -2444,8 +2321,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)  	struct sg_table *pages;  	pages = fetch_and_zero(&obj->mm.pages); -	if (!pages) -		return NULL; +	if (IS_ERR_OR_NULL(pages)) +		return pages;  	spin_lock(&i915->mm.obj_lock);  	list_del(&obj->mm.link); @@ -2469,22 +2346,23 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)  	return pages;  } -void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, -				 enum i915_mm_subclass subclass) +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, +				enum i915_mm_subclass subclass)  {  	struct sg_table *pages; +	int ret;  	if (i915_gem_object_has_pinned_pages(obj)) -		return; +		return -EBUSY;  	GEM_BUG_ON(obj->bind_count); -	if (!i915_gem_object_has_pages(obj)) -		return;  	/* May be called by shrinker from within get_pages() (on another bo) */  	mutex_lock_nested(&obj->mm.lock, subclass); -	if (unlikely(atomic_read(&obj->mm.pages_pin_count))) +	if (unlikely(atomic_read(&obj->mm.pages_pin_count))) { +		ret = -EBUSY;  		goto unlock; +	}  	/*  	 * ->put_pages might need to allocate memory for the bit17 swizzle @@ -2492,11 +2370,24 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,  	 * lists early.  	 */  	pages = __i915_gem_object_unset_pages(obj); + +	/* +	 * XXX Temporary hijinx to avoid updating all backends to handle +	 * NULL pages. In the future, when we have more asynchronous +	 * get_pages backends we should be better able to handle the +	 * cancellation of the async task in a more uniform manner. +	 */ +	if (!pages && !i915_gem_object_needs_async_cancel(obj)) +		pages = ERR_PTR(-EINVAL); +  	if (!IS_ERR(pages))  		obj->ops->put_pages(obj, pages); +	ret = 0;  unlock:  	mutex_unlock(&obj->mm.lock); + +	return ret;  }  bool i915_sg_trim(struct sg_table *orig_st) @@ -3000,59 +2891,12 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,  	return 0;  } -static void i915_gem_client_mark_guilty(struct drm_i915_file_private *file_priv, -					const struct i915_gem_context *ctx) +static bool match_ring(struct i915_request *rq)  { -	unsigned int score; -	unsigned long prev_hang; +	struct drm_i915_private *dev_priv = rq->i915; +	u32 ring = I915_READ(RING_START(rq->engine->mmio_base)); -	if (i915_gem_context_is_banned(ctx)) -		score = I915_CLIENT_SCORE_CONTEXT_BAN; -	else -		score = 0; - -	prev_hang = xchg(&file_priv->hang_timestamp, jiffies); -	if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES)) -		score += I915_CLIENT_SCORE_HANG_FAST; - -	if (score) { -		atomic_add(score, &file_priv->ban_score); - -		DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n", -				 ctx->name, score, -				 atomic_read(&file_priv->ban_score)); -	} -} - -static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx) -{ -	unsigned int score; -	bool banned, bannable; - -	atomic_inc(&ctx->guilty_count); - -	bannable = i915_gem_context_is_bannable(ctx); -	score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score); -	banned = score >= CONTEXT_SCORE_BAN_THRESHOLD; - -	/* Cool contexts don't accumulate client ban score */ -	if (!bannable) -		return; - -	if (banned) { -		DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n", -				 ctx->name, atomic_read(&ctx->guilty_count), -				 score); -		i915_gem_context_set_banned(ctx); -	} - -	if (!IS_ERR_OR_NULL(ctx->file_priv)) -		i915_gem_client_mark_guilty(ctx->file_priv, ctx); -} - -static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) -{ -	atomic_inc(&ctx->active_count); +	return ring == i915_ggtt_offset(rq->ring->vma);  }  struct i915_request * @@ -3074,9 +2918,16 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)  	 */  	spin_lock_irqsave(&engine->timeline.lock, flags);  	list_for_each_entry(request, &engine->timeline.requests, link) { -		if (__i915_request_completed(request, request->global_seqno)) +		if (i915_request_completed(request))  			continue; +		if (!i915_request_started(request)) +			break; + +		/* More than one preemptible request may match! */ +		if (!match_ring(request)) +			break; +  		active = request;  		break;  	} @@ -3085,366 +2936,6 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)  	return active;  } -/* - * Ensure irq handler finishes, and not run again. - * Also return the active request so that we only search for it once. - */ -struct i915_request * -i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) -{ -	struct i915_request *request; - -	/* -	 * During the reset sequence, we must prevent the engine from -	 * entering RC6. As the context state is undefined until we restart -	 * the engine, if it does enter RC6 during the reset, the state -	 * written to the powercontext is undefined and so we may lose -	 * GPU state upon resume, i.e. fail to restart after a reset. -	 */ -	intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); - -	request = engine->reset.prepare(engine); -	if (request && request->fence.error == -EIO) -		request = ERR_PTR(-EIO); /* Previous reset failed! */ - -	return request; -} - -int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) -{ -	struct intel_engine_cs *engine; -	struct i915_request *request; -	enum intel_engine_id id; -	int err = 0; - -	for_each_engine(engine, dev_priv, id) { -		request = i915_gem_reset_prepare_engine(engine); -		if (IS_ERR(request)) { -			err = PTR_ERR(request); -			continue; -		} - -		engine->hangcheck.active_request = request; -	} - -	i915_gem_revoke_fences(dev_priv); -	intel_uc_sanitize(dev_priv); - -	return err; -} - -static void engine_skip_context(struct i915_request *request) -{ -	struct intel_engine_cs *engine = request->engine; -	struct i915_gem_context *hung_ctx = request->gem_context; -	struct i915_timeline *timeline = request->timeline; -	unsigned long flags; - -	GEM_BUG_ON(timeline == &engine->timeline); - -	spin_lock_irqsave(&engine->timeline.lock, flags); -	spin_lock(&timeline->lock); - -	list_for_each_entry_continue(request, &engine->timeline.requests, link) -		if (request->gem_context == hung_ctx) -			i915_request_skip(request, -EIO); - -	list_for_each_entry(request, &timeline->requests, link) -		i915_request_skip(request, -EIO); - -	spin_unlock(&timeline->lock); -	spin_unlock_irqrestore(&engine->timeline.lock, flags); -} - -/* Returns the request if it was guilty of the hang */ -static struct i915_request * -i915_gem_reset_request(struct intel_engine_cs *engine, -		       struct i915_request *request, -		       bool stalled) -{ -	/* The guilty request will get skipped on a hung engine. -	 * -	 * Users of client default contexts do not rely on logical -	 * state preserved between batches so it is safe to execute -	 * queued requests following the hang. Non default contexts -	 * rely on preserved state, so skipping a batch loses the -	 * evolution of the state and it needs to be considered corrupted. -	 * Executing more queued batches on top of corrupted state is -	 * risky. But we take the risk by trying to advance through -	 * the queued requests in order to make the client behaviour -	 * more predictable around resets, by not throwing away random -	 * amount of batches it has prepared for execution. Sophisticated -	 * clients can use gem_reset_stats_ioctl and dma fence status -	 * (exported via sync_file info ioctl on explicit fences) to observe -	 * when it loses the context state and should rebuild accordingly. -	 * -	 * The context ban, and ultimately the client ban, mechanism are safety -	 * valves if client submission ends up resulting in nothing more than -	 * subsequent hangs. -	 */ - -	if (i915_request_completed(request)) { -		GEM_TRACE("%s pardoned global=%d (fence %llx:%d), current %d\n", -			  engine->name, request->global_seqno, -			  request->fence.context, request->fence.seqno, -			  intel_engine_get_seqno(engine)); -		stalled = false; -	} - -	if (stalled) { -		i915_gem_context_mark_guilty(request->gem_context); -		i915_request_skip(request, -EIO); - -		/* If this context is now banned, skip all pending requests. */ -		if (i915_gem_context_is_banned(request->gem_context)) -			engine_skip_context(request); -	} else { -		/* -		 * Since this is not the hung engine, it may have advanced -		 * since the hang declaration. Double check by refinding -		 * the active request at the time of the reset. -		 */ -		request = i915_gem_find_active_request(engine); -		if (request) { -			unsigned long flags; - -			i915_gem_context_mark_innocent(request->gem_context); -			dma_fence_set_error(&request->fence, -EAGAIN); - -			/* Rewind the engine to replay the incomplete rq */ -			spin_lock_irqsave(&engine->timeline.lock, flags); -			request = list_prev_entry(request, link); -			if (&request->link == &engine->timeline.requests) -				request = NULL; -			spin_unlock_irqrestore(&engine->timeline.lock, flags); -		} -	} - -	return request; -} - -void i915_gem_reset_engine(struct intel_engine_cs *engine, -			   struct i915_request *request, -			   bool stalled) -{ -	/* -	 * Make sure this write is visible before we re-enable the interrupt -	 * handlers on another CPU, as tasklet_enable() resolves to just -	 * a compiler barrier which is insufficient for our purpose here. -	 */ -	smp_store_mb(engine->irq_posted, 0); - -	if (request) -		request = i915_gem_reset_request(engine, request, stalled); - -	/* Setup the CS to resume from the breadcrumb of the hung request */ -	engine->reset.reset(engine, request); -} - -void i915_gem_reset(struct drm_i915_private *dev_priv, -		    unsigned int stalled_mask) -{ -	struct intel_engine_cs *engine; -	enum intel_engine_id id; - -	lockdep_assert_held(&dev_priv->drm.struct_mutex); - -	i915_retire_requests(dev_priv); - -	for_each_engine(engine, dev_priv, id) { -		struct intel_context *ce; - -		i915_gem_reset_engine(engine, -				      engine->hangcheck.active_request, -				      stalled_mask & ENGINE_MASK(id)); -		ce = fetch_and_zero(&engine->last_retired_context); -		if (ce) -			intel_context_unpin(ce); - -		/* -		 * Ostensibily, we always want a context loaded for powersaving, -		 * so if the engine is idle after the reset, send a request -		 * to load our scratch kernel_context. -		 * -		 * More mysteriously, if we leave the engine idle after a reset, -		 * the next userspace batch may hang, with what appears to be -		 * an incoherent read by the CS (presumably stale TLB). An -		 * empty request appears sufficient to paper over the glitch. -		 */ -		if (intel_engine_is_idle(engine)) { -			struct i915_request *rq; - -			rq = i915_request_alloc(engine, -						dev_priv->kernel_context); -			if (!IS_ERR(rq)) -				i915_request_add(rq); -		} -	} - -	i915_gem_restore_fences(dev_priv); -} - -void i915_gem_reset_finish_engine(struct intel_engine_cs *engine) -{ -	engine->reset.finish(engine); - -	intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); -} - -void i915_gem_reset_finish(struct drm_i915_private *dev_priv) -{ -	struct intel_engine_cs *engine; -	enum intel_engine_id id; - -	lockdep_assert_held(&dev_priv->drm.struct_mutex); - -	for_each_engine(engine, dev_priv, id) { -		engine->hangcheck.active_request = NULL; -		i915_gem_reset_finish_engine(engine); -	} -} - -static void nop_submit_request(struct i915_request *request) -{ -	unsigned long flags; - -	GEM_TRACE("%s fence %llx:%d -> -EIO\n", -		  request->engine->name, -		  request->fence.context, request->fence.seqno); -	dma_fence_set_error(&request->fence, -EIO); - -	spin_lock_irqsave(&request->engine->timeline.lock, flags); -	__i915_request_submit(request); -	intel_engine_init_global_seqno(request->engine, request->global_seqno); -	spin_unlock_irqrestore(&request->engine->timeline.lock, flags); -} - -void i915_gem_set_wedged(struct drm_i915_private *i915) -{ -	struct intel_engine_cs *engine; -	enum intel_engine_id id; - -	GEM_TRACE("start\n"); - -	if (GEM_SHOW_DEBUG()) { -		struct drm_printer p = drm_debug_printer(__func__); - -		for_each_engine(engine, i915, id) -			intel_engine_dump(engine, &p, "%s\n", engine->name); -	} - -	if (test_and_set_bit(I915_WEDGED, &i915->gpu_error.flags)) -		goto out; - -	/* -	 * First, stop submission to hw, but do not yet complete requests by -	 * rolling the global seqno forward (since this would complete requests -	 * for which we haven't set the fence error to EIO yet). -	 */ -	for_each_engine(engine, i915, id) -		i915_gem_reset_prepare_engine(engine); - -	/* Even if the GPU reset fails, it should still stop the engines */ -	if (INTEL_GEN(i915) >= 5) -		intel_gpu_reset(i915, ALL_ENGINES); - -	for_each_engine(engine, i915, id) { -		engine->submit_request = nop_submit_request; -		engine->schedule = NULL; -	} -	i915->caps.scheduler = 0; - -	/* -	 * Make sure no request can slip through without getting completed by -	 * either this call here to intel_engine_init_global_seqno, or the one -	 * in nop_submit_request. -	 */ -	synchronize_rcu(); - -	/* Mark all executing requests as skipped */ -	for_each_engine(engine, i915, id) -		engine->cancel_requests(engine); - -	for_each_engine(engine, i915, id) { -		i915_gem_reset_finish_engine(engine); -		intel_engine_wakeup(engine); -	} - -out: -	GEM_TRACE("end\n"); - -	wake_up_all(&i915->gpu_error.reset_queue); -} - -bool i915_gem_unset_wedged(struct drm_i915_private *i915) -{ -	struct i915_timeline *tl; - -	lockdep_assert_held(&i915->drm.struct_mutex); -	if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) -		return true; - -	GEM_TRACE("start\n"); - -	/* -	 * Before unwedging, make sure that all pending operations -	 * are flushed and errored out - we may have requests waiting upon -	 * third party fences. We marked all inflight requests as EIO, and -	 * every execbuf since returned EIO, for consistency we want all -	 * the currently pending requests to also be marked as EIO, which -	 * is done inside our nop_submit_request - and so we must wait. -	 * -	 * No more can be submitted until we reset the wedged bit. -	 */ -	list_for_each_entry(tl, &i915->gt.timelines, link) { -		struct i915_request *rq; - -		rq = i915_gem_active_peek(&tl->last_request, -					  &i915->drm.struct_mutex); -		if (!rq) -			continue; - -		/* -		 * We can't use our normal waiter as we want to -		 * avoid recursively trying to handle the current -		 * reset. The basic dma_fence_default_wait() installs -		 * a callback for dma_fence_signal(), which is -		 * triggered by our nop handler (indirectly, the -		 * callback enables the signaler thread which is -		 * woken by the nop_submit_request() advancing the seqno -		 * and when the seqno passes the fence, the signaler -		 * then signals the fence waking us up). -		 */ -		if (dma_fence_default_wait(&rq->fence, true, -					   MAX_SCHEDULE_TIMEOUT) < 0) -			return false; -	} -	i915_retire_requests(i915); -	GEM_BUG_ON(i915->gt.active_requests); - -	if (!intel_gpu_reset(i915, ALL_ENGINES)) -		intel_engines_sanitize(i915); - -	/* -	 * Undo nop_submit_request. We prevent all new i915 requests from -	 * being queued (by disallowing execbuf whilst wedged) so having -	 * waited for all active requests above, we know the system is idle -	 * and do not have to worry about a thread being inside -	 * engine->submit_request() as we swap over. So unlike installing -	 * the nop_submit_request on reset, we can do this from normal -	 * context and do not require stop_machine(). -	 */ -	intel_engines_reset_default_submission(i915); -	i915_gem_contexts_lost(i915); - -	GEM_TRACE("end\n"); - -	smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ -	clear_bit(I915_WEDGED, &i915->gpu_error.flags); - -	return true; -} -  static void  i915_gem_retire_work_handler(struct work_struct *work)  { @@ -3547,7 +3038,7 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915)  	GEM_BUG_ON(i915->gt.active_requests);  	for_each_engine(engine, i915, id) { -		GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request)); +		GEM_BUG_ON(__i915_active_request_peek(&engine->timeline.last_request));  		GEM_BUG_ON(engine->last_retired_context !=  			   to_intel_context(i915->kernel_context, engine));  	} @@ -3766,33 +3257,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)  	return ret;  } -static long wait_for_timeline(struct i915_timeline *tl, -			      unsigned int flags, long timeout) -{ -	struct i915_request *rq; - -	rq = i915_gem_active_get_unlocked(&tl->last_request); -	if (!rq) -		return timeout; - -	/* -	 * "Race-to-idle". -	 * -	 * Switching to the kernel context is often used a synchronous -	 * step prior to idling, e.g. in suspend for flushing all -	 * current operations to memory before sleeping. These we -	 * want to complete as quickly as possible to avoid prolonged -	 * stalls, so allow the gpu to boost to maximum clocks. -	 */ -	if (flags & I915_WAIT_FOR_IDLE_BOOST) -		gen6_rps_boost(rq, NULL); - -	timeout = i915_request_wait(rq, flags, timeout); -	i915_request_put(rq); - -	return timeout; -} -  static int wait_for_engines(struct drm_i915_private *i915)  {  	if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { @@ -3806,6 +3270,52 @@ static int wait_for_engines(struct drm_i915_private *i915)  	return 0;  } +static long +wait_for_timelines(struct drm_i915_private *i915, +		   unsigned int flags, long timeout) +{ +	struct i915_gt_timelines *gt = &i915->gt.timelines; +	struct i915_timeline *tl; + +	if (!READ_ONCE(i915->gt.active_requests)) +		return timeout; + +	mutex_lock(>->mutex); +	list_for_each_entry(tl, >->active_list, link) { +		struct i915_request *rq; + +		rq = i915_active_request_get_unlocked(&tl->last_request); +		if (!rq) +			continue; + +		mutex_unlock(>->mutex); + +		/* +		 * "Race-to-idle". +		 * +		 * Switching to the kernel context is often used a synchronous +		 * step prior to idling, e.g. in suspend for flushing all +		 * current operations to memory before sleeping. These we +		 * want to complete as quickly as possible to avoid prolonged +		 * stalls, so allow the gpu to boost to maximum clocks. +		 */ +		if (flags & I915_WAIT_FOR_IDLE_BOOST) +			gen6_rps_boost(rq, NULL); + +		timeout = i915_request_wait(rq, flags, timeout); +		i915_request_put(rq); +		if (timeout < 0) +			return timeout; + +		/* restart after reacquiring the lock */ +		mutex_lock(>->mutex); +		tl = list_entry(>->active_list, typeof(*tl), link); +	} +	mutex_unlock(>->mutex); + +	return timeout; +} +  int i915_gem_wait_for_idle(struct drm_i915_private *i915,  			   unsigned int flags, long timeout)  { @@ -3817,17 +3327,15 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,  	if (!READ_ONCE(i915->gt.awake))  		return 0; +	timeout = wait_for_timelines(i915, flags, timeout); +	if (timeout < 0) +		return timeout; +  	if (flags & I915_WAIT_LOCKED) { -		struct i915_timeline *tl;  		int err;  		lockdep_assert_held(&i915->drm.struct_mutex); -		list_for_each_entry(tl, &i915->gt.timelines, link) { -			timeout = wait_for_timeline(tl, flags, timeout); -			if (timeout < 0) -				return timeout; -		}  		if (GEM_SHOW_DEBUG() && !timeout) {  			/* Presume that timeout was non-zero to begin with! */  			dev_warn(&i915->drm.pdev->dev, @@ -3841,17 +3349,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,  		i915_retire_requests(i915);  		GEM_BUG_ON(i915->gt.active_requests); -	} else { -		struct intel_engine_cs *engine; -		enum intel_engine_id id; - -		for_each_engine(engine, i915, id) { -			struct i915_timeline *tl = &engine->timeline; - -			timeout = wait_for_timeline(tl, flags, timeout); -			if (timeout < 0) -				return timeout; -		}  	}  	return 0; @@ -4037,7 +3534,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,  	 * reading an invalid PTE on older architectures.  	 */  restart: -	list_for_each_entry(vma, &obj->vma_list, obj_link) { +	list_for_each_entry(vma, &obj->vma.list, obj_link) {  		if (!drm_mm_node_allocated(&vma->node))  			continue; @@ -4115,7 +3612,7 @@ restart:  			 */  		} -		list_for_each_entry(vma, &obj->vma_list, obj_link) { +		list_for_each_entry(vma, &obj->vma.list, obj_link) {  			if (!drm_mm_node_allocated(&vma->node))  				continue; @@ -4125,7 +3622,7 @@ restart:  		}  	} -	list_for_each_entry(vma, &obj->vma_list, obj_link) +	list_for_each_entry(vma, &obj->vma.list, obj_link)  		vma->node.color = cache_level;  	i915_gem_object_set_cache_coherency(obj, cache_level);  	obj->cache_dirty = true; /* Always invalidate stale cachelines */ @@ -4688,7 +4185,8 @@ out:  }  static void -frontbuffer_retire(struct i915_gem_active *active, struct i915_request *request) +frontbuffer_retire(struct i915_active_request *active, +		   struct i915_request *request)  {  	struct drm_i915_gem_object *obj =  		container_of(active, typeof(*obj), frontbuffer_write); @@ -4701,7 +4199,9 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,  {  	mutex_init(&obj->mm.lock); -	INIT_LIST_HEAD(&obj->vma_list); +	spin_lock_init(&obj->vma.lock); +	INIT_LIST_HEAD(&obj->vma.list); +  	INIT_LIST_HEAD(&obj->lut_list);  	INIT_LIST_HEAD(&obj->batch_pool_link); @@ -4713,7 +4213,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,  	obj->resv = &obj->__builtin_resv;  	obj->frontbuffer_ggtt_origin = ORIGIN_GTT; -	init_request_active(&obj->frontbuffer_write, frontbuffer_retire); +	i915_active_request_init(&obj->frontbuffer_write, +				 NULL, frontbuffer_retire);  	obj->mm.madv = I915_MADV_WILLNEED;  	INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); @@ -4856,8 +4357,9 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,  				    struct llist_node *freed)  {  	struct drm_i915_gem_object *obj, *on; +	intel_wakeref_t wakeref; -	intel_runtime_pm_get(i915); +	wakeref = intel_runtime_pm_get(i915);  	llist_for_each_entry_safe(obj, on, freed, freed) {  		struct i915_vma *vma, *vn; @@ -4866,14 +4368,13 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,  		mutex_lock(&i915->drm.struct_mutex);  		GEM_BUG_ON(i915_gem_object_is_active(obj)); -		list_for_each_entry_safe(vma, vn, -					 &obj->vma_list, obj_link) { +		list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {  			GEM_BUG_ON(i915_vma_is_active(vma));  			vma->flags &= ~I915_VMA_PIN_MASK;  			i915_vma_destroy(vma);  		} -		GEM_BUG_ON(!list_empty(&obj->vma_list)); -		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); +		GEM_BUG_ON(!list_empty(&obj->vma.list)); +		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));  		/* This serializes freeing with the shrinker. Since the free  		 * is delayed, first by RCU then by the workqueue, we want the @@ -4918,7 +4419,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,  		if (on)  			cond_resched();  	} -	intel_runtime_pm_put(i915); +	intel_runtime_pm_put(i915, wakeref);  }  static void i915_gem_flush_free_objects(struct drm_i915_private *i915) @@ -5027,13 +4528,11 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)  void i915_gem_sanitize(struct drm_i915_private *i915)  { -	int err; +	intel_wakeref_t wakeref;  	GEM_TRACE("\n"); -	mutex_lock(&i915->drm.struct_mutex); - -	intel_runtime_pm_get(i915); +	wakeref = intel_runtime_pm_get(i915);  	intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);  	/* @@ -5053,28 +4552,28 @@ void i915_gem_sanitize(struct drm_i915_private *i915)  	 * it may impact the display and we are uncertain about the stability  	 * of the reset, so this could be applied to even earlier gen.  	 */ -	err = -ENODEV; -	if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915)) -		err = WARN_ON(intel_gpu_reset(i915, ALL_ENGINES)); -	if (!err) -		intel_engines_sanitize(i915); +	intel_engines_sanitize(i915, false);  	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); -	intel_runtime_pm_put(i915); +	intel_runtime_pm_put(i915, wakeref); +	mutex_lock(&i915->drm.struct_mutex);  	i915_gem_contexts_lost(i915);  	mutex_unlock(&i915->drm.struct_mutex);  }  int i915_gem_suspend(struct drm_i915_private *i915)  { +	intel_wakeref_t wakeref;  	int ret;  	GEM_TRACE("\n"); -	intel_runtime_pm_get(i915); +	wakeref = intel_runtime_pm_get(i915);  	intel_suspend_gt_powersave(i915); +	flush_workqueue(i915->wq); +  	mutex_lock(&i915->drm.struct_mutex);  	/* @@ -5104,11 +4603,9 @@ int i915_gem_suspend(struct drm_i915_private *i915)  	i915_retire_requests(i915); /* ensure we flush after wedging */  	mutex_unlock(&i915->drm.struct_mutex); +	i915_reset_flush(i915); -	intel_uc_suspend(i915); - -	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); -	cancel_delayed_work_sync(&i915->gt.retire_work); +	drain_delayed_work(&i915->gt.retire_work);  	/*  	 * As the idle_work is rearming if it detects a race, play safe and @@ -5116,6 +4613,8 @@ int i915_gem_suspend(struct drm_i915_private *i915)  	 */  	drain_delayed_work(&i915->gt.idle_work); +	intel_uc_suspend(i915); +  	/*  	 * Assert that we successfully flushed all the work and  	 * reset the GPU back to its idle, low power state. @@ -5124,12 +4623,12 @@ int i915_gem_suspend(struct drm_i915_private *i915)  	if (WARN_ON(!intel_engines_are_idle(i915)))  		i915_gem_set_wedged(i915); /* no hope, discard everything */ -	intel_runtime_pm_put(i915); +	intel_runtime_pm_put(i915, wakeref);  	return 0;  err_unlock:  	mutex_unlock(&i915->drm.struct_mutex); -	intel_runtime_pm_put(i915); +	intel_runtime_pm_put(i915, wakeref);  	return ret;  } @@ -5223,15 +4722,15 @@ void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)  	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |  				 DISP_TILE_SURFACE_SWIZZLING); -	if (IS_GEN5(dev_priv)) +	if (IS_GEN(dev_priv, 5))  		return;  	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); -	if (IS_GEN6(dev_priv)) +	if (IS_GEN(dev_priv, 6))  		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); -	else if (IS_GEN7(dev_priv)) +	else if (IS_GEN(dev_priv, 7))  		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); -	else if (IS_GEN8(dev_priv)) +	else if (IS_GEN(dev_priv, 8))  		I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));  	else  		BUG(); @@ -5253,10 +4752,10 @@ static void init_unused_rings(struct drm_i915_private *dev_priv)  		init_unused_ring(dev_priv, SRB1_BASE);  		init_unused_ring(dev_priv, SRB2_BASE);  		init_unused_ring(dev_priv, SRB3_BASE); -	} else if (IS_GEN2(dev_priv)) { +	} else if (IS_GEN(dev_priv, 2)) {  		init_unused_ring(dev_priv, SRB0_BASE);  		init_unused_ring(dev_priv, SRB1_BASE); -	} else if (IS_GEN3(dev_priv)) { +	} else if (IS_GEN(dev_priv, 3)) {  		init_unused_ring(dev_priv, PRB1_BASE);  		init_unused_ring(dev_priv, PRB2_BASE);  	} @@ -5552,6 +5051,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)  		dev_priv->gt.cleanup_engine = intel_engine_cleanup;  	} +	i915_timelines_init(dev_priv); +  	ret = i915_gem_init_userptr(dev_priv);  	if (ret)  		return ret; @@ -5580,7 +5081,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)  	}  	ret = i915_gem_init_scratch(dev_priv, -				    IS_GEN2(dev_priv) ? SZ_256K : PAGE_SIZE); +				    IS_GEN(dev_priv, 2) ? SZ_256K : PAGE_SIZE);  	if (ret) {  		GEM_BUG_ON(ret == -EIO);  		goto err_ggtt; @@ -5674,8 +5175,10 @@ err_unlock:  err_uc_misc:  	intel_uc_fini_misc(dev_priv); -	if (ret != -EIO) +	if (ret != -EIO) {  		i915_gem_cleanup_userptr(dev_priv); +		i915_timelines_fini(dev_priv); +	}  	if (ret == -EIO) {  		mutex_lock(&dev_priv->drm.struct_mutex); @@ -5726,6 +5229,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv)  	intel_uc_fini_misc(dev_priv);  	i915_gem_cleanup_userptr(dev_priv); +	i915_timelines_fini(dev_priv);  	i915_gem_drain_freed_objects(dev_priv); @@ -5828,7 +5332,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)  	if (!dev_priv->priorities)  		goto err_dependencies; -	INIT_LIST_HEAD(&dev_priv->gt.timelines);  	INIT_LIST_HEAD(&dev_priv->gt.active_rings);  	INIT_LIST_HEAD(&dev_priv->gt.closed_vma); @@ -5840,6 +5343,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)  			  i915_gem_idle_work_handler);  	init_waitqueue_head(&dev_priv->gpu_error.wait_queue);  	init_waitqueue_head(&dev_priv->gpu_error.reset_queue); +	mutex_init(&dev_priv->gpu_error.wedge_mutex);  	atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); @@ -5871,7 +5375,6 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)  	GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));  	GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));  	WARN_ON(dev_priv->mm.object_count); -	WARN_ON(!list_empty(&dev_priv->gt.timelines));  	kmem_cache_destroy(dev_priv->priorities);  	kmem_cache_destroy(dev_priv->dependencies); | 
