diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h')
| -rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.h | 298 | 
1 files changed, 94 insertions, 204 deletions
| diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 72edaa7ff411..710ffb221775 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -5,6 +5,7 @@  #include <drm/drm_util.h>  #include <linux/hashtable.h> +#include <linux/irq_work.h>  #include <linux/seqlock.h>  #include "i915_gem_batch_pool.h" @@ -28,12 +29,11 @@ struct i915_sched_attr;   * workarounds!   */  #define CACHELINE_BYTES 64 -#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t)) +#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32))  struct intel_hw_status_page {  	struct i915_vma *vma; -	u32 *page_addr; -	u32 ggtt_offset; +	u32 *addr;  };  #define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base)) @@ -94,12 +94,12 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)  #define I915_MAX_SUBSLICES 8  #define instdone_slice_mask(dev_priv__) \ -	(IS_GEN7(dev_priv__) ? \ -	 1 : INTEL_INFO(dev_priv__)->sseu.slice_mask) +	(IS_GEN(dev_priv__, 7) ? \ +	 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)  #define instdone_subslice_mask(dev_priv__) \ -	(IS_GEN7(dev_priv__) ? \ -	 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask[0]) +	(IS_GEN(dev_priv__, 7) ? \ +	 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0])  #define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \  	for ((slice__) = 0, (subslice__) = 0; \ @@ -120,13 +120,8 @@ struct intel_instdone {  struct intel_engine_hangcheck {  	u64 acthd;  	u32 seqno; -	enum intel_engine_hangcheck_action action;  	unsigned long action_timestamp; -	int deadlock;  	struct intel_instdone instdone; -	struct i915_request *active_request; -	bool stalled:1; -	bool wedged:1;  };  struct intel_ring { @@ -209,6 +204,7 @@ struct i915_priolist {  struct st_preempt_hang {  	struct completion completion; +	unsigned int count;  	bool inject_hang;  }; @@ -299,14 +295,18 @@ struct intel_engine_execlists {  	unsigned int port_mask;  	/** -	 * @queue_priority: Highest pending priority. +	 * @queue_priority_hint: Highest pending priority.  	 *  	 * When we add requests into the queue, or adjust the priority of  	 * executing requests, we compute the maximum priority of those  	 * pending requests. We can then use this value to determine if  	 * we need to preempt the executing requests to service the queue. +	 * However, since the we may have recorded the priority of an inflight +	 * request we wanted to preempt but since completed, at the time of +	 * dequeuing the priority hint may no longer may match the highest +	 * available request priority.  	 */ -	int queue_priority; +	int queue_priority_hint;  	/**  	 * @queue: queue of requests, in priority lists @@ -365,9 +365,6 @@ struct intel_engine_cs {  	struct drm_i915_gem_object *default_state;  	void *pinned_default_state; -	unsigned long irq_posted; -#define ENGINE_IRQ_BREADCRUMB 0 -  	/* Rather than have every client wait upon all user interrupts,  	 * with the herd waking after every interrupt and each doing the  	 * heavyweight seqno dance, we delegate the task (of being the @@ -385,23 +382,14 @@ struct intel_engine_cs {  	 * the overhead of waking that client is much preferred.  	 */  	struct intel_breadcrumbs { -		spinlock_t irq_lock; /* protects irq_*; irqsafe */ -		struct intel_wait *irq_wait; /* oldest waiter by retirement */ - -		spinlock_t rb_lock; /* protects the rb and wraps irq_lock */ -		struct rb_root waiters; /* sorted by retirement, priority */ -		struct list_head signals; /* sorted by retirement */ -		struct task_struct *signaler; /* used for fence signalling */ +		spinlock_t irq_lock; +		struct list_head signalers; -		struct timer_list fake_irq; /* used after a missed interrupt */ -		struct timer_list hangcheck; /* detect missed interrupts */ +		struct irq_work irq_work; /* for use from inside irq_lock */ -		unsigned int hangcheck_interrupts;  		unsigned int irq_enabled; -		unsigned int irq_count; -		bool irq_armed : 1; -		I915_SELFTEST_DECLARE(bool mock : 1); +		bool irq_armed;  	} breadcrumbs;  	struct { @@ -415,16 +403,17 @@ struct intel_engine_cs {  		/**  		 * @enable_count: Reference count for the enabled samplers.  		 * -		 * Index number corresponds to the bit number from @enable. +		 * Index number corresponds to @enum drm_i915_pmu_engine_sample.  		 */ -		unsigned int enable_count[I915_PMU_SAMPLE_BITS]; +		unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT];  		/**  		 * @sample: Counter values for sampling events.  		 *  		 * Our internal timer stores the current counters in this field. +		 * +		 * Index number corresponds to @enum drm_i915_pmu_engine_sample.  		 */ -#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1) -		struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX]; +		struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT];  	} pmu;  	/* @@ -448,9 +437,8 @@ struct intel_engine_cs {  	int		(*init_hw)(struct intel_engine_cs *engine);  	struct { -		struct i915_request *(*prepare)(struct intel_engine_cs *engine); -		void (*reset)(struct intel_engine_cs *engine, -			      struct i915_request *rq); +		void (*prepare)(struct intel_engine_cs *engine); +		void (*reset)(struct intel_engine_cs *engine, bool stalled);  		void (*finish)(struct intel_engine_cs *engine);  	} reset; @@ -474,8 +462,10 @@ struct intel_engine_cs {  					 unsigned int dispatch_flags);  #define I915_DISPATCH_SECURE BIT(0)  #define I915_DISPATCH_PINNED BIT(1) -	void		(*emit_breadcrumb)(struct i915_request *rq, u32 *cs); -	int		emit_breadcrumb_sz; +	int		 (*emit_init_breadcrumb)(struct i915_request *rq); +	u32		*(*emit_fini_breadcrumb)(struct i915_request *rq, +						 u32 *cs); +	unsigned int	emit_fini_breadcrumb_dw;  	/* Pass the request to the hardware queue (e.g. directly into  	 * the legacy ringbuffer or to the end of an execlist). @@ -501,69 +491,8 @@ struct intel_engine_cs {  	 */  	void		(*cancel_requests)(struct intel_engine_cs *engine); -	/* Some chipsets are not quite as coherent as advertised and need -	 * an expensive kick to force a true read of the up-to-date seqno. -	 * However, the up-to-date seqno is not always required and the last -	 * seen value is good enough. Note that the seqno will always be -	 * monotonic, even if not coherent. -	 */ -	void		(*irq_seqno_barrier)(struct intel_engine_cs *engine);  	void		(*cleanup)(struct intel_engine_cs *engine); -	/* GEN8 signal/wait table - never trust comments! -	 *	  signal to	signal to    signal to   signal to      signal to -	 *	    RCS		   VCS          BCS        VECS		 VCS2 -	 *      -------------------------------------------------------------------- -	 *  RCS | NOP (0x00) | VCS (0x08) | BCS (0x10) | VECS (0x18) | VCS2 (0x20) | -	 *	|------------------------------------------------------------------- -	 *  VCS | RCS (0x28) | NOP (0x30) | BCS (0x38) | VECS (0x40) | VCS2 (0x48) | -	 *	|------------------------------------------------------------------- -	 *  BCS | RCS (0x50) | VCS (0x58) | NOP (0x60) | VECS (0x68) | VCS2 (0x70) | -	 *	|------------------------------------------------------------------- -	 * VECS | RCS (0x78) | VCS (0x80) | BCS (0x88) |  NOP (0x90) | VCS2 (0x98) | -	 *	|------------------------------------------------------------------- -	 * VCS2 | RCS (0xa0) | VCS (0xa8) | BCS (0xb0) | VECS (0xb8) | NOP  (0xc0) | -	 *	|------------------------------------------------------------------- -	 * -	 * Generalization: -	 *  f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id) -	 *  ie. transpose of g(x, y) -	 * -	 *	 sync from	sync from    sync from    sync from	sync from -	 *	    RCS		   VCS          BCS        VECS		 VCS2 -	 *      -------------------------------------------------------------------- -	 *  RCS | NOP (0x00) | VCS (0x28) | BCS (0x50) | VECS (0x78) | VCS2 (0xa0) | -	 *	|------------------------------------------------------------------- -	 *  VCS | RCS (0x08) | NOP (0x30) | BCS (0x58) | VECS (0x80) | VCS2 (0xa8) | -	 *	|------------------------------------------------------------------- -	 *  BCS | RCS (0x10) | VCS (0x38) | NOP (0x60) | VECS (0x88) | VCS2 (0xb0) | -	 *	|------------------------------------------------------------------- -	 * VECS | RCS (0x18) | VCS (0x40) | BCS (0x68) |  NOP (0x90) | VCS2 (0xb8) | -	 *	|------------------------------------------------------------------- -	 * VCS2 | RCS (0x20) | VCS (0x48) | BCS (0x70) | VECS (0x98) |  NOP (0xc0) | -	 *	|------------------------------------------------------------------- -	 * -	 * Generalization: -	 *  g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id) -	 *  ie. transpose of f(x, y) -	 */ -	struct { -#define GEN6_SEMAPHORE_LAST	VECS_HW -#define GEN6_NUM_SEMAPHORES	(GEN6_SEMAPHORE_LAST + 1) -#define GEN6_SEMAPHORES_MASK	GENMASK(GEN6_SEMAPHORE_LAST, 0) -		struct { -			/* our mbox written by others */ -			u32		wait[GEN6_NUM_SEMAPHORES]; -			/* mboxes this ring signals to */ -			i915_reg_t	signal[GEN6_NUM_SEMAPHORES]; -		} mbox; - -		/* AKA wait() */ -		int	(*sync_to)(struct i915_request *rq, -				   struct i915_request *signal); -		u32	*(*signal)(struct i915_request *rq, u32 *cs); -	} semaphore; -  	struct intel_engine_execlists execlists;  	/* Contexts are pinned whilst they are active on the GPU. The last @@ -664,7 +593,20 @@ intel_engine_has_preemption(const struct intel_engine_cs *engine)  static inline bool __execlists_need_preempt(int prio, int last)  { -	return prio > max(0, last); +	/* +	 * Allow preemption of low -> normal -> high, but we do +	 * not allow low priority tasks to preempt other low priority +	 * tasks under the impression that latency for low priority +	 * tasks does not matter (as much as background throughput), +	 * so kiss. +	 * +	 * More naturally we would write +	 *	prio >= max(0, last); +	 * except that we wish to prevent triggering preemption at the same +	 * priority level: the task that is running should remain running +	 * to preserve FIFO ordering of dependencies. +	 */ +	return prio > max(I915_PRIORITY_NORMAL - 1, last);  }  static inline void @@ -742,7 +684,7 @@ static inline u32  intel_read_status_page(const struct intel_engine_cs *engine, int reg)  {  	/* Ensure that the compiler doesn't optimize away the load. */ -	return READ_ONCE(engine->status_page.page_addr[reg]); +	return READ_ONCE(engine->status_page.addr[reg]);  }  static inline void @@ -755,12 +697,12 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)  	 */  	if (static_cpu_has(X86_FEATURE_CLFLUSH)) {  		mb(); -		clflush(&engine->status_page.page_addr[reg]); -		engine->status_page.page_addr[reg] = value; -		clflush(&engine->status_page.page_addr[reg]); +		clflush(&engine->status_page.addr[reg]); +		engine->status_page.addr[reg] = value; +		clflush(&engine->status_page.addr[reg]);  		mb();  	} else { -		WRITE_ONCE(engine->status_page.page_addr[reg], value); +		WRITE_ONCE(engine->status_page.addr[reg], value);  	}  } @@ -781,11 +723,13 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)   * The area from dword 0x30 to 0x3ff is available for driver usage.   */  #define I915_GEM_HWS_INDEX		0x30 -#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT) -#define I915_GEM_HWS_PREEMPT_INDEX	0x32 -#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT) -#define I915_GEM_HWS_SCRATCH_INDEX	0x40 -#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) +#define I915_GEM_HWS_INDEX_ADDR		(I915_GEM_HWS_INDEX * sizeof(u32)) +#define I915_GEM_HWS_PREEMPT		0x32 +#define I915_GEM_HWS_PREEMPT_ADDR	(I915_GEM_HWS_PREEMPT * sizeof(u32)) +#define I915_GEM_HWS_SEQNO		0x40 +#define I915_GEM_HWS_SEQNO_ADDR		(I915_GEM_HWS_SEQNO * sizeof(u32)) +#define I915_GEM_HWS_SCRATCH		0x80 +#define I915_GEM_HWS_SCRATCH_ADDR	(I915_GEM_HWS_SCRATCH * sizeof(u32))  #define I915_HWS_CSB_BUF0_INDEX		0x10  #define I915_HWS_CSB_WRITE_INDEX	0x1f @@ -808,7 +752,6 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);  int __must_check intel_ring_cacheline_align(struct i915_request *rq); -int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes);  u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);  static inline void intel_ring_advance(struct i915_request *rq, u32 *cs) @@ -889,9 +832,21 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)  	return tail;  } -void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); +static inline unsigned int +__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) +{ +	/* +	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the +	 * same cacheline, the Head Pointer must not be greater than the Tail +	 * Pointer." +	 */ +	GEM_BUG_ON(!is_power_of_2(size)); +	return (head - tail - CACHELINE_BYTES) & (size - 1); +} + +void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno); -void intel_engine_setup_common(struct intel_engine_cs *engine); +int intel_engine_setup_common(struct intel_engine_cs *engine);  int intel_engine_init_common(struct intel_engine_cs *engine);  void intel_engine_cleanup_common(struct intel_engine_cs *engine); @@ -903,6 +858,8 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine);  int intel_engine_stop_cs(struct intel_engine_cs *engine);  void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine); +void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask); +  u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);  u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine); @@ -947,102 +904,29 @@ static inline bool intel_engine_has_started(struct intel_engine_cs *engine,  void intel_engine_get_instdone(struct intel_engine_cs *engine,  			       struct intel_instdone *instdone); -/* - * Arbitrary size for largest possible 'add request' sequence. The code paths - * are complex and variable. Empirical measurement shows that the worst case - * is BDW at 192 bytes (6 + 6 + 36 dwords), then ILK at 136 bytes. However, - * we need to allocate double the largest single packet within that emission - * to account for tail wraparound (so 6 + 6 + 72 dwords for BDW). - */ -#define MIN_SPACE_FOR_ADD_REQUEST 336 - -static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) -{ -	return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR; -} - -static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine) -{ -	return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR; -} - -/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ -int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); - -static inline void intel_wait_init(struct intel_wait *wait) -{ -	wait->tsk = current; -	wait->request = NULL; -} - -static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno) -{ -	wait->tsk = current; -	wait->seqno = seqno; -} - -static inline bool intel_wait_has_seqno(const struct intel_wait *wait) -{ -	return wait->seqno; -} - -static inline bool -intel_wait_update_seqno(struct intel_wait *wait, u32 seqno) -{ -	wait->seqno = seqno; -	return intel_wait_has_seqno(wait); -} - -static inline bool -intel_wait_update_request(struct intel_wait *wait, -			  const struct i915_request *rq) -{ -	return intel_wait_update_seqno(wait, i915_request_global_seqno(rq)); -} - -static inline bool -intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno) -{ -	return wait->seqno == seqno; -} - -static inline bool -intel_wait_check_request(const struct intel_wait *wait, -			 const struct i915_request *rq) -{ -	return intel_wait_check_seqno(wait, i915_request_global_seqno(rq)); -} +void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); -static inline bool intel_wait_complete(const struct intel_wait *wait) -{ -	return RB_EMPTY_NODE(&wait->node); -} +void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine); +void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine); -bool intel_engine_add_wait(struct intel_engine_cs *engine, -			   struct intel_wait *wait); -void intel_engine_remove_wait(struct intel_engine_cs *engine, -			      struct intel_wait *wait); -bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup); -void intel_engine_cancel_signaling(struct i915_request *request); +bool intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); -static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) +static inline void +intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine)  { -	return READ_ONCE(engine->breadcrumbs.irq_wait); +	irq_work_queue(&engine->breadcrumbs.irq_work);  } -unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); -#define ENGINE_WAKEUP_WAITER BIT(0) -#define ENGINE_WAKEUP_ASLEEP BIT(1) - -void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine); -void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine); - -void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); -void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); +bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine);  void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);  void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, +				    struct drm_printer *p); +  static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)  {  	memset(batch, 0, 6 * sizeof(u32)); @@ -1055,7 +939,7 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)  }  static inline u32 * -gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset) +gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)  {  	/* We're using qword write, offset should be aligned to 8 bytes. */  	GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); @@ -1065,8 +949,7 @@ gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset)  	 * following the batch.  	 */  	*cs++ = GFX_OP_PIPE_CONTROL(6); -	*cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | -		PIPE_CONTROL_QW_WRITE; +	*cs++ = flags | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;  	*cs++ = gtt_offset;  	*cs++ = 0;  	*cs++ = value; @@ -1092,7 +975,14 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset)  	return cs;  } -void intel_engines_sanitize(struct drm_i915_private *i915); +static inline void intel_engine_reset(struct intel_engine_cs *engine, +				      bool stalled) +{ +	if (engine->reset.reset) +		engine->reset.reset(engine, stalled); +} + +void intel_engines_sanitize(struct drm_i915_private *i915, bool force);  bool intel_engine_is_idle(struct intel_engine_cs *engine);  bool intel_engines_are_idle(struct drm_i915_private *dev_priv); | 
