diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_pm.c')
| -rw-r--r-- | drivers/gpu/drm/i915/intel_pm.c | 378 | 
1 files changed, 268 insertions, 110 deletions
| diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index fa4ccb346389..eadc15cddbeb 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -67,7 +67,7 @@ static void skl_init_clock_gating(struct drm_device *dev)  	gen9_init_clock_gating(dev); -	if (INTEL_REVID(dev) == SKL_REVID_A0) { +	if (INTEL_REVID(dev) <= SKL_REVID_B0) {  		/*  		 * WaDisableSDEUnitClockGating:skl  		 * WaSetGAPSunitClckGateDisable:skl @@ -75,6 +75,10 @@ static void skl_init_clock_gating(struct drm_device *dev)  		I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |  			   GEN8_GAPSUNIT_CLOCK_GATE_DISABLE |  			   GEN8_SDEUNIT_CLOCK_GATE_DISABLE); + +		/* WaDisableVFUnitClockGating:skl */ +		I915_WRITE(GEN6_UCGCTL2, I915_READ(GEN6_UCGCTL2) | +			   GEN6_VFUNIT_CLOCK_GATE_DISABLE);  	}  	if (INTEL_REVID(dev) <= SKL_REVID_D0) { @@ -84,8 +88,7 @@ static void skl_init_clock_gating(struct drm_device *dev)  		/* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */  		I915_WRITE(FF_SLICE_CS_CHICKEN2, -			   I915_READ(FF_SLICE_CS_CHICKEN2) | -			   GEN9_TSG_BARRIER_ACK_DISABLE); +			   _MASKED_BIT_ENABLE(GEN9_TSG_BARRIER_ACK_DISABLE));  	}  	if (INTEL_REVID(dev) <= SKL_REVID_E0) @@ -94,6 +97,26 @@ static void skl_init_clock_gating(struct drm_device *dev)  			   GEN8_LQSC_RO_PERF_DIS);  } +static void bxt_init_clock_gating(struct drm_device *dev) +{ +	struct drm_i915_private *dev_priv = dev->dev_private; + +	gen9_init_clock_gating(dev); + +	/* +	 * FIXME: +	 * GEN8_SDEUNIT_CLOCK_GATE_DISABLE applies on A0 only. +	 * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only. +	 */ +	 /* WaDisableSDEUnitClockGating:bxt */ +	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | +		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE | +		   GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ); + +	/* FIXME: apply on A0 only */ +	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF); +} +  static void i915_pineview_get_mem_freq(struct drm_device *dev)  {  	struct drm_i915_private *dev_priv = dev->dev_private; @@ -1792,7 +1815,7 @@ hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)  	linetime = DIV_ROUND_CLOSEST(mode->crtc_htotal * 1000 * 8,  				     mode->crtc_clock);  	ips_linetime = DIV_ROUND_CLOSEST(mode->crtc_htotal * 1000 * 8, -					 intel_ddi_get_cdclk_freq(dev_priv)); +					 dev_priv->display.get_display_clock_speed(dev_priv->dev));  	return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |  	       PIPE_WM_LINETIME_TIME(linetime); @@ -1923,7 +1946,7 @@ static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5])  int ilk_wm_max_level(const struct drm_device *dev)  {  	/* how many WM levels are we expecting */ -	if (IS_GEN9(dev)) +	if (INTEL_INFO(dev)->gen >= 9)  		return 7;  	else if (IS_HASWELL(dev) || IS_BROADWELL(dev))  		return 4; @@ -2045,22 +2068,20 @@ static void ilk_compute_wm_parameters(struct drm_crtc *crtc,  	p->pipe_htotal = intel_crtc->config->base.adjusted_mode.crtc_htotal;  	p->pixel_rate = ilk_pipe_pixel_rate(dev, crtc); -	if (crtc->primary->state->fb) { -		p->pri.enabled = true; +	if (crtc->primary->state->fb)  		p->pri.bytes_per_pixel =  			crtc->primary->state->fb->bits_per_pixel / 8; -	} else { -		p->pri.enabled = false; -		p->pri.bytes_per_pixel = 0; -	} +	else +		p->pri.bytes_per_pixel = 4; + +	p->cur.bytes_per_pixel = 4; +	/* +	 * TODO: for now, assume primary and cursor planes are always enabled. +	 * Setting them to false makes the screen flicker. +	 */ +	p->pri.enabled = true; +	p->cur.enabled = true; -	if (crtc->cursor->state->fb) { -		p->cur.enabled = true; -		p->cur.bytes_per_pixel = 4; -	} else { -		p->cur.enabled = false; -		p->cur.bytes_per_pixel = 0; -	}  	p->pri.horiz_pixels = intel_crtc->config->pipe_src_w;  	p->cur.horiz_pixels = intel_crtc->base.cursor->state->crtc_w; @@ -2538,6 +2559,7 @@ static bool ilk_disable_lp_wm(struct drm_device *dev)   */  #define SKL_DDB_SIZE		896	/* in blocks */ +#define BXT_DDB_SIZE		512  static void  skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, @@ -2556,7 +2578,10 @@ skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,  		return;  	} -	ddb_size = SKL_DDB_SIZE; +	if (IS_BROXTON(dev)) +		ddb_size = BXT_DDB_SIZE; +	else +		ddb_size = SKL_DDB_SIZE;  	ddb_size -= 4; /* 4 blocks for bypass path allocation */ @@ -2612,8 +2637,18 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,  }  static unsigned int -skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p) +skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p, int y)  { + +	/* for planar format */ +	if (p->y_bytes_per_pixel) { +		if (y)  /* y-plane data rate */ +			return p->horiz_pixels * p->vert_pixels * p->y_bytes_per_pixel; +		else    /* uv-plane data rate */ +			return (p->horiz_pixels/2) * (p->vert_pixels/2) * p->bytes_per_pixel; +	} + +	/* for packed formats */  	return p->horiz_pixels * p->vert_pixels * p->bytes_per_pixel;  } @@ -2636,7 +2671,10 @@ skl_get_total_relative_data_rate(struct intel_crtc *intel_crtc,  		if (!p->enabled)  			continue; -		total_data_rate += skl_plane_relative_data_rate(p); +		total_data_rate += skl_plane_relative_data_rate(p, 0); /* packed/uv */ +		if (p->y_bytes_per_pixel) { +			total_data_rate += skl_plane_relative_data_rate(p, 1); /* y-plane */ +		}  	}  	return total_data_rate; @@ -2655,6 +2693,7 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,  	struct skl_ddb_entry *alloc = &ddb->pipe[pipe];  	uint16_t alloc_size, start, cursor_blocks;  	uint16_t minimum[I915_MAX_PLANES]; +	uint16_t y_minimum[I915_MAX_PLANES];  	unsigned int total_data_rate;  	int plane; @@ -2683,6 +2722,8 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,  		minimum[plane] = 8;  		alloc_size -= minimum[plane]; +		y_minimum[plane] = p->y_bytes_per_pixel ? 8 : 0; +		alloc_size -= y_minimum[plane];  	}  	/* @@ -2696,16 +2737,17 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,  	start = alloc->start;  	for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) {  		const struct intel_plane_wm_parameters *p; -		unsigned int data_rate; -		uint16_t plane_blocks; +		unsigned int data_rate, y_data_rate; +		uint16_t plane_blocks, y_plane_blocks = 0;  		p = ¶ms->plane[plane];  		if (!p->enabled)  			continue; -		data_rate = skl_plane_relative_data_rate(p); +		data_rate = skl_plane_relative_data_rate(p, 0);  		/* +		 * allocation for (packed formats) or (uv-plane part of planar format):  		 * promote the expression to 64 bits to avoid overflowing, the  		 * result is < available as data_rate / total_data_rate < 1  		 */ @@ -2717,6 +2759,22 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,  		ddb->plane[pipe][plane].end = start + plane_blocks;  		start += plane_blocks; + +		/* +		 * allocation for y_plane part of planar format: +		 */ +		if (p->y_bytes_per_pixel) { +			y_data_rate = skl_plane_relative_data_rate(p, 1); +			y_plane_blocks = y_minimum[plane]; +			y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate, +						total_data_rate); + +			ddb->y_plane[pipe][plane].start = start; +			ddb->y_plane[pipe][plane].end = start + y_plane_blocks; + +			start += y_plane_blocks; +		} +  	}  } @@ -2829,13 +2887,18 @@ static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,  		p->pixel_rate = skl_pipe_pixel_rate(intel_crtc->config);  		fb = crtc->primary->state->fb; +		/* For planar: Bpp is for uv plane, y_Bpp is for y plane */  		if (fb) {  			p->plane[0].enabled = true; -			p->plane[0].bytes_per_pixel = fb->bits_per_pixel / 8; +			p->plane[0].bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ? +				drm_format_plane_cpp(fb->pixel_format, 1) : fb->bits_per_pixel / 8; +			p->plane[0].y_bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ? +				drm_format_plane_cpp(fb->pixel_format, 0) : 0;  			p->plane[0].tiling = fb->modifier[0];  		} else {  			p->plane[0].enabled = false;  			p->plane[0].bytes_per_pixel = 0; +			p->plane[0].y_bytes_per_pixel = 0;  			p->plane[0].tiling = DRM_FORMAT_MOD_NONE;  		}  		p->plane[0].horiz_pixels = intel_crtc->config->pipe_src_w; @@ -2843,6 +2906,7 @@ static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,  		p->plane[0].rotation = crtc->primary->state->rotation;  		fb = crtc->cursor->state->fb; +		p->cursor.y_bytes_per_pixel = 0;  		if (fb) {  			p->cursor.enabled = true;  			p->cursor.bytes_per_pixel = fb->bits_per_pixel / 8; @@ -2878,22 +2942,25 @@ static bool skl_compute_plane_wm(const struct drm_i915_private *dev_priv,  	uint32_t plane_bytes_per_line, plane_blocks_per_line;  	uint32_t res_blocks, res_lines;  	uint32_t selected_result; +	uint8_t bytes_per_pixel;  	if (latency == 0 || !p->active || !p_params->enabled)  		return false; +	bytes_per_pixel = p_params->y_bytes_per_pixel ? +		p_params->y_bytes_per_pixel : +		p_params->bytes_per_pixel;  	method1 = skl_wm_method1(p->pixel_rate, -				 p_params->bytes_per_pixel, +				 bytes_per_pixel,  				 latency);  	method2 = skl_wm_method2(p->pixel_rate,  				 p->pipe_htotal,  				 p_params->horiz_pixels, -				 p_params->bytes_per_pixel, +				 bytes_per_pixel,  				 p_params->tiling,  				 latency); -	plane_bytes_per_line = p_params->horiz_pixels * -					p_params->bytes_per_pixel; +	plane_bytes_per_line = p_params->horiz_pixels * bytes_per_pixel;  	plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);  	if (p_params->tiling == I915_FORMAT_MOD_Y_TILED || @@ -3110,10 +3177,14 @@ static void skl_write_wm_values(struct drm_i915_private *dev_priv,  				   new->plane_trans[pipe][i]);  		I915_WRITE(CUR_WM_TRANS(pipe), new->cursor_trans[pipe]); -		for (i = 0; i < intel_num_planes(crtc); i++) +		for (i = 0; i < intel_num_planes(crtc); i++) {  			skl_ddb_entry_write(dev_priv,  					    PLANE_BUF_CFG(pipe, i),  					    &new->ddb.plane[pipe][i]); +			skl_ddb_entry_write(dev_priv, +					    PLANE_NV12_BUF_CFG(pipe, i), +					    &new->ddb.y_plane[pipe][i]); +		}  		skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe),  				    &new->ddb.cursor[pipe]); @@ -3178,7 +3249,7 @@ static void skl_flush_wm_values(struct drm_i915_private *dev_priv,  {  	struct drm_device *dev = dev_priv->dev;  	struct skl_ddb_allocation *cur_ddb, *new_ddb; -	bool reallocated[I915_MAX_PIPES] = {false, false, false}; +	bool reallocated[I915_MAX_PIPES] = {};  	struct intel_crtc *crtc;  	enum pipe pipe; @@ -3271,6 +3342,7 @@ static bool skl_update_pipe_wm(struct drm_crtc *crtc,  		return false;  	intel_crtc->wm.skl_active = *pipe_wm; +  	return true;  } @@ -3364,8 +3436,16 @@ skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc,  	intel_plane->wm.scaled = scaled;  	intel_plane->wm.horiz_pixels = sprite_width;  	intel_plane->wm.vert_pixels = sprite_height; -	intel_plane->wm.bytes_per_pixel = pixel_size;  	intel_plane->wm.tiling = DRM_FORMAT_MOD_NONE; + +	/* For planar: Bpp is for UV plane, y_Bpp is for Y plane */ +	intel_plane->wm.bytes_per_pixel = +		(fb && fb->pixel_format == DRM_FORMAT_NV12) ? +		drm_format_plane_cpp(plane->state->fb->pixel_format, 1) : pixel_size; +	intel_plane->wm.y_bytes_per_pixel = +		(fb && fb->pixel_format == DRM_FORMAT_NV12) ? +		drm_format_plane_cpp(plane->state->fb->pixel_format, 0) : 0; +  	/*  	 * Framebuffer can be NULL on plane disable, but it does not  	 * matter for watermarks if we assume no tiling in that case. @@ -3930,6 +4010,8 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)  		    GEN6_RP_DOWN_IDLE_AVG);  	dev_priv->rps.power = new_power; +	dev_priv->rps.up_threshold = threshold_up; +	dev_priv->rps.down_threshold = threshold_down;  	dev_priv->rps.last_adj = 0;  } @@ -4001,8 +4083,11 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val)  		      "Odd GPU freq value\n"))  		val &= ~1; -	if (val != dev_priv->rps.cur_freq) +	if (val != dev_priv->rps.cur_freq) {  		vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); +		if (!IS_CHERRYVIEW(dev_priv)) +			gen6_set_rps_thresholds(dev_priv, val); +	}  	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); @@ -4010,50 +4095,25 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val)  	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));  } -/* vlv_set_rps_idle: Set the frequency to Rpn if Gfx clocks are down +/* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down   *   * * If Gfx is Idle, then - * 1. Mask Turbo interrupts - * 2. Bring up Gfx clock - * 3. Change the freq to Rpn and wait till P-Unit updates freq - * 4. Clear the Force GFX CLK ON bit so that Gfx can down - * 5. Unmask Turbo interrupts + * 1. Forcewake Media well. + * 2. Request idle freq. + * 3. Release Forcewake of Media well.  */  static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)  { -	struct drm_device *dev = dev_priv->dev;  	u32 val = dev_priv->rps.idle_freq; -	/* CHV and latest VLV don't need to force the gfx clock */ -	if (IS_CHERRYVIEW(dev) || dev->pdev->revision >= 0xd) { -		valleyview_set_rps(dev_priv->dev, val); -		return; -	} - -	/* -	 * When we are idle.  Drop to min voltage state. -	 */ -  	if (dev_priv->rps.cur_freq <= val)  		return; -	/* Mask turbo interrupt so that they will not come in between */ -	I915_WRITE(GEN6_PMINTRMSK, -		   gen6_sanitize_rps_pm_mask(dev_priv, ~0)); - -	vlv_force_gfx_clock(dev_priv, true); - -	dev_priv->rps.cur_freq = val; - -	vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); - -	if (wait_for(((vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS)) -				& GENFREQSTATUS) == 0, 100)) -		DRM_ERROR("timed out waiting for Punit\n"); - -	vlv_force_gfx_clock(dev_priv, false); - -	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); +	/* Wake up the media well, as that takes a lot less +	 * power than the Render well. */ +	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA); +	valleyview_set_rps(dev_priv->dev, val); +	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);  }  void gen6_rps_busy(struct drm_i915_private *dev_priv) @@ -4082,21 +4142,47 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)  		I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);  	}  	mutex_unlock(&dev_priv->rps.hw_lock); + +	spin_lock(&dev_priv->rps.client_lock); +	while (!list_empty(&dev_priv->rps.clients)) +		list_del_init(dev_priv->rps.clients.next); +	spin_unlock(&dev_priv->rps.client_lock);  } -void gen6_rps_boost(struct drm_i915_private *dev_priv) +void gen6_rps_boost(struct drm_i915_private *dev_priv, +		    struct intel_rps_client *rps, +		    unsigned long submitted)  { -	u32 val; +	/* This is intentionally racy! We peek at the state here, then +	 * validate inside the RPS worker. +	 */ +	if (!(dev_priv->mm.busy && +	      dev_priv->rps.enabled && +	      dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)) +		return; -	mutex_lock(&dev_priv->rps.hw_lock); -	val = dev_priv->rps.max_freq_softlimit; -	if (dev_priv->rps.enabled && -	    dev_priv->mm.busy && -	    dev_priv->rps.cur_freq < val) { -		intel_set_rps(dev_priv->dev, val); -		dev_priv->rps.last_adj = 0; +	/* Force a RPS boost (and don't count it against the client) if +	 * the GPU is severely congested. +	 */ +	if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES)) +		rps = NULL; + +	spin_lock(&dev_priv->rps.client_lock); +	if (rps == NULL || list_empty(&rps->link)) { +		spin_lock_irq(&dev_priv->irq_lock); +		if (dev_priv->rps.interrupts_enabled) { +			dev_priv->rps.client_boost = true; +			queue_work(dev_priv->wq, &dev_priv->rps.work); +		} +		spin_unlock_irq(&dev_priv->irq_lock); + +		if (rps != NULL) { +			list_add(&rps->link, &dev_priv->rps.clients); +			rps->boosts++; +		} else +			dev_priv->rps.boosts++;  	} -	mutex_unlock(&dev_priv->rps.hw_lock); +	spin_unlock(&dev_priv->rps.client_lock);  }  void intel_set_rps(struct drm_device *dev, u8 val) @@ -4250,8 +4336,8 @@ static void gen6_init_rps_frequencies(struct drm_device *dev)  	if (dev_priv->rps.min_freq_softlimit == 0) {  		if (IS_HASWELL(dev) || IS_BROADWELL(dev))  			dev_priv->rps.min_freq_softlimit = -				/* max(RPe, 450 MHz) */ -				max(dev_priv->rps.efficient_freq, (u8) 9); +				max_t(int, dev_priv->rps.efficient_freq, +				      intel_freq_opcode(dev_priv, 450));  		else  			dev_priv->rps.min_freq_softlimit =  				dev_priv->rps.min_freq; @@ -4325,8 +4411,13 @@ static void gen9_enable_rc6(struct drm_device *dev)  				   GEN6_RC_CTL_EI_MODE(1) |  				   rc6_mask); -	/* 3b: Enable Coarse Power Gating only when RC6 is enabled */ -	I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? 3 : 0); +	/* +	 * 3b: Enable Coarse Power Gating only when RC6 is enabled. +	 * WaDisableRenderPowerGating:skl,bxt - Render PG need to be disabled with RC6. +	 */ +	I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? +			GEN9_MEDIA_PG_ENABLE : 0); +  	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); @@ -4665,24 +4756,6 @@ static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)  	return rp1;  } -static int cherryview_rps_min_freq(struct drm_i915_private *dev_priv) -{ -	struct drm_device *dev = dev_priv->dev; -	u32 val, rpn; - -	if (dev->pdev->revision >= 0x20) { -		val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE); -		rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) & -		       FB_GFX_FREQ_FUSE_MASK); -	} else { /* For pre-production hardware */ -		val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG); -		rpn = ((val >> PUNIT_GPU_STATIS_GFX_MIN_FREQ_SHIFT) & -		       PUNIT_GPU_STATUS_GFX_MIN_FREQ_MASK); -	} - -	return rpn; -} -  static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)  {  	u32 val, rp1; @@ -4889,9 +4962,9 @@ static void cherryview_init_gt_powersave(struct drm_device *dev)  	mutex_lock(&dev_priv->rps.hw_lock); -	mutex_lock(&dev_priv->dpio_lock); +	mutex_lock(&dev_priv->sb_lock);  	val = vlv_cck_read(dev_priv, CCK_FUSE_REG); -	mutex_unlock(&dev_priv->dpio_lock); +	mutex_unlock(&dev_priv->sb_lock);  	switch ((val >> 2) & 0x7) {  	case 0: @@ -4934,7 +5007,8 @@ static void cherryview_init_gt_powersave(struct drm_device *dev)  			 intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),  			 dev_priv->rps.rp1_freq); -	dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv); +	/* PUnit validated range is only [RPe, RP0] */ +	dev_priv->rps.min_freq = dev_priv->rps.efficient_freq;  	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",  			 intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),  			 dev_priv->rps.min_freq); @@ -4996,8 +5070,8 @@ static void cherryview_enable_rps(struct drm_device *dev)  		I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);  	I915_WRITE(GEN6_RC_SLEEP, 0); -	/* TO threshold set to 1750 us ( 0x557 * 1.28 us) */ -	I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); +	/* TO threshold set to 500 us ( 0x186 * 1.28 us) */ +	I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);  	/* allows RC6 residency counter to work */  	I915_WRITE(VLV_COUNTER_CONTROL, @@ -5032,6 +5106,12 @@ static void cherryview_enable_rps(struct drm_device *dev)  		   GEN6_RP_UP_BUSY_AVG |  		   GEN6_RP_DOWN_IDLE_AVG); +	/* Setting Fixed Bias */ +	val = VLV_OVERRIDE_EN | +		  VLV_SOC_TDP_EN | +		  CHV_BIAS_CPU_50_SOC_50; +	vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); +  	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);  	/* RPS code assumes GPLL is used */ @@ -5116,6 +5196,12 @@ static void valleyview_enable_rps(struct drm_device *dev)  	I915_WRITE(GEN6_RC_CONTROL, rc6_mode); +	/* Setting Fixed Bias */ +	val = VLV_OVERRIDE_EN | +		  VLV_SOC_TDP_EN | +		  VLV_BIAS_CPU_125_SOC_875; +	vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); +  	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);  	/* RPS code assumes GPLL is used */ @@ -5798,13 +5884,15 @@ static void ibx_init_clock_gating(struct drm_device *dev)  static void g4x_disable_trickle_feed(struct drm_device *dev)  {  	struct drm_i915_private *dev_priv = dev->dev_private; -	int pipe; +	enum pipe pipe;  	for_each_pipe(dev_priv, pipe) {  		I915_WRITE(DSPCNTR(pipe),  			   I915_READ(DSPCNTR(pipe)) |  			   DISPPLANE_TRICKLE_FEED_DISABLE); -		intel_flush_primary_plane(dev_priv, pipe); + +		I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe))); +		POSTING_READ(DSPSURF(pipe));  	}  } @@ -6094,10 +6182,9 @@ static void broadwell_init_clock_gating(struct drm_device *dev)  {  	struct drm_i915_private *dev_priv = dev->dev_private;  	enum pipe pipe; +	uint32_t misccpctl; -	I915_WRITE(WM3_LP_ILK, 0); -	I915_WRITE(WM2_LP_ILK, 0); -	I915_WRITE(WM1_LP_ILK, 0); +	ilk_init_lp_watermarks(dev);  	/* WaSwitchSolVfFArbitrationPriority:bdw */  	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); @@ -6126,6 +6213,22 @@ static void broadwell_init_clock_gating(struct drm_device *dev)  	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |  		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE); +	/* +	 * WaProgramL3SqcReg1Default:bdw +	 * WaTempDisableDOPClkGating:bdw +	 */ +	misccpctl = I915_READ(GEN7_MISCCPCTL); +	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); +	I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT); +	I915_WRITE(GEN7_MISCCPCTL, misccpctl); + +	/* +	 * WaGttCachingOffByDefault:bdw +	 * GTT cache may not work with big pages, so if those +	 * are ever enabled GTT cache may need to be disabled. +	 */ +	I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL); +  	lpt_init_clock_gating(dev);  } @@ -6401,6 +6504,12 @@ static void cherryview_init_clock_gating(struct drm_device *dev)  	/* WaDisableSDEUnitClockGating:chv */  	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |  		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE); + +	/* +	 * GTT cache may not work with big pages, so if those +	 * are ever enabled GTT cache may need to be disabled. +	 */ +	I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);  }  static void g4x_init_clock_gating(struct drm_device *dev) @@ -6544,7 +6653,12 @@ void intel_init_pm(struct drm_device *dev)  	if (INTEL_INFO(dev)->gen >= 9) {  		skl_setup_wm_latency(dev); -		dev_priv->display.init_clock_gating = skl_init_clock_gating; +		if (IS_BROXTON(dev)) +			dev_priv->display.init_clock_gating = +				bxt_init_clock_gating; +		else if (IS_SKYLAKE(dev)) +			dev_priv->display.init_clock_gating = +				skl_init_clock_gating;  		dev_priv->display.update_wm = skl_update_wm;  		dev_priv->display.update_sprite_wm = skl_update_sprite_wm;  	} else if (HAS_PCH_SPLIT(dev)) { @@ -6762,14 +6876,58 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)  		return val / GT_FREQUENCY_MULTIPLIER;  } +struct request_boost { +	struct work_struct work; +	struct drm_i915_gem_request *req; +}; + +static void __intel_rps_boost_work(struct work_struct *work) +{ +	struct request_boost *boost = container_of(work, struct request_boost, work); +	struct drm_i915_gem_request *req = boost->req; + +	if (!i915_gem_request_completed(req, true)) +		gen6_rps_boost(to_i915(req->ring->dev), NULL, +			       req->emitted_jiffies); + +	i915_gem_request_unreference__unlocked(req); +	kfree(boost); +} + +void intel_queue_rps_boost_for_request(struct drm_device *dev, +				       struct drm_i915_gem_request *req) +{ +	struct request_boost *boost; + +	if (req == NULL || INTEL_INFO(dev)->gen < 6) +		return; + +	if (i915_gem_request_completed(req, true)) +		return; + +	boost = kmalloc(sizeof(*boost), GFP_ATOMIC); +	if (boost == NULL) +		return; + +	i915_gem_request_reference(req); +	boost->req = req; + +	INIT_WORK(&boost->work, __intel_rps_boost_work); +	queue_work(to_i915(dev)->wq, &boost->work); +} +  void intel_pm_setup(struct drm_device *dev)  {  	struct drm_i915_private *dev_priv = dev->dev_private;  	mutex_init(&dev_priv->rps.hw_lock); +	spin_lock_init(&dev_priv->rps.client_lock);  	INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,  			  intel_gen6_powersave_work); +	INIT_LIST_HEAD(&dev_priv->rps.clients); +	INIT_LIST_HEAD(&dev_priv->rps.semaphores.link); +	INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link);  	dev_priv->pm.suspended = false;  } | 
