diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_pm.c')
| -rw-r--r-- | drivers/gpu/drm/i915/intel_pm.c | 245 | 
1 files changed, 177 insertions, 68 deletions
| diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index adca00783e61..de3b0dc5658b 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2460,10 +2460,14 @@ void gen6_set_rps(struct drm_device *dev, u8 val)  	if (val == dev_priv->rps.cur_delay)  		return; -	I915_WRITE(GEN6_RPNSWREQ, -		   GEN6_FREQUENCY(val) | -		   GEN6_OFFSET(0) | -		   GEN6_AGGRESSIVE_TURBO); +	if (IS_HASWELL(dev)) +		I915_WRITE(GEN6_RPNSWREQ, +			   HSW_FREQUENCY(val)); +	else +		I915_WRITE(GEN6_RPNSWREQ, +			   GEN6_FREQUENCY(val) | +			   GEN6_OFFSET(0) | +			   GEN6_AGGRESSIVE_TURBO);  	/* Make sure we continue to get interrupts  	 * until we hit the minimum or maximum frequencies. @@ -2554,8 +2558,8 @@ static void gen6_enable_rps(struct drm_device *dev)  	rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);  	gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS); -	/* In units of 100MHz */ -	dev_priv->rps.max_delay = rp_state_cap & 0xff; +	/* In units of 50MHz */ +	dev_priv->rps.hw_max = dev_priv->rps.max_delay = rp_state_cap & 0xff;  	dev_priv->rps.min_delay = (rp_state_cap & 0xff0000) >> 16;  	dev_priv->rps.cur_delay = 0; @@ -2601,12 +2605,19 @@ static void gen6_enable_rps(struct drm_device *dev)  		   GEN6_RC_CTL_EI_MODE(1) |  		   GEN6_RC_CTL_HW_ENABLE); -	I915_WRITE(GEN6_RPNSWREQ, -		   GEN6_FREQUENCY(10) | -		   GEN6_OFFSET(0) | -		   GEN6_AGGRESSIVE_TURBO); -	I915_WRITE(GEN6_RC_VIDEO_FREQ, -		   GEN6_FREQUENCY(12)); +	if (IS_HASWELL(dev)) { +		I915_WRITE(GEN6_RPNSWREQ, +			   HSW_FREQUENCY(10)); +		I915_WRITE(GEN6_RC_VIDEO_FREQ, +			   HSW_FREQUENCY(12)); +	} else { +		I915_WRITE(GEN6_RPNSWREQ, +			   GEN6_FREQUENCY(10) | +			   GEN6_OFFSET(0) | +			   GEN6_AGGRESSIVE_TURBO); +		I915_WRITE(GEN6_RC_VIDEO_FREQ, +			   GEN6_FREQUENCY(12)); +	}  	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);  	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, @@ -2631,9 +2642,11 @@ static void gen6_enable_rps(struct drm_device *dev)  	if (!ret) {  		pcu_mbox = 0;  		ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox); -		if (ret && pcu_mbox & (1<<31)) { /* OC supported */ -			dev_priv->rps.max_delay = pcu_mbox & 0xff; -			DRM_DEBUG_DRIVER("overclocking supported, adjusting frequency max to %dMHz\n", pcu_mbox * 50); +		if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */ +			DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n", +					 (dev_priv->rps.max_delay & 0xff) * 50, +					 (pcu_mbox & 0xff) * 50); +			dev_priv->rps.hw_max = pcu_mbox & 0xff;  		}  	} else {  		DRM_DEBUG_DRIVER("Failed to set the min frequency\n"); @@ -2671,8 +2684,8 @@ static void gen6_update_ring_freq(struct drm_device *dev)  {  	struct drm_i915_private *dev_priv = dev->dev_private;  	int min_freq = 15; -	int gpu_freq; -	unsigned int ia_freq, max_ia_freq; +	unsigned int gpu_freq; +	unsigned int max_ia_freq, min_ring_freq;  	int scaling_factor = 180;  	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); @@ -2688,6 +2701,10 @@ static void gen6_update_ring_freq(struct drm_device *dev)  	/* Convert from kHz to MHz */  	max_ia_freq /= 1000; +	min_ring_freq = I915_READ(MCHBAR_MIRROR_BASE_SNB + DCLK); +	/* convert DDR frequency from units of 133.3MHz to bandwidth */ +	min_ring_freq = (2 * 4 * min_ring_freq + 2) / 3; +  	/*  	 * For each potential GPU frequency, load a ring frequency we'd like  	 * to use for memory access.  We do this by specifying the IA frequency @@ -2696,21 +2713,32 @@ static void gen6_update_ring_freq(struct drm_device *dev)  	for (gpu_freq = dev_priv->rps.max_delay; gpu_freq >= dev_priv->rps.min_delay;  	     gpu_freq--) {  		int diff = dev_priv->rps.max_delay - gpu_freq; - -		/* -		 * For GPU frequencies less than 750MHz, just use the lowest -		 * ring freq. -		 */ -		if (gpu_freq < min_freq) -			ia_freq = 800; -		else -			ia_freq = max_ia_freq - ((diff * scaling_factor) / 2); -		ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100); -		ia_freq <<= GEN6_PCODE_FREQ_IA_RATIO_SHIFT; +		unsigned int ia_freq = 0, ring_freq = 0; + +		if (IS_HASWELL(dev)) { +			ring_freq = (gpu_freq * 5 + 3) / 4; +			ring_freq = max(min_ring_freq, ring_freq); +			/* leave ia_freq as the default, chosen by cpufreq */ +		} else { +			/* On older processors, there is no separate ring +			 * clock domain, so in order to boost the bandwidth +			 * of the ring, we need to upclock the CPU (ia_freq). +			 * +			 * For GPU frequencies less than 750MHz, +			 * just use the lowest ring freq. +			 */ +			if (gpu_freq < min_freq) +				ia_freq = 800; +			else +				ia_freq = max_ia_freq - ((diff * scaling_factor) / 2); +			ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100); +		}  		sandybridge_pcode_write(dev_priv,  					GEN6_PCODE_WRITE_MIN_FREQ_TABLE, -					ia_freq | gpu_freq); +					ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT | +					ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT | +					gpu_freq);  	}  } @@ -2821,7 +2849,7 @@ static void ironlake_enable_rc6(struct drm_device *dev)  	ret = intel_ring_idle(ring);  	dev_priv->mm.interruptible = was_interruptible;  	if (ret) { -		DRM_ERROR("failed to enable ironlake power power savings\n"); +		DRM_ERROR("failed to enable ironlake power savings\n");  		ironlake_teardown_rc6(dev);  		return;  	} @@ -3562,6 +3590,7 @@ static void cpt_init_clock_gating(struct drm_device *dev)  {  	struct drm_i915_private *dev_priv = dev->dev_private;  	int pipe; +	uint32_t val;  	/*  	 * On Ibex Peak and Cougar Point, we need to disable clock @@ -3574,8 +3603,17 @@ static void cpt_init_clock_gating(struct drm_device *dev)  	/* The below fixes the weird display corruption, a few pixels shifted  	 * downward, on (only) LVDS of some HP laptops with IVY.  	 */ -	for_each_pipe(pipe) -		I915_WRITE(TRANS_CHICKEN2(pipe), TRANS_CHICKEN2_TIMING_OVERRIDE); +	for_each_pipe(pipe) { +		val = I915_READ(TRANS_CHICKEN2(pipe)); +		val |= TRANS_CHICKEN2_TIMING_OVERRIDE; +		val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED; +		if (dev_priv->fdi_rx_polarity_inverted) +			val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED; +		val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK; +		val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER; +		val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH; +		I915_WRITE(TRANS_CHICKEN2(pipe), val); +	}  	/* WADP0ClockGatingDisable */  	for_each_pipe(pipe) {  		I915_WRITE(TRANS_CHICKEN1(pipe), @@ -3768,6 +3806,9 @@ static void haswell_init_clock_gating(struct drm_device *dev)  	I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |  		   GEN6_MBCTL_ENABLE_BOOT_FETCH); +	/* WaSwitchSolVfFArbitrationPriority */ +	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); +  	/* XXX: This is a workaround for early silicon revisions and should be  	 * removed later.  	 */ @@ -3874,7 +3915,8 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)  	snpcr |= GEN6_MBC_SNPCR_MED;  	I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr); -	cpt_init_clock_gating(dev); +	if (!HAS_PCH_NOP(dev)) +		cpt_init_clock_gating(dev);  	gen6_check_mch_setup(dev);  } @@ -3899,8 +3941,10 @@ static void valleyview_init_clock_gating(struct drm_device *dev)  		   CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |  		   CHICKEN3_DGMG_DONE_FIX_DISABLE); +	/* WaDisablePSDDualDispatchEnable */  	I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, -		   _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); +		   _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP | +				      GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));  	/* Apply the WaDisableRHWOOptimizationForRenderHang workaround. */  	I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1, @@ -3968,24 +4012,20 @@ static void valleyview_init_clock_gating(struct drm_device *dev)  		   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));  	/* -	 * On ValleyView, the GUnit needs to signal the GT -	 * when flip and other events complete.  So enable -	 * all the GUnit->GT interrupts here -	 */ -	I915_WRITE(VLV_DPFLIPSTAT, PIPEB_LINE_COMPARE_INT_EN | -		   PIPEB_HLINE_INT_EN | PIPEB_VBLANK_INT_EN | -		   SPRITED_FLIPDONE_INT_EN | SPRITEC_FLIPDONE_INT_EN | -		   PLANEB_FLIPDONE_INT_EN | PIPEA_LINE_COMPARE_INT_EN | -		   PIPEA_HLINE_INT_EN | PIPEA_VBLANK_INT_EN | -		   SPRITEB_FLIPDONE_INT_EN | SPRITEA_FLIPDONE_INT_EN | -		   PLANEA_FLIPDONE_INT_EN); - -	/*  	 * WaDisableVLVClockGating_VBIIssue  	 * Disable clock gating on th GCFG unit to prevent a delay  	 * in the reporting of vblank events.  	 */ -	I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS); +	I915_WRITE(VLV_GUNIT_CLOCK_GATE, 0xffffffff); + +	/* Conservative clock gating settings for now */ +	I915_WRITE(0x9400, 0xffffffff); +	I915_WRITE(0x9404, 0xffffffff); +	I915_WRITE(0x9408, 0xffffffff); +	I915_WRITE(0x940c, 0xffffffff); +	I915_WRITE(0x9410, 0xffffffff); +	I915_WRITE(0x9414, 0xffffffff); +	I915_WRITE(0x9418, 0xffffffff);  }  static void g4x_init_clock_gating(struct drm_device *dev) @@ -4070,13 +4110,29 @@ void intel_init_clock_gating(struct drm_device *dev)  	dev_priv->display.init_clock_gating(dev);  } +/** + * We should only use the power well if we explicitly asked the hardware to + * enable it, so check if it's enabled and also check if we've requested it to + * be enabled. + */ +bool intel_using_power_well(struct drm_device *dev) +{ +	struct drm_i915_private *dev_priv = dev->dev_private; + +	if (IS_HASWELL(dev)) +		return I915_READ(HSW_PWR_WELL_DRIVER) == +		       (HSW_PWR_WELL_ENABLE | HSW_PWR_WELL_STATE); +	else +		return true; +} +  void intel_set_power_well(struct drm_device *dev, bool enable)  {  	struct drm_i915_private *dev_priv = dev->dev_private;  	bool is_enabled, enable_requested;  	uint32_t tmp; -	if (!IS_HASWELL(dev)) +	if (!HAS_POWER_WELL(dev))  		return;  	if (!i915_disable_power_well && !enable) @@ -4114,7 +4170,7 @@ void intel_init_power_well(struct drm_device *dev)  {  	struct drm_i915_private *dev_priv = dev->dev_private; -	if (!IS_HASWELL(dev)) +	if (!HAS_POWER_WELL(dev))  		return;  	/* For now, we need the power well to be always enabled. */ @@ -4176,7 +4232,6 @@ void intel_init_pm(struct drm_device *dev)  			}  			dev_priv->display.init_clock_gating = gen6_init_clock_gating;  		} else if (IS_IVYBRIDGE(dev)) { -			/* FIXME: detect B0+ stepping and use auto training */  			if (SNB_READ_WM0_LATENCY()) {  				dev_priv->display.update_wm = ivybridge_update_wm;  				dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm; @@ -4274,21 +4329,14 @@ static void __gen6_gt_force_wake_reset(struct drm_i915_private *dev_priv)  static void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)  { -	u32 forcewake_ack; - -	if (IS_HASWELL(dev_priv->dev)) -		forcewake_ack = FORCEWAKE_ACK_HSW; -	else -		forcewake_ack = FORCEWAKE_ACK; - -	if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & 1) == 0, +	if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK) & 1) == 0,  			    FORCEWAKE_ACK_TIMEOUT_MS))  		DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n"); -	I915_WRITE_NOTRACE(FORCEWAKE, FORCEWAKE_KERNEL); +	I915_WRITE_NOTRACE(FORCEWAKE, 1);  	POSTING_READ(ECOBUS); /* something from same cacheline, but !FORCEWAKE */ -	if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & 1), +	if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK) & 1),  			    FORCEWAKE_ACK_TIMEOUT_MS))  		DRM_ERROR("Timed out waiting for forcewake to ack request.\n"); @@ -4311,7 +4359,7 @@ static void __gen6_gt_force_wake_mt_get(struct drm_i915_private *dev_priv)  	else  		forcewake_ack = FORCEWAKE_MT_ACK; -	if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & 1) == 0, +	if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & FORCEWAKE_KERNEL) == 0,  			    FORCEWAKE_ACK_TIMEOUT_MS))  		DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n"); @@ -4319,7 +4367,7 @@ static void __gen6_gt_force_wake_mt_get(struct drm_i915_private *dev_priv)  	/* something from same cacheline, but !FORCEWAKE_MT */  	POSTING_READ(ECOBUS); -	if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & 1), +	if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & FORCEWAKE_KERNEL),  			    FORCEWAKE_ACK_TIMEOUT_MS))  		DRM_ERROR("Timed out waiting for forcewake to ack request.\n"); @@ -4409,15 +4457,22 @@ static void vlv_force_wake_reset(struct drm_i915_private *dev_priv)  static void vlv_force_wake_get(struct drm_i915_private *dev_priv)  { -	if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV) & 1) == 0, +	if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV) & FORCEWAKE_KERNEL) == 0,  			    FORCEWAKE_ACK_TIMEOUT_MS))  		DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n");  	I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL)); +	I915_WRITE_NOTRACE(FORCEWAKE_MEDIA_VLV, +			   _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL)); -	if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV) & 1), +	if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV) & FORCEWAKE_KERNEL),  			    FORCEWAKE_ACK_TIMEOUT_MS)) -		DRM_ERROR("Timed out waiting for forcewake to ack request.\n"); +		DRM_ERROR("Timed out waiting for GT to ack forcewake request.\n"); + +	if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_MEDIA_VLV) & +			     FORCEWAKE_KERNEL), +			    FORCEWAKE_ACK_TIMEOUT_MS)) +		DRM_ERROR("Timed out waiting for media to ack forcewake request.\n");  	__gen6_gt_wait_for_thread_c0(dev_priv);  } @@ -4425,8 +4480,9 @@ static void vlv_force_wake_get(struct drm_i915_private *dev_priv)  static void vlv_force_wake_put(struct drm_i915_private *dev_priv)  {  	I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL)); -	/* something from same cacheline, but !FORCEWAKE_VLV */ -	POSTING_READ(FORCEWAKE_ACK_VLV); +	I915_WRITE_NOTRACE(FORCEWAKE_MEDIA_VLV, +			   _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL)); +	/* The below doubles as a POSTING_READ */  	gen6_gt_check_fifodbg(dev_priv);  } @@ -4511,3 +4567,56 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u8 mbox, u32 val)  	return 0;  } + +static int vlv_punit_rw(struct drm_i915_private *dev_priv, u8 opcode, +			u8 addr, u32 *val) +{ +	u32 cmd, devfn, port, be, bar; + +	bar = 0; +	be = 0xf; +	port = IOSF_PORT_PUNIT; +	devfn = PCI_DEVFN(2, 0); + +	cmd = (devfn << IOSF_DEVFN_SHIFT) | (opcode << IOSF_OPCODE_SHIFT) | +		(port << IOSF_PORT_SHIFT) | (be << IOSF_BYTE_ENABLES_SHIFT) | +		(bar << IOSF_BAR_SHIFT); + +	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + +	if (I915_READ(VLV_IOSF_DOORBELL_REQ) & IOSF_SB_BUSY) { +		DRM_DEBUG_DRIVER("warning: pcode (%s) mailbox access failed\n", +				 opcode == PUNIT_OPCODE_REG_READ ? +				 "read" : "write"); +		return -EAGAIN; +	} + +	I915_WRITE(VLV_IOSF_ADDR, addr); +	if (opcode == PUNIT_OPCODE_REG_WRITE) +		I915_WRITE(VLV_IOSF_DATA, *val); +	I915_WRITE(VLV_IOSF_DOORBELL_REQ, cmd); + +	if (wait_for((I915_READ(VLV_IOSF_DOORBELL_REQ) & IOSF_SB_BUSY) == 0, +		     500)) { +		DRM_ERROR("timeout waiting for pcode %s (%d) to finish\n", +			  opcode == PUNIT_OPCODE_REG_READ ? "read" : "write", +			  addr); +		return -ETIMEDOUT; +	} + +	if (opcode == PUNIT_OPCODE_REG_READ) +		*val = I915_READ(VLV_IOSF_DATA); +	I915_WRITE(VLV_IOSF_DATA, 0); + +	return 0; +} + +int valleyview_punit_read(struct drm_i915_private *dev_priv, u8 addr, u32 *val) +{ +	return vlv_punit_rw(dev_priv, PUNIT_OPCODE_REG_READ, addr, val); +} + +int valleyview_punit_write(struct drm_i915_private *dev_priv, u8 addr, u32 val) +{ +	return vlv_punit_rw(dev_priv, PUNIT_OPCODE_REG_WRITE, addr, &val); +} | 
