diff options
Diffstat (limited to 'kernel/sched')
| -rw-r--r-- | kernel/sched/core.c | 26 | ||||
| -rw-r--r-- | kernel/sched/cpufreq_schedutil.c | 20 | ||||
| -rw-r--r-- | kernel/sched/deadline.c | 97 | ||||
| -rw-r--r-- | kernel/sched/debug.c | 12 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 73 | ||||
| -rw-r--r-- | kernel/sched/idle.c | 28 | 
6 files changed, 159 insertions, 97 deletions
| diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d2003a7d5ab5..e7e453492cff 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2501,7 +2501,12 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,  #ifdef CONFIG_SMP  	if (wake_flags & WF_MIGRATED)  		en_flags |= ENQUEUE_MIGRATED; +	else  #endif +	if (p->in_iowait) { +		delayacct_blkio_end(p); +		atomic_dec(&task_rq(p)->nr_iowait); +	}  	activate_task(rq, p, en_flags);  	ttwu_do_wakeup(rq, p, wake_flags, rf); @@ -2888,11 +2893,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)  	if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags))  		goto unlock; -	if (p->in_iowait) { -		delayacct_blkio_end(p); -		atomic_dec(&task_rq(p)->nr_iowait); -	} -  #ifdef CONFIG_SMP  	/*  	 * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be @@ -2963,6 +2963,11 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)  	cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);  	if (task_cpu(p) != cpu) { +		if (p->in_iowait) { +			delayacct_blkio_end(p); +			atomic_dec(&task_rq(p)->nr_iowait); +		} +  		wake_flags |= WF_MIGRATED;  		psi_ttwu_dequeue(p);  		set_task_cpu(p, cpu); @@ -4907,20 +4912,21 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)  		if (!dl_prio(p->normal_prio) ||  		    (pi_task && dl_prio(pi_task->prio) &&  		     dl_entity_preempt(&pi_task->dl, &p->dl))) { -			p->dl.dl_boosted = 1; +			p->dl.pi_se = pi_task->dl.pi_se;  			queue_flag |= ENQUEUE_REPLENISH; -		} else -			p->dl.dl_boosted = 0; +		} else { +			p->dl.pi_se = &p->dl; +		}  		p->sched_class = &dl_sched_class;  	} else if (rt_prio(prio)) {  		if (dl_prio(oldprio)) -			p->dl.dl_boosted = 0; +			p->dl.pi_se = &p->dl;  		if (oldprio < prio)  			queue_flag |= ENQUEUE_HEAD;  		p->sched_class = &rt_sched_class;  	} else {  		if (dl_prio(oldprio)) -			p->dl.dl_boosted = 0; +			p->dl.pi_se = &p->dl;  		if (rt_prio(oldprio))  			p->rt.timeout = 0;  		p->sched_class = &fair_sched_class; diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index e254745a82cb..97d318b0cd0c 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -102,8 +102,12 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)  static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time,  				   unsigned int next_freq)  { -	if (sg_policy->next_freq == next_freq) -		return false; +	if (!sg_policy->need_freq_update) { +		if (sg_policy->next_freq == next_freq) +			return false; +	} else { +		sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); +	}  	sg_policy->next_freq = next_freq;  	sg_policy->last_freq_update_time = time; @@ -164,7 +168,6 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,  	if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update)  		return sg_policy->next_freq; -	sg_policy->need_freq_update = false;  	sg_policy->cached_raw_freq = freq;  	return cpufreq_driver_resolve_freq(policy, freq);  } @@ -440,7 +443,6 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,  	struct sugov_policy *sg_policy = sg_cpu->sg_policy;  	unsigned long util, max;  	unsigned int next_f; -	bool busy;  	unsigned int cached_freq = sg_policy->cached_raw_freq;  	sugov_iowait_boost(sg_cpu, time, flags); @@ -451,9 +453,6 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,  	if (!sugov_should_update_freq(sg_policy, time))  		return; -	/* Limits may have changed, don't skip frequency update */ -	busy = !sg_policy->need_freq_update && sugov_cpu_is_busy(sg_cpu); -  	util = sugov_get_util(sg_cpu);  	max = sg_cpu->max;  	util = sugov_iowait_apply(sg_cpu, time, util, max); @@ -462,7 +461,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,  	 * Do not reduce the frequency if the CPU has not been idle  	 * recently, as the reduction is likely to be premature then.  	 */ -	if (busy && next_f < sg_policy->next_freq) { +	if (sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) {  		next_f = sg_policy->next_freq;  		/* Restore cached freq as next_freq has changed */ @@ -827,9 +826,10 @@ static int sugov_start(struct cpufreq_policy *policy)  	sg_policy->next_freq			= 0;  	sg_policy->work_in_progress		= false;  	sg_policy->limits_changed		= false; -	sg_policy->need_freq_update		= false;  	sg_policy->cached_raw_freq		= 0; +	sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); +  	for_each_cpu(cpu, policy->cpus) {  		struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); @@ -881,7 +881,7 @@ static void sugov_limits(struct cpufreq_policy *policy)  struct cpufreq_governor schedutil_gov = {  	.name			= "schedutil",  	.owner			= THIS_MODULE, -	.dynamic_switching	= true, +	.flags			= CPUFREQ_GOV_DYNAMIC_SWITCHING,  	.init			= sugov_init,  	.exit			= sugov_exit,  	.start			= sugov_start, diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index f232305dcefe..1d3c97268ec0 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -43,6 +43,28 @@ static inline int on_dl_rq(struct sched_dl_entity *dl_se)  	return !RB_EMPTY_NODE(&dl_se->rb_node);  } +#ifdef CONFIG_RT_MUTEXES +static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se) +{ +	return dl_se->pi_se; +} + +static inline bool is_dl_boosted(struct sched_dl_entity *dl_se) +{ +	return pi_of(dl_se) != dl_se; +} +#else +static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se) +{ +	return dl_se; +} + +static inline bool is_dl_boosted(struct sched_dl_entity *dl_se) +{ +	return false; +} +#endif +  #ifdef CONFIG_SMP  static inline struct dl_bw *dl_bw_of(int i)  { @@ -698,7 +720,7 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)  	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);  	struct rq *rq = rq_of_dl_rq(dl_rq); -	WARN_ON(dl_se->dl_boosted); +	WARN_ON(is_dl_boosted(dl_se));  	WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));  	/* @@ -736,21 +758,20 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)   * could happen are, typically, a entity voluntarily trying to overcome its   * runtime, or it just underestimated it during sched_setattr().   */ -static void replenish_dl_entity(struct sched_dl_entity *dl_se, -				struct sched_dl_entity *pi_se) +static void replenish_dl_entity(struct sched_dl_entity *dl_se)  {  	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);  	struct rq *rq = rq_of_dl_rq(dl_rq); -	BUG_ON(pi_se->dl_runtime <= 0); +	BUG_ON(pi_of(dl_se)->dl_runtime <= 0);  	/*  	 * This could be the case for a !-dl task that is boosted.  	 * Just go with full inherited parameters.  	 */  	if (dl_se->dl_deadline == 0) { -		dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; -		dl_se->runtime = pi_se->dl_runtime; +		dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline; +		dl_se->runtime = pi_of(dl_se)->dl_runtime;  	}  	if (dl_se->dl_yielded && dl_se->runtime > 0) @@ -763,8 +784,8 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,  	 * arbitrary large.  	 */  	while (dl_se->runtime <= 0) { -		dl_se->deadline += pi_se->dl_period; -		dl_se->runtime += pi_se->dl_runtime; +		dl_se->deadline += pi_of(dl_se)->dl_period; +		dl_se->runtime += pi_of(dl_se)->dl_runtime;  	}  	/* @@ -778,8 +799,8 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,  	 */  	if (dl_time_before(dl_se->deadline, rq_clock(rq))) {  		printk_deferred_once("sched: DL replenish lagged too much\n"); -		dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; -		dl_se->runtime = pi_se->dl_runtime; +		dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline; +		dl_se->runtime = pi_of(dl_se)->dl_runtime;  	}  	if (dl_se->dl_yielded) @@ -812,8 +833,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,   * task with deadline equal to period this is the same of using   * dl_period instead of dl_deadline in the equation above.   */ -static bool dl_entity_overflow(struct sched_dl_entity *dl_se, -			       struct sched_dl_entity *pi_se, u64 t) +static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t)  {  	u64 left, right; @@ -835,9 +855,9 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se,  	 * of anything below microseconds resolution is actually fiction  	 * (but still we want to give the user that illusion >;).  	 */ -	left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE); +	left = (pi_of(dl_se)->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);  	right = ((dl_se->deadline - t) >> DL_SCALE) * -		(pi_se->dl_runtime >> DL_SCALE); +		(pi_of(dl_se)->dl_runtime >> DL_SCALE);  	return dl_time_before(right, left);  } @@ -922,24 +942,23 @@ static inline bool dl_is_implicit(struct sched_dl_entity *dl_se)   * Please refer to the comments update_dl_revised_wakeup() function to find   * more about the Revised CBS rule.   */ -static void update_dl_entity(struct sched_dl_entity *dl_se, -			     struct sched_dl_entity *pi_se) +static void update_dl_entity(struct sched_dl_entity *dl_se)  {  	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);  	struct rq *rq = rq_of_dl_rq(dl_rq);  	if (dl_time_before(dl_se->deadline, rq_clock(rq)) || -	    dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) { +	    dl_entity_overflow(dl_se, rq_clock(rq))) {  		if (unlikely(!dl_is_implicit(dl_se) &&  			     !dl_time_before(dl_se->deadline, rq_clock(rq)) && -			     !dl_se->dl_boosted)){ +			     !is_dl_boosted(dl_se))) {  			update_dl_revised_wakeup(dl_se, rq);  			return;  		} -		dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; -		dl_se->runtime = pi_se->dl_runtime; +		dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline; +		dl_se->runtime = pi_of(dl_se)->dl_runtime;  	}  } @@ -1038,7 +1057,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)  	 * The task might have been boosted by someone else and might be in the  	 * boosting/deboosting path, its not throttled.  	 */ -	if (dl_se->dl_boosted) +	if (is_dl_boosted(dl_se))  		goto unlock;  	/* @@ -1066,7 +1085,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)  	 * but do not enqueue -- wait for our wakeup to do that.  	 */  	if (!task_on_rq_queued(p)) { -		replenish_dl_entity(dl_se, dl_se); +		replenish_dl_entity(dl_se);  		goto unlock;  	} @@ -1156,7 +1175,7 @@ static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)  	if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&  	    dl_time_before(rq_clock(rq), dl_next_period(dl_se))) { -		if (unlikely(dl_se->dl_boosted || !start_dl_timer(p))) +		if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(p)))  			return;  		dl_se->dl_throttled = 1;  		if (dl_se->runtime > 0) @@ -1287,7 +1306,7 @@ throttle:  			dl_se->dl_overrun = 1;  		__dequeue_task_dl(rq, curr, 0); -		if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr))) +		if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(curr)))  			enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);  		if (!is_leftmost(curr, &rq->dl)) @@ -1481,8 +1500,7 @@ static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)  }  static void -enqueue_dl_entity(struct sched_dl_entity *dl_se, -		  struct sched_dl_entity *pi_se, int flags) +enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)  {  	BUG_ON(on_dl_rq(dl_se)); @@ -1493,9 +1511,9 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se,  	 */  	if (flags & ENQUEUE_WAKEUP) {  		task_contending(dl_se, flags); -		update_dl_entity(dl_se, pi_se); +		update_dl_entity(dl_se);  	} else if (flags & ENQUEUE_REPLENISH) { -		replenish_dl_entity(dl_se, pi_se); +		replenish_dl_entity(dl_se);  	} else if ((flags & ENQUEUE_RESTORE) &&  		  dl_time_before(dl_se->deadline,  				 rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) { @@ -1512,19 +1530,7 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se)  static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)  { -	struct task_struct *pi_task = rt_mutex_get_top_task(p); -	struct sched_dl_entity *pi_se = &p->dl; - -	/* -	 * Use the scheduling parameters of the top pi-waiter task if: -	 * - we have a top pi-waiter which is a SCHED_DEADLINE task AND -	 * - our dl_boosted is set (i.e. the pi-waiter's (absolute) deadline is -	 *   smaller than our deadline OR we are a !SCHED_DEADLINE task getting -	 *   boosted due to a SCHED_DEADLINE pi-waiter). -	 * Otherwise we keep our runtime and deadline. -	 */ -	if (pi_task && dl_prio(pi_task->normal_prio) && p->dl.dl_boosted) { -		pi_se = &pi_task->dl; +	if (is_dl_boosted(&p->dl)) {  		/*  		 * Because of delays in the detection of the overrun of a  		 * thread's runtime, it might be the case that a thread @@ -1557,7 +1563,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)  		 * the throttle.  		 */  		p->dl.dl_throttled = 0; -		BUG_ON(!p->dl.dl_boosted || flags != ENQUEUE_REPLENISH); +		BUG_ON(!is_dl_boosted(&p->dl) || flags != ENQUEUE_REPLENISH);  		return;  	} @@ -1594,7 +1600,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)  		return;  	} -	enqueue_dl_entity(&p->dl, pi_se, flags); +	enqueue_dl_entity(&p->dl, flags);  	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)  		enqueue_pushable_dl_task(rq, p); @@ -2787,11 +2793,14 @@ void __dl_clear_params(struct task_struct *p)  	dl_se->dl_bw			= 0;  	dl_se->dl_density		= 0; -	dl_se->dl_boosted		= 0;  	dl_se->dl_throttled		= 0;  	dl_se->dl_yielded		= 0;  	dl_se->dl_non_contending	= 0;  	dl_se->dl_overrun		= 0; + +#ifdef CONFIG_RT_MUTEXES +	dl_se->pi_se			= dl_se; +#endif  }  bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 0655524700d2..2357921580f9 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -251,7 +251,7 @@ static int sd_ctl_doflags(struct ctl_table *table, int write,  	unsigned long flags = *(unsigned long *)table->data;  	size_t data_size = 0;  	size_t len = 0; -	char *tmp; +	char *tmp, *buf;  	int idx;  	if (write) @@ -269,17 +269,17 @@ static int sd_ctl_doflags(struct ctl_table *table, int write,  		return 0;  	} -	tmp = kcalloc(data_size + 1, sizeof(*tmp), GFP_KERNEL); -	if (!tmp) +	buf = kcalloc(data_size + 1, sizeof(*buf), GFP_KERNEL); +	if (!buf)  		return -ENOMEM;  	for_each_set_bit(idx, &flags, __SD_FLAG_CNT) {  		char *name = sd_flag_debug[idx].name; -		len += snprintf(tmp + len, strlen(name) + 2, "%s ", name); +		len += snprintf(buf + len, strlen(name) + 2, "%s ", name);  	} -	tmp += *ppos; +	tmp = buf + *ppos;  	len -= *ppos;  	if (len > *lenp) @@ -294,7 +294,7 @@ static int sd_ctl_doflags(struct ctl_table *table, int write,  	*lenp = len;  	*ppos += len; -	kfree(tmp); +	kfree(buf);  	return 0;  } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 290f9e38378c..ae7ceba8fd4f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5477,6 +5477,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)  	struct cfs_rq *cfs_rq;  	struct sched_entity *se = &p->se;  	int idle_h_nr_running = task_has_idle_policy(p); +	int task_new = !(flags & ENQUEUE_WAKEUP);  	/*  	 * The code below (indirectly) updates schedutil which looks at @@ -5549,7 +5550,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)  	 * into account, but that is not straightforward to implement,  	 * and the following generally works well enough in practice.  	 */ -	if (flags & ENQUEUE_WAKEUP) +	if (!task_new)  		update_overutilized_status(rq);  enqueue_throttle: @@ -6172,21 +6173,21 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t  static int  select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)  { -	unsigned long best_cap = 0; +	unsigned long task_util, best_cap = 0;  	int cpu, best_cpu = -1;  	struct cpumask *cpus; -	sync_entity_load_avg(&p->se); -  	cpus = this_cpu_cpumask_var_ptr(select_idle_mask);  	cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); +	task_util = uclamp_task_util(p); +  	for_each_cpu_wrap(cpu, cpus, target) {  		unsigned long cpu_cap = capacity_of(cpu);  		if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu))  			continue; -		if (task_fits_capacity(p, cpu_cap)) +		if (fits_capacity(task_util, cpu_cap))  			return cpu;  		if (cpu_cap > best_cap) { @@ -6198,44 +6199,42 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)  	return best_cpu;  } +static inline bool asym_fits_capacity(int task_util, int cpu) +{ +	if (static_branch_unlikely(&sched_asym_cpucapacity)) +		return fits_capacity(task_util, capacity_of(cpu)); + +	return true; +} +  /*   * Try and locate an idle core/thread in the LLC cache domain.   */  static int select_idle_sibling(struct task_struct *p, int prev, int target)  {  	struct sched_domain *sd; +	unsigned long task_util;  	int i, recent_used_cpu;  	/* -	 * For asymmetric CPU capacity systems, our domain of interest is -	 * sd_asym_cpucapacity rather than sd_llc. +	 * On asymmetric system, update task utilization because we will check +	 * that the task fits with cpu's capacity.  	 */  	if (static_branch_unlikely(&sched_asym_cpucapacity)) { -		sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target)); -		/* -		 * On an asymmetric CPU capacity system where an exclusive -		 * cpuset defines a symmetric island (i.e. one unique -		 * capacity_orig value through the cpuset), the key will be set -		 * but the CPUs within that cpuset will not have a domain with -		 * SD_ASYM_CPUCAPACITY. These should follow the usual symmetric -		 * capacity path. -		 */ -		if (!sd) -			goto symmetric; - -		i = select_idle_capacity(p, sd, target); -		return ((unsigned)i < nr_cpumask_bits) ? i : target; +		sync_entity_load_avg(&p->se); +		task_util = uclamp_task_util(p);  	} -symmetric: -	if (available_idle_cpu(target) || sched_idle_cpu(target)) +	if ((available_idle_cpu(target) || sched_idle_cpu(target)) && +	    asym_fits_capacity(task_util, target))  		return target;  	/*  	 * If the previous CPU is cache affine and idle, don't be stupid:  	 */  	if (prev != target && cpus_share_cache(prev, target) && -	    (available_idle_cpu(prev) || sched_idle_cpu(prev))) +	    (available_idle_cpu(prev) || sched_idle_cpu(prev)) && +	    asym_fits_capacity(task_util, prev))  		return prev;  	/* @@ -6258,7 +6257,8 @@ symmetric:  	    recent_used_cpu != target &&  	    cpus_share_cache(recent_used_cpu, target) &&  	    (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) && -	    cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr)) { +	    cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) && +	    asym_fits_capacity(task_util, recent_used_cpu)) {  		/*  		 * Replace recent_used_cpu with prev as it is a potential  		 * candidate for the next wake: @@ -6267,6 +6267,26 @@ symmetric:  		return recent_used_cpu;  	} +	/* +	 * For asymmetric CPU capacity systems, our domain of interest is +	 * sd_asym_cpucapacity rather than sd_llc. +	 */ +	if (static_branch_unlikely(&sched_asym_cpucapacity)) { +		sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target)); +		/* +		 * On an asymmetric CPU capacity system where an exclusive +		 * cpuset defines a symmetric island (i.e. one unique +		 * capacity_orig value through the cpuset), the key will be set +		 * but the CPUs within that cpuset will not have a domain with +		 * SD_ASYM_CPUCAPACITY. These should follow the usual symmetric +		 * capacity path. +		 */ +		if (sd) { +			i = select_idle_capacity(p, sd, target); +			return ((unsigned)i < nr_cpumask_bits) ? i : target; +		} +	} +  	sd = rcu_dereference(per_cpu(sd_llc, target));  	if (!sd)  		return target; @@ -9031,7 +9051,8 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s  	 * emptying busiest.  	 */  	if (local->group_type == group_has_spare) { -		if (busiest->group_type > group_fully_busy) { +		if ((busiest->group_type > group_fully_busy) && +		    !(env->sd->flags & SD_SHARE_PKG_RESOURCES)) {  			/*  			 * If busiest is overloaded, try to fill spare  			 * capacity. This might end up creating spare capacity diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 24d0ee26377d..c6932b8f4467 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -78,7 +78,7 @@ void __weak arch_cpu_idle_dead(void) { }  void __weak arch_cpu_idle(void)  {  	cpu_idle_force_poll = 1; -	local_irq_enable(); +	raw_local_irq_enable();  }  /** @@ -94,9 +94,35 @@ void __cpuidle default_idle_call(void)  		trace_cpu_idle(1, smp_processor_id());  		stop_critical_timings(); + +		/* +		 * arch_cpu_idle() is supposed to enable IRQs, however +		 * we can't do that because of RCU and tracing. +		 * +		 * Trace IRQs enable here, then switch off RCU, and have +		 * arch_cpu_idle() use raw_local_irq_enable(). Note that +		 * rcu_idle_enter() relies on lockdep IRQ state, so switch that +		 * last -- this is very similar to the entry code. +		 */ +		trace_hardirqs_on_prepare(); +		lockdep_hardirqs_on_prepare(_THIS_IP_);  		rcu_idle_enter(); +		lockdep_hardirqs_on(_THIS_IP_); +  		arch_cpu_idle(); + +		/* +		 * OK, so IRQs are enabled here, but RCU needs them disabled to +		 * turn itself back on.. funny thing is that disabling IRQs +		 * will cause tracing, which needs RCU. Jump through hoops to +		 * make it 'work'. +		 */ +		raw_local_irq_disable(); +		lockdep_hardirqs_off(_THIS_IP_);  		rcu_idle_exit(); +		lockdep_hardirqs_on(_THIS_IP_); +		raw_local_irq_enable(); +  		start_critical_timings();  		trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());  	} | 
