diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/Kconfig.kexec | 1 | ||||
| -rw-r--r-- | kernel/cgroup/cpuset.c | 11 | ||||
| -rw-r--r-- | kernel/cgroup/rstat.c | 3 | ||||
| -rw-r--r-- | kernel/events/core.c | 6 | ||||
| -rw-r--r-- | kernel/futex/futex.h | 6 | ||||
| -rw-r--r-- | kernel/kexec_handover.c | 29 | ||||
| -rw-r--r-- | kernel/kthread.c | 1 | ||||
| -rw-r--r-- | kernel/locking/ww_mutex.h | 6 | ||||
| -rw-r--r-- | kernel/params.c | 7 | ||||
| -rw-r--r-- | kernel/rcu/tree.c | 2 | ||||
| -rw-r--r-- | kernel/rcu/tree.h | 1 | ||||
| -rw-r--r-- | kernel/rcu/tree_plugin.h | 8 | ||||
| -rw-r--r-- | kernel/sched/ext.c | 4 | ||||
| -rw-r--r-- | kernel/signal.c | 6 | ||||
| -rw-r--r-- | kernel/trace/fgraph.c | 1 | ||||
| -rw-r--r-- | kernel/trace/ftrace.c | 19 | ||||
| -rw-r--r-- | kernel/trace/ring_buffer.c | 2 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 22 | ||||
| -rw-r--r-- | kernel/trace/trace.h | 10 | ||||
| -rw-r--r-- | kernel/trace/trace_functions_graph.c | 22 | 
20 files changed, 121 insertions, 46 deletions
| diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec index 2ee603a98813..1224dd937df0 100644 --- a/kernel/Kconfig.kexec +++ b/kernel/Kconfig.kexec @@ -97,6 +97,7 @@ config KEXEC_JUMP  config KEXEC_HANDOVER  	bool "kexec handover"  	depends on ARCH_SUPPORTS_KEXEC_HANDOVER && ARCH_SUPPORTS_KEXEC_FILE +	depends on !DEFERRED_STRUCT_PAGE_INIT  	select MEMBLOCK_KHO_SCRATCH  	select KEXEC_FILE  	select DEBUG_FS diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index f74d04429a29..27adb04df675 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -280,7 +280,7 @@ static inline void check_insane_mems_config(nodemask_t *nodes)  {  	if (!cpusets_insane_config() &&  		movable_only_nodes(nodes)) { -		static_branch_enable(&cpusets_insane_config_key); +		static_branch_enable_cpuslocked(&cpusets_insane_config_key);  		pr_info("Unsupported (movable nodes only) cpuset configuration detected (nmask=%*pbl)!\n"  			"Cpuset allocations might fail even with a lot of memory available.\n",  			nodemask_pr_args(nodes)); @@ -1843,7 +1843,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,  			if (is_partition_valid(cs))  				adding = cpumask_and(tmp->addmask,  						xcpus, parent->effective_xcpus); -		} else if (is_partition_invalid(cs) && +		} else if (is_partition_invalid(cs) && !cpumask_empty(xcpus) &&  			   cpumask_subset(xcpus, parent->effective_xcpus)) {  			struct cgroup_subsys_state *css;  			struct cpuset *child; @@ -3358,14 +3358,12 @@ static ssize_t cpuset_partition_write(struct kernfs_open_file *of, char *buf,  	else  		return -EINVAL; -	css_get(&cs->css);  	cpus_read_lock();  	mutex_lock(&cpuset_mutex);  	if (is_cpuset_online(cs))  		retval = update_prstate(cs, val);  	mutex_unlock(&cpuset_mutex);  	cpus_read_unlock(); -	css_put(&cs->css);  	return retval ?: nbytes;  } @@ -3870,9 +3868,10 @@ retry:  		partcmd = partcmd_invalidate;  	/*  	 * On the other hand, an invalid partition root may be transitioned -	 * back to a regular one. +	 * back to a regular one with a non-empty effective xcpus.  	 */ -	else if (is_partition_valid(parent) && is_partition_invalid(cs)) +	else if (is_partition_valid(parent) && is_partition_invalid(cs) && +		 !cpumask_empty(cs->effective_xcpus))  		partcmd = partcmd_update;  	if (partcmd >= 0) { diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 981e2f77ad4e..a198e40c799b 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -479,6 +479,9 @@ void css_rstat_exit(struct cgroup_subsys_state *css)  	if (!css_uses_rstat(css))  		return; +	if (!css->rstat_cpu) +		return; +  	css_rstat_flush(css);  	/* sanity check */ diff --git a/kernel/events/core.c b/kernel/events/core.c index 8060c2857bb2..872122e074e5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2665,6 +2665,9 @@ static void perf_log_itrace_start(struct perf_event *event);  static void perf_event_unthrottle(struct perf_event *event, bool start)  { +	if (event->state != PERF_EVENT_STATE_ACTIVE) +		return; +  	event->hw.interrupts = 0;  	if (start)  		event->pmu->start(event, 0); @@ -2674,6 +2677,9 @@ static void perf_event_unthrottle(struct perf_event *event, bool start)  static void perf_event_throttle(struct perf_event *event)  { +	if (event->state != PERF_EVENT_STATE_ACTIVE) +		return; +  	event->hw.interrupts = MAX_INTERRUPTS;  	event->pmu->stop(event, 0);  	if (event == event->group_leader) diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index c74eac572acd..2cd57096c38e 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -319,13 +319,13 @@ static __always_inline int futex_put_value(u32 val, u32 __user *to)  {  	if (can_do_masked_user_access())  		to = masked_user_access_begin(to); -	else if (!user_read_access_begin(to, sizeof(*to))) +	else if (!user_write_access_begin(to, sizeof(*to)))  		return -EFAULT;  	unsafe_put_user(val, to, Efault); -	user_read_access_end(); +	user_write_access_end();  	return 0;  Efault: -	user_read_access_end(); +	user_write_access_end();  	return -EFAULT;  } diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c index e49743ae52c5..ecd1ac210dbd 100644 --- a/kernel/kexec_handover.c +++ b/kernel/kexec_handover.c @@ -144,14 +144,34 @@ static int __kho_preserve_order(struct kho_mem_track *track, unsigned long pfn,  				unsigned int order)  {  	struct kho_mem_phys_bits *bits; -	struct kho_mem_phys *physxa; +	struct kho_mem_phys *physxa, *new_physxa;  	const unsigned long pfn_high = pfn >> order;  	might_sleep(); -	physxa = xa_load_or_alloc(&track->orders, order, sizeof(*physxa)); -	if (IS_ERR(physxa)) -		return PTR_ERR(physxa); +	physxa = xa_load(&track->orders, order); +	if (!physxa) { +		int err; + +		new_physxa = kzalloc(sizeof(*physxa), GFP_KERNEL); +		if (!new_physxa) +			return -ENOMEM; + +		xa_init(&new_physxa->phys_bits); +		physxa = xa_cmpxchg(&track->orders, order, NULL, new_physxa, +				    GFP_KERNEL); + +		err = xa_err(physxa); +		if (err || physxa) { +			xa_destroy(&new_physxa->phys_bits); +			kfree(new_physxa); + +			if (err) +				return err; +		} else { +			physxa = new_physxa; +		} +	}  	bits = xa_load_or_alloc(&physxa->phys_bits, pfn_high / PRESERVE_BITS,  				sizeof(*bits)); @@ -544,6 +564,7 @@ err_free_scratch_areas:  err_free_scratch_desc:  	memblock_free(kho_scratch, kho_scratch_cnt * sizeof(*kho_scratch));  err_disable_kho: +	pr_warn("Failed to reserve scratch area, disabling kexec handover\n");  	kho_enable = false;  } diff --git a/kernel/kthread.c b/kernel/kthread.c index 0e98b228a8ef..31b072e8d427 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -893,6 +893,7 @@ out:  	return ret;  } +EXPORT_SYMBOL_GPL(kthread_affine_preferred);  /*   * Re-affine kthreads according to their preferences diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h index 086fd5487ca7..31a785afee6c 100644 --- a/kernel/locking/ww_mutex.h +++ b/kernel/locking/ww_mutex.h @@ -342,8 +342,12 @@ static bool __ww_mutex_wound(struct MUTEX *lock,  			 * When waking up the task to wound, be sure to clear the  			 * blocked_on pointer. Otherwise we can see circular  			 * blocked_on relationships that can't resolve. +			 * +			 * NOTE: We pass NULL here instead of lock, because we +			 * are waking the mutex owner, who may be currently +			 * blocked on a different mutex.  			 */ -			__clear_task_blocked_on(owner, lock); +			__clear_task_blocked_on(owner, NULL);  			wake_q_add(wake_q, owner);  		}  		return true; diff --git a/kernel/params.c b/kernel/params.c index b92d64161b75..b96cfd693c99 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -513,13 +513,14 @@ EXPORT_SYMBOL(param_array_ops);  int param_set_copystring(const char *val, const struct kernel_param *kp)  {  	const struct kparam_string *kps = kp->str; +	const size_t len = strnlen(val, kps->maxlen); -	if (strnlen(val, kps->maxlen) == kps->maxlen) { +	if (len == kps->maxlen) {  		pr_err("%s: string doesn't fit in %u chars.\n",  		       kp->name, kps->maxlen-1);  		return -ENOSPC;  	} -	strcpy(kps->string, val); +	memcpy(kps->string, val, len + 1);  	return 0;  }  EXPORT_SYMBOL(param_set_copystring); @@ -841,7 +842,7 @@ static void __init param_sysfs_builtin(void)  		dot = strchr(kp->name, '.');  		if (!dot) {  			/* This happens for core_param() */ -			strcpy(modname, "kernel"); +			strscpy(modname, "kernel");  			name_len = 0;  		} else {  			name_len = dot - kp->name + 1; diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 174ee243b349..8eff357b0436 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4262,6 +4262,8 @@ int rcutree_prepare_cpu(unsigned int cpu)  	rdp->rcu_iw_gp_seq = rdp->gp_seq - 1;  	trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl"));  	raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + +	rcu_preempt_deferred_qs_init(rdp);  	rcu_spawn_rnp_kthreads(rnp);  	rcu_spawn_cpu_nocb_kthread(cpu);  	ASSERT_EXCLUSIVE_WRITER(rcu_state.n_online_cpus); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index de6ca13a7b5f..b8bbe7960cda 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -488,6 +488,7 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp);  static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);  static void rcu_flavor_sched_clock_irq(int user);  static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck); +static void rcu_preempt_deferred_qs_init(struct rcu_data *rdp);  static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);  static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);  static bool rcu_is_callbacks_kthread(struct rcu_data *rdp); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index fc14adf15cbb..4cd170b2d655 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -763,8 +763,6 @@ static void rcu_read_unlock_special(struct task_struct *t)  			    cpu_online(rdp->cpu)) {  				// Get scheduler to re-evaluate and call hooks.  				// If !IRQ_WORK, FQS scan will eventually IPI. -				rdp->defer_qs_iw = -					IRQ_WORK_INIT_HARD(rcu_preempt_deferred_qs_handler);  				rdp->defer_qs_iw_pending = DEFER_QS_PENDING;  				irq_work_queue_on(&rdp->defer_qs_iw, rdp->cpu);  			} @@ -904,6 +902,10 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)  	}  } +static void rcu_preempt_deferred_qs_init(struct rcu_data *rdp) +{ +	rdp->defer_qs_iw = IRQ_WORK_INIT_HARD(rcu_preempt_deferred_qs_handler); +}  #else /* #ifdef CONFIG_PREEMPT_RCU */  /* @@ -1103,6 +1105,8 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)  	WARN_ON_ONCE(!list_empty(&rnp->blkd_tasks));  } +static void rcu_preempt_deferred_qs_init(struct rcu_data *rdp) { } +  #endif /* #else #ifdef CONFIG_PREEMPT_RCU */  /* diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 7dedc9a16281..4ae32ef179dd 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -5749,6 +5749,9 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)  			__setscheduler_class(p->policy, p->prio);  		struct sched_enq_and_set_ctx ctx; +		if (!tryget_task_struct(p)) +			continue; +  		if (old_class != new_class && p->se.sched_delayed)  			dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED); @@ -5761,6 +5764,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)  		sched_enq_and_set_task(&ctx);  		check_class_changed(task_rq(p), p, old_class, p->prio); +		put_task_struct(p);  	}  	scx_task_iter_stop(&sti);  	percpu_up_write(&scx_fork_rwsem); diff --git a/kernel/signal.c b/kernel/signal.c index e2c928de7d2c..fe9190d84f28 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -4067,6 +4067,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,  {  	struct pid *pid;  	enum pid_type type; +	int ret;  	/* Enforce flags be set to 0 until we add an extension. */  	if (flags & ~PIDFD_SEND_SIGNAL_FLAGS) @@ -4108,7 +4109,10 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,  	}  	} -	return do_pidfd_send_signal(pid, sig, type, info, flags); +	ret = do_pidfd_send_signal(pid, sig, type, info, flags); +	put_pid(pid); + +	return ret;  }  static int diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index f4d200f0c610..2a42c1036ea8 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -1397,6 +1397,7 @@ error:  		ftrace_graph_active--;  		gops->saved_func = NULL;  		fgraph_lru_release_index(i); +		unregister_pm_notifier(&ftrace_suspend_notifier);  	}  	return ret;  } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 00b76d450a89..a69067367c29 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4661,13 +4661,17 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,  	        } else {  			iter->hash = alloc_and_copy_ftrace_hash(size_bits, hash);  		} +	} else { +		if (hash) +			iter->hash = alloc_and_copy_ftrace_hash(hash->size_bits, hash); +		else +			iter->hash = EMPTY_HASH; +	} -		if (!iter->hash) { -			trace_parser_put(&iter->parser); -			goto out_unlock; -		} -	} else -		iter->hash = hash; +	if (!iter->hash) { +		trace_parser_put(&iter->parser); +		goto out_unlock; +	}  	ret = 0; @@ -6543,9 +6547,6 @@ int ftrace_regex_release(struct inode *inode, struct file *file)  		ftrace_hash_move_and_update_ops(iter->ops, orig_hash,  						      iter->hash, filter_hash);  		mutex_unlock(&ftrace_lock); -	} else { -		/* For read only, the hash is the ops hash */ -		iter->hash = NULL;  	}  	mutex_unlock(&iter->ops->func_hash->regex_lock); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index bb71a0dc9d69..43460949ad3f 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -7666,7 +7666,7 @@ static __init int test_ringbuffer(void)  	rb_test_started = true;  	set_current_state(TASK_INTERRUPTIBLE); -	/* Just run for 10 seconds */; +	/* Just run for 10 seconds */  	schedule_timeout(10 * HZ);  	kthread_stop(rb_hammer); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4283ed4e8f59..1b7db732c0b1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1816,7 +1816,7 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf,  	ret = get_user(ch, ubuf++);  	if (ret) -		return ret; +		goto fail;  	read++;  	cnt--; @@ -1830,7 +1830,7 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf,  		while (cnt && isspace(ch)) {  			ret = get_user(ch, ubuf++);  			if (ret) -				return ret; +				goto fail;  			read++;  			cnt--;  		} @@ -1848,12 +1848,14 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf,  	while (cnt && !isspace(ch) && ch) {  		if (parser->idx < parser->size - 1)  			parser->buffer[parser->idx++] = ch; -		else -			return -EINVAL; +		else { +			ret = -EINVAL; +			goto fail; +		}  		ret = get_user(ch, ubuf++);  		if (ret) -			return ret; +			goto fail;  		read++;  		cnt--;  	} @@ -1868,11 +1870,15 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf,  		/* Make sure the parsed string always terminates with '\0'. */  		parser->buffer[parser->idx] = 0;  	} else { -		return -EINVAL; +		ret = -EINVAL; +		goto fail;  	}  	*ppos += read;  	return read; +fail: +	trace_parser_fail(parser); +	return ret;  }  /* TODO add a seq_buf_to_buffer() */ @@ -10632,10 +10638,10 @@ static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_m  			ret = print_trace_line(&iter);  			if (ret != TRACE_TYPE_NO_CONSUME)  				trace_consume(&iter); + +			trace_printk_seq(&iter.seq);  		}  		touch_nmi_watchdog(); - -		trace_printk_seq(&iter.seq);  	}  	if (!cnt) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 1dbf1d3cf2f1..5f4bed5842f9 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1292,6 +1292,7 @@ bool ftrace_event_is_function(struct trace_event_call *call);   */  struct trace_parser {  	bool		cont; +	bool		fail;  	char		*buffer;  	unsigned	idx;  	unsigned	size; @@ -1299,7 +1300,7 @@ struct trace_parser {  static inline bool trace_parser_loaded(struct trace_parser *parser)  { -	return (parser->idx != 0); +	return !parser->fail && parser->idx != 0;  }  static inline bool trace_parser_cont(struct trace_parser *parser) @@ -1313,6 +1314,11 @@ static inline void trace_parser_clear(struct trace_parser *parser)  	parser->idx = 0;  } +static inline void trace_parser_fail(struct trace_parser *parser) +{ +	parser->fail = true; +} +  extern int trace_parser_get_init(struct trace_parser *parser, int size);  extern void trace_parser_put(struct trace_parser *parser);  extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, @@ -2204,7 +2210,7 @@ static inline bool is_good_system_name(const char *name)  static inline void sanitize_event_name(char *name)  {  	while (*name++ != '\0') -		if (*name == ':' || *name == '.') +		if (*name == ':' || *name == '.' || *name == '*')  			*name = '_';  } diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 66e1a527cf1a..a7f4b9a47a71 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -27,14 +27,21 @@ struct fgraph_cpu_data {  	unsigned long	enter_funcs[FTRACE_RETFUNC_DEPTH];  }; +struct fgraph_ent_args { +	struct ftrace_graph_ent_entry	ent; +	/* Force the sizeof of args[] to have FTRACE_REGS_MAX_ARGS entries */ +	unsigned long			args[FTRACE_REGS_MAX_ARGS]; +}; +  struct fgraph_data {  	struct fgraph_cpu_data __percpu *cpu_data;  	/* Place to preserve last processed entry. */  	union { -		struct ftrace_graph_ent_entry	ent; +		struct fgraph_ent_args		ent; +		/* TODO allow retaddr to have args */  		struct fgraph_retaddr_ent_entry	rent; -	} ent; +	};  	struct ftrace_graph_ret_entry	ret;  	int				failed;  	int				cpu; @@ -627,10 +634,13 @@ get_return_for_leaf(struct trace_iterator *iter,  			 * Save current and next entries for later reference  			 * if the output fails.  			 */ -			if (unlikely(curr->ent.type == TRACE_GRAPH_RETADDR_ENT)) -				data->ent.rent = *(struct fgraph_retaddr_ent_entry *)curr; -			else -				data->ent.ent = *curr; +			if (unlikely(curr->ent.type == TRACE_GRAPH_RETADDR_ENT)) { +				data->rent = *(struct fgraph_retaddr_ent_entry *)curr; +			} else { +				int size = min((int)sizeof(data->ent), (int)iter->ent_size); + +				memcpy(&data->ent, curr, size); +			}  			/*  			 * If the next event is not a return type, then  			 * we only care about what type it is. Otherwise we can | 
