summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/backtracetest.c1
-rw-r--r--kernel/bpf/memalloc.c9
-rw-r--r--kernel/bpf/syscall.c10
-rw-r--r--kernel/bpf/verifier.c4
-rw-r--r--kernel/crash_reserve.c1
-rw-r--r--kernel/debug/kdb/kdb_bt.c2
-rw-r--r--kernel/debug/kdb/kdb_io.c6
-rw-r--r--kernel/debug/kdb/kdb_main.c18
-rw-r--r--kernel/debug/kdb/kdb_private.h2
-rw-r--r--kernel/delayacct.c2
-rw-r--r--kernel/dma/mapping.c2
-rw-r--r--kernel/events/callchain.c2
-rw-r--r--kernel/events/core.c63
-rw-r--r--kernel/events/uprobes.c2
-rw-r--r--kernel/exit.c81
-rw-r--r--kernel/fork.c27
-rw-r--r--kernel/hung_task.c4
-rw-r--r--kernel/irq/debugfs.c10
-rw-r--r--kernel/irq/devres.c41
-rw-r--r--kernel/irq/generic-chip.c111
-rw-r--r--kernel/irq/internals.h10
-rw-r--r--kernel/irq/irqdomain.c290
-rw-r--r--kernel/irq/msi.c95
-rw-r--r--kernel/irq/proc.c7
-rw-r--r--kernel/kallsyms.c5
-rw-r--r--kernel/kallsyms_internal.h6
-rw-r--r--kernel/kexec_core.c2
-rw-r--r--kernel/kprobes.c2
-rw-r--r--kernel/ksysfs.c7
-rw-r--r--kernel/latencytop.c2
-rw-r--r--kernel/livepatch/core.c17
-rw-r--r--kernel/panic.c116
-rw-r--r--kernel/pid_namespace.c2
-rw-r--r--kernel/pid_sysctl.h2
-rw-r--r--kernel/power/swap.c5
-rw-r--r--kernel/printk/internal.h2
-rw-r--r--kernel/printk/printk.c8
-rw-r--r--kernel/printk/sysctl.c2
-rw-r--r--kernel/profile.c231
-rw-r--r--kernel/resource_kunit.c1
-rw-r--r--kernel/sched/core.c6
-rw-r--r--kernel/sched/rt.c8
-rw-r--r--kernel/sched/topology.c2
-rw-r--r--kernel/seccomp.c2
-rw-r--r--kernel/stackleak.c2
-rw-r--r--kernel/sysctl.c64
-rw-r--r--kernel/task_work.c6
-rw-r--r--kernel/time/timer.c2
-rw-r--r--kernel/time/timer_migration.c393
-rw-r--r--kernel/time/timer_migration.h27
-rw-r--r--kernel/trace/ftrace.c3
-rw-r--r--kernel/trace/preemptirq_delay_test.c2
-rw-r--r--kernel/trace/trace.c2
-rw-r--r--kernel/trace/trace_events_user.c2
-rw-r--r--kernel/trace/trace_stack.c2
-rw-r--r--kernel/tsacct.c2
-rw-r--r--kernel/umh.c2
-rw-r--r--kernel/utsname_sysctl.c2
-rw-r--r--kernel/vmcore_info.c4
-rw-r--r--kernel/watchdog.c12
-rw-r--r--kernel/watchdog_perf.c11
61 files changed, 882 insertions, 884 deletions
diff --git a/kernel/backtracetest.c b/kernel/backtracetest.c
index a4181234232b..2dfe66b9ed76 100644
--- a/kernel/backtracetest.c
+++ b/kernel/backtracetest.c
@@ -74,5 +74,6 @@ static void exitf(void)
module_init(backtrace_regression_test);
module_exit(exitf);
+MODULE_DESCRIPTION("Simple stack backtrace regression test module");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index a546aba46d5d..dec892ded031 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -155,12 +155,9 @@ static void *__alloc(struct bpf_mem_cache *c, int node, gfp_t flags)
static struct mem_cgroup *get_memcg(const struct bpf_mem_cache *c)
{
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
if (c->objcg)
return get_mem_cgroup_from_objcg(c->objcg);
-#endif
-
-#ifdef CONFIG_MEMCG
return root_mem_cgroup;
#else
return NULL;
@@ -534,7 +531,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
size += LLIST_NODE_SZ; /* room for llist_node */
unit_size = size;
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
if (memcg_bpf_enabled())
objcg = get_obj_cgroup_from_current();
#endif
@@ -556,7 +553,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
pcc = __alloc_percpu_gfp(sizeof(*cc), 8, GFP_KERNEL);
if (!pcc)
return -ENOMEM;
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
objcg = get_obj_cgroup_from_current();
#endif
ma->objcg = objcg;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 869265852d51..bf6c5f685ea2 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -385,7 +385,7 @@ void bpf_map_free_id(struct bpf_map *map)
spin_unlock_irqrestore(&map_idr_lock, flags);
}
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
static void bpf_map_save_memcg(struct bpf_map *map)
{
/* Currently if a map is created by a process belonging to the root
@@ -486,7 +486,7 @@ int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid,
unsigned long i, j;
struct page *pg;
int ret = 0;
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
struct mem_cgroup *memcg, *old_memcg;
memcg = bpf_map_get_memcg(map);
@@ -505,7 +505,7 @@ int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid,
break;
}
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
set_active_memcg(old_memcg);
mem_cgroup_put(memcg);
#endif
@@ -5983,7 +5983,7 @@ const struct bpf_prog_ops bpf_syscall_prog_ops = {
};
#ifdef CONFIG_SYSCTL
-static int bpf_stats_handler(struct ctl_table *table, int write,
+static int bpf_stats_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct static_key *key = (struct static_key *)table->data;
@@ -6018,7 +6018,7 @@ void __weak unpriv_ebpf_notify(int new_state)
{
}
-static int bpf_unpriv_handler(struct ctl_table *table, int write,
+static int bpf_unpriv_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret, unpriv_enable = *(int *)table->data;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8da132a1ef28..4cb5441ad75f 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -21132,8 +21132,12 @@ BTF_SET_START(btf_non_sleepable_error_inject)
* Assume non-sleepable from bpf safety point of view.
*/
BTF_ID(func, __filemap_add_folio)
+#ifdef CONFIG_FAIL_PAGE_ALLOC
BTF_ID(func, should_fail_alloc_page)
+#endif
+#ifdef CONFIG_FAILSLAB
BTF_ID(func, should_failslab)
+#endif
BTF_SET_END(btf_non_sleepable_error_inject)
static int check_non_sleepable_error_inject(u32 btf_id)
diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c
index 5b2722a93a48..d3b4cd12bdd1 100644
--- a/kernel/crash_reserve.c
+++ b/kernel/crash_reserve.c
@@ -13,7 +13,6 @@
#include <linux/memory.h>
#include <linux/cpuhotplug.h>
#include <linux/memblock.h>
-#include <linux/kexec.h>
#include <linux/kmemleak.h>
#include <asm/page.h>
diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c
index 10b454554ab0..137ba73f56fc 100644
--- a/kernel/debug/kdb/kdb_bt.c
+++ b/kernel/debug/kdb/kdb_bt.c
@@ -144,7 +144,7 @@ kdb_bt(int argc, const char **argv)
kdb_ps_suppressed();
/* Run the active tasks first */
for_each_online_cpu(cpu) {
- p = kdb_curr_task(cpu);
+ p = curr_task(cpu);
if (kdb_bt1(p, mask, btaprompt))
return 0;
}
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 3131334d7a81..6a77f1c779c4 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -206,7 +206,7 @@ char kdb_getchar(void)
*/
static void kdb_position_cursor(char *prompt, char *buffer, char *cp)
{
- kdb_printf("\r%s", kdb_prompt_str);
+ kdb_printf("\r%s", prompt);
if (cp > buffer)
kdb_printf("%.*s", (int)(cp - buffer), buffer);
}
@@ -362,7 +362,7 @@ poll_again:
if (i >= dtab_count)
kdb_printf("...");
kdb_printf("\n");
- kdb_printf(kdb_prompt_str);
+ kdb_printf("%s", kdb_prompt_str);
kdb_printf("%s", buffer);
if (cp != lastchar)
kdb_position_cursor(kdb_prompt_str, buffer, cp);
@@ -453,7 +453,7 @@ char *kdb_getstr(char *buffer, size_t bufsize, const char *prompt)
{
if (prompt && kdb_prompt_str != prompt)
strscpy(kdb_prompt_str, prompt, CMD_BUFLEN);
- kdb_printf(kdb_prompt_str);
+ kdb_printf("%s", kdb_prompt_str);
kdb_nextline = 1; /* Prompt and input resets line number */
return kdb_read(buffer, bufsize);
}
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 664bae55f2c9..f5f7d7fb5936 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -155,16 +155,6 @@ static char *__env[31] = {
static const int __nenv = ARRAY_SIZE(__env);
-struct task_struct *kdb_curr_task(int cpu)
-{
- struct task_struct *p = curr_task(cpu);
-#ifdef _TIF_MCA_INIT
- if ((task_thread_info(p)->flags & _TIF_MCA_INIT) && KDB_TSK(cpu))
- p = krp->p;
-#endif
- return p;
-}
-
/*
* Update the permissions flags (kdb_cmd_enabled) to match the
* current lockdown state.
@@ -1228,7 +1218,7 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
char *cmdbuf;
int diag;
struct task_struct *kdb_current =
- kdb_curr_task(raw_smp_processor_id());
+ curr_task(raw_smp_processor_id());
KDB_DEBUG_STATE("kdb_local 1", reason);
@@ -2278,7 +2268,7 @@ void kdb_ps_suppressed(void)
unsigned long cpu;
const struct task_struct *p, *g;
for_each_online_cpu(cpu) {
- p = kdb_curr_task(cpu);
+ p = curr_task(cpu);
if (kdb_task_state(p, "-"))
++idle;
}
@@ -2314,7 +2304,7 @@ void kdb_ps1(const struct task_struct *p)
kdb_task_has_cpu(p), kdb_process_cpu(p),
kdb_task_state_char(p),
(void *)(&p->thread),
- p == kdb_curr_task(raw_smp_processor_id()) ? '*' : ' ',
+ p == curr_task(raw_smp_processor_id()) ? '*' : ' ',
p->comm);
if (kdb_task_has_cpu(p)) {
if (!KDB_TSK(cpu)) {
@@ -2350,7 +2340,7 @@ static int kdb_ps(int argc, const char **argv)
for_each_online_cpu(cpu) {
if (KDB_FLAG(CMD_INTERRUPT))
return 0;
- p = kdb_curr_task(cpu);
+ p = curr_task(cpu);
if (kdb_task_state(p, mask))
kdb_ps1(p);
}
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
index 548fd4059bf9..d2520d72b1f5 100644
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -210,8 +210,6 @@ extern void kdb_gdb_state_pass(char *buf);
#define KDB_TSK(cpu) kgdb_info[cpu].task
#define KDB_TSKREGS(cpu) kgdb_info[cpu].debuggerinfo
-extern struct task_struct *kdb_curr_task(int);
-
#define kdb_task_has_cpu(p) (task_curr(p))
#define GFP_KDB (in_dbg_master() ? GFP_ATOMIC : GFP_KERNEL)
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index e039b0f99a0b..dead51de8eb5 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -44,7 +44,7 @@ void delayacct_init(void)
}
#ifdef CONFIG_PROC_SYSCTL
-static int sysctl_delayacct(struct ctl_table *table, int write, void *buffer,
+static int sysctl_delayacct(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
int state = delayacct_on;
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 81de84318ccc..b1c18058d55f 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -67,8 +67,8 @@ void dmam_free_coherent(struct device *dev, size_t size, void *vaddr,
{
struct dma_devres match_data = { size, vaddr, dma_handle };
- dma_free_coherent(dev, size, vaddr, dma_handle);
WARN_ON(devres_destroy(dev, dmam_release, dmam_match, &match_data));
+ dma_free_coherent(dev, size, vaddr, dma_handle);
}
EXPORT_SYMBOL(dmam_free_coherent);
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 8d57255e5b29..8a47e52a454f 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -270,7 +270,7 @@ exit_put:
* Used for sysctl_perf_event_max_stack and
* sysctl_perf_event_max_contexts_per_stack.
*/
-int perf_event_max_stack_handler(struct ctl_table *table, int write,
+int perf_event_max_stack_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int *value = table->data;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index ab6c4c942f79..aa3450bdc227 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -450,7 +450,7 @@ static void update_perf_cpu_limits(void)
static bool perf_rotate_context(struct perf_cpu_pmu_context *cpc);
-int perf_event_max_sample_rate_handler(struct ctl_table *table, int write,
+int perf_event_max_sample_rate_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
@@ -474,7 +474,7 @@ int perf_event_max_sample_rate_handler(struct ctl_table *table, int write,
int sysctl_perf_cpu_time_max_percent __read_mostly = DEFAULT_CPU_TIME_MAX_PERCENT;
-int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
+int perf_cpu_time_max_percent_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
@@ -534,7 +534,7 @@ void perf_sample_event_took(u64 sample_len_ns)
__this_cpu_write(running_sample_length, running_len);
/*
- * Note: this will be biased artifically low until we have
+ * Note: this will be biased artificially low until we have
* seen NR_ACCUMULATED_SAMPLES. Doing it this way keeps us
* from having to maintain a count.
*/
@@ -596,10 +596,10 @@ static inline u64 perf_event_clock(struct perf_event *event)
*
* Event groups make things a little more complicated, but not terribly so. The
* rules for a group are that if the group leader is OFF the entire group is
- * OFF, irrespecive of what the group member states are. This results in
+ * OFF, irrespective of what the group member states are. This results in
* __perf_effective_state().
*
- * A futher ramification is that when a group leader flips between OFF and
+ * A further ramification is that when a group leader flips between OFF and
* !OFF, we need to update all group member times.
*
*
@@ -891,7 +891,7 @@ static int perf_cgroup_ensure_storage(struct perf_event *event,
int cpu, heap_size, ret = 0;
/*
- * Allow storage to have sufficent space for an iterator for each
+ * Allow storage to have sufficient space for an iterator for each
* possibly nested cgroup plus an iterator for events with no cgroup.
*/
for (heap_size = 1; css; css = css->parent)
@@ -3671,7 +3671,7 @@ void __perf_event_task_sched_out(struct task_struct *task,
perf_cgroup_switch(next);
}
-static bool perf_less_group_idx(const void *l, const void *r)
+static bool perf_less_group_idx(const void *l, const void *r, void __always_unused *args)
{
const struct perf_event *le = *(const struct perf_event **)l;
const struct perf_event *re = *(const struct perf_event **)r;
@@ -3679,20 +3679,21 @@ static bool perf_less_group_idx(const void *l, const void *r)
return le->group_index < re->group_index;
}
-static void swap_ptr(void *l, void *r)
+static void swap_ptr(void *l, void *r, void __always_unused *args)
{
void **lp = l, **rp = r;
swap(*lp, *rp);
}
+DEFINE_MIN_HEAP(struct perf_event *, perf_event_min_heap);
+
static const struct min_heap_callbacks perf_min_heap = {
- .elem_size = sizeof(struct perf_event *),
.less = perf_less_group_idx,
.swp = swap_ptr,
};
-static void __heap_add(struct min_heap *heap, struct perf_event *event)
+static void __heap_add(struct perf_event_min_heap *heap, struct perf_event *event)
{
struct perf_event **itrs = heap->data;
@@ -3726,7 +3727,7 @@ static noinline int visit_groups_merge(struct perf_event_context *ctx,
struct perf_cpu_context *cpuctx = NULL;
/* Space for per CPU and/or any CPU event iterators. */
struct perf_event *itrs[2];
- struct min_heap event_heap;
+ struct perf_event_min_heap event_heap;
struct perf_event **evt;
int ret;
@@ -3735,7 +3736,7 @@ static noinline int visit_groups_merge(struct perf_event_context *ctx,
if (!ctx->task) {
cpuctx = this_cpu_ptr(&perf_cpu_context);
- event_heap = (struct min_heap){
+ event_heap = (struct perf_event_min_heap){
.data = cpuctx->heap,
.nr = 0,
.size = cpuctx->heap_size,
@@ -3748,7 +3749,7 @@ static noinline int visit_groups_merge(struct perf_event_context *ctx,
css = &cpuctx->cgrp->css;
#endif
} else {
- event_heap = (struct min_heap){
+ event_heap = (struct perf_event_min_heap){
.data = itrs,
.nr = 0,
.size = ARRAY_SIZE(itrs),
@@ -3770,7 +3771,7 @@ static noinline int visit_groups_merge(struct perf_event_context *ctx,
perf_assert_pmu_disabled((*evt)->pmu_ctx->pmu);
}
- min_heapify_all(&event_heap, &perf_min_heap);
+ min_heapify_all(&event_heap, &perf_min_heap, NULL);
while (event_heap.nr) {
ret = func(*evt, data);
@@ -3779,9 +3780,9 @@ static noinline int visit_groups_merge(struct perf_event_context *ctx,
*evt = perf_event_groups_next(*evt, pmu);
if (*evt)
- min_heapify(&event_heap, 0, &perf_min_heap);
+ min_heap_sift_down(&event_heap, 0, &perf_min_heap, NULL);
else
- min_heap_pop(&event_heap, &perf_min_heap);
+ min_heap_pop(&event_heap, &perf_min_heap, NULL);
}
return 0;
@@ -7634,7 +7635,7 @@ again:
pte = ptep_get_lockless(ptep);
if (pte_present(pte))
- size = pte_leaf_size(pte);
+ size = __pte_leaf_size(pmd, pte);
pte_unmap(ptep);
#endif /* CONFIG_HAVE_GUP_FAST */
@@ -9327,21 +9328,19 @@ static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog,
bool unregister = type == PERF_BPF_EVENT_PROG_UNLOAD;
int i;
- if (prog->aux->func_cnt == 0) {
- perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF,
- (u64)(unsigned long)prog->bpf_func,
- prog->jited_len, unregister,
- prog->aux->ksym.name);
- } else {
- for (i = 0; i < prog->aux->func_cnt; i++) {
- struct bpf_prog *subprog = prog->aux->func[i];
-
- perf_event_ksymbol(
- PERF_RECORD_KSYMBOL_TYPE_BPF,
- (u64)(unsigned long)subprog->bpf_func,
- subprog->jited_len, unregister,
- subprog->aux->ksym.name);
- }
+ perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF,
+ (u64)(unsigned long)prog->bpf_func,
+ prog->jited_len, unregister,
+ prog->aux->ksym.name);
+
+ for (i = 1; i < prog->aux->func_cnt; i++) {
+ struct bpf_prog *subprog = prog->aux->func[i];
+
+ perf_event_ksymbol(
+ PERF_RECORD_KSYMBOL_TYPE_BPF,
+ (u64)(unsigned long)subprog->bpf_func,
+ subprog->jited_len, unregister,
+ subprog->aux->ksym.name);
}
}
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 99be2adedbc0..73cc47708679 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -181,7 +181,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
if (new_page) {
folio_get(new_folio);
- folio_add_new_anon_rmap(new_folio, vma, addr);
+ folio_add_new_anon_rmap(new_folio, vma, addr, RMAP_EXCLUSIVE);
folio_add_lru_vma(new_folio, vma);
} else
/* no new page, just dec_mm_counter for old_page */
diff --git a/kernel/exit.c b/kernel/exit.c
index be81342caf1b..7430852a8571 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -438,14 +438,46 @@ static void coredump_task_exit(struct task_struct *tsk)
}
#ifdef CONFIG_MEMCG
+/* drops tasklist_lock if succeeds */
+static bool __try_to_set_owner(struct task_struct *tsk, struct mm_struct *mm)
+{
+ bool ret = false;
+
+ task_lock(tsk);
+ if (likely(tsk->mm == mm)) {
+ /* tsk can't pass exit_mm/exec_mmap and exit */
+ read_unlock(&tasklist_lock);
+ WRITE_ONCE(mm->owner, tsk);
+ lru_gen_migrate_mm(mm);
+ ret = true;
+ }
+ task_unlock(tsk);
+ return ret;
+}
+
+static bool try_to_set_owner(struct task_struct *g, struct mm_struct *mm)
+{
+ struct task_struct *t;
+
+ for_each_thread(g, t) {
+ struct mm_struct *t_mm = READ_ONCE(t->mm);
+ if (t_mm == mm) {
+ if (__try_to_set_owner(t, mm))
+ return true;
+ } else if (t_mm)
+ break;
+ }
+
+ return false;
+}
+
/*
* A task is exiting. If it owned this mm, find a new owner for the mm.
*/
void mm_update_next_owner(struct mm_struct *mm)
{
- struct task_struct *c, *g, *p = current;
+ struct task_struct *g, *p = current;
-retry:
/*
* If the exiting or execing task is not the owner, it's
* someone else's problem.
@@ -466,19 +498,17 @@ retry:
/*
* Search in the children
*/
- list_for_each_entry(c, &p->children, sibling) {
- if (c->mm == mm)
- goto assign_new_owner;
+ list_for_each_entry(g, &p->children, sibling) {
+ if (try_to_set_owner(g, mm))
+ goto ret;
}
-
/*
* Search in the siblings
*/
- list_for_each_entry(c, &p->real_parent->children, sibling) {
- if (c->mm == mm)
- goto assign_new_owner;
+ list_for_each_entry(g, &p->real_parent->children, sibling) {
+ if (try_to_set_owner(g, mm))
+ goto ret;
}
-
/*
* Search through everything else, we should not get here often.
*/
@@ -487,12 +517,8 @@ retry:
break;
if (g->flags & PF_KTHREAD)
continue;
- for_each_thread(g, c) {
- if (c->mm == mm)
- goto assign_new_owner;
- if (c->mm)
- break;
- }
+ if (try_to_set_owner(g, mm))
+ goto ret;
}
read_unlock(&tasklist_lock);
/*
@@ -501,30 +527,9 @@ retry:
* ptrace or page migration (get_task_mm()). Mark owner as NULL.
*/
WRITE_ONCE(mm->owner, NULL);
+ ret:
return;
-assign_new_owner:
- BUG_ON(c == p);
- get_task_struct(c);
- /*
- * The task_lock protects c->mm from changing.
- * We always want mm->owner->mm == mm
- */
- task_lock(c);
- /*
- * Delay read_unlock() till we have the task_lock()
- * to ensure that c does not slip away underneath us
- */
- read_unlock(&tasklist_lock);
- if (c->mm != mm) {
- task_unlock(c);
- put_task_struct(c);
- goto retry;
- }
- WRITE_ONCE(mm->owner, c);
- lru_gen_migrate_mm(mm);
- task_unlock(c);
- put_task_struct(c);
}
#endif /* CONFIG_MEMCG */
diff --git a/kernel/fork.c b/kernel/fork.c
index 942e3d8617bf..cc760491f201 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -44,6 +44,7 @@
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/mm_inline.h>
+#include <linux/memblock.h>
#include <linux/nsproxy.h>
#include <linux/capability.h>
#include <linux/cpu.h>
@@ -207,9 +208,10 @@ static bool try_release_thread_stack_to_cache(struct vm_struct *vm)
unsigned int i;
for (i = 0; i < NR_CACHED_STACKS; i++) {
- if (this_cpu_cmpxchg(cached_stacks[i], NULL, vm) != NULL)
- continue;
- return true;
+ struct vm_struct *tmp = NULL;
+
+ if (this_cpu_try_cmpxchg(cached_stacks[i], &tmp, vm))
+ return true;
}
return false;
}
@@ -992,10 +994,10 @@ void __init __weak arch_task_cache_init(void) { }
/*
* set_max_threads
*/
-static void set_max_threads(unsigned int max_threads_suggested)
+static void __init set_max_threads(unsigned int max_threads_suggested)
{
u64 threads;
- unsigned long nr_pages = totalram_pages();
+ unsigned long nr_pages = PHYS_PFN(memblock_phys_mem_size() - memblock_reserved_size());
/*
* The number of threads shall be limited such that the thread
@@ -1018,7 +1020,7 @@ static void set_max_threads(unsigned int max_threads_suggested)
int arch_task_struct_size __read_mostly;
#endif
-static void task_struct_whitelist(unsigned long *offset, unsigned long *size)
+static void __init task_struct_whitelist(unsigned long *offset, unsigned long *size)
{
/* Fetch thread_struct whitelist for the architecture. */
arch_thread_struct_whitelist(offset, size);
@@ -1519,14 +1521,13 @@ struct mm_struct *get_task_mm(struct task_struct *task)
{
struct mm_struct *mm;
+ if (task->flags & PF_KTHREAD)
+ return NULL;
+
task_lock(task);
mm = task->mm;
- if (mm) {
- if (task->flags & PF_KTHREAD)
- mm = NULL;
- else
- mmget(mm);
- }
+ if (mm)
+ mmget(mm);
task_unlock(task);
return mm;
}
@@ -3403,7 +3404,7 @@ int unshare_files(void)
return 0;
}
-int sysctl_max_threads(struct ctl_table *table, int write,
+int sysctl_max_threads(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table t;
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 1d92016b0b3c..959d99583d1c 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -127,7 +127,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
* Ok, the task did not get scheduled for more than 2 minutes,
* complain:
*/
- if (sysctl_hung_task_warnings) {
+ if (sysctl_hung_task_warnings || hung_task_call_panic) {
if (sysctl_hung_task_warnings > 0)
sysctl_hung_task_warnings--;
pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",
@@ -239,7 +239,7 @@ static long hung_timeout_jiffies(unsigned long last_checked,
/*
* Process updating of timeout sysctl
*/
-static int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
+static int proc_dohung_task_timeout_secs(const struct ctl_table *table, int write,
void *buffer,
size_t *lenp, loff_t *ppos)
{
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
index aae0402507ed..c6ffb97966be 100644
--- a/kernel/irq/debugfs.c
+++ b/kernel/irq/debugfs.c
@@ -9,14 +9,8 @@
static struct dentry *irq_dir;
-struct irq_bit_descr {
- unsigned int mask;
- char *name;
-};
-#define BIT_MASK_DESCR(m) { .mask = m, .name = #m }
-
-static void irq_debug_show_bits(struct seq_file *m, int ind, unsigned int state,
- const struct irq_bit_descr *sd, int size)
+void irq_debug_show_bits(struct seq_file *m, int ind, unsigned int state,
+ const struct irq_bit_descr *sd, int size)
{
int i;
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c
index f6e5515ee077..b3e98668f4dd 100644
--- a/kernel/irq/devres.c
+++ b/kernel/irq/devres.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/module.h>
#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
#include <linux/device.h>
#include <linux/gfp.h>
#include <linux/irq.h>
@@ -282,3 +283,43 @@ int devm_irq_setup_generic_chip(struct device *dev, struct irq_chip_generic *gc,
}
EXPORT_SYMBOL_GPL(devm_irq_setup_generic_chip);
#endif /* CONFIG_GENERIC_IRQ_CHIP */
+
+#ifdef CONFIG_IRQ_DOMAIN
+static void devm_irq_domain_remove(struct device *dev, void *res)
+{
+ struct irq_domain **domain = res;
+
+ irq_domain_remove(*domain);
+}
+
+/**
+ * devm_irq_domain_instantiate() - Instantiate a new irq domain data for a
+ * managed device.
+ * @dev: Device to instantiate the domain for
+ * @info: Domain information pointer pointing to the information for this
+ * domain
+ *
+ * Return: A pointer to the instantiated irq domain or an ERR_PTR value.
+ */
+struct irq_domain *devm_irq_domain_instantiate(struct device *dev,
+ const struct irq_domain_info *info)
+{
+ struct irq_domain *domain;
+ struct irq_domain **dr;
+
+ dr = devres_alloc(devm_irq_domain_remove, sizeof(*dr), GFP_KERNEL);
+ if (!dr)
+ return ERR_PTR(-ENOMEM);
+
+ domain = irq_domain_instantiate(info);
+ if (!IS_ERR(domain)) {
+ *dr = domain;
+ devres_add(dev, dr);
+ } else {
+ devres_free(dr);
+ }
+
+ return domain;
+}
+EXPORT_SYMBOL_GPL(devm_irq_domain_instantiate);
+#endif /* CONFIG_IRQ_DOMAIN */
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index d39a40bc542b..32ffcbb87fa1 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -276,21 +276,14 @@ irq_gc_init_mask_cache(struct irq_chip_generic *gc, enum irq_gc_flags flags)
}
/**
- * __irq_alloc_domain_generic_chips - Allocate generic chips for an irq domain
- * @d: irq domain for which to allocate chips
- * @irqs_per_chip: Number of interrupts each chip handles (max 32)
- * @num_ct: Number of irq_chip_type instances associated with this
- * @name: Name of the irq chip
- * @handler: Default flow handler associated with these chips
- * @clr: IRQ_* bits to clear in the mapping function
- * @set: IRQ_* bits to set in the mapping function
- * @gcflags: Generic chip specific setup flags
+ * irq_domain_alloc_generic_chips - Allocate generic chips for an irq domain
+ * @d: irq domain for which to allocate chips
+ * @info: Generic chip information
+ *
+ * Return: 0 on success, negative error code on failure
*/
-int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
- int num_ct, const char *name,
- irq_flow_handler_t handler,
- unsigned int clr, unsigned int set,
- enum irq_gc_flags gcflags)
+int irq_domain_alloc_generic_chips(struct irq_domain *d,
+ const struct irq_domain_chip_generic_info *info)
{
struct irq_domain_chip_generic *dgc;
struct irq_chip_generic *gc;
@@ -300,27 +293,29 @@ int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
size_t gc_sz;
size_t sz;
void *tmp;
+ int ret;
if (d->gc)
return -EBUSY;
- numchips = DIV_ROUND_UP(d->revmap_size, irqs_per_chip);
+ numchips = DIV_ROUND_UP(d->revmap_size, info->irqs_per_chip);
if (!numchips)
return -EINVAL;
/* Allocate a pointer, generic chip and chiptypes for each chip */
- gc_sz = struct_size(gc, chip_types, num_ct);
+ gc_sz = struct_size(gc, chip_types, info->num_ct);
dgc_sz = struct_size(dgc, gc, numchips);
sz = dgc_sz + numchips * gc_sz;
tmp = dgc = kzalloc(sz, GFP_KERNEL);
if (!dgc)
return -ENOMEM;
- dgc->irqs_per_chip = irqs_per_chip;
+ dgc->irqs_per_chip = info->irqs_per_chip;
dgc->num_chips = numchips;
- dgc->irq_flags_to_set = set;
- dgc->irq_flags_to_clear = clr;
- dgc->gc_flags = gcflags;
+ dgc->irq_flags_to_set = info->irq_flags_to_set;
+ dgc->irq_flags_to_clear = info->irq_flags_to_clear;
+ dgc->gc_flags = info->gc_flags;
+ dgc->exit = info->exit;
d->gc = dgc;
/* Calc pointer to the first generic chip */
@@ -328,15 +323,22 @@ int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
for (i = 0; i < numchips; i++) {
/* Store the pointer to the generic chip */
dgc->gc[i] = gc = tmp;
- irq_init_generic_chip(gc, name, num_ct, i * irqs_per_chip,
- NULL, handler);
+ irq_init_generic_chip(gc, info->name, info->num_ct,
+ i * dgc->irqs_per_chip, NULL,
+ info->handler);
gc->domain = d;
- if (gcflags & IRQ_GC_BE_IO) {
+ if (dgc->gc_flags & IRQ_GC_BE_IO) {
gc->reg_readl = &irq_readl_be;
gc->reg_writel = &irq_writel_be;
}
+ if (info->init) {
+ ret = info->init(gc);
+ if (ret)
+ goto err;
+ }
+
raw_spin_lock_irqsave(&gc_lock, flags);
list_add_tail(&gc->list, &gc_list);
raw_spin_unlock_irqrestore(&gc_lock, flags);
@@ -344,6 +346,69 @@ int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
tmp += gc_sz;
}
return 0;
+
+err:
+ while (i--) {
+ if (dgc->exit)
+ dgc->exit(dgc->gc[i]);
+ irq_remove_generic_chip(dgc->gc[i], ~0U, 0, 0);
+ }
+ d->gc = NULL;
+ kfree(dgc);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(irq_domain_alloc_generic_chips);
+
+/**
+ * irq_domain_remove_generic_chips - Remove generic chips from an irq domain
+ * @d: irq domain for which generic chips are to be removed
+ */
+void irq_domain_remove_generic_chips(struct irq_domain *d)
+{
+ struct irq_domain_chip_generic *dgc = d->gc;
+ unsigned int i;
+
+ if (!dgc)
+ return;
+
+ for (i = 0; i < dgc->num_chips; i++) {
+ if (dgc->exit)
+ dgc->exit(dgc->gc[i]);
+ irq_remove_generic_chip(dgc->gc[i], ~0U, 0, 0);
+ }
+ d->gc = NULL;
+ kfree(dgc);
+}
+EXPORT_SYMBOL_GPL(irq_domain_remove_generic_chips);
+
+/**
+ * __irq_alloc_domain_generic_chips - Allocate generic chips for an irq domain
+ * @d: irq domain for which to allocate chips
+ * @irqs_per_chip: Number of interrupts each chip handles (max 32)
+ * @num_ct: Number of irq_chip_type instances associated with this
+ * @name: Name of the irq chip
+ * @handler: Default flow handler associated with these chips
+ * @clr: IRQ_* bits to clear in the mapping function
+ * @set: IRQ_* bits to set in the mapping function
+ * @gcflags: Generic chip specific setup flags
+ */
+int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
+ int num_ct, const char *name,
+ irq_flow_handler_t handler,
+ unsigned int clr, unsigned int set,
+ enum irq_gc_flags gcflags)
+{
+ struct irq_domain_chip_generic_info info = {
+ .irqs_per_chip = irqs_per_chip,
+ .num_ct = num_ct,
+ .name = name,
+ .handler = handler,
+ .irq_flags_to_clear = clr,
+ .irq_flags_to_set = set,
+ .gc_flags = gcflags,
+ };
+
+ return irq_domain_alloc_generic_chips(d, &info);
}
EXPORT_SYMBOL_GPL(__irq_alloc_domain_generic_chips);
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index ed28059e9849..fe0272cd84a5 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -501,6 +501,16 @@ static inline struct irq_data *irqd_get_parent_data(struct irq_data *irqd)
#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
#include <linux/debugfs.h>
+struct irq_bit_descr {
+ unsigned int mask;
+ char *name;
+};
+
+#define BIT_MASK_DESCR(m) { .mask = m, .name = #m }
+
+void irq_debug_show_bits(struct seq_file *m, int ind, unsigned int state,
+ const struct irq_bit_descr *sd, int size);
+
void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc);
static inline void irq_remove_debugfs_entry(struct irq_desc *desc)
{
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index aadc8891cc16..cea8f6874b1f 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -111,6 +111,7 @@ EXPORT_SYMBOL_GPL(__irq_domain_alloc_fwnode);
/**
* irq_domain_free_fwnode - Free a non-OF-backed fwnode_handle
+ * @fwnode: fwnode_handle to free
*
* Free a fwnode_handle allocated with irq_domain_alloc_fwnode.
*/
@@ -127,27 +128,12 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode)
}
EXPORT_SYMBOL_GPL(irq_domain_free_fwnode);
-static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode,
- unsigned int size,
- irq_hw_number_t hwirq_max,
- int direct_max,
- const struct irq_domain_ops *ops,
- void *host_data)
+static int irq_domain_set_name(struct irq_domain *domain,
+ const struct fwnode_handle *fwnode,
+ enum irq_domain_bus_token bus_token)
{
- struct irqchip_fwid *fwid;
- struct irq_domain *domain;
-
static atomic_t unknown_domains;
-
- if (WARN_ON((size && direct_max) ||
- (!IS_ENABLED(CONFIG_IRQ_DOMAIN_NOMAP) && direct_max) ||
- (direct_max && (direct_max != hwirq_max))))
- return NULL;
-
- domain = kzalloc_node(struct_size(domain, revmap, size),
- GFP_KERNEL, of_node_to_nid(to_of_node(fwnode)));
- if (!domain)
- return NULL;
+ struct irqchip_fwid *fwid;
if (is_fwnode_irqchip(fwnode)) {
fwid = container_of(fwnode, struct irqchip_fwid, fwnode);
@@ -155,17 +141,23 @@ static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode,
switch (fwid->type) {
case IRQCHIP_FWNODE_NAMED:
case IRQCHIP_FWNODE_NAMED_ID:
- domain->fwnode = fwnode;
- domain->name = kstrdup(fwid->name, GFP_KERNEL);
- if (!domain->name) {
- kfree(domain);
- return NULL;
- }
+ domain->name = bus_token ?
+ kasprintf(GFP_KERNEL, "%s-%d",
+ fwid->name, bus_token) :
+ kstrdup(fwid->name, GFP_KERNEL);
+ if (!domain->name)
+ return -ENOMEM;
domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
break;
default:
- domain->fwnode = fwnode;
domain->name = fwid->name;
+ if (bus_token) {
+ domain->name = kasprintf(GFP_KERNEL, "%s-%d",
+ fwid->name, bus_token);
+ if (!domain->name)
+ return -ENOMEM;
+ domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
+ }
break;
}
} else if (is_of_node(fwnode) || is_acpi_device_node(fwnode) ||
@@ -177,42 +169,68 @@ static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode,
* unhappy about. Replace them with ':', which does
* the trick and is not as offensive as '\'...
*/
- name = kasprintf(GFP_KERNEL, "%pfw", fwnode);
- if (!name) {
- kfree(domain);
- return NULL;
- }
+ name = bus_token ?
+ kasprintf(GFP_KERNEL, "%pfw-%d", fwnode, bus_token) :
+ kasprintf(GFP_KERNEL, "%pfw", fwnode);
+ if (!name)
+ return -ENOMEM;
domain->name = strreplace(name, '/', ':');
- domain->fwnode = fwnode;
domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
}
if (!domain->name) {
if (fwnode)
pr_err("Invalid fwnode type for irqdomain\n");
- domain->name = kasprintf(GFP_KERNEL, "unknown-%d",
- atomic_inc_return(&unknown_domains));
- if (!domain->name) {
- kfree(domain);
- return NULL;
- }
+ domain->name = bus_token ?
+ kasprintf(GFP_KERNEL, "unknown-%d-%d",
+ atomic_inc_return(&unknown_domains),
+ bus_token) :
+ kasprintf(GFP_KERNEL, "unknown-%d",
+ atomic_inc_return(&unknown_domains));
+ if (!domain->name)
+ return -ENOMEM;
domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
}
- fwnode_handle_get(fwnode);
- fwnode_dev_initialized(fwnode, true);
+ return 0;
+}
+
+static struct irq_domain *__irq_domain_create(const struct irq_domain_info *info)
+{
+ struct irq_domain *domain;
+ int err;
+
+ if (WARN_ON((info->size && info->direct_max) ||
+ (!IS_ENABLED(CONFIG_IRQ_DOMAIN_NOMAP) && info->direct_max) ||
+ (info->direct_max && info->direct_max != info->hwirq_max)))
+ return ERR_PTR(-EINVAL);
+
+ domain = kzalloc_node(struct_size(domain, revmap, info->size),
+ GFP_KERNEL, of_node_to_nid(to_of_node(info->fwnode)));
+ if (!domain)
+ return ERR_PTR(-ENOMEM);
+
+ err = irq_domain_set_name(domain, info->fwnode, info->bus_token);
+ if (err) {
+ kfree(domain);
+ return ERR_PTR(err);
+ }
+
+ domain->fwnode = fwnode_handle_get(info->fwnode);
+ fwnode_dev_initialized(domain->fwnode, true);
/* Fill structure */
INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL);
- domain->ops = ops;
- domain->host_data = host_data;
- domain->hwirq_max = hwirq_max;
+ domain->ops = info->ops;
+ domain->host_data = info->host_data;
+ domain->bus_token = info->bus_token;
+ domain->hwirq_max = info->hwirq_max;
- if (direct_max)
+ if (info->direct_max)
domain->flags |= IRQ_DOMAIN_FLAG_NO_MAP;
- domain->revmap_size = size;
+ domain->revmap_size = info->size;
/*
* Hierarchical domains use the domain lock of the root domain
@@ -240,34 +258,64 @@ static void __irq_domain_publish(struct irq_domain *domain)
pr_debug("Added domain %s\n", domain->name);
}
+static void irq_domain_free(struct irq_domain *domain)
+{
+ fwnode_dev_initialized(domain->fwnode, false);
+ fwnode_handle_put(domain->fwnode);
+ if (domain->flags & IRQ_DOMAIN_NAME_ALLOCATED)
+ kfree(domain->name);
+ kfree(domain);
+}
+
/**
- * __irq_domain_add() - Allocate a new irq_domain data structure
- * @fwnode: firmware node for the interrupt controller
- * @size: Size of linear map; 0 for radix mapping only
- * @hwirq_max: Maximum number of interrupts supported by controller
- * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
- * direct mapping
- * @ops: domain callbacks
- * @host_data: Controller private data pointer
+ * irq_domain_instantiate() - Instantiate a new irq domain data structure
+ * @info: Domain information pointer pointing to the information for this domain
*
- * Allocates and initializes an irq_domain structure.
- * Returns pointer to IRQ domain, or NULL on failure.
+ * Return: A pointer to the instantiated irq domain or an ERR_PTR value.
*/
-struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
- irq_hw_number_t hwirq_max, int direct_max,
- const struct irq_domain_ops *ops,
- void *host_data)
+struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info)
{
struct irq_domain *domain;
+ int err;
+
+ domain = __irq_domain_create(info);
+ if (IS_ERR(domain))
+ return domain;
- domain = __irq_domain_create(fwnode, size, hwirq_max, direct_max,
- ops, host_data);
- if (domain)
- __irq_domain_publish(domain);
+ domain->flags |= info->domain_flags;
+ domain->exit = info->exit;
+
+#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
+ if (info->parent) {
+ domain->root = info->parent->root;
+ domain->parent = info->parent;
+ }
+#endif
+
+ if (info->dgc_info) {
+ err = irq_domain_alloc_generic_chips(domain, info->dgc_info);
+ if (err)
+ goto err_domain_free;
+ }
+
+ if (info->init) {
+ err = info->init(domain);
+ if (err)
+ goto err_domain_gc_remove;
+ }
+
+ __irq_domain_publish(domain);
return domain;
+
+err_domain_gc_remove:
+ if (info->dgc_info)
+ irq_domain_remove_generic_chips(domain);
+err_domain_free:
+ irq_domain_free(domain);
+ return ERR_PTR(err);
}
-EXPORT_SYMBOL_GPL(__irq_domain_add);
+EXPORT_SYMBOL_GPL(irq_domain_instantiate);
/**
* irq_domain_remove() - Remove an irq domain.
@@ -279,6 +327,9 @@ EXPORT_SYMBOL_GPL(__irq_domain_add);
*/
void irq_domain_remove(struct irq_domain *domain)
{
+ if (domain->exit)
+ domain->exit(domain);
+
mutex_lock(&irq_domain_mutex);
debugfs_remove_domain_dir(domain);
@@ -294,13 +345,11 @@ void irq_domain_remove(struct irq_domain *domain)
mutex_unlock(&irq_domain_mutex);
- pr_debug("Removed domain %s\n", domain->name);
+ if (domain->flags & IRQ_DOMAIN_FLAG_DESTROY_GC)
+ irq_domain_remove_generic_chips(domain);
- fwnode_dev_initialized(domain->fwnode, false);
- fwnode_handle_put(domain->fwnode);
- if (domain->flags & IRQ_DOMAIN_NAME_ALLOCATED)
- kfree(domain->name);
- kfree(domain);
+ pr_debug("Removed domain %s\n", domain->name);
+ irq_domain_free(domain);
}
EXPORT_SYMBOL_GPL(irq_domain_remove);
@@ -360,10 +409,17 @@ struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode,
const struct irq_domain_ops *ops,
void *host_data)
{
+ struct irq_domain_info info = {
+ .fwnode = fwnode,
+ .size = size,
+ .hwirq_max = size,
+ .ops = ops,
+ .host_data = host_data,
+ };
struct irq_domain *domain;
- domain = __irq_domain_add(fwnode, size, size, 0, ops, host_data);
- if (!domain)
+ domain = irq_domain_instantiate(&info);
+ if (IS_ERR(domain))
return NULL;
if (first_irq > 0) {
@@ -416,11 +472,20 @@ struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode,
const struct irq_domain_ops *ops,
void *host_data)
{
+ struct irq_domain_info info = {
+ .fwnode = fwnode,
+ .size = first_hwirq + size,
+ .hwirq_max = first_hwirq + size,
+ .ops = ops,
+ .host_data = host_data,
+ };
struct irq_domain *domain;
- domain = __irq_domain_add(fwnode, first_hwirq + size, first_hwirq + size, 0, ops, host_data);
- if (domain)
- irq_domain_associate_many(domain, first_irq, first_hwirq, size);
+ domain = irq_domain_instantiate(&info);
+ if (IS_ERR(domain))
+ return NULL;
+
+ irq_domain_associate_many(domain, first_irq, first_hwirq, size);
return domain;
}
@@ -438,7 +503,8 @@ struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
struct fwnode_handle *fwnode = fwspec->fwnode;
int rc;
- /* We might want to match the legacy controller last since
+ /*
+ * We might want to match the legacy controller last since
* it might potentially be set to match all interrupts in
* the absence of a device node. This isn't a problem so far
* yet though...
@@ -982,6 +1048,12 @@ EXPORT_SYMBOL_GPL(__irq_resolve_mapping);
/**
* irq_domain_xlate_onecell() - Generic xlate for direct one cell bindings
+ * @d: Interrupt domain involved in the translation
+ * @ctrlr: The device tree node for the device whose interrupt is translated
+ * @intspec: The interrupt specifier data from the device tree
+ * @intsize: The number of entries in @intspec
+ * @out_hwirq: Pointer to storage for the hardware interrupt number
+ * @out_type: Pointer to storage for the interrupt type
*
* Device Tree IRQ specifier translation function which works with one cell
* bindings where the cell value maps directly to the hwirq number.
@@ -1000,6 +1072,12 @@ EXPORT_SYMBOL_GPL(irq_domain_xlate_onecell);
/**
* irq_domain_xlate_twocell() - Generic xlate for direct two cell bindings
+ * @d: Interrupt domain involved in the translation
+ * @ctrlr: The device tree node for the device whose interrupt is translated
+ * @intspec: The interrupt specifier data from the device tree
+ * @intsize: The number of entries in @intspec
+ * @out_hwirq: Pointer to storage for the hardware interrupt number
+ * @out_type: Pointer to storage for the interrupt type
*
* Device Tree IRQ specifier translation function which works with two cell
* bindings where the cell values map directly to the hwirq number
@@ -1018,6 +1096,12 @@ EXPORT_SYMBOL_GPL(irq_domain_xlate_twocell);
/**
* irq_domain_xlate_onetwocell() - Generic xlate for one or two cell bindings
+ * @d: Interrupt domain involved in the translation
+ * @ctrlr: The device tree node for the device whose interrupt is translated
+ * @intspec: The interrupt specifier data from the device tree
+ * @intsize: The number of entries in @intspec
+ * @out_hwirq: Pointer to storage for the hardware interrupt number
+ * @out_type: Pointer to storage for the interrupt type
*
* Device Tree IRQ specifier translation function which works with either one
* or two cell bindings where the cell values map directly to the hwirq number
@@ -1051,6 +1135,10 @@ EXPORT_SYMBOL_GPL(irq_domain_simple_ops);
/**
* irq_domain_translate_onecell() - Generic translate for direct one cell
* bindings
+ * @d: Interrupt domain involved in the translation
+ * @fwspec: The firmware interrupt specifier to translate
+ * @out_hwirq: Pointer to storage for the hardware interrupt number
+ * @out_type: Pointer to storage for the interrupt type
*/
int irq_domain_translate_onecell(struct irq_domain *d,
struct irq_fwspec *fwspec,
@@ -1068,6 +1156,10 @@ EXPORT_SYMBOL_GPL(irq_domain_translate_onecell);
/**
* irq_domain_translate_twocell() - Generic translate for direct two cell
* bindings
+ * @d: Interrupt domain involved in the translation
+ * @fwspec: The firmware interrupt specifier to translate
+ * @out_hwirq: Pointer to storage for the hardware interrupt number
+ * @out_type: Pointer to storage for the interrupt type
*
* Device Tree IRQ specifier translation function which works with two cell
* bindings where the cell values map directly to the hwirq number
@@ -1144,23 +1236,22 @@ struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
const struct irq_domain_ops *ops,
void *host_data)
{
- struct irq_domain *domain;
-
- if (size)
- domain = __irq_domain_create(fwnode, size, size, 0, ops, host_data);
- else
- domain = __irq_domain_create(fwnode, 0, ~0, 0, ops, host_data);
-
- if (domain) {
- if (parent)
- domain->root = parent->root;
- domain->parent = parent;
- domain->flags |= flags;
+ struct irq_domain_info info = {
+ .fwnode = fwnode,
+ .size = size,
+ .hwirq_max = size,
+ .ops = ops,
+ .host_data = host_data,
+ .domain_flags = flags,
+ .parent = parent,
+ };
+ struct irq_domain *d;
- __irq_domain_publish(domain);
- }
+ if (!info.size)
+ info.hwirq_max = ~0U;
- return domain;
+ d = irq_domain_instantiate(&info);
+ return IS_ERR(d) ? NULL : d;
}
EXPORT_SYMBOL_GPL(irq_domain_create_hierarchy);
@@ -1932,13 +2023,26 @@ static void irq_domain_free_one_irq(struct irq_domain *domain, unsigned int virq
static struct dentry *domain_dir;
-static void
-irq_domain_debug_show_one(struct seq_file *m, struct irq_domain *d, int ind)
+static const struct irq_bit_descr irqdomain_flags[] = {
+ BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_HIERARCHY),
+ BIT_MASK_DESCR(IRQ_DOMAIN_NAME_ALLOCATED),
+ BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_IPI_PER_CPU),
+ BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_IPI_SINGLE),
+ BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_MSI),
+ BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_ISOLATED_MSI),
+ BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_NO_MAP),
+ BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_MSI_PARENT),
+ BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_MSI_DEVICE),
+ BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_NONCORE),
+};
+
+static void irq_domain_debug_show_one(struct seq_file *m, struct irq_domain *d, int ind)
{
seq_printf(m, "%*sname: %s\n", ind, "", d->name);
seq_printf(m, "%*ssize: %u\n", ind + 1, "", d->revmap_size);
seq_printf(m, "%*smapped: %u\n", ind + 1, "", d->mapcount);
seq_printf(m, "%*sflags: 0x%08x\n", ind +1 , "", d->flags);
+ irq_debug_show_bits(m, ind, d->flags, irqdomain_flags, ARRAY_SIZE(irqdomain_flags));
if (d->ops && d->ops->debug_show)
d->ops->debug_show(m, d, NULL, ind + 1);
#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 2024f89baea4..5fa0547ece0c 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -8,18 +8,34 @@
* This file contains common code to support Message Signaled Interrupts for
* PCI compatible and non PCI compatible devices.
*/
-#include <linux/types.h>
#include <linux/device.h>
#include <linux/irq.h>
#include <linux/irqdomain.h>
#include <linux/msi.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/sysfs.h>
-#include <linux/pci.h>
+#include <linux/types.h>
+#include <linux/xarray.h>
#include "internals.h"
/**
+ * struct msi_device_data - MSI per device data
+ * @properties: MSI properties which are interesting to drivers
+ * @mutex: Mutex protecting the MSI descriptor store
+ * @__domains: Internal data for per device MSI domains
+ * @__iter_idx: Index to search the next entry for iterators
+ */
+struct msi_device_data {
+ unsigned long properties;
+ struct mutex mutex;
+ struct msi_dev_domain __domains[MSI_MAX_DEVICE_IRQDOMAINS];
+ unsigned long __iter_idx;
+};
+
+/**
* struct msi_ctrl - MSI internal management control structure
* @domid: ID of the domain on which management operations should be done
* @first: First (hardware) slot index to operate on
@@ -1088,8 +1104,8 @@ bool msi_match_device_irq_domain(struct device *dev, unsigned int domid,
return ret;
}
-int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
- int nvec, msi_alloc_info_t *arg)
+static int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
+ int nvec, msi_alloc_info_t *arg)
{
struct msi_domain_info *info = domain->host_data;
struct msi_domain_ops *ops = info->ops;
@@ -1097,77 +1113,6 @@ int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
return ops->msi_prepare(domain, dev, nvec, arg);
}
-int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
- int virq_base, int nvec, msi_alloc_info_t *arg)
-{
- struct msi_domain_info *info = domain->host_data;
- struct msi_domain_ops *ops = info->ops;
- struct msi_ctrl ctrl = {
- .domid = MSI_DEFAULT_DOMAIN,
- .first = virq_base,
- .last = virq_base + nvec - 1,
- };
- struct msi_desc *desc;
- struct xarray *xa;
- int ret, virq;
-
- msi_lock_descs(dev);
-
- if (!msi_ctrl_valid(dev, &ctrl)) {
- ret = -EINVAL;
- goto unlock;
- }
-
- ret = msi_domain_add_simple_msi_descs(dev, &ctrl);
- if (ret)
- goto unlock;
-
- xa = &dev->msi.data->__domains[ctrl.domid].store;
-
- for (virq = virq_base; virq < virq_base + nvec; virq++) {
- desc = xa_load(xa, virq);
- desc->irq = virq;
-
- ops->set_desc(arg, desc);
- ret = irq_domain_alloc_irqs_hierarchy(domain, virq, 1, arg);
- if (ret)
- goto fail;
-
- irq_set_msi_desc(virq, desc);
- }
- msi_unlock_descs(dev);
- return 0;
-
-fail:
- for (--virq; virq >= virq_base; virq--) {
- msi_domain_depopulate_descs(dev, virq, 1);
- irq_domain_free_irqs_common(domain, virq, 1);
- }
- msi_domain_free_descs(dev, &ctrl);
-unlock:
- msi_unlock_descs(dev);
- return ret;
-}
-
-void msi_domain_depopulate_descs(struct device *dev, int virq_base, int nvec)
-{
- struct msi_ctrl ctrl = {
- .domid = MSI_DEFAULT_DOMAIN,
- .first = virq_base,
- .last = virq_base + nvec - 1,
- };
- struct msi_desc *desc;
- struct xarray *xa;
- unsigned long idx;
-
- if (!msi_ctrl_valid(dev, &ctrl))
- return;
-
- xa = &dev->msi.data->__domains[ctrl.domid].store;
- xa_for_each_range(xa, idx, desc, ctrl.first, ctrl.last)
- desc->irq = 0;
-}
-
/*
* Carefully check whether the device can use reservation mode. If
* reservation mode is enabled then the early activation will assign a
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 5c320c3f10a7..8cccdf40725a 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -461,10 +461,10 @@ int show_interrupts(struct seq_file *p, void *v)
{
static int prec;
- unsigned long flags, any_count = 0;
int i = *(loff_t *) v, j;
struct irqaction *action;
struct irq_desc *desc;
+ unsigned long flags;
if (i > ACTUAL_NR_IRQS)
return 0;
@@ -488,10 +488,7 @@ int show_interrupts(struct seq_file *p, void *v)
if (!desc || irq_settings_is_hidden(desc))
goto outsparse;
- if (desc->kstat_irqs)
- any_count = kstat_irqs_desc(desc, cpu_online_mask);
-
- if ((!desc->action || irq_desc_is_chained(desc)) && !any_count)
+ if (!desc->action || irq_desc_is_chained(desc) || !desc->kstat_irqs)
goto outsparse;
seq_printf(p, "%*d: ", prec, i);
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 98b9622d372e..fb2c77368d18 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -148,9 +148,6 @@ static unsigned int get_symbol_offset(unsigned long pos)
unsigned long kallsyms_sym_address(int idx)
{
- if (!IS_ENABLED(CONFIG_KALLSYMS_BASE_RELATIVE))
- return kallsyms_addresses[idx];
-
/* values are unsigned offsets if --absolute-percpu is not in effect */
if (!IS_ENABLED(CONFIG_KALLSYMS_ABSOLUTE_PERCPU))
return kallsyms_relative_base + (u32)kallsyms_offsets[idx];
@@ -325,7 +322,7 @@ static unsigned long get_symbol_pos(unsigned long addr,
unsigned long symbol_start = 0, symbol_end = 0;
unsigned long i, low, high, mid;
- /* Do a binary search on the sorted kallsyms_addresses array. */
+ /* Do a binary search on the sorted kallsyms_offsets array. */
low = 0;
high = kallsyms_num_syms;
diff --git a/kernel/kallsyms_internal.h b/kernel/kallsyms_internal.h
index 85480274fc8f..9633782f8250 100644
--- a/kernel/kallsyms_internal.h
+++ b/kernel/kallsyms_internal.h
@@ -4,12 +4,6 @@
#include <linux/types.h>
-/*
- * These will be re-linked against their real values during the second link
- * stage. Preliminary values must be provided in the linker script using the
- * PROVIDE() directive so that the first link stage can complete successfully.
- */
-extern const unsigned long kallsyms_addresses[];
extern const int kallsyms_offsets[];
extern const u8 kallsyms_names[];
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 9112d69d68b0..c0caa14880c3 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -888,7 +888,7 @@ struct kimage *kexec_crash_image;
static int kexec_load_disabled;
#ifdef CONFIG_SYSCTL
-static int kexec_limit_handler(struct ctl_table *table, int write,
+static int kexec_limit_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct kexec_load_limit *limit = table->data;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 6a76a8100073..e85de37d9e1e 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -939,7 +939,7 @@ static void unoptimize_all_kprobes(void)
static DEFINE_MUTEX(kprobe_sysctl_mutex);
static int sysctl_kprobes_optimization;
-static int proc_kprobes_optimization_handler(struct ctl_table *table,
+static int proc_kprobes_optimization_handler(const struct ctl_table *table,
int write, void *buffer,
size_t *length, loff_t *ppos)
{
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 07fb5987b42b..1bab21b4718f 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -92,7 +92,14 @@ static ssize_t profiling_store(struct kobject *kobj,
const char *buf, size_t count)
{
int ret;
+ static DEFINE_MUTEX(lock);
+ /*
+ * We need serialization, for profile_setup() initializes prof_on
+ * value and profile_init() must not reallocate prof_buffer after
+ * once allocated.
+ */
+ guard(mutex)(&lock);
if (prof_on)
return -EEXIST;
/*
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index 84c53285f499..7a75eab9c179 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -65,7 +65,7 @@ static struct latency_record latency_record[MAXLR];
int latencytop_enabled;
#ifdef CONFIG_SYSCTL
-static int sysctl_latencytop(struct ctl_table *table, int write, void *buffer,
+static int sysctl_latencytop(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
int err;
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 52426665eecc..3c21c31796db 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -346,6 +346,7 @@ int klp_apply_section_relocs(struct module *pmod, Elf_Shdr *sechdrs,
* /sys/kernel/livepatch/<patch>/enabled
* /sys/kernel/livepatch/<patch>/transition
* /sys/kernel/livepatch/<patch>/force
+ * /sys/kernel/livepatch/<patch>/replace
* /sys/kernel/livepatch/<patch>/<object>
* /sys/kernel/livepatch/<patch>/<object>/patched
* /sys/kernel/livepatch/<patch>/<object>/<function,sympos>
@@ -401,7 +402,7 @@ static ssize_t enabled_show(struct kobject *kobj,
struct klp_patch *patch;
patch = container_of(kobj, struct klp_patch, kobj);
- return snprintf(buf, PAGE_SIZE-1, "%d\n", patch->enabled);
+ return sysfs_emit(buf, "%d\n", patch->enabled);
}
static ssize_t transition_show(struct kobject *kobj,
@@ -410,8 +411,7 @@ static ssize_t transition_show(struct kobject *kobj,
struct klp_patch *patch;
patch = container_of(kobj, struct klp_patch, kobj);
- return snprintf(buf, PAGE_SIZE-1, "%d\n",
- patch == klp_transition_patch);
+ return sysfs_emit(buf, "%d\n", patch == klp_transition_patch);
}
static ssize_t force_store(struct kobject *kobj, struct kobj_attribute *attr,
@@ -443,13 +443,24 @@ static ssize_t force_store(struct kobject *kobj, struct kobj_attribute *attr,
return count;
}
+static ssize_t replace_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct klp_patch *patch;
+
+ patch = container_of(kobj, struct klp_patch, kobj);
+ return sysfs_emit(buf, "%d\n", patch->replace);
+}
+
static struct kobj_attribute enabled_kobj_attr = __ATTR_RW(enabled);
static struct kobj_attribute transition_kobj_attr = __ATTR_RO(transition);
static struct kobj_attribute force_kobj_attr = __ATTR_WO(force);
+static struct kobj_attribute replace_kobj_attr = __ATTR_RO(replace);
static struct attribute *klp_patch_attrs[] = {
&enabled_kobj_attr.attr,
&transition_kobj_attr.attr,
&force_kobj_attr.attr,
+ &replace_kobj_attr.attr,
NULL
};
ATTRIBUTE_GROUPS(klp_patch);
diff --git a/kernel/panic.c b/kernel/panic.c
index 8bff183d6180..f861bedc1925 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -35,6 +35,7 @@
#include <linux/debugfs.h>
#include <linux/sysfs.h>
#include <linux/context_tracking.h>
+#include <linux/seq_buf.h>
#include <trace/events/error_report.h>
#include <asm/sections.h>
@@ -470,32 +471,83 @@ void panic(const char *fmt, ...)
EXPORT_SYMBOL(panic);
+#define TAINT_FLAG(taint, _c_true, _c_false, _module) \
+ [ TAINT_##taint ] = { \
+ .c_true = _c_true, .c_false = _c_false, \
+ .module = _module, \
+ .desc = #taint, \
+ }
+
/*
* TAINT_FORCED_RMMOD could be a per-module flag but the module
* is being removed anyway.
*/
const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = {
- [ TAINT_PROPRIETARY_MODULE ] = { 'P', 'G', true },
- [ TAINT_FORCED_MODULE ] = { 'F', ' ', true },
- [ TAINT_CPU_OUT_OF_SPEC ] = { 'S', ' ', false },
- [ TAINT_FORCED_RMMOD ] = { 'R', ' ', false },
- [ TAINT_MACHINE_CHECK ] = { 'M', ' ', false },
- [ TAINT_BAD_PAGE ] = { 'B', ' ', false },
- [ TAINT_USER ] = { 'U', ' ', false },
- [ TAINT_DIE ] = { 'D', ' ', false },
- [ TAINT_OVERRIDDEN_ACPI_TABLE ] = { 'A', ' ', false },
- [ TAINT_WARN ] = { 'W', ' ', false },
- [ TAINT_CRAP ] = { 'C', ' ', true },
- [ TAINT_FIRMWARE_WORKAROUND ] = { 'I', ' ', false },
- [ TAINT_OOT_MODULE ] = { 'O', ' ', true },
- [ TAINT_UNSIGNED_MODULE ] = { 'E', ' ', true },
- [ TAINT_SOFTLOCKUP ] = { 'L', ' ', false },
- [ TAINT_LIVEPATCH ] = { 'K', ' ', true },
- [ TAINT_AUX ] = { 'X', ' ', true },
- [ TAINT_RANDSTRUCT ] = { 'T', ' ', true },
- [ TAINT_TEST ] = { 'N', ' ', true },
+ TAINT_FLAG(PROPRIETARY_MODULE, 'P', 'G', true),
+ TAINT_FLAG(FORCED_MODULE, 'F', ' ', true),
+ TAINT_FLAG(CPU_OUT_OF_SPEC, 'S', ' ', false),
+ TAINT_FLAG(FORCED_RMMOD, 'R', ' ', false),
+ TAINT_FLAG(MACHINE_CHECK, 'M', ' ', false),
+ TAINT_FLAG(BAD_PAGE, 'B', ' ', false),
+ TAINT_FLAG(USER, 'U', ' ', false),
+ TAINT_FLAG(DIE, 'D', ' ', false),
+ TAINT_FLAG(OVERRIDDEN_ACPI_TABLE, 'A', ' ', false),
+ TAINT_FLAG(WARN, 'W', ' ', false),
+ TAINT_FLAG(CRAP, 'C', ' ', true),
+ TAINT_FLAG(FIRMWARE_WORKAROUND, 'I', ' ', false),
+ TAINT_FLAG(OOT_MODULE, 'O', ' ', true),
+ TAINT_FLAG(UNSIGNED_MODULE, 'E', ' ', true),
+ TAINT_FLAG(SOFTLOCKUP, 'L', ' ', false),
+ TAINT_FLAG(LIVEPATCH, 'K', ' ', true),
+ TAINT_FLAG(AUX, 'X', ' ', true),
+ TAINT_FLAG(RANDSTRUCT, 'T', ' ', true),
+ TAINT_FLAG(TEST, 'N', ' ', true),
};
+#undef TAINT_FLAG
+
+static void print_tainted_seq(struct seq_buf *s, bool verbose)
+{
+ const char *sep = "";
+ int i;
+
+ if (!tainted_mask) {
+ seq_buf_puts(s, "Not tainted");
+ return;
+ }
+
+ seq_buf_printf(s, "Tainted: ");
+ for (i = 0; i < TAINT_FLAGS_COUNT; i++) {
+ const struct taint_flag *t = &taint_flags[i];
+ bool is_set = test_bit(i, &tainted_mask);
+ char c = is_set ? t->c_true : t->c_false;
+
+ if (verbose) {
+ if (is_set) {
+ seq_buf_printf(s, "%s[%c]=%s", sep, c, t->desc);
+ sep = ", ";
+ }
+ } else {
+ seq_buf_putc(s, c);
+ }
+ }
+}
+
+static const char *_print_tainted(bool verbose)
+{
+ /* FIXME: what should the size be? */
+ static char buf[sizeof(taint_flags)];
+ struct seq_buf s;
+
+ BUILD_BUG_ON(ARRAY_SIZE(taint_flags) != TAINT_FLAGS_COUNT);
+
+ seq_buf_init(&s, buf, sizeof(buf));
+
+ print_tainted_seq(&s, verbose);
+
+ return seq_buf_str(&s);
+}
+
/**
* print_tainted - return a string to represent the kernel taint state.
*
@@ -506,25 +558,15 @@ const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = {
*/
const char *print_tainted(void)
{
- static char buf[TAINT_FLAGS_COUNT + sizeof("Tainted: ")];
-
- BUILD_BUG_ON(ARRAY_SIZE(taint_flags) != TAINT_FLAGS_COUNT);
-
- if (tainted_mask) {
- char *s;
- int i;
-
- s = buf + sprintf(buf, "Tainted: ");
- for (i = 0; i < TAINT_FLAGS_COUNT; i++) {
- const struct taint_flag *t = &taint_flags[i];
- *s++ = test_bit(i, &tainted_mask) ?
- t->c_true : t->c_false;
- }
- *s = 0;
- } else
- snprintf(buf, sizeof(buf), "Not tainted");
+ return _print_tainted(false);
+}
- return buf;
+/**
+ * print_tainted_verbose - A more verbose version of print_tainted()
+ */
+const char *print_tainted_verbose(void)
+{
+ return _print_tainted(true);
}
int test_taint(unsigned flag)
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index bdf0087d6442..d70ab49d5b4a 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -261,7 +261,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
}
#ifdef CONFIG_CHECKPOINT_RESTORE
-static int pid_ns_ctl_handler(struct ctl_table *table, int write,
+static int pid_ns_ctl_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct pid_namespace *pid_ns = task_active_pid_ns(current);
diff --git a/kernel/pid_sysctl.h b/kernel/pid_sysctl.h
index fe9fb991dc42..18ecaef6be41 100644
--- a/kernel/pid_sysctl.h
+++ b/kernel/pid_sysctl.h
@@ -5,7 +5,7 @@
#include <linux/pid_namespace.h>
#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
-static int pid_mfd_noexec_dointvec_minmax(struct ctl_table *table,
+static int pid_mfd_noexec_dointvec_minmax(const struct ctl_table *table,
int write, void *buf, size_t *lenp, loff_t *ppos)
{
struct pid_namespace *ns = task_active_pid_ns(current);
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 753b8dd42a59..82b884b67152 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -200,12 +200,11 @@ void free_all_swap_pages(int swap)
while ((node = swsusp_extents.rb_node)) {
struct swsusp_extent *ext;
- unsigned long offset;
ext = rb_entry(node, struct swsusp_extent, node);
rb_erase(node, &swsusp_extents);
- for (offset = ext->start; offset <= ext->end; offset++)
- swap_free(swp_entry(swap, offset));
+ swap_free_nr(swp_entry(swap, ext->start),
+ ext->end - ext->start + 1);
kfree(ext);
}
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index 6c2afee5ef62..19dcc5832651 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -8,7 +8,7 @@
#if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL)
void __init printk_sysctl_init(void);
-int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
+int devkmsg_sysctl_set_loglvl(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos);
#else
#define printk_sysctl_init() do { } while (0)
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 7d91593f0ecf..054c0e7784fd 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -197,7 +197,7 @@ __setup("printk.devkmsg=", control_devkmsg);
char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE] = "ratelimit";
#if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL)
-int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
+int devkmsg_sysctl_set_loglvl(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
char old_str[DEVKMSG_STR_MAX_SIZE];
@@ -4372,15 +4372,15 @@ void kmsg_dump_rewind(struct kmsg_dump_iter *iter)
EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
/**
- * console_replay_all - replay kernel log on consoles
+ * console_try_replay_all - try to replay kernel log on consoles
*
* Try to obtain lock on console subsystem and replay all
* available records in printk buffer on the consoles.
* Does nothing if lock is not obtained.
*
- * Context: Any context.
+ * Context: Any, except for NMI.
*/
-void console_replay_all(void)
+void console_try_replay_all(void)
{
if (console_trylock()) {
__console_rewind_all();
diff --git a/kernel/printk/sysctl.c b/kernel/printk/sysctl.c
index 3e47dedce9e5..f5072dc85f7a 100644
--- a/kernel/printk/sysctl.c
+++ b/kernel/printk/sysctl.c
@@ -11,7 +11,7 @@
static const int ten_thousand = 10000;
-static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
+static int proc_dointvec_minmax_sysadmin(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
if (write && !capable(CAP_SYS_ADMIN))
diff --git a/kernel/profile.c b/kernel/profile.c
index 2b775cc5c28f..ff68d3816182 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -47,13 +47,6 @@ static unsigned short int prof_shift;
int prof_on __read_mostly;
EXPORT_SYMBOL_GPL(prof_on);
-static cpumask_var_t prof_cpu_mask;
-#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS)
-static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits);
-static DEFINE_PER_CPU(int, cpu_profile_flip);
-static DEFINE_MUTEX(profile_flip_mutex);
-#endif /* CONFIG_SMP */
-
int profile_setup(char *str)
{
static const char schedstr[] = "schedule";
@@ -114,11 +107,6 @@ int __ref profile_init(void)
buffer_bytes = prof_len*sizeof(atomic_t);
- if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL))
- return -ENOMEM;
-
- cpumask_copy(prof_cpu_mask, cpu_possible_mask);
-
prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL|__GFP_NOWARN);
if (prof_buffer)
return 0;
@@ -132,195 +120,16 @@ int __ref profile_init(void)
if (prof_buffer)
return 0;
- free_cpumask_var(prof_cpu_mask);
return -ENOMEM;
}
-#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS)
-/*
- * Each cpu has a pair of open-addressed hashtables for pending
- * profile hits. read_profile() IPI's all cpus to request them
- * to flip buffers and flushes their contents to prof_buffer itself.
- * Flip requests are serialized by the profile_flip_mutex. The sole
- * use of having a second hashtable is for avoiding cacheline
- * contention that would otherwise happen during flushes of pending
- * profile hits required for the accuracy of reported profile hits
- * and so resurrect the interrupt livelock issue.
- *
- * The open-addressed hashtables are indexed by profile buffer slot
- * and hold the number of pending hits to that profile buffer slot on
- * a cpu in an entry. When the hashtable overflows, all pending hits
- * are accounted to their corresponding profile buffer slots with
- * atomic_add() and the hashtable emptied. As numerous pending hits
- * may be accounted to a profile buffer slot in a hashtable entry,
- * this amortizes a number of atomic profile buffer increments likely
- * to be far larger than the number of entries in the hashtable,
- * particularly given that the number of distinct profile buffer
- * positions to which hits are accounted during short intervals (e.g.
- * several seconds) is usually very small. Exclusion from buffer
- * flipping is provided by interrupt disablement (note that for
- * SCHED_PROFILING or SLEEP_PROFILING profile_hit() may be called from
- * process context).
- * The hash function is meant to be lightweight as opposed to strong,
- * and was vaguely inspired by ppc64 firmware-supported inverted
- * pagetable hash functions, but uses a full hashtable full of finite
- * collision chains, not just pairs of them.
- *
- * -- nyc
- */
-static void __profile_flip_buffers(void *unused)
-{
- int cpu = smp_processor_id();
-
- per_cpu(cpu_profile_flip, cpu) = !per_cpu(cpu_profile_flip, cpu);
-}
-
-static void profile_flip_buffers(void)
-{
- int i, j, cpu;
-
- mutex_lock(&profile_flip_mutex);
- j = per_cpu(cpu_profile_flip, get_cpu());
- put_cpu();
- on_each_cpu(__profile_flip_buffers, NULL, 1);
- for_each_online_cpu(cpu) {
- struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[j];
- for (i = 0; i < NR_PROFILE_HIT; ++i) {
- if (!hits[i].hits) {
- if (hits[i].pc)
- hits[i].pc = 0;
- continue;
- }
- atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]);
- hits[i].hits = hits[i].pc = 0;
- }
- }
- mutex_unlock(&profile_flip_mutex);
-}
-
-static void profile_discard_flip_buffers(void)
-{
- int i, cpu;
-
- mutex_lock(&profile_flip_mutex);
- i = per_cpu(cpu_profile_flip, get_cpu());
- put_cpu();
- on_each_cpu(__profile_flip_buffers, NULL, 1);
- for_each_online_cpu(cpu) {
- struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[i];
- memset(hits, 0, NR_PROFILE_HIT*sizeof(struct profile_hit));
- }
- mutex_unlock(&profile_flip_mutex);
-}
-
-static void do_profile_hits(int type, void *__pc, unsigned int nr_hits)
-{
- unsigned long primary, secondary, flags, pc = (unsigned long)__pc;
- int i, j, cpu;
- struct profile_hit *hits;
-
- pc = min((pc - (unsigned long)_stext) >> prof_shift, prof_len - 1);
- i = primary = (pc & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
- secondary = (~(pc << 1) & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
- cpu = get_cpu();
- hits = per_cpu(cpu_profile_hits, cpu)[per_cpu(cpu_profile_flip, cpu)];
- if (!hits) {
- put_cpu();
- return;
- }
- /*
- * We buffer the global profiler buffer into a per-CPU
- * queue and thus reduce the number of global (and possibly
- * NUMA-alien) accesses. The write-queue is self-coalescing:
- */
- local_irq_save(flags);
- do {
- for (j = 0; j < PROFILE_GRPSZ; ++j) {
- if (hits[i + j].pc == pc) {
- hits[i + j].hits += nr_hits;
- goto out;
- } else if (!hits[i + j].hits) {
- hits[i + j].pc = pc;
- hits[i + j].hits = nr_hits;
- goto out;
- }
- }
- i = (i + secondary) & (NR_PROFILE_HIT - 1);
- } while (i != primary);
-
- /*
- * Add the current hit(s) and flush the write-queue out
- * to the global buffer:
- */
- atomic_add(nr_hits, &prof_buffer[pc]);
- for (i = 0; i < NR_PROFILE_HIT; ++i) {
- atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]);
- hits[i].pc = hits[i].hits = 0;
- }
-out:
- local_irq_restore(flags);
- put_cpu();
-}
-
-static int profile_dead_cpu(unsigned int cpu)
-{
- struct page *page;
- int i;
-
- if (cpumask_available(prof_cpu_mask))
- cpumask_clear_cpu(cpu, prof_cpu_mask);
-
- for (i = 0; i < 2; i++) {
- if (per_cpu(cpu_profile_hits, cpu)[i]) {
- page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[i]);
- per_cpu(cpu_profile_hits, cpu)[i] = NULL;
- __free_page(page);
- }
- }
- return 0;
-}
-
-static int profile_prepare_cpu(unsigned int cpu)
-{
- int i, node = cpu_to_mem(cpu);
- struct page *page;
-
- per_cpu(cpu_profile_flip, cpu) = 0;
-
- for (i = 0; i < 2; i++) {
- if (per_cpu(cpu_profile_hits, cpu)[i])
- continue;
-
- page = __alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
- if (!page) {
- profile_dead_cpu(cpu);
- return -ENOMEM;
- }
- per_cpu(cpu_profile_hits, cpu)[i] = page_address(page);
-
- }
- return 0;
-}
-
-static int profile_online_cpu(unsigned int cpu)
-{
- if (cpumask_available(prof_cpu_mask))
- cpumask_set_cpu(cpu, prof_cpu_mask);
-
- return 0;
-}
-
-#else /* !CONFIG_SMP */
-#define profile_flip_buffers() do { } while (0)
-#define profile_discard_flip_buffers() do { } while (0)
-
static void do_profile_hits(int type, void *__pc, unsigned int nr_hits)
{
unsigned long pc;
pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift;
- atomic_add(nr_hits, &prof_buffer[min(pc, prof_len - 1)]);
+ if (pc < prof_len)
+ atomic_add(nr_hits, &prof_buffer[pc]);
}
-#endif /* !CONFIG_SMP */
void profile_hits(int type, void *__pc, unsigned int nr_hits)
{
@@ -334,8 +143,8 @@ void profile_tick(int type)
{
struct pt_regs *regs = get_irq_regs();
- if (!user_mode(regs) && cpumask_available(prof_cpu_mask) &&
- cpumask_test_cpu(smp_processor_id(), prof_cpu_mask))
+ /* This is the old kernel-only legacy profiling */
+ if (!user_mode(regs))
profile_hit(type, (void *)profile_pc(regs));
}
@@ -358,7 +167,6 @@ read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos)
char *pnt;
unsigned long sample_step = 1UL << prof_shift;
- profile_flip_buffers();
if (p >= (prof_len+1)*sizeof(unsigned int))
return 0;
if (count > (prof_len+1)*sizeof(unsigned int) - p)
@@ -404,7 +212,6 @@ static ssize_t write_profile(struct file *file, const char __user *buf,
return -EINVAL;
}
#endif
- profile_discard_flip_buffers();
memset(prof_buffer, 0, prof_len * sizeof(atomic_t));
return count;
}
@@ -418,40 +225,14 @@ static const struct proc_ops profile_proc_ops = {
int __ref create_proc_profile(void)
{
struct proc_dir_entry *entry;
-#ifdef CONFIG_SMP
- enum cpuhp_state online_state;
-#endif
-
int err = 0;
if (!prof_on)
return 0;
-#ifdef CONFIG_SMP
- err = cpuhp_setup_state(CPUHP_PROFILE_PREPARE, "PROFILE_PREPARE",
- profile_prepare_cpu, profile_dead_cpu);
- if (err)
- return err;
-
- err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "AP_PROFILE_ONLINE",
- profile_online_cpu, NULL);
- if (err < 0)
- goto err_state_prep;
- online_state = err;
- err = 0;
-#endif
entry = proc_create("profile", S_IWUSR | S_IRUGO,
NULL, &profile_proc_ops);
- if (!entry)
- goto err_state_onl;
- proc_set_size(entry, (1 + prof_len) * sizeof(atomic_t));
-
- return err;
-err_state_onl:
-#ifdef CONFIG_SMP
- cpuhp_remove_state(online_state);
-err_state_prep:
- cpuhp_remove_state(CPUHP_PROFILE_PREPARE);
-#endif
+ if (entry)
+ proc_set_size(entry, (1 + prof_len) * sizeof(atomic_t));
return err;
}
subsys_initcall(create_proc_profile);
diff --git a/kernel/resource_kunit.c b/kernel/resource_kunit.c
index 58ab9f914602..0e509985a44a 100644
--- a/kernel/resource_kunit.c
+++ b/kernel/resource_kunit.c
@@ -149,4 +149,5 @@ static struct kunit_suite resource_test_suite = {
};
kunit_test_suite(resource_test_suite);
+MODULE_DESCRIPTION("I/O Port & Memory Resource manager unit tests");
MODULE_LICENSE("GPL");
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ae5ef3013a55..a9f655025607 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1806,7 +1806,7 @@ static void uclamp_sync_util_min_rt_default(void)
uclamp_update_util_min_rt_default(p);
}
-static int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
+static int sysctl_sched_uclamp_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
bool update_root_tg = false;
@@ -4392,7 +4392,7 @@ static void reset_memory_tiering(void)
}
}
-static int sysctl_numa_balancing(struct ctl_table *table, int write,
+static int sysctl_numa_balancing(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table t;
@@ -4461,7 +4461,7 @@ out:
__setup("schedstats=", setup_schedstats);
#ifdef CONFIG_PROC_SYSCTL
-static int sysctl_schedstats(struct ctl_table *table, int write, void *buffer,
+static int sysctl_schedstats(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
struct ctl_table t;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 63e49c8ffc4d..310523c1b9e3 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -26,9 +26,9 @@ int sysctl_sched_rt_runtime = 950000;
#ifdef CONFIG_SYSCTL
static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC * RR_TIMESLICE) / HZ;
-static int sched_rt_handler(struct ctl_table *table, int write, void *buffer,
+static int sched_rt_handler(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos);
-static int sched_rr_handler(struct ctl_table *table, int write, void *buffer,
+static int sched_rr_handler(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos);
static struct ctl_table sched_rt_sysctls[] = {
{
@@ -2952,7 +2952,7 @@ static void sched_rt_do_global(void)
raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
}
-static int sched_rt_handler(struct ctl_table *table, int write, void *buffer,
+static int sched_rt_handler(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
int old_period, old_runtime;
@@ -2991,7 +2991,7 @@ undo:
return ret;
}
-static int sched_rr_handler(struct ctl_table *table, int write, void *buffer,
+static int sched_rr_handler(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
int ret;
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 784a0be81e84..76504b776d03 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -285,7 +285,7 @@ void rebuild_sched_domains_energy(void)
}
#ifdef CONFIG_PROC_SYSCTL
-static int sched_energy_aware_handler(struct ctl_table *table, int write,
+static int sched_energy_aware_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret, state;
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index dc51e521bc1d..385d48293a5f 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -2431,7 +2431,7 @@ static void audit_actions_logged(u32 actions_logged, u32 old_actions_logged,
return audit_seccomp_actions_logged(new, old, !ret);
}
-static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write,
+static int seccomp_actions_logged_handler(const struct ctl_table *ro_table, int write,
void *buffer, size_t *lenp,
loff_t *ppos)
{
diff --git a/kernel/stackleak.c b/kernel/stackleak.c
index 0f9712584913..39fd620a7db6 100644
--- a/kernel/stackleak.c
+++ b/kernel/stackleak.c
@@ -21,7 +21,7 @@
static DEFINE_STATIC_KEY_FALSE(stack_erasing_bypass);
#ifdef CONFIG_SYSCTL
-static int stack_erasing_sysctl(struct ctl_table *table, int write,
+static int stack_erasing_sysctl(const struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret = 0;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index e4421594fc25..79e6cb1d5c48 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -256,7 +256,7 @@ static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
*
* Returns 0 on success.
*/
-int proc_dostring(struct ctl_table *table, int write,
+int proc_dostring(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
if (write)
@@ -702,7 +702,7 @@ int do_proc_douintvec(const struct ctl_table *table, int write,
*
* Returns 0 on success.
*/
-int proc_dobool(struct ctl_table *table, int write, void *buffer,
+int proc_dobool(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
struct ctl_table tmp;
@@ -739,7 +739,7 @@ int proc_dobool(struct ctl_table *table, int write, void *buffer,
*
* Returns 0 on success.
*/
-int proc_dointvec(struct ctl_table *table, int write, void *buffer,
+int proc_dointvec(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
@@ -758,7 +758,7 @@ int proc_dointvec(struct ctl_table *table, int write, void *buffer,
*
* Returns 0 on success.
*/
-int proc_douintvec(struct ctl_table *table, int write, void *buffer,
+int proc_douintvec(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
return do_proc_douintvec(table, write, buffer, lenp, ppos,
@@ -769,7 +769,7 @@ int proc_douintvec(struct ctl_table *table, int write, void *buffer,
* Taint values can only be increased
* This means we can safely use a temporary.
*/
-static int proc_taint(struct ctl_table *table, int write,
+static int proc_taint(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table t;
@@ -864,7 +864,7 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
*
* Returns 0 on success or -EINVAL on write when the range check fails.
*/
-int proc_dointvec_minmax(struct ctl_table *table, int write,
+int proc_dointvec_minmax(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct do_proc_dointvec_minmax_conv_param param = {
@@ -933,7 +933,7 @@ static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
*
* Returns 0 on success or -ERANGE on write when the range check fails.
*/
-int proc_douintvec_minmax(struct ctl_table *table, int write,
+int proc_douintvec_minmax(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct do_proc_douintvec_minmax_conv_param param = {
@@ -961,7 +961,7 @@ int proc_douintvec_minmax(struct ctl_table *table, int write,
*
* Returns 0 on success or an error on write when the range check fails.
*/
-int proc_dou8vec_minmax(struct ctl_table *table, int write,
+int proc_dou8vec_minmax(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table tmp;
@@ -998,7 +998,7 @@ int proc_dou8vec_minmax(struct ctl_table *table, int write,
EXPORT_SYMBOL_GPL(proc_dou8vec_minmax);
#ifdef CONFIG_MAGIC_SYSRQ
-static int sysrq_sysctl_handler(struct ctl_table *table, int write,
+static int sysrq_sysctl_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int tmp, ret;
@@ -1115,7 +1115,7 @@ static int do_proc_doulongvec_minmax(const struct ctl_table *table, int write,
*
* Returns 0 on success.
*/
-int proc_doulongvec_minmax(struct ctl_table *table, int write,
+int proc_doulongvec_minmax(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
@@ -1138,7 +1138,7 @@ int proc_doulongvec_minmax(struct ctl_table *table, int write,
*
* Returns 0 on success.
*/
-int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
+int proc_doulongvec_ms_jiffies_minmax(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
return do_proc_doulongvec_minmax(table, write, buffer,
@@ -1259,14 +1259,14 @@ static int do_proc_dointvec_ms_jiffies_minmax_conv(bool *negp, unsigned long *lv
*
* Returns 0 on success.
*/
-int proc_dointvec_jiffies(struct ctl_table *table, int write,
+int proc_dointvec_jiffies(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
return do_proc_dointvec(table,write,buffer,lenp,ppos,
do_proc_dointvec_jiffies_conv,NULL);
}
-int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write,
+int proc_dointvec_ms_jiffies_minmax(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct do_proc_dointvec_minmax_conv_param param = {
@@ -1292,7 +1292,7 @@ int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write,
*
* Returns 0 on success.
*/
-int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
+int proc_dointvec_userhz_jiffies(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
return do_proc_dointvec(table, write, buffer, lenp, ppos,
@@ -1315,14 +1315,14 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
*
* Returns 0 on success.
*/
-int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, void *buffer,
+int proc_dointvec_ms_jiffies(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
return do_proc_dointvec(table, write, buffer, lenp, ppos,
do_proc_dointvec_ms_jiffies_conv, NULL);
}
-static int proc_do_cad_pid(struct ctl_table *table, int write, void *buffer,
+static int proc_do_cad_pid(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
struct pid *new_pid;
@@ -1361,7 +1361,7 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, void *buffer,
*
* Returns 0 on success.
*/
-int proc_do_large_bitmap(struct ctl_table *table, int write,
+int proc_do_large_bitmap(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int err = 0;
@@ -1493,85 +1493,85 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
#else /* CONFIG_PROC_SYSCTL */
-int proc_dostring(struct ctl_table *table, int write,
+int proc_dostring(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
-int proc_dobool(struct ctl_table *table, int write,
+int proc_dobool(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
-int proc_dointvec(struct ctl_table *table, int write,
+int proc_dointvec(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
-int proc_douintvec(struct ctl_table *table, int write,
+int proc_douintvec(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
-int proc_dointvec_minmax(struct ctl_table *table, int write,
+int proc_dointvec_minmax(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
-int proc_douintvec_minmax(struct ctl_table *table, int write,
+int proc_douintvec_minmax(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
-int proc_dou8vec_minmax(struct ctl_table *table, int write,
+int proc_dou8vec_minmax(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
-int proc_dointvec_jiffies(struct ctl_table *table, int write,
+int proc_dointvec_jiffies(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
-int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write,
+int proc_dointvec_ms_jiffies_minmax(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
-int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
+int proc_dointvec_userhz_jiffies(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
-int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
+int proc_dointvec_ms_jiffies(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
-int proc_doulongvec_minmax(struct ctl_table *table, int write,
+int proc_doulongvec_minmax(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
-int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
+int proc_doulongvec_ms_jiffies_minmax(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
-int proc_do_large_bitmap(struct ctl_table *table, int write,
+int proc_do_large_bitmap(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
@@ -1580,7 +1580,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
#endif /* CONFIG_PROC_SYSCTL */
#if defined(CONFIG_SYSCTL)
-int proc_do_static_key(struct ctl_table *table, int write,
+int proc_do_static_key(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct static_key *key = (struct static_key *)table->data;
diff --git a/kernel/task_work.c b/kernel/task_work.c
index 5c2daa7ad3f9..5d14d639ac71 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -6,12 +6,14 @@
static struct callback_head work_exited; /* all we need is ->next == NULL */
+#ifdef CONFIG_IRQ_WORK
static void task_work_set_notify_irq(struct irq_work *entry)
{
test_and_set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
}
static DEFINE_PER_CPU(struct irq_work, irq_work_NMI_resume) =
IRQ_WORK_INIT_HARD(task_work_set_notify_irq);
+#endif
/**
* task_work_add - ask the @task to execute @work->func()
@@ -57,6 +59,8 @@ int task_work_add(struct task_struct *task, struct callback_head *work,
if (notify == TWA_NMI_CURRENT) {
if (WARN_ON_ONCE(task != current))
return -EINVAL;
+ if (!IS_ENABLED(CONFIG_IRQ_WORK))
+ return -EINVAL;
} else {
/* record the work call stack in order to print it in KASAN reports */
kasan_record_aux_stack(work);
@@ -81,9 +85,11 @@ int task_work_add(struct task_struct *task, struct callback_head *work,
case TWA_SIGNAL_NO_IPI:
__set_notify_signal(task);
break;
+#ifdef CONFIG_IRQ_WORK
case TWA_NMI_CURRENT:
irq_work_queue(this_cpu_ptr(&irq_work_NMI_resume));
break;
+#endif
default:
WARN_ON_ONCE(1);
break;
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 48288dd4a102..64b0d8a0aa0f 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -289,7 +289,7 @@ static void timers_update_migration(void)
}
#ifdef CONFIG_SYSCTL
-static int timer_migration_handler(struct ctl_table *table, int write,
+static int timer_migration_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c
index 84413114db5c..8d57f7686bb0 100644
--- a/kernel/time/timer_migration.c
+++ b/kernel/time/timer_migration.c
@@ -475,9 +475,54 @@ static bool tmigr_check_lonely(struct tmigr_group *group)
return bitmap_weight(&active, BIT_CNT) <= 1;
}
-typedef bool (*up_f)(struct tmigr_group *, struct tmigr_group *, void *);
+/**
+ * struct tmigr_walk - data required for walking the hierarchy
+ * @nextexp: Next CPU event expiry information which is handed into
+ * the timer migration code by the timer code
+ * (get_next_timer_interrupt())
+ * @firstexp: Contains the first event expiry information when
+ * hierarchy is completely idle. When CPU itself was the
+ * last going idle, information makes sure, that CPU will
+ * be back in time. When using this value in the remote
+ * expiry case, firstexp is stored in the per CPU tmigr_cpu
+ * struct of CPU which expires remote timers. It is updated
+ * in top level group only. Be aware, there could occur a
+ * new top level of the hierarchy between the 'top level
+ * call' in tmigr_update_events() and the check for the
+ * parent group in walk_groups(). Then @firstexp might
+ * contain a value != KTIME_MAX even if it was not the
+ * final top level. This is not a problem, as the worst
+ * outcome is a CPU which might wake up a little early.
+ * @evt: Pointer to tmigr_event which needs to be queued (of idle
+ * child group)
+ * @childmask: groupmask of child group
+ * @remote: Is set, when the new timer path is executed in
+ * tmigr_handle_remote_cpu()
+ * @basej: timer base in jiffies
+ * @now: timer base monotonic
+ * @check: is set if there is the need to handle remote timers;
+ * required in tmigr_requires_handle_remote() only
+ * @tmc_active: this flag indicates, whether the CPU which triggers
+ * the hierarchy walk is !idle in the timer migration
+ * hierarchy. When the CPU is idle and the whole hierarchy is
+ * idle, only the first event of the top level has to be
+ * considered.
+ */
+struct tmigr_walk {
+ u64 nextexp;
+ u64 firstexp;
+ struct tmigr_event *evt;
+ u8 childmask;
+ bool remote;
+ unsigned long basej;
+ u64 now;
+ bool check;
+ bool tmc_active;
+};
+
+typedef bool (*up_f)(struct tmigr_group *, struct tmigr_group *, struct tmigr_walk *);
-static void __walk_groups(up_f up, void *data,
+static void __walk_groups(up_f up, struct tmigr_walk *data,
struct tmigr_cpu *tmc)
{
struct tmigr_group *child = NULL, *group = tmc->tmgroup;
@@ -490,64 +535,17 @@ static void __walk_groups(up_f up, void *data,
child = group;
group = group->parent;
+ data->childmask = child->groupmask;
} while (group);
}
-static void walk_groups(up_f up, void *data, struct tmigr_cpu *tmc)
+static void walk_groups(up_f up, struct tmigr_walk *data, struct tmigr_cpu *tmc)
{
lockdep_assert_held(&tmc->lock);
__walk_groups(up, data, tmc);
}
-/**
- * struct tmigr_walk - data required for walking the hierarchy
- * @nextexp: Next CPU event expiry information which is handed into
- * the timer migration code by the timer code
- * (get_next_timer_interrupt())
- * @firstexp: Contains the first event expiry information when last
- * active CPU of hierarchy is on the way to idle to make
- * sure CPU will be back in time.
- * @evt: Pointer to tmigr_event which needs to be queued (of idle
- * child group)
- * @childmask: childmask of child group
- * @remote: Is set, when the new timer path is executed in
- * tmigr_handle_remote_cpu()
- */
-struct tmigr_walk {
- u64 nextexp;
- u64 firstexp;
- struct tmigr_event *evt;
- u8 childmask;
- bool remote;
-};
-
-/**
- * struct tmigr_remote_data - data required for remote expiry hierarchy walk
- * @basej: timer base in jiffies
- * @now: timer base monotonic
- * @firstexp: returns expiry of the first timer in the idle timer
- * migration hierarchy to make sure the timer is handled in
- * time; it is stored in the per CPU tmigr_cpu struct of
- * CPU which expires remote timers
- * @childmask: childmask of child group
- * @check: is set if there is the need to handle remote timers;
- * required in tmigr_requires_handle_remote() only
- * @tmc_active: this flag indicates, whether the CPU which triggers
- * the hierarchy walk is !idle in the timer migration
- * hierarchy. When the CPU is idle and the whole hierarchy is
- * idle, only the first event of the top level has to be
- * considered.
- */
-struct tmigr_remote_data {
- unsigned long basej;
- u64 now;
- u64 firstexp;
- u8 childmask;
- bool check;
- bool tmc_active;
-};
-
/*
* Returns the next event of the timerqueue @group->events
*
@@ -618,10 +616,9 @@ static u64 tmigr_next_groupevt_expires(struct tmigr_group *group)
static bool tmigr_active_up(struct tmigr_group *group,
struct tmigr_group *child,
- void *ptr)
+ struct tmigr_walk *data)
{
union tmigr_state curstate, newstate;
- struct tmigr_walk *data = ptr;
bool walk_done;
u8 childmask;
@@ -649,8 +646,7 @@ static bool tmigr_active_up(struct tmigr_group *group,
} while (!atomic_try_cmpxchg(&group->migr_state, &curstate.state, newstate.state));
- if ((walk_done == false) && group->parent)
- data->childmask = group->childmask;
+ trace_tmigr_group_set_cpu_active(group, newstate, childmask);
/*
* The group is active (again). The group event might be still queued
@@ -666,8 +662,6 @@ static bool tmigr_active_up(struct tmigr_group *group,
*/
group->groupevt.ignore = true;
- trace_tmigr_group_set_cpu_active(group, newstate, childmask);
-
return walk_done;
}
@@ -675,7 +669,7 @@ static void __tmigr_cpu_activate(struct tmigr_cpu *tmc)
{
struct tmigr_walk data;
- data.childmask = tmc->childmask;
+ data.childmask = tmc->groupmask;
trace_tmigr_cpu_active(tmc);
@@ -860,10 +854,8 @@ unlock:
static bool tmigr_new_timer_up(struct tmigr_group *group,
struct tmigr_group *child,
- void *ptr)
+ struct tmigr_walk *data)
{
- struct tmigr_walk *data = ptr;
-
return tmigr_update_events(group, child, data);
}
@@ -995,9 +987,8 @@ unlock:
static bool tmigr_handle_remote_up(struct tmigr_group *group,
struct tmigr_group *child,
- void *ptr)
+ struct tmigr_walk *data)
{
- struct tmigr_remote_data *data = ptr;
struct tmigr_event *evt;
unsigned long jif;
u8 childmask;
@@ -1034,12 +1025,10 @@ again:
}
/*
- * Update of childmask for the next level and keep track of the expiry
- * of the first event that needs to be handled (group->next_expiry was
- * updated by tmigr_next_expired_groupevt(), next was set by
- * tmigr_handle_remote_cpu()).
+ * Keep track of the expiry of the first event that needs to be handled
+ * (group->next_expiry was updated by tmigr_next_expired_groupevt(),
+ * next was set by tmigr_handle_remote_cpu()).
*/
- data->childmask = group->childmask;
data->firstexp = group->next_expiry;
raw_spin_unlock_irq(&group->lock);
@@ -1055,12 +1044,12 @@ again:
void tmigr_handle_remote(void)
{
struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
- struct tmigr_remote_data data;
+ struct tmigr_walk data;
if (tmigr_is_not_available(tmc))
return;
- data.childmask = tmc->childmask;
+ data.childmask = tmc->groupmask;
data.firstexp = KTIME_MAX;
/*
@@ -1068,7 +1057,7 @@ void tmigr_handle_remote(void)
* in tmigr_handle_remote_up() anyway. Keep this check to speed up the
* return when nothing has to be done.
*/
- if (!tmigr_check_migrator(tmc->tmgroup, tmc->childmask)) {
+ if (!tmigr_check_migrator(tmc->tmgroup, tmc->groupmask)) {
/*
* If this CPU was an idle migrator, make sure to clear its wakeup
* value so it won't chase timers that have already expired elsewhere.
@@ -1097,9 +1086,8 @@ void tmigr_handle_remote(void)
static bool tmigr_requires_handle_remote_up(struct tmigr_group *group,
struct tmigr_group *child,
- void *ptr)
+ struct tmigr_walk *data)
{
- struct tmigr_remote_data *data = ptr;
u8 childmask;
childmask = data->childmask;
@@ -1118,7 +1106,7 @@ static bool tmigr_requires_handle_remote_up(struct tmigr_group *group,
* group before reading the next_expiry value.
*/
if (group->parent && !data->tmc_active)
- goto out;
+ return false;
/*
* The lock is required on 32bit architectures to read the variable
@@ -1143,9 +1131,6 @@ static bool tmigr_requires_handle_remote_up(struct tmigr_group *group,
raw_spin_unlock(&group->lock);
}
-out:
- /* Update of childmask for the next level */
- data->childmask = group->childmask;
return false;
}
@@ -1157,7 +1142,7 @@ out:
bool tmigr_requires_handle_remote(void)
{
struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
- struct tmigr_remote_data data;
+ struct tmigr_walk data;
unsigned long jif;
bool ret = false;
@@ -1165,7 +1150,7 @@ bool tmigr_requires_handle_remote(void)
return ret;
data.now = get_jiffies_update(&jif);
- data.childmask = tmc->childmask;
+ data.childmask = tmc->groupmask;
data.firstexp = KTIME_MAX;
data.tmc_active = !tmc->idle;
data.check = false;
@@ -1230,14 +1215,13 @@ u64 tmigr_cpu_new_timer(u64 nextexp)
if (nextexp != tmc->cpuevt.nextevt.expires ||
tmc->cpuevt.ignore) {
ret = tmigr_new_timer(tmc, nextexp);
+ /*
+ * Make sure the reevaluation of timers in idle path
+ * will not miss an event.
+ */
+ WRITE_ONCE(tmc->wakeup, ret);
}
}
- /*
- * Make sure the reevaluation of timers in idle path will not miss an
- * event.
- */
- WRITE_ONCE(tmc->wakeup, ret);
-
trace_tmigr_cpu_new_timer_idle(tmc, nextexp);
raw_spin_unlock(&tmc->lock);
return ret;
@@ -1245,10 +1229,9 @@ u64 tmigr_cpu_new_timer(u64 nextexp)
static bool tmigr_inactive_up(struct tmigr_group *group,
struct tmigr_group *child,
- void *ptr)
+ struct tmigr_walk *data)
{
union tmigr_state curstate, newstate, childstate;
- struct tmigr_walk *data = ptr;
bool walk_done;
u8 childmask;
@@ -1299,9 +1282,10 @@ static bool tmigr_inactive_up(struct tmigr_group *group,
WARN_ON_ONCE((newstate.migrator != TMIGR_NONE) && !(newstate.active));
- if (atomic_try_cmpxchg(&group->migr_state, &curstate.state,
- newstate.state))
+ if (atomic_try_cmpxchg(&group->migr_state, &curstate.state, newstate.state)) {
+ trace_tmigr_group_set_cpu_inactive(group, newstate, childmask);
break;
+ }
/*
* The memory barrier is paired with the cmpxchg() in
@@ -1317,22 +1301,6 @@ static bool tmigr_inactive_up(struct tmigr_group *group,
/* Event Handling */
tmigr_update_events(group, child, data);
- if (group->parent && (walk_done == false))
- data->childmask = group->childmask;
-
- /*
- * data->firstexp was set by tmigr_update_events() and contains the
- * expiry of the first global event which needs to be handled. It
- * differs from KTIME_MAX if:
- * - group is the top level group and
- * - group is idle (which means CPU was the last active CPU in the
- * hierarchy) and
- * - there is a pending event in the hierarchy
- */
- WARN_ON_ONCE(data->firstexp != KTIME_MAX && group->parent);
-
- trace_tmigr_group_set_cpu_inactive(group, newstate, childmask);
-
return walk_done;
}
@@ -1341,7 +1309,7 @@ static u64 __tmigr_cpu_deactivate(struct tmigr_cpu *tmc, u64 nextexp)
struct tmigr_walk data = { .nextexp = nextexp,
.firstexp = KTIME_MAX,
.evt = &tmc->cpuevt,
- .childmask = tmc->childmask };
+ .childmask = tmc->groupmask };
/*
* If nextexp is KTIME_MAX, the CPU event will be ignored because the
@@ -1400,7 +1368,7 @@ u64 tmigr_cpu_deactivate(u64 nextexp)
* the only one in the level 0 group; and if it is the
* only one in level 0 group, but there are more than a
* single group active on the way to top level)
- * * nextevt - when CPU is offline and has to handle timer on his own
+ * * nextevt - when CPU is offline and has to handle timer on its own
* or when on the way to top in every group only a single
* child is active but @nextevt is before the lowest
* next_expiry encountered while walking up to top level.
@@ -1419,7 +1387,7 @@ u64 tmigr_quick_check(u64 nextevt)
if (WARN_ON_ONCE(tmc->idle))
return nextevt;
- if (!tmigr_check_migrator_and_lonely(tmc->tmgroup, tmc->childmask))
+ if (!tmigr_check_migrator_and_lonely(tmc->tmgroup, tmc->groupmask))
return KTIME_MAX;
do {
@@ -1442,6 +1410,66 @@ u64 tmigr_quick_check(u64 nextevt)
return KTIME_MAX;
}
+/*
+ * tmigr_trigger_active() - trigger a CPU to become active again
+ *
+ * This function is executed on a CPU which is part of cpu_online_mask, when the
+ * last active CPU in the hierarchy is offlining. With this, it is ensured that
+ * the other CPU is active and takes over the migrator duty.
+ */
+static long tmigr_trigger_active(void *unused)
+{
+ struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
+
+ WARN_ON_ONCE(!tmc->online || tmc->idle);
+
+ return 0;
+}
+
+static int tmigr_cpu_offline(unsigned int cpu)
+{
+ struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
+ int migrator;
+ u64 firstexp;
+
+ raw_spin_lock_irq(&tmc->lock);
+ tmc->online = false;
+ WRITE_ONCE(tmc->wakeup, KTIME_MAX);
+
+ /*
+ * CPU has to handle the local events on his own, when on the way to
+ * offline; Therefore nextevt value is set to KTIME_MAX
+ */
+ firstexp = __tmigr_cpu_deactivate(tmc, KTIME_MAX);
+ trace_tmigr_cpu_offline(tmc);
+ raw_spin_unlock_irq(&tmc->lock);
+
+ if (firstexp != KTIME_MAX) {
+ migrator = cpumask_any_but(cpu_online_mask, cpu);
+ work_on_cpu(migrator, tmigr_trigger_active, NULL);
+ }
+
+ return 0;
+}
+
+static int tmigr_cpu_online(unsigned int cpu)
+{
+ struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
+
+ /* Check whether CPU data was successfully initialized */
+ if (WARN_ON_ONCE(!tmc->tmgroup))
+ return -EINVAL;
+
+ raw_spin_lock_irq(&tmc->lock);
+ trace_tmigr_cpu_online(tmc);
+ tmc->idle = timer_base_is_idle();
+ if (!tmc->idle)
+ __tmigr_cpu_activate(tmc);
+ tmc->online = true;
+ raw_spin_unlock_irq(&tmc->lock);
+ return 0;
+}
+
static void tmigr_init_group(struct tmigr_group *group, unsigned int lvl,
int node)
{
@@ -1514,21 +1542,25 @@ static struct tmigr_group *tmigr_get_group(unsigned int cpu, int node,
}
static void tmigr_connect_child_parent(struct tmigr_group *child,
- struct tmigr_group *parent)
+ struct tmigr_group *parent,
+ bool activate)
{
- union tmigr_state childstate;
+ struct tmigr_walk data;
raw_spin_lock_irq(&child->lock);
raw_spin_lock_nested(&parent->lock, SINGLE_DEPTH_NESTING);
child->parent = parent;
- child->childmask = BIT(parent->num_children++);
+ child->groupmask = BIT(parent->num_children++);
raw_spin_unlock(&parent->lock);
raw_spin_unlock_irq(&child->lock);
trace_tmigr_connect_child_parent(child);
+ if (!activate)
+ return;
+
/*
* To prevent inconsistent states, active children need to be active in
* the new parent as well. Inactive children are already marked inactive
@@ -1544,21 +1576,24 @@ static void tmigr_connect_child_parent(struct tmigr_group *child,
* child to the new parent. So tmigr_connect_child_parent() is
* executed with the formerly top level group (child) and the newly
* created group (parent).
+ *
+ * * It is ensured that the child is active, as this setup path is
+ * executed in hotplug prepare callback. This is exectued by an
+ * already connected and !idle CPU. Even if all other CPUs go idle,
+ * the CPU executing the setup will be responsible up to current top
+ * level group. And the next time it goes inactive, it will release
+ * the new childmask and parent to subsequent walkers through this
+ * @child. Therefore propagate active state unconditionally.
*/
- childstate.state = atomic_read(&child->migr_state);
- if (childstate.migrator != TMIGR_NONE) {
- struct tmigr_walk data;
-
- data.childmask = child->childmask;
+ data.childmask = child->groupmask;
- /*
- * There is only one new level per time. When connecting the
- * child and the parent and set the child active when the parent
- * is inactive, the parent needs to be the uppermost
- * level. Otherwise there went something wrong!
- */
- WARN_ON(!tmigr_active_up(parent, child, &data) && parent->parent);
- }
+ /*
+ * There is only one new level per time (which is protected by
+ * tmigr_mutex). When connecting the child and the parent and set the
+ * child active when the parent is inactive, the parent needs to be the
+ * uppermost level. Otherwise there went something wrong!
+ */
+ WARN_ON(!tmigr_active_up(parent, child, &data) && parent->parent);
}
static int tmigr_setup_groups(unsigned int cpu, unsigned int node)
@@ -1611,12 +1646,12 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node)
* Update tmc -> group / child -> group connection
*/
if (i == 0) {
- struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
+ struct tmigr_cpu *tmc = per_cpu_ptr(&tmigr_cpu, cpu);
raw_spin_lock_irq(&group->lock);
tmc->tmgroup = group;
- tmc->childmask = BIT(group->num_children++);
+ tmc->groupmask = BIT(group->num_children++);
raw_spin_unlock_irq(&group->lock);
@@ -1626,7 +1661,8 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node)
continue;
} else {
child = stack[i - 1];
- tmigr_connect_child_parent(child, group);
+ /* Will be activated at online time */
+ tmigr_connect_child_parent(child, group, false);
}
/* check if uppermost level was newly created */
@@ -1637,12 +1673,21 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node)
lvllist = &tmigr_level_list[top];
if (group->num_children == 1 && list_is_singular(lvllist)) {
+ /*
+ * The target CPU must never do the prepare work, except
+ * on early boot when the boot CPU is the target. Otherwise
+ * it may spuriously activate the old top level group inside
+ * the new one (nevertheless whether old top level group is
+ * active or not) and/or release an uninitialized childmask.
+ */
+ WARN_ON_ONCE(cpu == raw_smp_processor_id());
+
lvllist = &tmigr_level_list[top - 1];
list_for_each_entry(child, lvllist, list) {
if (child->parent)
continue;
- tmigr_connect_child_parent(child, group);
+ tmigr_connect_child_parent(child, group, true);
}
}
}
@@ -1664,80 +1709,31 @@ static int tmigr_add_cpu(unsigned int cpu)
return ret;
}
-static int tmigr_cpu_online(unsigned int cpu)
-{
- struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
- int ret;
-
- /* First online attempt? Initialize CPU data */
- if (!tmc->tmgroup) {
- raw_spin_lock_init(&tmc->lock);
-
- ret = tmigr_add_cpu(cpu);
- if (ret < 0)
- return ret;
-
- if (tmc->childmask == 0)
- return -EINVAL;
-
- timerqueue_init(&tmc->cpuevt.nextevt);
- tmc->cpuevt.nextevt.expires = KTIME_MAX;
- tmc->cpuevt.ignore = true;
- tmc->cpuevt.cpu = cpu;
-
- tmc->remote = false;
- WRITE_ONCE(tmc->wakeup, KTIME_MAX);
- }
- raw_spin_lock_irq(&tmc->lock);
- trace_tmigr_cpu_online(tmc);
- tmc->idle = timer_base_is_idle();
- if (!tmc->idle)
- __tmigr_cpu_activate(tmc);
- tmc->online = true;
- raw_spin_unlock_irq(&tmc->lock);
- return 0;
-}
-
-/*
- * tmigr_trigger_active() - trigger a CPU to become active again
- *
- * This function is executed on a CPU which is part of cpu_online_mask, when the
- * last active CPU in the hierarchy is offlining. With this, it is ensured that
- * the other CPU is active and takes over the migrator duty.
- */
-static long tmigr_trigger_active(void *unused)
+static int tmigr_cpu_prepare(unsigned int cpu)
{
- struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
+ struct tmigr_cpu *tmc = per_cpu_ptr(&tmigr_cpu, cpu);
+ int ret = 0;
- WARN_ON_ONCE(!tmc->online || tmc->idle);
-
- return 0;
-}
-
-static int tmigr_cpu_offline(unsigned int cpu)
-{
- struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
- int migrator;
- u64 firstexp;
+ /* Not first online attempt? */
+ if (tmc->tmgroup)
+ return ret;
- raw_spin_lock_irq(&tmc->lock);
- tmc->online = false;
+ raw_spin_lock_init(&tmc->lock);
+ timerqueue_init(&tmc->cpuevt.nextevt);
+ tmc->cpuevt.nextevt.expires = KTIME_MAX;
+ tmc->cpuevt.ignore = true;
+ tmc->cpuevt.cpu = cpu;
+ tmc->remote = false;
WRITE_ONCE(tmc->wakeup, KTIME_MAX);
- /*
- * CPU has to handle the local events on his own, when on the way to
- * offline; Therefore nextevt value is set to KTIME_MAX
- */
- firstexp = __tmigr_cpu_deactivate(tmc, KTIME_MAX);
- trace_tmigr_cpu_offline(tmc);
- raw_spin_unlock_irq(&tmc->lock);
+ ret = tmigr_add_cpu(cpu);
+ if (ret < 0)
+ return ret;
- if (firstexp != KTIME_MAX) {
- migrator = cpumask_any_but(cpu_online_mask, cpu);
- work_on_cpu(migrator, tmigr_trigger_active, NULL);
- }
+ if (tmc->groupmask == 0)
+ return -EINVAL;
- return 0;
+ return ret;
}
static int __init tmigr_init(void)
@@ -1796,6 +1792,11 @@ static int __init tmigr_init(void)
tmigr_hierarchy_levels, TMIGR_CHILDREN_PER_GROUP,
tmigr_crossnode_level);
+ ret = cpuhp_setup_state(CPUHP_TMIGR_PREPARE, "tmigr:prepare",
+ tmigr_cpu_prepare, NULL);
+ if (ret)
+ goto err;
+
ret = cpuhp_setup_state(CPUHP_AP_TMIGR_ONLINE, "tmigr:online",
tmigr_cpu_online, tmigr_cpu_offline);
if (ret)
@@ -1807,4 +1808,4 @@ err:
pr_err("Timer migration setup failed\n");
return ret;
}
-late_initcall(tmigr_init);
+early_initcall(tmigr_init);
diff --git a/kernel/time/timer_migration.h b/kernel/time/timer_migration.h
index 6c37d94a37d9..154accc7a543 100644
--- a/kernel/time/timer_migration.h
+++ b/kernel/time/timer_migration.h
@@ -22,7 +22,17 @@ struct tmigr_event {
* struct tmigr_group - timer migration hierarchy group
* @lock: Lock protecting the event information and group hierarchy
* information during setup
- * @parent: Pointer to the parent group
+ * @parent: Pointer to the parent group. Pointer is updated when a
+ * new hierarchy level is added because of a CPU coming
+ * online the first time. Once it is set, the pointer will
+ * not be removed or updated. When accessing parent pointer
+ * lock less to decide whether to abort a propagation or
+ * not, it is not a problem. The worst outcome is an
+ * unnecessary/early CPU wake up. But do not access parent
+ * pointer several times in the same 'action' (like
+ * activation, deactivation, check for remote expiry,...)
+ * without holding the lock as it is not ensured that value
+ * will not change.
* @groupevt: Next event of the group which is only used when the
* group is !active. The group event is then queued into
* the parent timer queue.
@@ -41,9 +51,8 @@ struct tmigr_event {
* @num_children: Counter of group children to make sure the group is only
* filled with TMIGR_CHILDREN_PER_GROUP; Required for setup
* only
- * @childmask: childmask of the group in the parent group; is set
- * during setup and will never change; can be read
- * lockless
+ * @groupmask: mask of the group in the parent group; is set during
+ * setup and will never change; can be read lockless
* @list: List head that is added to the per level
* tmigr_level_list; is required during setup when a
* new group needs to be connected to the existing
@@ -59,7 +68,7 @@ struct tmigr_group {
unsigned int level;
int numa_node;
unsigned int num_children;
- u8 childmask;
+ u8 groupmask;
struct list_head list;
};
@@ -79,7 +88,7 @@ struct tmigr_group {
* hierarchy
* @remote: Is set when timers of the CPU are expired remotely
* @tmgroup: Pointer to the parent group
- * @childmask: childmask of tmigr_cpu in the parent group
+ * @groupmask: mask of tmigr_cpu in the parent group
* @wakeup: Stores the first timer when the timer migration
* hierarchy is completely idle and remote expiry was done;
* is returned to timer code in the idle path and is only
@@ -92,7 +101,7 @@ struct tmigr_cpu {
bool idle;
bool remote;
struct tmigr_group *tmgroup;
- u8 childmask;
+ u8 groupmask;
u64 wakeup;
struct tmigr_event cpuevt;
};
@@ -108,8 +117,8 @@ union tmigr_state {
u32 state;
/**
* struct - split state of tmigr_group
- * @active: Contains each childmask bit of the active children
- * @migrator: Contains childmask of the child which is migrator
+ * @active: Contains each mask bit of the active children
+ * @migrator: Contains mask of the child which is migrator
* @seq: Sequence counter needs to be increased when an update
* to the tmigr_state is done. It prevents a race when
* updates in the child groups are propagated in changed
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index e5d6a4ab433b..4c28dd177ca6 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -7920,6 +7920,7 @@ out:
void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs)
{
+ kmsan_unpoison_memory(fregs, sizeof(*fregs));
__ftrace_ops_list_func(ip, parent_ip, NULL, fregs);
}
#else
@@ -8734,7 +8735,7 @@ static bool is_permanent_ops_registered(void)
}
static int
-ftrace_enable_sysctl(struct ctl_table *table, int write,
+ftrace_enable_sysctl(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret = -ENODEV;
diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c
index cb0871fbdb07..314ffc143039 100644
--- a/kernel/trace/preemptirq_delay_test.c
+++ b/kernel/trace/preemptirq_delay_test.c
@@ -34,8 +34,6 @@ MODULE_PARM_DESC(cpu_affinity, "Cpu num test is running on");
static struct completion done;
-#define MIN(x, y) ((x) < (y) ? (x) : (y))
-
static void busy_wait(ulong time)
{
u64 start, end;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 578a49ff5c32..10cd38bce2f1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2767,7 +2767,7 @@ static void output_printk(struct trace_event_buffer *fbuffer)
raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
}
-int tracepoint_printk_sysctl(struct ctl_table *table, int write,
+int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
void *buffer, size_t *lenp,
loff_t *ppos)
{
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index 3a2b46847c8b..42b0d998d103 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -2885,7 +2885,7 @@ err:
return -ENODEV;
}
-static int set_max_user_events_sysctl(struct ctl_table *table, int write,
+static int set_max_user_events_sysctl(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 5a48dba912ea..7f9572a37333 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -514,7 +514,7 @@ static const struct file_operations stack_trace_filter_fops = {
#endif /* CONFIG_DYNAMIC_FTRACE */
int
-stack_trace_sysctl(struct ctl_table *table, int write, void *buffer,
+stack_trace_sysctl(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
int was_enabled;
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 4252f0645b9e..16b283f9d831 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -76,7 +76,7 @@ void bacct_add_tsk(struct user_namespace *user_ns,
stats->ac_minflt = tsk->min_flt;
stats->ac_majflt = tsk->maj_flt;
- strncpy(stats->ac_comm, tsk->comm, sizeof(stats->ac_comm));
+ strscpy_pad(stats->ac_comm, tsk->comm);
}
diff --git a/kernel/umh.c b/kernel/umh.c
index 598b3ffe1522..ff1f13a27d29 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -495,7 +495,7 @@ int call_usermodehelper(const char *path, char **argv, char **envp, int wait)
EXPORT_SYMBOL(call_usermodehelper);
#if defined(CONFIG_SYSCTL)
-static int proc_cap_handler(struct ctl_table *table, int write,
+static int proc_cap_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table t;
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index 04e4513f2985..7282f61a8650 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -30,7 +30,7 @@ static void *get_uts(const struct ctl_table *table)
* Special case of dostring for the UTS structure. This has locks
* to observe. Should this be in kernel/sys.c ????
*/
-static int proc_do_uts_string(struct ctl_table *table, int write,
+static int proc_do_uts_string(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table uts_table;
diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c
index 1d5eadd9dd61..8b4f8cc2e0ec 100644
--- a/kernel/vmcore_info.c
+++ b/kernel/vmcore_info.c
@@ -216,12 +216,8 @@ static int __init crash_save_vmcoreinfo_init(void)
VMCOREINFO_SYMBOL(kallsyms_num_syms);
VMCOREINFO_SYMBOL(kallsyms_token_table);
VMCOREINFO_SYMBOL(kallsyms_token_index);
-#ifdef CONFIG_KALLSYMS_BASE_RELATIVE
VMCOREINFO_SYMBOL(kallsyms_offsets);
VMCOREINFO_SYMBOL(kallsyms_relative_base);
-#else
- VMCOREINFO_SYMBOL(kallsyms_addresses);
-#endif /* CONFIG_KALLSYMS_BASE_RELATIVE */
#endif /* CONFIG_KALLSYMS */
arch_crash_save_vmcoreinfo();
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 51915b44ac73..830a83895493 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -983,7 +983,7 @@ static void proc_watchdog_update(void)
* -------------------|----------------------------------|-------------------------------
* proc_soft_watchdog | watchdog_softlockup_user_enabled | WATCHDOG_SOFTOCKUP_ENABLED
*/
-static int proc_watchdog_common(int which, struct ctl_table *table, int write,
+static int proc_watchdog_common(int which, const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int err, old, *param = table->data;
@@ -1010,7 +1010,7 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write,
/*
* /proc/sys/kernel/watchdog
*/
-static int proc_watchdog(struct ctl_table *table, int write,
+static int proc_watchdog(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
return proc_watchdog_common(WATCHDOG_HARDLOCKUP_ENABLED |
@@ -1021,7 +1021,7 @@ static int proc_watchdog(struct ctl_table *table, int write,
/*
* /proc/sys/kernel/nmi_watchdog
*/
-static int proc_nmi_watchdog(struct ctl_table *table, int write,
+static int proc_nmi_watchdog(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
if (!watchdog_hardlockup_available && write)
@@ -1034,7 +1034,7 @@ static int proc_nmi_watchdog(struct ctl_table *table, int write,
/*
* /proc/sys/kernel/soft_watchdog
*/
-static int proc_soft_watchdog(struct ctl_table *table, int write,
+static int proc_soft_watchdog(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
return proc_watchdog_common(WATCHDOG_SOFTOCKUP_ENABLED,
@@ -1045,7 +1045,7 @@ static int proc_soft_watchdog(struct ctl_table *table, int write,
/*
* /proc/sys/kernel/watchdog_thresh
*/
-static int proc_watchdog_thresh(struct ctl_table *table, int write,
+static int proc_watchdog_thresh(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int err, old;
@@ -1068,7 +1068,7 @@ static int proc_watchdog_thresh(struct ctl_table *table, int write,
* user to specify a mask that will include cpus that have not yet
* been brought online, if desired.
*/
-static int proc_watchdog_cpumask(struct ctl_table *table, int write,
+static int proc_watchdog_cpumask(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int err;
diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c
index d577c4a8321e..59c1d86a73a2 100644
--- a/kernel/watchdog_perf.c
+++ b/kernel/watchdog_perf.c
@@ -75,11 +75,15 @@ static bool watchdog_check_timestamp(void)
__this_cpu_write(last_timestamp, now);
return true;
}
-#else
-static inline bool watchdog_check_timestamp(void)
+
+static void watchdog_init_timestamp(void)
{
- return true;
+ __this_cpu_write(nmi_rearmed, 0);
+ __this_cpu_write(last_timestamp, ktime_get_mono_fast_ns());
}
+#else
+static inline bool watchdog_check_timestamp(void) { return true; }
+static inline void watchdog_init_timestamp(void) { }
#endif
static struct perf_event_attr wd_hw_attr = {
@@ -161,6 +165,7 @@ void watchdog_hardlockup_enable(unsigned int cpu)
if (!atomic_fetch_inc(&watchdog_cpus))
pr_info("Enabled. Permanently consumes one hw-PMU counter.\n");
+ watchdog_init_timestamp();
perf_event_enable(this_cpu_read(watchdog_ev));
}