diff options
author | Benjamin Tissoires <bentiss@kernel.org> | 2024-07-16 12:19:28 +0200 |
---|---|---|
committer | Benjamin Tissoires <bentiss@kernel.org> | 2024-07-16 12:19:28 +0200 |
commit | 3c69140734a27f8b145f12fa0ae80c1fe36a02ca (patch) | |
tree | ad36bf7e6a5400212fbf917eceb4c7ad4df5d557 /kernel | |
parent | 5ba28be6be8ac6cd4fa1ac67cd4da237d39917d2 (diff) | |
parent | 8a25418ba65a5d2494b369f6178a284c449bc399 (diff) |
Merge branch 'for-6.11/trivial' into for-linus
Couple of trivial fixes:
- extra semicolon (Chen Ni)
- typo (Thorsten Blum)
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/devmap.c | 3 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 11 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 14 | ||||
-rw-r--r-- | kernel/dma/map_benchmark.c | 25 | ||||
-rw-r--r-- | kernel/events/core.c | 13 | ||||
-rw-r--r-- | kernel/exit.c | 5 | ||||
-rwxr-xr-x | kernel/gen_kheaders.sh | 9 | ||||
-rw-r--r-- | kernel/irq/cpuhotplug.c | 16 | ||||
-rw-r--r-- | kernel/irq/irqdesc.c | 5 | ||||
-rw-r--r-- | kernel/irq/msi.c | 2 | ||||
-rw-r--r-- | kernel/locking/mutex-debug.c | 12 | ||||
-rw-r--r-- | kernel/module/sysfs.c | 13 | ||||
-rw-r--r-- | kernel/power/swap.c | 9 | ||||
-rw-r--r-- | kernel/printk/Makefile | 2 | ||||
-rw-r--r-- | kernel/printk/conopt.c | 146 | ||||
-rw-r--r-- | kernel/printk/console_cmdline.h | 6 | ||||
-rw-r--r-- | kernel/printk/printk.c | 100 | ||||
-rw-r--r-- | kernel/sched/topology.c | 6 | ||||
-rw-r--r-- | kernel/signal.c | 4 | ||||
-rw-r--r-- | kernel/sys.c | 6 | ||||
-rw-r--r-- | kernel/sys_ni.c | 1 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 12 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.h | 2 | ||||
-rw-r--r-- | kernel/trace/preemptirq_delay_test.c | 1 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 25 | ||||
-rw-r--r-- | kernel/trace/rv/rv.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_probe.c | 4 | ||||
-rw-r--r-- | kernel/trace/trace_uprobe.c | 14 | ||||
-rw-r--r-- | kernel/vhost_task.c | 53 |
29 files changed, 385 insertions, 136 deletions
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 4e2cdbb5629f..7f3b34452243 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -760,9 +760,6 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, for (i = 0; i < dtab->n_buckets; i++) { head = dev_map_index_hash(dtab, i); hlist_for_each_entry_safe(dst, next, head, index_hlist) { - if (!dst) - continue; - if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex)) continue; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2222c3ff88e7..f45ed6adc092 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2998,6 +2998,7 @@ static int bpf_obj_get(const union bpf_attr *attr) void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, const struct bpf_link_ops *ops, struct bpf_prog *prog) { + WARN_ON(ops->dealloc && ops->dealloc_deferred); atomic64_set(&link->refcnt, 1); link->type = type; link->id = 0; @@ -3056,16 +3057,17 @@ static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu) /* bpf_link_free is guaranteed to be called from process context */ static void bpf_link_free(struct bpf_link *link) { + const struct bpf_link_ops *ops = link->ops; bool sleepable = false; bpf_link_free_id(link->id); if (link->prog) { sleepable = link->prog->sleepable; /* detach BPF program, clean up used resources */ - link->ops->release(link); + ops->release(link); bpf_prog_put(link->prog); } - if (link->ops->dealloc_deferred) { + if (ops->dealloc_deferred) { /* schedule BPF link deallocation; if underlying BPF program * is sleepable, we need to first wait for RCU tasks trace * sync, then go through "classic" RCU grace period @@ -3074,9 +3076,8 @@ static void bpf_link_free(struct bpf_link *link) call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp); else call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp); - } - if (link->ops->dealloc) - link->ops->dealloc(link); + } else if (ops->dealloc) + ops->dealloc(link); } static void bpf_link_put_deferred(struct work_struct *work) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 77da1f438bec..36ef8e96787e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8882,7 +8882,8 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id) enum bpf_attach_type eatype = env->prog->expected_attach_type; enum bpf_prog_type type = resolve_prog_type(env->prog); - if (func_id != BPF_FUNC_map_update_elem) + if (func_id != BPF_FUNC_map_update_elem && + func_id != BPF_FUNC_map_delete_elem) return false; /* It's not possible to get access to a locked struct sock in these @@ -8893,6 +8894,11 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id) if (eatype == BPF_TRACE_ITER) return true; break; + case BPF_PROG_TYPE_SOCK_OPS: + /* map_update allowed only via dedicated helpers with event type checks */ + if (func_id == BPF_FUNC_map_delete_elem) + return true; + break; case BPF_PROG_TYPE_SOCKET_FILTER: case BPF_PROG_TYPE_SCHED_CLS: case BPF_PROG_TYPE_SCHED_ACT: @@ -8988,7 +8994,6 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_MAP_TYPE_SOCKMAP: if (func_id != BPF_FUNC_sk_redirect_map && func_id != BPF_FUNC_sock_map_update && - func_id != BPF_FUNC_map_delete_elem && func_id != BPF_FUNC_msg_redirect_map && func_id != BPF_FUNC_sk_select_reuseport && func_id != BPF_FUNC_map_lookup_elem && @@ -8998,7 +9003,6 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_MAP_TYPE_SOCKHASH: if (func_id != BPF_FUNC_sk_redirect_hash && func_id != BPF_FUNC_sock_hash_update && - func_id != BPF_FUNC_map_delete_elem && func_id != BPF_FUNC_msg_redirect_hash && func_id != BPF_FUNC_sk_select_reuseport && func_id != BPF_FUNC_map_lookup_elem && @@ -11124,7 +11128,11 @@ BTF_ID(func, bpf_iter_css_task_new) #else BTF_ID_UNUSED #endif +#ifdef CONFIG_BPF_EVENTS BTF_ID(func, bpf_session_cookie) +#else +BTF_ID_UNUSED +#endif static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) { diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c index 02205ab53b7e..4950e0b622b1 100644 --- a/kernel/dma/map_benchmark.c +++ b/kernel/dma/map_benchmark.c @@ -101,7 +101,6 @@ static int do_map_benchmark(struct map_benchmark_data *map) struct task_struct **tsk; int threads = map->bparam.threads; int node = map->bparam.node; - const cpumask_t *cpu_mask = cpumask_of_node(node); u64 loops; int ret = 0; int i; @@ -118,11 +117,13 @@ static int do_map_benchmark(struct map_benchmark_data *map) if (IS_ERR(tsk[i])) { pr_err("create dma_map thread failed\n"); ret = PTR_ERR(tsk[i]); + while (--i >= 0) + kthread_stop(tsk[i]); goto out; } if (node != NUMA_NO_NODE) - kthread_bind_mask(tsk[i], cpu_mask); + kthread_bind_mask(tsk[i], cpumask_of_node(node)); } /* clear the old value in the previous benchmark */ @@ -139,13 +140,17 @@ static int do_map_benchmark(struct map_benchmark_data *map) msleep_interruptible(map->bparam.seconds * 1000); - /* wait for the completion of benchmark threads */ + /* wait for the completion of all started benchmark threads */ for (i = 0; i < threads; i++) { - ret = kthread_stop(tsk[i]); - if (ret) - goto out; + int kthread_ret = kthread_stop_put(tsk[i]); + + if (kthread_ret) + ret = kthread_ret; } + if (ret) + goto out; + loops = atomic64_read(&map->loops); if (likely(loops > 0)) { u64 map_variance, unmap_variance; @@ -170,8 +175,6 @@ static int do_map_benchmark(struct map_benchmark_data *map) } out: - for (i = 0; i < threads; i++) - put_task_struct(tsk[i]); put_device(map->dev); kfree(tsk); return ret; @@ -208,7 +211,8 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd, } if (map->bparam.node != NUMA_NO_NODE && - !node_possible(map->bparam.node)) { + (map->bparam.node < 0 || map->bparam.node >= MAX_NUMNODES || + !node_possible(map->bparam.node))) { pr_err("invalid numa node\n"); return -EINVAL; } @@ -252,6 +256,9 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd, * dma_mask changed by benchmark */ dma_set_mask(map->dev, old_dma_mask); + + if (ret) + return ret; break; default: return -EINVAL; diff --git a/kernel/events/core.c b/kernel/events/core.c index f0128c5ff278..8f908f077935 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5384,6 +5384,7 @@ int perf_event_release_kernel(struct perf_event *event) again: mutex_lock(&event->child_mutex); list_for_each_entry(child, &event->child_list, child_list) { + void *var = NULL; /* * Cannot change, child events are not migrated, see the @@ -5424,11 +5425,23 @@ again: * this can't be the last reference. */ put_event(event); + } else { + var = &ctx->refcount; } mutex_unlock(&event->child_mutex); mutex_unlock(&ctx->mutex); put_ctx(ctx); + + if (var) { + /* + * If perf_event_free_task() has deleted all events from the + * ctx while the child_mutex got released above, make sure to + * notify about the preceding put_ctx(). + */ + smp_mb(); /* pairs with wait_var_event() */ + wake_up_var(var); + } goto again; } mutex_unlock(&event->child_mutex); diff --git a/kernel/exit.c b/kernel/exit.c index cd3aa9042f1a..f95a2c1338a8 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -413,10 +413,7 @@ static void coredump_task_exit(struct task_struct *tsk) tsk->flags |= PF_POSTCOREDUMP; core_state = tsk->signal->core_state; spin_unlock_irq(&tsk->sighand->siglock); - - /* The vhost_worker does not particpate in coredumps */ - if (core_state && - ((tsk->flags & (PF_IO_WORKER | PF_USER_WORKER)) != PF_USER_WORKER)) { + if (core_state) { struct core_thread self; self.task = current; diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh index 6d443ea22bb7..383fd43ac612 100755 --- a/kernel/gen_kheaders.sh +++ b/kernel/gen_kheaders.sh @@ -14,7 +14,12 @@ include/ arch/$SRCARCH/include/ " -type cpio > /dev/null +if ! command -v cpio >/dev/null; then + echo >&2 "***" + echo >&2 "*** 'cpio' could not be found." + echo >&2 "***" + exit 1 +fi # Support incremental builds by skipping archive generation # if timestamps of files being archived are not changed. @@ -84,7 +89,7 @@ find $cpio_dir -type f -print0 | # Create archive and try to normalize metadata for reproducibility. tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \ - --owner=0 --group=0 --sort=name --numeric-owner \ + --owner=0 --group=0 --sort=name --numeric-owner --mode=u=rw,go=r,a+X \ -I $XZ -cf $tarfile -C $cpio_dir/ . > /dev/null echo $headers_md5 > kernel/kheaders.md5 diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c index 75cadbc3c232..eb8628390156 100644 --- a/kernel/irq/cpuhotplug.c +++ b/kernel/irq/cpuhotplug.c @@ -70,6 +70,14 @@ static bool migrate_one_irq(struct irq_desc *desc) } /* + * Complete an eventually pending irq move cleanup. If this + * interrupt was moved in hard irq context, then the vectors need + * to be cleaned up. It can't wait until this interrupt actually + * happens and this CPU was involved. + */ + irq_force_complete_move(desc); + + /* * No move required, if: * - Interrupt is per cpu * - Interrupt is not started @@ -88,14 +96,6 @@ static bool migrate_one_irq(struct irq_desc *desc) } /* - * Complete an eventually pending irq move cleanup. If this - * interrupt was moved in hard irq context, then the vectors need - * to be cleaned up. It can't wait until this interrupt actually - * happens and this CPU was involved. - */ - irq_force_complete_move(desc); - - /* * If there is a setaffinity pending, then try to reuse the pending * mask, so the last change of the affinity does not get lost. If * there is no move pending or the pending mask does not contain diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 88ac3652fcf2..07e99c936ba5 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -160,7 +160,10 @@ static int irq_find_free_area(unsigned int from, unsigned int cnt) static unsigned int irq_find_at_or_after(unsigned int offset) { unsigned long index = offset; - struct irq_desc *desc = mt_find(&sparse_irqs, &index, nr_irqs); + struct irq_desc *desc; + + guard(rcu)(); + desc = mt_find(&sparse_irqs, &index, nr_irqs); return desc ? irq_desc_get_irq(desc) : nr_irqs; } diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index f90952ebc494..2024f89baea4 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -1434,6 +1434,7 @@ int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid, msi_unlock_descs(dev); return ret; } +EXPORT_SYMBOL_GPL(msi_domain_alloc_irqs_range); /** * msi_domain_alloc_irqs_all_locked - Allocate all interrupts from a MSI interrupt domain @@ -1680,6 +1681,7 @@ void msi_domain_free_irqs_range(struct device *dev, unsigned int domid, msi_domain_free_irqs_range_locked(dev, domid, first, last); msi_unlock_descs(dev); } +EXPORT_SYMBOL_GPL(msi_domain_free_irqs_all); /** * msi_domain_free_irqs_all_locked - Free all interrupts from a MSI interrupt domain diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index bc8abb8549d2..6e6f6071cfa2 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c @@ -12,6 +12,7 @@ */ #include <linux/mutex.h> #include <linux/delay.h> +#include <linux/device.h> #include <linux/export.h> #include <linux/poison.h> #include <linux/sched.h> @@ -89,6 +90,17 @@ void debug_mutex_init(struct mutex *lock, const char *name, lock->magic = lock; } +static void devm_mutex_release(void *res) +{ + mutex_destroy(res); +} + +int __devm_mutex_init(struct device *dev, struct mutex *lock) +{ + return devm_add_action_or_reset(dev, devm_mutex_release, lock); +} +EXPORT_SYMBOL_GPL(__devm_mutex_init); + /*** * mutex_destroy - mark a mutex unusable * @lock: the mutex to be destroyed diff --git a/kernel/module/sysfs.c b/kernel/module/sysfs.c index d964167c6658..26efe1305c12 100644 --- a/kernel/module/sysfs.c +++ b/kernel/module/sysfs.c @@ -146,17 +146,6 @@ struct module_notes_attrs { struct bin_attribute attrs[] __counted_by(notes); }; -static ssize_t module_notes_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, - char *buf, loff_t pos, size_t count) -{ - /* - * The caller checked the pos and count against our size. - */ - memcpy(buf, bin_attr->private + pos, count); - return count; -} - static void free_notes_attrs(struct module_notes_attrs *notes_attrs, unsigned int i) { @@ -205,7 +194,7 @@ static void add_notes_attrs(struct module *mod, const struct load_info *info) nattr->attr.mode = 0444; nattr->size = info->sechdrs[i].sh_size; nattr->private = (void *)info->sechdrs[i].sh_addr; - nattr->read = module_notes_read; + nattr->read = sysfs_bin_attr_simple_read; ++nattr; } ++loaded; diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 5bc04bfe2db1..753b8dd42a59 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -368,11 +368,7 @@ static int swsusp_swap_check(void) if (IS_ERR(hib_resume_bdev_file)) return PTR_ERR(hib_resume_bdev_file); - res = set_blocksize(file_bdev(hib_resume_bdev_file), PAGE_SIZE); - if (res < 0) - fput(hib_resume_bdev_file); - - return res; + return 0; } /** @@ -1574,7 +1570,6 @@ int swsusp_check(bool exclusive) hib_resume_bdev_file = bdev_file_open_by_dev(swsusp_resume_device, BLK_OPEN_READ, holder, NULL); if (!IS_ERR(hib_resume_bdev_file)) { - set_blocksize(file_bdev(hib_resume_bdev_file), PAGE_SIZE); clear_page(swsusp_header); error = hib_submit_io(REQ_OP_READ, swsusp_resume_block, swsusp_header, NULL); @@ -1600,7 +1595,7 @@ int swsusp_check(bool exclusive) put: if (error) - fput(hib_resume_bdev_file); + bdev_fput(hib_resume_bdev_file); else pr_debug("Image signature found, resuming\n"); } else { diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile index 39a2b61c7232..040fe7d1eda2 100644 --- a/kernel/printk/Makefile +++ b/kernel/printk/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y = printk.o +obj-y = printk.o conopt.o obj-$(CONFIG_PRINTK) += printk_safe.o nbcon.o obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o obj-$(CONFIG_PRINTK_INDEX) += index.o diff --git a/kernel/printk/conopt.c b/kernel/printk/conopt.c new file mode 100644 index 000000000000..9d507bac3657 --- /dev/null +++ b/kernel/printk/conopt.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Kernel command line console options for hardware based addressing + * + * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/ + * Author: Tony Lindgren <tony@atomide.com> + */ + +#include <linux/console.h> +#include <linux/init.h> +#include <linux/string.h> +#include <linux/types.h> + +#include <asm/errno.h> + +#include "console_cmdline.h" + +/* + * Allow longer DEVNAME:0.0 style console naming such as abcd0000.serial:0.0 + * in addition to the legacy ttyS0 style naming. + */ +#define CONSOLE_NAME_MAX 32 + +#define CONSOLE_OPT_MAX 16 +#define CONSOLE_BRL_OPT_MAX 16 + +struct console_option { + char name[CONSOLE_NAME_MAX]; + char opt[CONSOLE_OPT_MAX]; + char brl_opt[CONSOLE_BRL_OPT_MAX]; + u8 has_brl_opt:1; +}; + +/* Updated only at console_setup() time, no locking needed */ +static struct console_option conopt[MAX_CMDLINECONSOLES]; + +/** + * console_opt_save - Saves kernel command line console option for driver use + * @str: Kernel command line console name and option + * @brl_opt: Braille console options + * + * Saves a kernel command line console option for driver subsystems to use for + * adding a preferred console during init. Called from console_setup() only. + * + * Return: 0 on success, negative error code on failure. + */ +int __init console_opt_save(const char *str, const char *brl_opt) +{ + struct console_option *con; + size_t namelen, optlen; + const char *opt; + int i; + + namelen = strcspn(str, ","); + if (namelen == 0 || namelen >= CONSOLE_NAME_MAX) + return -EINVAL; + + opt = str + namelen; + if (*opt == ',') + opt++; + + optlen = strlen(opt); + if (optlen >= CONSOLE_OPT_MAX) + return -EINVAL; + + for (i = 0; i < MAX_CMDLINECONSOLES; i++) { + con = &conopt[i]; + + if (con->name[0]) { + if (!strncmp(str, con->name, namelen)) + return 0; + continue; + } + + /* + * The name isn't terminated, only opt is. Empty opt is fine, + * but brl_opt can be either empty or NULL. For more info, see + * _braille_console_setup(). + */ + strscpy(con->name, str, namelen + 1); + strscpy(con->opt, opt, CONSOLE_OPT_MAX); + if (brl_opt) { + strscpy(con->brl_opt, brl_opt, CONSOLE_BRL_OPT_MAX); + con->has_brl_opt = 1; + } + + return 0; + } + + return -ENOMEM; +} + +static struct console_option *console_opt_find(const char *name) +{ + struct console_option *con; + int i; + + for (i = 0; i < MAX_CMDLINECONSOLES; i++) { + con = &conopt[i]; + if (!strcmp(name, con->name)) + return con; + } + + return NULL; +} + +/** + * add_preferred_console_match - Adds a preferred console if a match is found + * @match: Expected console on kernel command line, such as console=DEVNAME:0.0 + * @name: Name of the console character device to add such as ttyS + * @idx: Index for the console + * + * Allows driver subsystems to add a console after translating the command + * line name to the character device name used for the console. Options are + * added automatically based on the kernel command line. Duplicate preferred + * consoles are ignored by __add_preferred_console(). + * + * Return: 0 on success, negative error code on failure. + */ +int add_preferred_console_match(const char *match, const char *name, + const short idx) +{ + struct console_option *con; + char *brl_opt = NULL; + + if (!match || !strlen(match) || !name || !strlen(name) || + idx < 0) + return -EINVAL; + + con = console_opt_find(match); + if (!con) + return -ENOENT; + + /* + * See __add_preferred_console(). It checks for NULL brl_options to set + * the preferred_console flag. Empty brl_opt instead of NULL leads into + * the preferred_console flag not set, and CON_CONSDEV not being set, + * and the boot console won't get disabled at the end of console_setup(). + */ + if (con->has_brl_opt) + brl_opt = con->brl_opt; + + console_opt_add_preferred_console(name, idx, con->opt, brl_opt); + + return 0; +} diff --git a/kernel/printk/console_cmdline.h b/kernel/printk/console_cmdline.h index 3ca74ad391d6..a125e0235589 100644 --- a/kernel/printk/console_cmdline.h +++ b/kernel/printk/console_cmdline.h @@ -2,6 +2,12 @@ #ifndef _CONSOLE_CMDLINE_H #define _CONSOLE_CMDLINE_H +#define MAX_CMDLINECONSOLES 8 + +int console_opt_save(const char *str, const char *brl_opt); +int console_opt_add_preferred_console(const char *name, const short idx, + char *options, char *brl_options); + struct console_cmdline { char name[16]; /* Name of the driver */ diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 3160d287d4cf..420fd310129d 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -383,9 +383,6 @@ static int console_locked; /* * Array of consoles built from command line options (console=) */ - -#define MAX_CMDLINECONSOLES 8 - static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; static int preferred_console = -1; @@ -2503,6 +2500,17 @@ static int __init console_setup(char *str) if (_braille_console_setup(&str, &brl_options)) return 1; + /* Save the console for driver subsystem use */ + if (console_opt_save(str, brl_options)) + return 1; + + /* Flag register_console() to not call try_enable_default_console() */ + console_set_on_cmdline = 1; + + /* Don't attempt to parse a DEVNAME:0.0 style console */ + if (strchr(str, ':')) + return 1; + /* * Decode str into name, index, options. */ @@ -2533,6 +2541,13 @@ static int __init console_setup(char *str) } __setup("console=", console_setup); +/* Only called from add_preferred_console_match() */ +int console_opt_add_preferred_console(const char *name, const short idx, + char *options, char *brl_options) +{ + return __add_preferred_console(name, idx, options, brl_options, true); +} + /** * add_preferred_console - add a device to the list of preferred consoles. * @name: device name @@ -3146,6 +3161,40 @@ void console_unblank(void) pr_flush(1000, true); } +/* + * Rewind all consoles to the oldest available record. + * + * IMPORTANT: The function is safe only when called under + * console_lock(). It is not enforced because + * it is used as a best effort in panic(). + */ +static void __console_rewind_all(void) +{ + struct console *c; + short flags; + int cookie; + u64 seq; + + seq = prb_first_valid_seq(prb); + + cookie = console_srcu_read_lock(); + for_each_console_srcu(c) { + flags = console_srcu_read_flags(c); + + if (flags & CON_NBCON) { + nbcon_seq_force(c, seq); + } else { + /* + * This assignment is safe only when called under + * console_lock(). On panic, legacy consoles are + * only best effort. + */ + c->seq = seq; + } + } + console_srcu_read_unlock(cookie); +} + /** * console_flush_on_panic - flush console content on panic * @mode: flush all messages in buffer or just the pending ones @@ -3174,30 +3223,8 @@ void console_flush_on_panic(enum con_flush_mode mode) */ console_may_schedule = 0; - if (mode == CONSOLE_REPLAY_ALL) { - struct console *c; - short flags; - int cookie; - u64 seq; - - seq = prb_first_valid_seq(prb); - - cookie = console_srcu_read_lock(); - for_each_console_srcu(c) { - flags = console_srcu_read_flags(c); - - if (flags & CON_NBCON) { - nbcon_seq_force(c, seq); - } else { - /* - * This is an unsynchronized assignment. On - * panic legacy consoles are only best effort. - */ - c->seq = seq; - } - } - console_srcu_read_unlock(cookie); - } + if (mode == CONSOLE_REPLAY_ALL) + __console_rewind_all(); console_flush_all(false, &next_seq, &handover); } @@ -3495,7 +3522,7 @@ void register_console(struct console *newcon) * Note that a console with tty binding will have CON_CONSDEV * flag set and will be first in the list. */ - if (preferred_console < 0) { + if (preferred_console < 0 && !console_set_on_cmdline) { if (hlist_empty(&console_list) || !console_first()->device || console_first()->flags & CON_BOOT) { try_enable_default_console(newcon); @@ -4286,6 +4313,23 @@ void kmsg_dump_rewind(struct kmsg_dump_iter *iter) } EXPORT_SYMBOL_GPL(kmsg_dump_rewind); +/** + * console_replay_all - replay kernel log on consoles + * + * Try to obtain lock on console subsystem and replay all + * available records in printk buffer on the consoles. + * Does nothing if lock is not obtained. + * + * Context: Any context. + */ +void console_replay_all(void) +{ + if (console_trylock()) { + __console_rewind_all(); + /* Consoles are flushed as part of console_unlock(). */ + console_unlock(); + } +} #endif #ifdef CONFIG_SMP diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 329c82faca9b..a6994a1fcc90 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -2353,7 +2353,7 @@ static struct sched_domain *build_sched_domain(struct sched_domain_topology_leve static bool topology_span_sane(struct sched_domain_topology_level *tl, const struct cpumask *cpu_map, int cpu) { - int i; + int i = cpu + 1; /* NUMA levels are allowed to overlap */ if (tl->flags & SDTL_OVERLAP) @@ -2365,9 +2365,7 @@ static bool topology_span_sane(struct sched_domain_topology_level *tl, * breaking the sched_group lists - i.e. a later get_group() pass * breaks the linking done for an earlier span. */ - for_each_cpu(i, cpu_map) { - if (i == cpu) - continue; + for_each_cpu_from(i, cpu_map) { /* * We should 'and' all those masks with 'cpu_map' to exactly * match the topology we're about to build, but that can only diff --git a/kernel/signal.c b/kernel/signal.c index 01c4c46a51a8..1f9dd41c04be 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1375,9 +1375,7 @@ int zap_other_threads(struct task_struct *p) for_other_threads(p, t) { task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); - /* Don't require de_thread to wait for the vhost_worker */ - if ((t->flags & (PF_IO_WORKER | PF_USER_WORKER)) != PF_USER_WORKER) - count++; + count++; /* Don't bother with already dead threads */ if (t->exit_state) diff --git a/kernel/sys.c b/kernel/sys.c index f9c95410278c..3a2df1bd9f64 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -146,6 +146,9 @@ #ifndef RISCV_V_GET_CONTROL # define RISCV_V_GET_CONTROL() (-EINVAL) #endif +#ifndef RISCV_SET_ICACHE_FLUSH_CTX +# define RISCV_SET_ICACHE_FLUSH_CTX(a, b) (-EINVAL) +#endif #ifndef PPC_GET_DEXCR_ASPECT # define PPC_GET_DEXCR_ASPECT(a, b) (-EINVAL) #endif @@ -2776,6 +2779,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_RISCV_V_GET_CONTROL: error = RISCV_V_GET_CONTROL(); break; + case PR_RISCV_SET_ICACHE_FLUSH_CTX: + error = RISCV_SET_ICACHE_FLUSH_CTX(arg2, arg3); + break; default: error = -EINVAL; break; diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index faad00cce269..d7eee421d4bc 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -196,6 +196,7 @@ COND_SYSCALL(migrate_pages); COND_SYSCALL(move_pages); COND_SYSCALL(set_mempolicy_home_node); COND_SYSCALL(cachestat); +COND_SYSCALL(mseal); COND_SYSCALL(perf_event_open); COND_SYSCALL(accept4); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f5154c051d2c..d1daeab1bbc1 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -3295,7 +3295,7 @@ static int uprobe_prog_run(struct bpf_uprobe *uprobe, struct bpf_run_ctx *old_run_ctx; int err = 0; - if (link->task && current != link->task) + if (link->task && current->mm != link->task->mm) return 0; if (sleepable) @@ -3396,8 +3396,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path); uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets); cnt = attr->link_create.uprobe_multi.cnt; + pid = attr->link_create.uprobe_multi.pid; - if (!upath || !uoffsets || !cnt) + if (!upath || !uoffsets || !cnt || pid < 0) return -EINVAL; if (cnt > MAX_UPROBE_MULTI_CNT) return -E2BIG; @@ -3421,11 +3422,8 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr goto error_path_put; } - pid = attr->link_create.uprobe_multi.pid; if (pid) { - rcu_read_lock(); - task = get_pid_task(find_vpid(pid), PIDTYPE_PID); - rcu_read_unlock(); + task = get_pid_task(find_vpid(pid), PIDTYPE_TGID); if (!task) { err = -ESRCH; goto error_path_put; @@ -3519,7 +3517,6 @@ static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx) } #endif /* CONFIG_UPROBES */ -#ifdef CONFIG_FPROBE __bpf_kfunc_start_defs(); __bpf_kfunc bool bpf_session_is_return(void) @@ -3568,4 +3565,3 @@ static int __init bpf_kprobe_multi_kfuncs_init(void) } late_initcall(bpf_kprobe_multi_kfuncs_init); -#endif diff --git a/kernel/trace/bpf_trace.h b/kernel/trace/bpf_trace.h index 9acbc11ac7bb..c4075b56becc 100644 --- a/kernel/trace/bpf_trace.h +++ b/kernel/trace/bpf_trace.h @@ -19,7 +19,7 @@ TRACE_EVENT(bpf_trace_printk, ), TP_fast_assign( - __assign_str(bpf_string, bpf_string); + __assign_str(bpf_string); ), TP_printk("%s", __get_str(bpf_string)) diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c index 8c4ffd076162..cb0871fbdb07 100644 --- a/kernel/trace/preemptirq_delay_test.c +++ b/kernel/trace/preemptirq_delay_test.c @@ -215,4 +215,5 @@ static void __exit preemptirq_delay_exit(void) module_init(preemptirq_delay_init) module_exit(preemptirq_delay_exit) +MODULE_DESCRIPTION("Preempt / IRQ disable delay thread to test latency tracers"); MODULE_LICENSE("GPL v2"); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 7345a8b625fb..28853966aa9a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1460,6 +1460,11 @@ static void rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer, * * As a safety measure we check to make sure the data pages have not * been corrupted. + * + * Callers of this function need to guarantee that the list of pages doesn't get + * modified during the check. In particular, if it's possible that the function + * is invoked with concurrent readers which can swap in a new reader page then + * the caller should take cpu_buffer->reader_lock. */ static void rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) { @@ -2210,8 +2215,12 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, */ synchronize_rcu(); for_each_buffer_cpu(buffer, cpu) { + unsigned long flags; + cpu_buffer = buffer->buffers[cpu]; + raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); rb_check_pages(cpu_buffer); + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); } atomic_dec(&buffer->record_disabled); } @@ -5046,13 +5055,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_consume); * @flags: gfp flags to use for memory allocation * * This performs the initial preparations necessary to iterate - * through the buffer. Memory is allocated, buffer recording + * through the buffer. Memory is allocated, buffer resizing * is disabled, and the iterator pointer is returned to the caller. * - * Disabling buffer recording prevents the reading from being - * corrupted. This is not a consuming read, so a producer is not - * expected. - * * After a sequence of ring_buffer_read_prepare calls, the user is * expected to make at least one call to ring_buffer_read_prepare_sync. * Afterwards, ring_buffer_read_start is invoked to get things going @@ -5139,8 +5144,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_read_start); * ring_buffer_read_finish - finish reading the iterator of the buffer * @iter: The iterator retrieved by ring_buffer_start * - * This re-enables the recording to the buffer, and frees the - * iterator. + * This re-enables resizing of the buffer, and frees the iterator. */ void ring_buffer_read_finish(struct ring_buffer_iter *iter) @@ -5148,12 +5152,7 @@ ring_buffer_read_finish(struct ring_buffer_iter *iter) struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; unsigned long flags; - /* - * Ring buffer is disabled from recording, here's a good place - * to check the integrity of the ring buffer. - * Must prevent readers from trying to read, as the check - * clears the HEAD page and readers require it. - */ + /* Use this opportunity to check the integrity of the ring buffer. */ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); rb_check_pages(cpu_buffer); raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c index 2f68e93fff0b..df0745a42a3f 100644 --- a/kernel/trace/rv/rv.c +++ b/kernel/trace/rv/rv.c @@ -245,6 +245,7 @@ static int __rv_disable_monitor(struct rv_monitor_def *mdef, bool sync) /** * rv_disable_monitor - disable a given runtime monitor + * @mdef: Pointer to the monitor definition structure. * * Returns 0 on success. */ @@ -256,6 +257,7 @@ int rv_disable_monitor(struct rv_monitor_def *mdef) /** * rv_enable_monitor - enable a given runtime monitor + * @mdef: Pointer to the monitor definition structure. * * Returns 0 on success, error otherwise. */ diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 5e263c141574..39877c80d6cb 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -554,6 +554,10 @@ static int parse_btf_field(char *fieldname, const struct btf_type *type, anon_offs = 0; field = btf_find_struct_member(ctx->btf, type, fieldname, &anon_offs); + if (IS_ERR(field)) { + trace_probe_log_err(ctx->offset, BAD_BTF_TID); + return PTR_ERR(field); + } if (!field) { trace_probe_log_err(ctx->offset, NO_BTF_FIELD); return -ENOENT; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 8541fa1494ae..c98e3b3386ba 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -970,19 +970,17 @@ static struct uprobe_cpu_buffer *prepare_uprobe_buffer(struct trace_uprobe *tu, static void __uprobe_trace_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs, - struct uprobe_cpu_buffer **ucbp, + struct uprobe_cpu_buffer *ucb, struct trace_event_file *trace_file) { struct uprobe_trace_entry_head *entry; struct trace_event_buffer fbuffer; - struct uprobe_cpu_buffer *ucb; void *data; int size, esize; struct trace_event_call *call = trace_probe_event_call(&tu->tp); WARN_ON(call != trace_file->event_call); - ucb = prepare_uprobe_buffer(tu, regs, ucbp); if (WARN_ON_ONCE(ucb->dsize > PAGE_SIZE)) return; @@ -1014,13 +1012,16 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs, struct uprobe_cpu_buffer **ucbp) { struct event_file_link *link; + struct uprobe_cpu_buffer *ucb; if (is_ret_probe(tu)) return 0; + ucb = prepare_uprobe_buffer(tu, regs, ucbp); + rcu_read_lock(); trace_probe_for_each_link_rcu(link, &tu->tp) - __uprobe_trace_func(tu, 0, regs, ucbp, link->file); + __uprobe_trace_func(tu, 0, regs, ucb, link->file); rcu_read_unlock(); return 0; @@ -1031,10 +1032,13 @@ static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func, struct uprobe_cpu_buffer **ucbp) { struct event_file_link *link; + struct uprobe_cpu_buffer *ucb; + + ucb = prepare_uprobe_buffer(tu, regs, ucbp); rcu_read_lock(); trace_probe_for_each_link_rcu(link, &tu->tp) - __uprobe_trace_func(tu, func, regs, ucbp, link->file); + __uprobe_trace_func(tu, func, regs, ucb, link->file); rcu_read_unlock(); } diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c index da35e5b7f047..8800f5acc007 100644 --- a/kernel/vhost_task.c +++ b/kernel/vhost_task.c @@ -10,38 +10,32 @@ enum vhost_task_flags { VHOST_TASK_FLAGS_STOP, + VHOST_TASK_FLAGS_KILLED, }; struct vhost_task { bool (*fn)(void *data); + void (*handle_sigkill)(void *data); void *data; struct completion exited; unsigned long flags; struct task_struct *task; + /* serialize SIGKILL and vhost_task_stop calls */ + struct mutex exit_mutex; }; static int vhost_task_fn(void *data) { struct vhost_task *vtsk = data; - bool dead = false; for (;;) { bool did_work; - if (!dead && signal_pending(current)) { + if (signal_pending(current)) { struct ksignal ksig; - /* - * Calling get_signal will block in SIGSTOP, - * or clear fatal_signal_pending, but remember - * what was set. - * - * This thread won't actually exit until all - * of the file descriptors are closed, and - * the release function is called. - */ - dead = get_signal(&ksig); - if (dead) - clear_thread_flag(TIF_SIGPENDING); + + if (get_signal(&ksig)) + break; } /* mb paired w/ vhost_task_stop */ @@ -57,7 +51,19 @@ static int vhost_task_fn(void *data) schedule(); } + mutex_lock(&vtsk->exit_mutex); + /* + * If a vhost_task_stop and SIGKILL race, we can ignore the SIGKILL. + * When the vhost layer has called vhost_task_stop it's already stopped + * new work and flushed. + */ + if (!test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags)) { + set_bit(VHOST_TASK_FLAGS_KILLED, &vtsk->flags); + vtsk->handle_sigkill(vtsk->data); + } + mutex_unlock(&vtsk->exit_mutex); complete(&vtsk->exited); + do_exit(0); } @@ -78,12 +84,17 @@ EXPORT_SYMBOL_GPL(vhost_task_wake); * @vtsk: vhost_task to stop * * vhost_task_fn ensures the worker thread exits after - * VHOST_TASK_FLAGS_SOP becomes true. + * VHOST_TASK_FLAGS_STOP becomes true. */ void vhost_task_stop(struct vhost_task *vtsk) { - set_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags); - vhost_task_wake(vtsk); + mutex_lock(&vtsk->exit_mutex); + if (!test_bit(VHOST_TASK_FLAGS_KILLED, &vtsk->flags)) { + set_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags); + vhost_task_wake(vtsk); + } + mutex_unlock(&vtsk->exit_mutex); + /* * Make sure vhost_task_fn is no longer accessing the vhost_task before * freeing it below. @@ -96,14 +107,16 @@ EXPORT_SYMBOL_GPL(vhost_task_stop); /** * vhost_task_create - create a copy of a task to be used by the kernel * @fn: vhost worker function - * @arg: data to be passed to fn + * @handle_sigkill: vhost function to handle when we are killed + * @arg: data to be passed to fn and handled_kill * @name: the thread's name * * This returns a specialized task for use by the vhost layer or NULL on * failure. The returned task is inactive, and the caller must fire it up * through vhost_task_start(). */ -struct vhost_task *vhost_task_create(bool (*fn)(void *), void *arg, +struct vhost_task *vhost_task_create(bool (*fn)(void *), + void (*handle_sigkill)(void *), void *arg, const char *name) { struct kernel_clone_args args = { @@ -122,8 +135,10 @@ struct vhost_task *vhost_task_create(bool (*fn)(void *), void *arg, if (!vtsk) return NULL; init_completion(&vtsk->exited); + mutex_init(&vtsk->exit_mutex); vtsk->data = arg; vtsk->fn = fn; + vtsk->handle_sigkill = handle_sigkill; args.fn_arg = vtsk; |