summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-05-31 19:12:53 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-05-31 19:12:53 -0700
commit7d4e49a77d9930c69751b9192448fda6ff9100f1 (patch)
treea041784a50f35185c77d235ba28037909c53fbd5 /kernel
parent00c010e130e58301db2ea0cec1eadc931e1cb8cf (diff)
parent375700bab5b150e876e42d894a9a7470881f8a61 (diff)
Merge tag 'mm-nonmm-stable-2025-05-31-15-28' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull non-MM updates from Andrew Morton: - "hung_task: extend blocking task stacktrace dump to semaphore" from Lance Yang enhances the hung task detector. The detector presently dumps the blocking tasks's stack when it is blocked on a mutex. Lance's series extends this to semaphores - "nilfs2: improve sanity checks in dirty state propagation" from Wentao Liang addresses a couple of minor flaws in nilfs2 - "scripts/gdb: Fixes related to lx_per_cpu()" from Illia Ostapyshyn fixes a couple of issues in the gdb scripts - "Support kdump with LUKS encryption by reusing LUKS volume keys" from Coiby Xu addresses a usability problem with kdump. When the dump device is LUKS-encrypted, the kdump kernel may not have the keys to the encrypted filesystem. A full writeup of this is in the series [0/N] cover letter - "sysfs: add counters for lockups and stalls" from Max Kellermann adds /sys/kernel/hardlockup_count and /sys/kernel/hardlockup_count and /sys/kernel/rcu_stall_count - "fork: Page operation cleanups in the fork code" from Pasha Tatashin implements a number of code cleanups in fork.c - "scripts/gdb/symbols: determine KASLR offset on s390 during early boot" from Ilya Leoshkevich fixes some s390 issues in the gdb scripts * tag 'mm-nonmm-stable-2025-05-31-15-28' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (67 commits) llist: make llist_add_batch() a static inline delayacct: remove redundant code and adjust indentation squashfs: add optional full compressed block caching crash_dump, nvme: select CONFIGFS_FS as built-in scripts/gdb/symbols: determine KASLR offset on s390 during early boot scripts/gdb/symbols: factor out pagination_off() scripts/gdb/symbols: factor out get_vmlinux() kernel/panic.c: format kernel-doc comments mailmap: update and consolidate Casey Connolly's name and email nilfs2: remove wbc->for_reclaim handling fork: define a local GFP_VMAP_STACK fork: check charging success before zeroing stack fork: clean-up naming of vm_stack/vm_struct variables in vmap stacks code fork: clean-up ifdef logic around stack allocation kernel/rcu/tree_stall: add /sys/kernel/rcu_stall_count kernel/watchdog: add /sys/kernel/{hard,soft}lockup_count x86/crash: make the page that stores the dm crypt keys inaccessible x86/crash: pass dm crypt keys to kdump kernel Revert "x86/mm: Remove unused __set_memory_prot()" crash_dump: retrieve dm crypt keys in kdump kernel ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.kexec20
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/crash_dump_dm_crypt.c464
-rw-r--r--kernel/crash_reserve.c2
-rw-r--r--kernel/delayacct.c51
-rw-r--r--kernel/exit.c68
-rw-r--r--kernel/hung_task.c55
-rw-r--r--kernel/kexec_file.c81
-rw-r--r--kernel/locking/mutex.c5
-rw-r--r--kernel/locking/semaphore.c57
-rw-r--r--kernel/panic.c8
-rw-r--r--kernel/relay.c111
-rw-r--r--kernel/vmcore_info.c4
-rw-r--r--kernel/watchdog.c94
14 files changed, 736 insertions, 285 deletions
diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec
index 4fa212909d69..e64ce21f9a80 100644
--- a/kernel/Kconfig.kexec
+++ b/kernel/Kconfig.kexec
@@ -38,8 +38,7 @@ config KEXEC
config KEXEC_FILE
bool "Enable kexec file based system call"
depends on ARCH_SUPPORTS_KEXEC_FILE
- select CRYPTO
- select CRYPTO_SHA256
+ select CRYPTO_LIB_SHA256
select KEXEC_CORE
help
This is new version of kexec system call. This system call is
@@ -130,6 +129,23 @@ config CRASH_DUMP
For s390, this option also enables zfcpdump.
See also <file:Documentation/arch/s390/zfcpdump.rst>
+config CRASH_DM_CRYPT
+ bool "Support saving crash dump to dm-crypt encrypted volume"
+ depends on KEXEC_FILE
+ depends on CRASH_DUMP
+ depends on DM_CRYPT
+ help
+ With this option enabled, user space can intereact with
+ /sys/kernel/config/crash_dm_crypt_keys to make the dm crypt keys
+ persistent for the dump-capture kernel.
+
+config CRASH_DM_CRYPT_CONFIGS
+ def_tristate CRASH_DM_CRYPT
+ select CONFIGFS_FS
+ help
+ CRASH_DM_CRYPT cannot directly select CONFIGFS_FS, because that
+ is required to be built-in.
+
config CRASH_HOTPLUG
bool "Update the crash elfcorehdr on system configuration changes"
default y
diff --git a/kernel/Makefile b/kernel/Makefile
index 97c09847db42..32e80dd626af 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -77,6 +77,7 @@ obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o elfcorehdr.o
obj-$(CONFIG_CRASH_RESERVE) += crash_reserve.o
obj-$(CONFIG_KEXEC_CORE) += kexec_core.o
obj-$(CONFIG_CRASH_DUMP) += crash_core.o
+obj-$(CONFIG_CRASH_DM_CRYPT) += crash_dump_dm_crypt.o
obj-$(CONFIG_KEXEC) += kexec.o
obj-$(CONFIG_KEXEC_FILE) += kexec_file.o
obj-$(CONFIG_KEXEC_ELF) += kexec_elf.o
diff --git a/kernel/crash_dump_dm_crypt.c b/kernel/crash_dump_dm_crypt.c
new file mode 100644
index 000000000000..401423ba477d
--- /dev/null
+++ b/kernel/crash_dump_dm_crypt.c
@@ -0,0 +1,464 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/key.h>
+#include <linux/keyctl.h>
+#include <keys/user-type.h>
+#include <linux/crash_dump.h>
+#include <linux/cc_platform.h>
+#include <linux/configfs.h>
+#include <linux/module.h>
+
+#define KEY_NUM_MAX 128 /* maximum dm crypt keys */
+#define KEY_SIZE_MAX 256 /* maximum dm crypt key size */
+#define KEY_DESC_MAX_LEN 128 /* maximum dm crypt key description size */
+
+static unsigned int key_count;
+
+struct dm_crypt_key {
+ unsigned int key_size;
+ char key_desc[KEY_DESC_MAX_LEN];
+ u8 data[KEY_SIZE_MAX];
+};
+
+static struct keys_header {
+ unsigned int total_keys;
+ struct dm_crypt_key keys[] __counted_by(total_keys);
+} *keys_header;
+
+static size_t get_keys_header_size(size_t total_keys)
+{
+ return struct_size(keys_header, keys, total_keys);
+}
+
+unsigned long long dm_crypt_keys_addr;
+EXPORT_SYMBOL_GPL(dm_crypt_keys_addr);
+
+static int __init setup_dmcryptkeys(char *arg)
+{
+ char *end;
+
+ if (!arg)
+ return -EINVAL;
+ dm_crypt_keys_addr = memparse(arg, &end);
+ if (end > arg)
+ return 0;
+
+ dm_crypt_keys_addr = 0;
+ return -EINVAL;
+}
+
+early_param("dmcryptkeys", setup_dmcryptkeys);
+
+/*
+ * Architectures may override this function to read dm crypt keys
+ */
+ssize_t __weak dm_crypt_keys_read(char *buf, size_t count, u64 *ppos)
+{
+ struct kvec kvec = { .iov_base = buf, .iov_len = count };
+ struct iov_iter iter;
+
+ iov_iter_kvec(&iter, READ, &kvec, 1, count);
+ return read_from_oldmem(&iter, count, ppos, cc_platform_has(CC_ATTR_MEM_ENCRYPT));
+}
+
+static int add_key_to_keyring(struct dm_crypt_key *dm_key,
+ key_ref_t keyring_ref)
+{
+ key_ref_t key_ref;
+ int r;
+
+ /* create or update the requested key and add it to the target keyring */
+ key_ref = key_create_or_update(keyring_ref, "user", dm_key->key_desc,
+ dm_key->data, dm_key->key_size,
+ KEY_USR_ALL, KEY_ALLOC_IN_QUOTA);
+
+ if (!IS_ERR(key_ref)) {
+ r = key_ref_to_ptr(key_ref)->serial;
+ key_ref_put(key_ref);
+ kexec_dprintk("Success adding key %s", dm_key->key_desc);
+ } else {
+ r = PTR_ERR(key_ref);
+ kexec_dprintk("Error when adding key");
+ }
+
+ key_ref_put(keyring_ref);
+ return r;
+}
+
+static void get_keys_from_kdump_reserved_memory(void)
+{
+ struct keys_header *keys_header_loaded;
+
+ arch_kexec_unprotect_crashkres();
+
+ keys_header_loaded = kmap_local_page(pfn_to_page(
+ kexec_crash_image->dm_crypt_keys_addr >> PAGE_SHIFT));
+
+ memcpy(keys_header, keys_header_loaded, get_keys_header_size(key_count));
+ kunmap_local(keys_header_loaded);
+ arch_kexec_protect_crashkres();
+}
+
+static int restore_dm_crypt_keys_to_thread_keyring(void)
+{
+ struct dm_crypt_key *key;
+ size_t keys_header_size;
+ key_ref_t keyring_ref;
+ u64 addr;
+
+ /* find the target keyring (which must be writable) */
+ keyring_ref =
+ lookup_user_key(KEY_SPEC_USER_KEYRING, 0x01, KEY_NEED_WRITE);
+ if (IS_ERR(keyring_ref)) {
+ kexec_dprintk("Failed to get the user keyring\n");
+ return PTR_ERR(keyring_ref);
+ }
+
+ addr = dm_crypt_keys_addr;
+ dm_crypt_keys_read((char *)&key_count, sizeof(key_count), &addr);
+ if (key_count < 0 || key_count > KEY_NUM_MAX) {
+ kexec_dprintk("Failed to read the number of dm-crypt keys\n");
+ return -1;
+ }
+
+ kexec_dprintk("There are %u keys\n", key_count);
+ addr = dm_crypt_keys_addr;
+
+ keys_header_size = get_keys_header_size(key_count);
+ keys_header = kzalloc(keys_header_size, GFP_KERNEL);
+ if (!keys_header)
+ return -ENOMEM;
+
+ dm_crypt_keys_read((char *)keys_header, keys_header_size, &addr);
+
+ for (int i = 0; i < keys_header->total_keys; i++) {
+ key = &keys_header->keys[i];
+ kexec_dprintk("Get key (size=%u)\n", key->key_size);
+ add_key_to_keyring(key, keyring_ref);
+ }
+
+ return 0;
+}
+
+static int read_key_from_user_keying(struct dm_crypt_key *dm_key)
+{
+ const struct user_key_payload *ukp;
+ struct key *key;
+
+ kexec_dprintk("Requesting logon key %s", dm_key->key_desc);
+ key = request_key(&key_type_logon, dm_key->key_desc, NULL);
+
+ if (IS_ERR(key)) {
+ pr_warn("No such logon key %s\n", dm_key->key_desc);
+ return PTR_ERR(key);
+ }
+
+ ukp = user_key_payload_locked(key);
+ if (!ukp)
+ return -EKEYREVOKED;
+
+ if (ukp->datalen > KEY_SIZE_MAX) {
+ pr_err("Key size %u exceeds maximum (%u)\n", ukp->datalen, KEY_SIZE_MAX);
+ return -EINVAL;
+ }
+
+ memcpy(dm_key->data, ukp->data, ukp->datalen);
+ dm_key->key_size = ukp->datalen;
+ kexec_dprintk("Get dm crypt key (size=%u) %s: %8ph\n", dm_key->key_size,
+ dm_key->key_desc, dm_key->data);
+ return 0;
+}
+
+struct config_key {
+ struct config_item item;
+ const char *description;
+};
+
+static inline struct config_key *to_config_key(struct config_item *item)
+{
+ return container_of(item, struct config_key, item);
+}
+
+static ssize_t config_key_description_show(struct config_item *item, char *page)
+{
+ return sprintf(page, "%s\n", to_config_key(item)->description);
+}
+
+static ssize_t config_key_description_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct config_key *config_key = to_config_key(item);
+ size_t len;
+ int ret;
+
+ ret = -EINVAL;
+ len = strcspn(page, "\n");
+
+ if (len > KEY_DESC_MAX_LEN) {
+ pr_err("The key description shouldn't exceed %u characters", KEY_DESC_MAX_LEN);
+ return ret;
+ }
+
+ if (!len)
+ return ret;
+
+ kfree(config_key->description);
+ ret = -ENOMEM;
+ config_key->description = kmemdup_nul(page, len, GFP_KERNEL);
+ if (!config_key->description)
+ return ret;
+
+ return count;
+}
+
+CONFIGFS_ATTR(config_key_, description);
+
+static struct configfs_attribute *config_key_attrs[] = {
+ &config_key_attr_description,
+ NULL,
+};
+
+static void config_key_release(struct config_item *item)
+{
+ kfree(to_config_key(item));
+ key_count--;
+}
+
+static struct configfs_item_operations config_key_item_ops = {
+ .release = config_key_release,
+};
+
+static const struct config_item_type config_key_type = {
+ .ct_item_ops = &config_key_item_ops,
+ .ct_attrs = config_key_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct config_item *config_keys_make_item(struct config_group *group,
+ const char *name)
+{
+ struct config_key *config_key;
+
+ if (key_count > KEY_NUM_MAX) {
+ pr_err("Only %u keys at maximum to be created\n", KEY_NUM_MAX);
+ return ERR_PTR(-EINVAL);
+ }
+
+ config_key = kzalloc(sizeof(struct config_key), GFP_KERNEL);
+ if (!config_key)
+ return ERR_PTR(-ENOMEM);
+
+ config_item_init_type_name(&config_key->item, name, &config_key_type);
+
+ key_count++;
+
+ return &config_key->item;
+}
+
+static ssize_t config_keys_count_show(struct config_item *item, char *page)
+{
+ return sprintf(page, "%d\n", key_count);
+}
+
+CONFIGFS_ATTR_RO(config_keys_, count);
+
+static bool is_dm_key_reused;
+
+static ssize_t config_keys_reuse_show(struct config_item *item, char *page)
+{
+ return sprintf(page, "%d\n", is_dm_key_reused);
+}
+
+static ssize_t config_keys_reuse_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ if (!kexec_crash_image || !kexec_crash_image->dm_crypt_keys_addr) {
+ kexec_dprintk(
+ "dm-crypt keys haven't be saved to crash-reserved memory\n");
+ return -EINVAL;
+ }
+
+ if (kstrtobool(page, &is_dm_key_reused))
+ return -EINVAL;
+
+ if (is_dm_key_reused)
+ get_keys_from_kdump_reserved_memory();
+
+ return count;
+}
+
+CONFIGFS_ATTR(config_keys_, reuse);
+
+static struct configfs_attribute *config_keys_attrs[] = {
+ &config_keys_attr_count,
+ &config_keys_attr_reuse,
+ NULL,
+};
+
+/*
+ * Note that, since no extra work is required on ->drop_item(),
+ * no ->drop_item() is provided.
+ */
+static struct configfs_group_operations config_keys_group_ops = {
+ .make_item = config_keys_make_item,
+};
+
+static const struct config_item_type config_keys_type = {
+ .ct_group_ops = &config_keys_group_ops,
+ .ct_attrs = config_keys_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static bool restore;
+
+static ssize_t config_keys_restore_show(struct config_item *item, char *page)
+{
+ return sprintf(page, "%d\n", restore);
+}
+
+static ssize_t config_keys_restore_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ if (!restore)
+ restore_dm_crypt_keys_to_thread_keyring();
+
+ if (kstrtobool(page, &restore))
+ return -EINVAL;
+
+ return count;
+}
+
+CONFIGFS_ATTR(config_keys_, restore);
+
+static struct configfs_attribute *kdump_config_keys_attrs[] = {
+ &config_keys_attr_restore,
+ NULL,
+};
+
+static const struct config_item_type kdump_config_keys_type = {
+ .ct_attrs = kdump_config_keys_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct configfs_subsystem config_keys_subsys = {
+ .su_group = {
+ .cg_item = {
+ .ci_namebuf = "crash_dm_crypt_keys",
+ .ci_type = &config_keys_type,
+ },
+ },
+};
+
+static int build_keys_header(void)
+{
+ struct config_item *item = NULL;
+ struct config_key *key;
+ int i, r;
+
+ if (keys_header != NULL)
+ kvfree(keys_header);
+
+ keys_header = kzalloc(get_keys_header_size(key_count), GFP_KERNEL);
+ if (!keys_header)
+ return -ENOMEM;
+
+ keys_header->total_keys = key_count;
+
+ i = 0;
+ list_for_each_entry(item, &config_keys_subsys.su_group.cg_children,
+ ci_entry) {
+ if (item->ci_type != &config_key_type)
+ continue;
+
+ key = to_config_key(item);
+
+ if (!key->description) {
+ pr_warn("No key description for key %s\n", item->ci_name);
+ return -EINVAL;
+ }
+
+ strscpy(keys_header->keys[i].key_desc, key->description,
+ KEY_DESC_MAX_LEN);
+ r = read_key_from_user_keying(&keys_header->keys[i]);
+ if (r != 0) {
+ kexec_dprintk("Failed to read key %s\n",
+ keys_header->keys[i].key_desc);
+ return r;
+ }
+ i++;
+ kexec_dprintk("Found key: %s\n", item->ci_name);
+ }
+
+ return 0;
+}
+
+int crash_load_dm_crypt_keys(struct kimage *image)
+{
+ struct kexec_buf kbuf = {
+ .image = image,
+ .buf_min = 0,
+ .buf_max = ULONG_MAX,
+ .top_down = false,
+ .random = true,
+ };
+ int r;
+
+
+ if (key_count <= 0) {
+ kexec_dprintk("No dm-crypt keys\n");
+ return -ENOENT;
+ }
+
+ if (!is_dm_key_reused) {
+ image->dm_crypt_keys_addr = 0;
+ r = build_keys_header();
+ if (r)
+ return r;
+ }
+
+ kbuf.buffer = keys_header;
+ kbuf.bufsz = get_keys_header_size(key_count);
+
+ kbuf.memsz = kbuf.bufsz;
+ kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ r = kexec_add_buffer(&kbuf);
+ if (r) {
+ kvfree((void *)kbuf.buffer);
+ return r;
+ }
+ image->dm_crypt_keys_addr = kbuf.mem;
+ image->dm_crypt_keys_sz = kbuf.bufsz;
+ kexec_dprintk(
+ "Loaded dm crypt keys to kexec_buffer bufsz=0x%lx memsz=0x%lx\n",
+ kbuf.bufsz, kbuf.memsz);
+
+ return r;
+}
+
+static int __init configfs_dmcrypt_keys_init(void)
+{
+ int ret;
+
+ if (is_kdump_kernel()) {
+ config_keys_subsys.su_group.cg_item.ci_type =
+ &kdump_config_keys_type;
+ }
+
+ config_group_init(&config_keys_subsys.su_group);
+ mutex_init(&config_keys_subsys.su_mutex);
+ ret = configfs_register_subsystem(&config_keys_subsys);
+ if (ret) {
+ pr_err("Error %d while registering subsystem %s\n", ret,
+ config_keys_subsys.su_group.cg_item.ci_namebuf);
+ goto out_unregister;
+ }
+
+ return 0;
+
+out_unregister:
+ configfs_unregister_subsystem(&config_keys_subsys);
+
+ return ret;
+}
+
+module_init(configfs_dmcrypt_keys_init);
diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c
index aff7c0fdbefa..acb6bf42e30d 100644
--- a/kernel/crash_reserve.c
+++ b/kernel/crash_reserve.c
@@ -131,7 +131,7 @@ static int __init parse_crashkernel_mem(char *cmdline,
cur++;
*crash_base = memparse(cur, &tmp);
if (cur == tmp) {
- pr_warn("crahskernel: Memory value expected after '@'\n");
+ pr_warn("crashkernel: Memory value expected after '@'\n");
return -EINVAL;
}
}
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index eb63a021ac04..30e7912ebb0d 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -14,6 +14,15 @@
#include <linux/delayacct.h>
#include <linux/module.h>
+#define UPDATE_DELAY(type) \
+do { \
+ d->type##_delay_max = tsk->delays->type##_delay_max; \
+ d->type##_delay_min = tsk->delays->type##_delay_min; \
+ tmp = d->type##_delay_total + tsk->delays->type##_delay; \
+ d->type##_delay_total = (tmp < d->type##_delay_total) ? 0 : tmp; \
+ d->type##_count += tsk->delays->type##_count; \
+} while (0)
+
DEFINE_STATIC_KEY_FALSE(delayacct_key);
int delayacct_on __read_mostly; /* Delay accounting turned on/off */
struct kmem_cache *delayacct_cache;
@@ -173,41 +182,13 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
raw_spin_lock_irqsave(&tsk->delays->lock, flags);
- d->blkio_delay_max = tsk->delays->blkio_delay_max;
- d->blkio_delay_min = tsk->delays->blkio_delay_min;
- tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
- d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
- d->swapin_delay_max = tsk->delays->swapin_delay_max;
- d->swapin_delay_min = tsk->delays->swapin_delay_min;
- tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
- d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
- d->freepages_delay_max = tsk->delays->freepages_delay_max;
- d->freepages_delay_min = tsk->delays->freepages_delay_min;
- tmp = d->freepages_delay_total + tsk->delays->freepages_delay;
- d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp;
- d->thrashing_delay_max = tsk->delays->thrashing_delay_max;
- d->thrashing_delay_min = tsk->delays->thrashing_delay_min;
- tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay;
- d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp;
- d->compact_delay_max = tsk->delays->compact_delay_max;
- d->compact_delay_min = tsk->delays->compact_delay_min;
- tmp = d->compact_delay_total + tsk->delays->compact_delay;
- d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp;
- d->wpcopy_delay_max = tsk->delays->wpcopy_delay_max;
- d->wpcopy_delay_min = tsk->delays->wpcopy_delay_min;
- tmp = d->wpcopy_delay_total + tsk->delays->wpcopy_delay;
- d->wpcopy_delay_total = (tmp < d->wpcopy_delay_total) ? 0 : tmp;
- d->irq_delay_max = tsk->delays->irq_delay_max;
- d->irq_delay_min = tsk->delays->irq_delay_min;
- tmp = d->irq_delay_total + tsk->delays->irq_delay;
- d->irq_delay_total = (tmp < d->irq_delay_total) ? 0 : tmp;
- d->blkio_count += tsk->delays->blkio_count;
- d->swapin_count += tsk->delays->swapin_count;
- d->freepages_count += tsk->delays->freepages_count;
- d->thrashing_count += tsk->delays->thrashing_count;
- d->compact_count += tsk->delays->compact_count;
- d->wpcopy_count += tsk->delays->wpcopy_count;
- d->irq_count += tsk->delays->irq_count;
+ UPDATE_DELAY(blkio);
+ UPDATE_DELAY(swapin);
+ UPDATE_DELAY(freepages);
+ UPDATE_DELAY(thrashing);
+ UPDATE_DELAY(compact);
+ UPDATE_DELAY(wpcopy);
+ UPDATE_DELAY(irq);
raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
return 0;
diff --git a/kernel/exit.c b/kernel/exit.c
index 38645039dd8f..bd743900354c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -421,44 +421,30 @@ kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
}
}
-static void coredump_task_exit(struct task_struct *tsk)
+static void coredump_task_exit(struct task_struct *tsk,
+ struct core_state *core_state)
{
- struct core_state *core_state;
+ struct core_thread self;
+ self.task = tsk;
+ if (self.task->flags & PF_SIGNALED)
+ self.next = xchg(&core_state->dumper.next, &self);
+ else
+ self.task = NULL;
/*
- * Serialize with any possible pending coredump.
- * We must hold siglock around checking core_state
- * and setting PF_POSTCOREDUMP. The core-inducing thread
- * will increment ->nr_threads for each thread in the
- * group without PF_POSTCOREDUMP set.
+ * Implies mb(), the result of xchg() must be visible
+ * to core_state->dumper.
*/
- spin_lock_irq(&tsk->sighand->siglock);
- tsk->flags |= PF_POSTCOREDUMP;
- core_state = tsk->signal->core_state;
- spin_unlock_irq(&tsk->sighand->siglock);
- if (core_state) {
- struct core_thread self;
-
- self.task = current;
- if (self.task->flags & PF_SIGNALED)
- self.next = xchg(&core_state->dumper.next, &self);
- else
- self.task = NULL;
- /*
- * Implies mb(), the result of xchg() must be visible
- * to core_state->dumper.
- */
- if (atomic_dec_and_test(&core_state->nr_threads))
- complete(&core_state->startup);
+ if (atomic_dec_and_test(&core_state->nr_threads))
+ complete(&core_state->startup);
- for (;;) {
- set_current_state(TASK_IDLE|TASK_FREEZABLE);
- if (!self.task) /* see coredump_finish() */
- break;
- schedule();
- }
- __set_current_state(TASK_RUNNING);
+ for (;;) {
+ set_current_state(TASK_IDLE|TASK_FREEZABLE);
+ if (!self.task) /* see coredump_finish() */
+ break;
+ schedule();
}
+ __set_current_state(TASK_RUNNING);
}
#ifdef CONFIG_MEMCG
@@ -882,6 +868,7 @@ static void synchronize_group_exit(struct task_struct *tsk, long code)
{
struct sighand_struct *sighand = tsk->sighand;
struct signal_struct *signal = tsk->signal;
+ struct core_state *core_state;
spin_lock_irq(&sighand->siglock);
signal->quick_threads--;
@@ -891,7 +878,19 @@ static void synchronize_group_exit(struct task_struct *tsk, long code)
signal->group_exit_code = code;
signal->group_stop_count = 0;
}
+ /*
+ * Serialize with any possible pending coredump.
+ * We must hold siglock around checking core_state
+ * and setting PF_POSTCOREDUMP. The core-inducing thread
+ * will increment ->nr_threads for each thread in the
+ * group without PF_POSTCOREDUMP set.
+ */
+ tsk->flags |= PF_POSTCOREDUMP;
+ core_state = signal->core_state;
spin_unlock_irq(&sighand->siglock);
+
+ if (unlikely(core_state))
+ coredump_task_exit(tsk, core_state);
}
void __noreturn do_exit(long code)
@@ -900,15 +899,12 @@ void __noreturn do_exit(long code)
int group_dead;
WARN_ON(irqs_disabled());
-
- synchronize_group_exit(tsk, code);
-
WARN_ON(tsk->plug);
kcov_task_exit(tsk);
kmsan_task_exit(tsk);
- coredump_task_exit(tsk);
+ synchronize_group_exit(tsk, code);
ptrace_event(PTRACE_EVENT_EXIT, code);
user_events_exit(tsk);
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index dc898ec93463..d2432df2b905 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -22,6 +22,7 @@
#include <linux/sched/signal.h>
#include <linux/sched/debug.h>
#include <linux/sched/sysctl.h>
+#include <linux/hung_task.h>
#include <trace/events/sched.h>
@@ -98,30 +99,62 @@ static struct notifier_block panic_block = {
static void debug_show_blocker(struct task_struct *task)
{
struct task_struct *g, *t;
- unsigned long owner;
- struct mutex *lock;
+ unsigned long owner, blocker, blocker_type;
RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "No rcu lock held");
- lock = READ_ONCE(task->blocker_mutex);
- if (!lock)
+ blocker = READ_ONCE(task->blocker);
+ if (!blocker)
return;
- owner = mutex_get_owner(lock);
+ blocker_type = hung_task_get_blocker_type(blocker);
+
+ switch (blocker_type) {
+ case BLOCKER_TYPE_MUTEX:
+ owner = mutex_get_owner(
+ (struct mutex *)hung_task_blocker_to_lock(blocker));
+ break;
+ case BLOCKER_TYPE_SEM:
+ owner = sem_last_holder(
+ (struct semaphore *)hung_task_blocker_to_lock(blocker));
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+
if (unlikely(!owner)) {
- pr_err("INFO: task %s:%d is blocked on a mutex, but the owner is not found.\n",
- task->comm, task->pid);
+ switch (blocker_type) {
+ case BLOCKER_TYPE_MUTEX:
+ pr_err("INFO: task %s:%d is blocked on a mutex, but the owner is not found.\n",
+ task->comm, task->pid);
+ break;
+ case BLOCKER_TYPE_SEM:
+ pr_err("INFO: task %s:%d is blocked on a semaphore, but the last holder is not found.\n",
+ task->comm, task->pid);
+ break;
+ }
return;
}
/* Ensure the owner information is correct. */
for_each_process_thread(g, t) {
- if ((unsigned long)t == owner) {
+ if ((unsigned long)t != owner)
+ continue;
+
+ switch (blocker_type) {
+ case BLOCKER_TYPE_MUTEX:
pr_err("INFO: task %s:%d is blocked on a mutex likely owned by task %s:%d.\n",
- task->comm, task->pid, t->comm, t->pid);
- sched_show_task(t);
- return;
+ task->comm, task->pid, t->comm, t->pid);
+ break;
+ case BLOCKER_TYPE_SEM:
+ pr_err("INFO: task %s:%d blocked on a semaphore likely last held by task %s:%d\n",
+ task->comm, task->pid, t->comm, t->pid);
+ break;
}
+ sched_show_task(t);
+ return;
}
}
#else
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 10a78358257b..69fe76fd9233 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -19,7 +19,6 @@
#include <linux/list.h>
#include <linux/fs.h>
#include <linux/ima.h>
-#include <crypto/hash.h>
#include <crypto/sha2.h>
#include <linux/elf.h>
#include <linux/elfcore.h>
@@ -474,6 +473,7 @@ static int locate_mem_hole_top_down(unsigned long start, unsigned long end,
temp_end = min(end, kbuf->buf_max);
temp_start = temp_end - kbuf->memsz + 1;
+ kexec_random_range_start(temp_start, temp_end, kbuf, &temp_start);
do {
/* align down start */
@@ -518,6 +518,8 @@ static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end,
temp_start = max(start, kbuf->buf_min);
+ kexec_random_range_start(temp_start, end, kbuf, &temp_start);
+
do {
temp_start = ALIGN(temp_start, kbuf->buf_align);
temp_end = temp_start + kbuf->memsz - 1;
@@ -749,11 +751,10 @@ int kexec_add_buffer(struct kexec_buf *kbuf)
/* Calculate and store the digest of segments */
static int kexec_calculate_store_digests(struct kimage *image)
{
- struct crypto_shash *tfm;
- struct shash_desc *desc;
+ struct sha256_state state;
int ret = 0, i, j, zero_buf_sz, sha_region_sz;
- size_t desc_size, nullsz;
- char *digest;
+ size_t nullsz;
+ u8 digest[SHA256_DIGEST_SIZE];
void *zero_buf;
struct kexec_sha_region *sha_regions;
struct purgatory_info *pi = &image->purgatory_info;
@@ -764,37 +765,12 @@ static int kexec_calculate_store_digests(struct kimage *image)
zero_buf = __va(page_to_pfn(ZERO_PAGE(0)) << PAGE_SHIFT);
zero_buf_sz = PAGE_SIZE;
- tfm = crypto_alloc_shash("sha256", 0, 0);
- if (IS_ERR(tfm)) {
- ret = PTR_ERR(tfm);
- goto out;
- }
-
- desc_size = crypto_shash_descsize(tfm) + sizeof(*desc);
- desc = kzalloc(desc_size, GFP_KERNEL);
- if (!desc) {
- ret = -ENOMEM;
- goto out_free_tfm;
- }
-
sha_region_sz = KEXEC_SEGMENT_MAX * sizeof(struct kexec_sha_region);
sha_regions = vzalloc(sha_region_sz);
- if (!sha_regions) {
- ret = -ENOMEM;
- goto out_free_desc;
- }
-
- desc->tfm = tfm;
-
- ret = crypto_shash_init(desc);
- if (ret < 0)
- goto out_free_sha_regions;
+ if (!sha_regions)
+ return -ENOMEM;
- digest = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL);
- if (!digest) {
- ret = -ENOMEM;
- goto out_free_sha_regions;
- }
+ sha256_init(&state);
for (j = i = 0; i < image->nr_segments; i++) {
struct kexec_segment *ksegment;
@@ -820,10 +796,7 @@ static int kexec_calculate_store_digests(struct kimage *image)
if (check_ima_segment_index(image, i))
continue;
- ret = crypto_shash_update(desc, ksegment->kbuf,
- ksegment->bufsz);
- if (ret)
- break;
+ sha256_update(&state, ksegment->kbuf, ksegment->bufsz);
/*
* Assume rest of the buffer is filled with zero and
@@ -835,44 +808,26 @@ static int kexec_calculate_store_digests(struct kimage *image)
if (bytes > zero_buf_sz)
bytes = zero_buf_sz;
- ret = crypto_shash_update(desc, zero_buf, bytes);
- if (ret)
- break;
+ sha256_update(&state, zero_buf, bytes);
nullsz -= bytes;
}
- if (ret)
- break;
-
sha_regions[j].start = ksegment->mem;
sha_regions[j].len = ksegment->memsz;
j++;
}
- if (!ret) {
- ret = crypto_shash_final(desc, digest);
- if (ret)
- goto out_free_digest;
- ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha_regions",
- sha_regions, sha_region_sz, 0);
- if (ret)
- goto out_free_digest;
+ sha256_final(&state, digest);
- ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha256_digest",
- digest, SHA256_DIGEST_SIZE, 0);
- if (ret)
- goto out_free_digest;
- }
+ ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha_regions",
+ sha_regions, sha_region_sz, 0);
+ if (ret)
+ goto out_free_sha_regions;
-out_free_digest:
- kfree(digest);
+ ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha256_digest",
+ digest, SHA256_DIGEST_SIZE, 0);
out_free_sha_regions:
vfree(sha_regions);
-out_free_desc:
- kfree(desc);
-out_free_tfm:
- kfree(tfm);
-out:
return ret;
}
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 555e2b3a665a..61fa97da7989 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -29,6 +29,7 @@
#include <linux/interrupt.h>
#include <linux/debug_locks.h>
#include <linux/osq_lock.h>
+#include <linux/hung_task.h>
#define CREATE_TRACE_POINTS
#include <trace/events/lock.h>
@@ -191,7 +192,7 @@ __mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
struct list_head *list)
{
#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
- WRITE_ONCE(current->blocker_mutex, lock);
+ hung_task_set_blocker(lock, BLOCKER_TYPE_MUTEX);
#endif
debug_mutex_add_waiter(lock, waiter, current);
@@ -209,7 +210,7 @@ __mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter)
debug_mutex_remove_waiter(lock, waiter, current);
#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
- WRITE_ONCE(current->blocker_mutex, NULL);
+ hung_task_clear_blocker();
#endif
}
diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c
index de9117c0e671..3ef032e22f7e 100644
--- a/kernel/locking/semaphore.c
+++ b/kernel/locking/semaphore.c
@@ -34,6 +34,7 @@
#include <linux/spinlock.h>
#include <linux/ftrace.h>
#include <trace/events/lock.h>
+#include <linux/hung_task.h>
static noinline void __down(struct semaphore *sem);
static noinline int __down_interruptible(struct semaphore *sem);
@@ -41,6 +42,41 @@ static noinline int __down_killable(struct semaphore *sem);
static noinline int __down_timeout(struct semaphore *sem, long timeout);
static noinline void __up(struct semaphore *sem, struct wake_q_head *wake_q);
+#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
+static inline void hung_task_sem_set_holder(struct semaphore *sem)
+{
+ WRITE_ONCE((sem)->last_holder, (unsigned long)current);
+}
+
+static inline void hung_task_sem_clear_if_holder(struct semaphore *sem)
+{
+ if (READ_ONCE((sem)->last_holder) == (unsigned long)current)
+ WRITE_ONCE((sem)->last_holder, 0UL);
+}
+
+unsigned long sem_last_holder(struct semaphore *sem)
+{
+ return READ_ONCE(sem->last_holder);
+}
+#else
+static inline void hung_task_sem_set_holder(struct semaphore *sem)
+{
+}
+static inline void hung_task_sem_clear_if_holder(struct semaphore *sem)
+{
+}
+unsigned long sem_last_holder(struct semaphore *sem)
+{
+ return 0UL;
+}
+#endif
+
+static inline void __sem_acquire(struct semaphore *sem)
+{
+ sem->count--;
+ hung_task_sem_set_holder(sem);
+}
+
/**
* down - acquire the semaphore
* @sem: the semaphore to be acquired
@@ -59,7 +95,7 @@ void __sched down(struct semaphore *sem)
might_sleep();
raw_spin_lock_irqsave(&sem->lock, flags);
if (likely(sem->count > 0))
- sem->count--;
+ __sem_acquire(sem);
else
__down(sem);
raw_spin_unlock_irqrestore(&sem->lock, flags);
@@ -83,7 +119,7 @@ int __sched down_interruptible(struct semaphore *sem)
might_sleep();
raw_spin_lock_irqsave(&sem->lock, flags);
if (likely(sem->count > 0))
- sem->count--;
+ __sem_acquire(sem);
else
result = __down_interruptible(sem);
raw_spin_unlock_irqrestore(&sem->lock, flags);
@@ -110,7 +146,7 @@ int __sched down_killable(struct semaphore *sem)
might_sleep();
raw_spin_lock_irqsave(&sem->lock, flags);
if (likely(sem->count > 0))
- sem->count--;
+ __sem_acquire(sem);
else
result = __down_killable(sem);
raw_spin_unlock_irqrestore(&sem->lock, flags);
@@ -140,7 +176,7 @@ int __sched down_trylock(struct semaphore *sem)
raw_spin_lock_irqsave(&sem->lock, flags);
count = sem->count - 1;
if (likely(count >= 0))
- sem->count = count;
+ __sem_acquire(sem);
raw_spin_unlock_irqrestore(&sem->lock, flags);
return (count < 0);
@@ -165,7 +201,7 @@ int __sched down_timeout(struct semaphore *sem, long timeout)
might_sleep();
raw_spin_lock_irqsave(&sem->lock, flags);
if (likely(sem->count > 0))
- sem->count--;
+ __sem_acquire(sem);
else
result = __down_timeout(sem, timeout);
raw_spin_unlock_irqrestore(&sem->lock, flags);
@@ -187,6 +223,9 @@ void __sched up(struct semaphore *sem)
DEFINE_WAKE_Q(wake_q);
raw_spin_lock_irqsave(&sem->lock, flags);
+
+ hung_task_sem_clear_if_holder(sem);
+
if (likely(list_empty(&sem->wait_list)))
sem->count++;
else
@@ -228,8 +267,10 @@ static inline int __sched ___down_common(struct semaphore *sem, long state,
raw_spin_unlock_irq(&sem->lock);
timeout = schedule_timeout(timeout);
raw_spin_lock_irq(&sem->lock);
- if (waiter.up)
+ if (waiter.up) {
+ hung_task_sem_set_holder(sem);
return 0;
+ }
}
timed_out:
@@ -246,10 +287,14 @@ static inline int __sched __down_common(struct semaphore *sem, long state,
{
int ret;
+ hung_task_set_blocker(sem, BLOCKER_TYPE_SEM);
+
trace_contention_begin(sem, 0);
ret = ___down_common(sem, state, timeout);
trace_contention_end(sem, ret);
+ hung_task_clear_blocker();
+
return ret;
}
diff --git a/kernel/panic.c b/kernel/panic.c
index 047ea3215312..b0b9a8bf4560 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -307,12 +307,10 @@ static void panic_other_cpus_shutdown(bool crash_kexec)
}
/**
- * panic - halt the system
- * @fmt: The text string to print
+ * panic - halt the system
+ * @fmt: The text string to print
*
- * Display a message, then perform cleanups.
- *
- * This function never returns.
+ * Display a message, then perform cleanups. This function never returns.
*/
void panic(const char *fmt, ...)
{
diff --git a/kernel/relay.c b/kernel/relay.c
index 5ac7e711e4b6..c0c93a04d4ce 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -452,7 +452,7 @@ int relay_prepare_cpu(unsigned int cpu)
/**
* relay_open - create a new relay channel
- * @base_filename: base name of files to create, %NULL for buffering only
+ * @base_filename: base name of files to create
* @parent: dentry of parent directory, %NULL for root directory or buffer
* @subbuf_size: size of sub-buffers
* @n_subbufs: number of sub-buffers
@@ -465,10 +465,6 @@ int relay_prepare_cpu(unsigned int cpu)
* attributes specified. The created channel buffer files
* will be named base_filename0...base_filenameN-1. File
* permissions will be %S_IRUSR.
- *
- * If opening a buffer (@parent = NULL) that you later wish to register
- * in a filesystem, call relay_late_setup_files() once the @parent dentry
- * is available.
*/
struct rchan *relay_open(const char *base_filename,
struct dentry *parent,
@@ -540,111 +536,6 @@ struct rchan_percpu_buf_dispatcher {
struct dentry *dentry;
};
-/* Called in atomic context. */
-static void __relay_set_buf_dentry(void *info)
-{
- struct rchan_percpu_buf_dispatcher *p = info;
-
- relay_set_buf_dentry(p->buf, p->dentry);
-}
-
-/**
- * relay_late_setup_files - triggers file creation
- * @chan: channel to operate on
- * @base_filename: base name of files to create
- * @parent: dentry of parent directory, %NULL for root directory
- *
- * Returns 0 if successful, non-zero otherwise.
- *
- * Use to setup files for a previously buffer-only channel created
- * by relay_open() with a NULL parent dentry.
- *
- * For example, this is useful for perfomring early tracing in kernel,
- * before VFS is up and then exposing the early results once the dentry
- * is available.
- */
-int relay_late_setup_files(struct rchan *chan,
- const char *base_filename,
- struct dentry *parent)
-{
- int err = 0;
- unsigned int i, curr_cpu;
- unsigned long flags;
- struct dentry *dentry;
- struct rchan_buf *buf;
- struct rchan_percpu_buf_dispatcher disp;
-
- if (!chan || !base_filename)
- return -EINVAL;
-
- strscpy(chan->base_filename, base_filename, NAME_MAX);
-
- mutex_lock(&relay_channels_mutex);
- /* Is chan already set up? */
- if (unlikely(chan->has_base_filename)) {
- mutex_unlock(&relay_channels_mutex);
- return -EEXIST;
- }
- chan->has_base_filename = 1;
- chan->parent = parent;
-
- if (chan->is_global) {
- err = -EINVAL;
- buf = *per_cpu_ptr(chan->buf, 0);
- if (!WARN_ON_ONCE(!buf)) {
- dentry = relay_create_buf_file(chan, buf, 0);
- if (dentry && !WARN_ON_ONCE(!chan->is_global)) {
- relay_set_buf_dentry(buf, dentry);
- err = 0;
- }
- }
- mutex_unlock(&relay_channels_mutex);
- return err;
- }
-
- curr_cpu = get_cpu();
- /*
- * The CPU hotplug notifier ran before us and created buffers with
- * no files associated. So it's safe to call relay_setup_buf_file()
- * on all currently online CPUs.
- */
- for_each_online_cpu(i) {
- buf = *per_cpu_ptr(chan->buf, i);
- if (unlikely(!buf)) {
- WARN_ONCE(1, KERN_ERR "CPU has no buffer!\n");
- err = -EINVAL;
- break;
- }
-
- dentry = relay_create_buf_file(chan, buf, i);
- if (unlikely(!dentry)) {
- err = -EINVAL;
- break;
- }
-
- if (curr_cpu == i) {
- local_irq_save(flags);
- relay_set_buf_dentry(buf, dentry);
- local_irq_restore(flags);
- } else {
- disp.buf = buf;
- disp.dentry = dentry;
- smp_mb();
- /* relay_channels_mutex must be held, so wait. */
- err = smp_call_function_single(i,
- __relay_set_buf_dentry,
- &disp, 1);
- }
- if (unlikely(err))
- break;
- }
- put_cpu();
- mutex_unlock(&relay_channels_mutex);
-
- return err;
-}
-EXPORT_SYMBOL_GPL(relay_late_setup_files);
-
/**
* relay_switch_subbuf - switch to a new sub-buffer
* @buf: channel buffer
diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c
index 1fec61603ef3..e066d31d08f8 100644
--- a/kernel/vmcore_info.c
+++ b/kernel/vmcore_info.c
@@ -210,6 +210,10 @@ static int __init crash_save_vmcoreinfo_init(void)
VMCOREINFO_NUMBER(PAGE_HUGETLB_MAPCOUNT_VALUE);
#define PAGE_OFFLINE_MAPCOUNT_VALUE (PGTY_offline << 24)
VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE);
+#ifdef CONFIG_UNACCEPTED_MEMORY
+#define PAGE_UNACCEPTED_MAPCOUNT_VALUE (PGTY_unaccepted << 24)
+ VMCOREINFO_NUMBER(PAGE_UNACCEPTED_MAPCOUNT_VALUE);
+#endif
#ifdef CONFIG_KALLSYMS
VMCOREINFO_SYMBOL(kallsyms_names);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 9fa2af9dbf2c..80b56c002c7f 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -47,6 +47,7 @@ int __read_mostly watchdog_user_enabled = 1;
static int __read_mostly watchdog_hardlockup_user_enabled = WATCHDOG_HARDLOCKUP_DEFAULT;
static int __read_mostly watchdog_softlockup_user_enabled = 1;
int __read_mostly watchdog_thresh = 10;
+static int __read_mostly watchdog_thresh_next;
static int __read_mostly watchdog_hardlockup_available;
struct cpumask watchdog_cpumask __read_mostly;
@@ -63,6 +64,29 @@ int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
*/
unsigned int __read_mostly hardlockup_panic =
IS_ENABLED(CONFIG_BOOTPARAM_HARDLOCKUP_PANIC);
+
+#ifdef CONFIG_SYSFS
+
+static unsigned int hardlockup_count;
+
+static ssize_t hardlockup_count_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *page)
+{
+ return sysfs_emit(page, "%u\n", hardlockup_count);
+}
+
+static struct kobj_attribute hardlockup_count_attr = __ATTR_RO(hardlockup_count);
+
+static __init int kernel_hardlockup_sysfs_init(void)
+{
+ sysfs_add_file_to_group(kernel_kobj, &hardlockup_count_attr.attr, NULL);
+ return 0;
+}
+
+late_initcall(kernel_hardlockup_sysfs_init);
+
+#endif // CONFIG_SYSFS
+
/*
* We may not want to enable hard lockup detection by default in all cases,
* for example when running the kernel as a guest on a hypervisor. In these
@@ -169,6 +193,10 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
unsigned int this_cpu = smp_processor_id();
unsigned long flags;
+#ifdef CONFIG_SYSFS
+ ++hardlockup_count;
+#endif
+
/* Only print hardlockups once. */
if (per_cpu(watchdog_hardlockup_warned, cpu))
return;
@@ -311,6 +339,28 @@ unsigned int __read_mostly softlockup_panic =
static bool softlockup_initialized __read_mostly;
static u64 __read_mostly sample_period;
+#ifdef CONFIG_SYSFS
+
+static unsigned int softlockup_count;
+
+static ssize_t softlockup_count_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *page)
+{
+ return sysfs_emit(page, "%u\n", softlockup_count);
+}
+
+static struct kobj_attribute softlockup_count_attr = __ATTR_RO(softlockup_count);
+
+static __init int kernel_softlockup_sysfs_init(void)
+{
+ sysfs_add_file_to_group(kernel_kobj, &softlockup_count_attr.attr, NULL);
+ return 0;
+}
+
+late_initcall(kernel_softlockup_sysfs_init);
+
+#endif // CONFIG_SYSFS
+
/* Timestamp taken after the last successful reschedule. */
static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
/* Timestamp of the last softlockup report. */
@@ -742,6 +792,10 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
touch_ts = __this_cpu_read(watchdog_touch_ts);
duration = is_softlockup(touch_ts, period_ts, now);
if (unlikely(duration)) {
+#ifdef CONFIG_SYSFS
+ ++softlockup_count;
+#endif
+
/*
* Prevent multiple soft-lockup reports if one cpu is already
* engaged in dumping all cpu back traces.
@@ -870,12 +924,20 @@ int lockup_detector_offline_cpu(unsigned int cpu)
return 0;
}
-static void __lockup_detector_reconfigure(void)
+static void __lockup_detector_reconfigure(bool thresh_changed)
{
cpus_read_lock();
watchdog_hardlockup_stop();
softlockup_stop_all();
+ /*
+ * To prevent watchdog_timer_fn from using the old interval and
+ * the new watchdog_thresh at the same time, which could lead to
+ * false softlockup reports, it is necessary to update the
+ * watchdog_thresh after the softlockup is completed.
+ */
+ if (thresh_changed)
+ watchdog_thresh = READ_ONCE(watchdog_thresh_next);
set_sample_period();
lockup_detector_update_enable();
if (watchdog_enabled && watchdog_thresh)
@@ -888,7 +950,7 @@ static void __lockup_detector_reconfigure(void)
void lockup_detector_reconfigure(void)
{
mutex_lock(&watchdog_mutex);
- __lockup_detector_reconfigure();
+ __lockup_detector_reconfigure(false);
mutex_unlock(&watchdog_mutex);
}
@@ -908,27 +970,29 @@ static __init void lockup_detector_setup(void)
return;
mutex_lock(&watchdog_mutex);
- __lockup_detector_reconfigure();
+ __lockup_detector_reconfigure(false);
softlockup_initialized = true;
mutex_unlock(&watchdog_mutex);
}
#else /* CONFIG_SOFTLOCKUP_DETECTOR */
-static void __lockup_detector_reconfigure(void)
+static void __lockup_detector_reconfigure(bool thresh_changed)
{
cpus_read_lock();
watchdog_hardlockup_stop();
+ if (thresh_changed)
+ watchdog_thresh = READ_ONCE(watchdog_thresh_next);
lockup_detector_update_enable();
watchdog_hardlockup_start();
cpus_read_unlock();
}
void lockup_detector_reconfigure(void)
{
- __lockup_detector_reconfigure();
+ __lockup_detector_reconfigure(false);
}
static inline void lockup_detector_setup(void)
{
- __lockup_detector_reconfigure();
+ __lockup_detector_reconfigure(false);
}
#endif /* !CONFIG_SOFTLOCKUP_DETECTOR */
@@ -946,11 +1010,11 @@ void lockup_detector_soft_poweroff(void)
#ifdef CONFIG_SYSCTL
/* Propagate any changes to the watchdog infrastructure */
-static void proc_watchdog_update(void)
+static void proc_watchdog_update(bool thresh_changed)
{
/* Remove impossible cpus to keep sysctl output clean. */
cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask);
- __lockup_detector_reconfigure();
+ __lockup_detector_reconfigure(thresh_changed);
}
/*
@@ -984,7 +1048,7 @@ static int proc_watchdog_common(int which, const struct ctl_table *table, int wr
} else {
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (!err && old != READ_ONCE(*param))
- proc_watchdog_update();
+ proc_watchdog_update(false);
}
mutex_unlock(&watchdog_mutex);
return err;
@@ -1035,11 +1099,13 @@ static int proc_watchdog_thresh(const struct ctl_table *table, int write,
mutex_lock(&watchdog_mutex);
- old = READ_ONCE(watchdog_thresh);
+ watchdog_thresh_next = READ_ONCE(watchdog_thresh);
+
+ old = watchdog_thresh_next;
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
- if (!err && write && old != READ_ONCE(watchdog_thresh))
- proc_watchdog_update();
+ if (!err && write && old != READ_ONCE(watchdog_thresh_next))
+ proc_watchdog_update(true);
mutex_unlock(&watchdog_mutex);
return err;
@@ -1060,7 +1126,7 @@ static int proc_watchdog_cpumask(const struct ctl_table *table, int write,
err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
if (!err && write)
- proc_watchdog_update();
+ proc_watchdog_update(false);
mutex_unlock(&watchdog_mutex);
return err;
@@ -1080,7 +1146,7 @@ static const struct ctl_table watchdog_sysctls[] = {
},
{
.procname = "watchdog_thresh",
- .data = &watchdog_thresh,
+ .data = &watchdog_thresh_next,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_watchdog_thresh,