diff options
-rw-r--r-- | include/linux/kexec_handover.h | 14 | ||||
-rw-r--r-- | kernel/kexec_handover.c | 233 | ||||
-rw-r--r-- | mm/memblock.c | 1 |
3 files changed, 247 insertions, 1 deletions
diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h index 2e19004776f6..02dcfc8c427e 100644 --- a/include/linux/kexec_handover.h +++ b/include/linux/kexec_handover.h @@ -24,11 +24,15 @@ struct kho_serialization; bool kho_is_enabled(void); int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt); +int kho_retrieve_subtree(const char *name, phys_addr_t *phys); int register_kho_notifier(struct notifier_block *nb); int unregister_kho_notifier(struct notifier_block *nb); void kho_memory_init(void); + +void kho_populate(phys_addr_t fdt_phys, u64 fdt_len, phys_addr_t scratch_phys, + u64 scratch_len); #else static inline bool kho_is_enabled(void) { @@ -41,6 +45,11 @@ static inline int kho_add_subtree(struct kho_serialization *ser, return -EOPNOTSUPP; } +static inline int kho_retrieve_subtree(const char *name, phys_addr_t *phys) +{ + return -EOPNOTSUPP; +} + static inline int register_kho_notifier(struct notifier_block *nb) { return -EOPNOTSUPP; @@ -54,6 +63,11 @@ static inline int unregister_kho_notifier(struct notifier_block *nb) static inline void kho_memory_init(void) { } + +static inline void kho_populate(phys_addr_t fdt_phys, u64 fdt_len, + phys_addr_t scratch_phys, u64 scratch_len) +{ +} #endif /* CONFIG_KEXEC_HANDOVER */ #endif /* LINUX_KEXEC_HANDOVER_H */ diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c index e541d3d5003d..59f3cf9557f5 100644 --- a/kernel/kexec_handover.c +++ b/kernel/kexec_handover.c @@ -17,6 +17,9 @@ #include <linux/memblock.h> #include <linux/notifier.h> #include <linux/page-isolation.h> + +#include <asm/early_ioremap.h> + /* * KHO is tightly coupled with mm init and needs access to some of mm * internal APIs. @@ -501,9 +504,112 @@ err_rmdir: return -ENOENT; } +struct kho_in { + struct dentry *dir; + phys_addr_t fdt_phys; + phys_addr_t scratch_phys; + struct list_head fdt_list; +}; + +static struct kho_in kho_in = { + .fdt_list = LIST_HEAD_INIT(kho_in.fdt_list), +}; + +static const void *kho_get_fdt(void) +{ + return kho_in.fdt_phys ? phys_to_virt(kho_in.fdt_phys) : NULL; +} + +/** + * kho_retrieve_subtree - retrieve a preserved sub FDT by its name. + * @name: the name of the sub FDT passed to kho_add_subtree(). + * @phys: if found, the physical address of the sub FDT is stored in @phys. + * + * Retrieve a preserved sub FDT named @name and store its physical + * address in @phys. + * + * Return: 0 on success, error code on failure + */ +int kho_retrieve_subtree(const char *name, phys_addr_t *phys) +{ + const void *fdt = kho_get_fdt(); + const u64 *val; + int offset, len; + + if (!fdt) + return -ENOENT; + + if (!phys) + return -EINVAL; + + offset = fdt_subnode_offset(fdt, 0, name); + if (offset < 0) + return -ENOENT; + + val = fdt_getprop(fdt, offset, PROP_SUB_FDT, &len); + if (!val || len != sizeof(*val)) + return -EINVAL; + + *phys = (phys_addr_t)*val; + + return 0; +} +EXPORT_SYMBOL_GPL(kho_retrieve_subtree); + +/* Handling for debugfs/kho/in */ + +static __init int kho_in_debugfs_init(const void *fdt) +{ + struct dentry *sub_fdt_dir; + int err, child; + + kho_in.dir = debugfs_create_dir("in", debugfs_root); + if (IS_ERR(kho_in.dir)) + return PTR_ERR(kho_in.dir); + + sub_fdt_dir = debugfs_create_dir("sub_fdts", kho_in.dir); + if (IS_ERR(sub_fdt_dir)) { + err = PTR_ERR(sub_fdt_dir); + goto err_rmdir; + } + + err = kho_debugfs_fdt_add(&kho_in.fdt_list, kho_in.dir, "fdt", fdt); + if (err) + goto err_rmdir; + + fdt_for_each_subnode(child, fdt, 0) { + int len = 0; + const char *name = fdt_get_name(fdt, child, NULL); + const u64 *fdt_phys; + + fdt_phys = fdt_getprop(fdt, child, "fdt", &len); + if (!fdt_phys) + continue; + if (len != sizeof(*fdt_phys)) { + pr_warn("node `%s`'s prop `fdt` has invalid length: %d\n", + name, len); + continue; + } + err = kho_debugfs_fdt_add(&kho_in.fdt_list, sub_fdt_dir, name, + phys_to_virt(*fdt_phys)); + if (err) { + pr_warn("failed to add fdt `%s` to debugfs: %d\n", name, + err); + continue; + } + } + + return 0; + +err_rmdir: + debugfs_remove_recursive(kho_in.dir); + return err; +} + static __init int kho_init(void) { int err = 0; + const void *fdt = kho_get_fdt(); if (!kho_enable) return 0; @@ -524,6 +630,20 @@ static __init int kho_init(void) if (err) goto err_free_fdt; + if (fdt) { + err = kho_in_debugfs_init(fdt); + /* + * Failure to create /sys/kernel/debug/kho/in does not prevent + * reviving state from KHO and setting up KHO for the next + * kexec. + */ + if (err) + pr_err("failed exposing handover FDT in debugfs: %d\n", + err); + + return 0; + } + for (int i = 0; i < kho_scratch_cnt; i++) { unsigned long base_pfn = PHYS_PFN(kho_scratch[i].addr); unsigned long count = kho_scratch[i].size >> PAGE_SHIFT; @@ -551,7 +671,118 @@ err_free_scratch: } late_initcall(kho_init); +static void __init kho_release_scratch(void) +{ + phys_addr_t start, end; + u64 i; + + memmap_init_kho_scratch_pages(); + + /* + * Mark scratch mem as CMA before we return it. That way we + * ensure that no kernel allocations happen on it. That means + * we can reuse it as scratch memory again later. + */ + __for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, + MEMBLOCK_KHO_SCRATCH, &start, &end, NULL) { + ulong start_pfn = pageblock_start_pfn(PFN_DOWN(start)); + ulong end_pfn = pageblock_align(PFN_UP(end)); + ulong pfn; + + for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) + set_pageblock_migratetype(pfn_to_page(pfn), + MIGRATE_CMA); + } +} + void __init kho_memory_init(void) { - kho_reserve_scratch(); + if (kho_in.scratch_phys) { + kho_scratch = phys_to_virt(kho_in.scratch_phys); + kho_release_scratch(); + } else { + kho_reserve_scratch(); + } +} + +void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len, + phys_addr_t scratch_phys, u64 scratch_len) +{ + void *fdt = NULL; + struct kho_scratch *scratch = NULL; + int err = 0; + unsigned int scratch_cnt = scratch_len / sizeof(*kho_scratch); + + /* Validate the input FDT */ + fdt = early_memremap(fdt_phys, fdt_len); + if (!fdt) { + pr_warn("setup: failed to memremap FDT (0x%llx)\n", fdt_phys); + err = -EFAULT; + goto out; + } + err = fdt_check_header(fdt); + if (err) { + pr_warn("setup: handover FDT (0x%llx) is invalid: %d\n", + fdt_phys, err); + err = -EINVAL; + goto out; + } + err = fdt_node_check_compatible(fdt, 0, KHO_FDT_COMPATIBLE); + if (err) { + pr_warn("setup: handover FDT (0x%llx) is incompatible with '%s': %d\n", + fdt_phys, KHO_FDT_COMPATIBLE, err); + err = -EINVAL; + goto out; + } + + scratch = early_memremap(scratch_phys, scratch_len); + if (!scratch) { + pr_warn("setup: failed to memremap scratch (phys=0x%llx, len=%lld)\n", + scratch_phys, scratch_len); + err = -EFAULT; + goto out; + } + + /* + * We pass a safe contiguous blocks of memory to use for early boot + * purporses from the previous kernel so that we can resize the + * memblock array as needed. + */ + for (int i = 0; i < scratch_cnt; i++) { + struct kho_scratch *area = &scratch[i]; + u64 size = area->size; + + memblock_add(area->addr, size); + err = memblock_mark_kho_scratch(area->addr, size); + if (WARN_ON(err)) { + pr_warn("failed to mark the scratch region 0x%pa+0x%pa: %d", + &area->addr, &size, err); + goto out; + } + pr_debug("Marked 0x%pa+0x%pa as scratch", &area->addr, &size); + } + + memblock_reserve(scratch_phys, scratch_len); + + /* + * Now that we have a viable region of scratch memory, let's tell + * the memblocks allocator to only use that for any allocations. + * That way we ensure that nothing scribbles over in use data while + * we initialize the page tables which we will need to ingest all + * memory reservations from the previous kernel. + */ + memblock_set_kho_scratch_only(); + + kho_in.fdt_phys = fdt_phys; + kho_in.scratch_phys = scratch_phys; + kho_scratch_cnt = scratch_cnt; + pr_info("found kexec handover data. Will skip init for some devices\n"); + +out: + if (fdt) + early_memunmap(fdt, fdt_len); + if (scratch) + early_memunmap(scratch, scratch_len); + if (err) + pr_warn("disabling KHO revival: %d\n", err); } diff --git a/mm/memblock.c b/mm/memblock.c index ec30d850e195..8895b95ffb5b 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -2394,6 +2394,7 @@ void __init memblock_free_all(void) free_unused_memmap(); reset_all_zones_managed_pages(); + memblock_clear_kho_scratch_only(); pages = free_low_memory_core_early(); totalram_pages_add(pages); } |