diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/balloon_compaction.c | 6 | ||||
-rw-r--r-- | mm/damon/core.c | 19 | ||||
-rw-r--r-- | mm/damon/sysfs-schemes.c | 2 | ||||
-rw-r--r-- | mm/debug_vm_pgtable.c | 9 | ||||
-rw-r--r-- | mm/kasan/init.c | 12 | ||||
-rw-r--r-- | mm/kasan/kasan_test_c.c | 2 | ||||
-rw-r--r-- | mm/kasan/shadow.c | 22 | ||||
-rw-r--r-- | mm/kmemleak.c | 27 | ||||
-rw-r--r-- | mm/memblock.c | 19 | ||||
-rw-r--r-- | mm/memory-failure.c | 8 | ||||
-rw-r--r-- | mm/migrate.c | 38 | ||||
-rw-r--r-- | mm/mremap.c | 82 | ||||
-rw-r--r-- | mm/numa_emulation.c | 4 | ||||
-rw-r--r-- | mm/numa_memblks.c | 6 | ||||
-rw-r--r-- | mm/percpu.c | 6 | ||||
-rw-r--r-- | mm/slub.c | 37 | ||||
-rw-r--r-- | mm/sparse-vmemmap.c | 11 | ||||
-rw-r--r-- | mm/sparse.c | 15 | ||||
-rw-r--r-- | mm/userfaultfd.c | 9 | ||||
-rw-r--r-- | mm/vmscan.c | 4 | ||||
-rw-r--r-- | mm/zsmalloc.c | 10 |
21 files changed, 235 insertions, 113 deletions
diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index 2a4a649805c1..03c5dbabb156 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -254,4 +254,10 @@ const struct movable_operations balloon_mops = { .putback_page = balloon_page_putback, }; +static int __init balloon_init(void) +{ + return set_movable_ops(&balloon_mops, PGTY_offline); +} +core_initcall(balloon_init); + #endif /* CONFIG_BALLOON_COMPACTION */ diff --git a/mm/damon/core.c b/mm/damon/core.c index 52a48c9316bc..106ee8b0f2d5 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -845,6 +845,18 @@ static struct damos_filter *damos_nth_filter(int n, struct damos *s) return NULL; } +static struct damos_filter *damos_nth_ops_filter(int n, struct damos *s) +{ + struct damos_filter *filter; + int i = 0; + + damos_for_each_ops_filter(filter, s) { + if (i++ == n) + return filter; + } + return NULL; +} + static void damos_commit_filter_arg( struct damos_filter *dst, struct damos_filter *src) { @@ -871,6 +883,7 @@ static void damos_commit_filter( { dst->type = src->type; dst->matching = src->matching; + dst->allow = src->allow; damos_commit_filter_arg(dst, src); } @@ -908,7 +921,7 @@ static int damos_commit_ops_filters(struct damos *dst, struct damos *src) int i = 0, j = 0; damos_for_each_ops_filter_safe(dst_filter, next, dst) { - src_filter = damos_nth_filter(i++, src); + src_filter = damos_nth_ops_filter(i++, src); if (src_filter) damos_commit_filter(dst_filter, src_filter); else @@ -2060,8 +2073,8 @@ static void damos_set_effective_quota(struct damos_quota *quota) if (quota->ms) { if (quota->total_charged_ns) - throughput = quota->total_charged_sz * 1000000 / - quota->total_charged_ns; + throughput = mult_frac(quota->total_charged_sz, 1000000, + quota->total_charged_ns); else throughput = PAGE_SIZE * 1024; esz = min(throughput * quota->ms, esz); diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 74056bcd6a2c..6536f16006c9 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -2158,8 +2158,8 @@ static void damon_sysfs_scheme_rm_dirs(struct damon_sysfs_scheme *scheme) { damon_sysfs_access_pattern_rm_dirs(scheme->access_pattern); kobject_put(&scheme->access_pattern->kobj); - kobject_put(&scheme->dests->kobj); damos_sysfs_dests_rm_dirs(scheme->dests); + kobject_put(&scheme->dests->kobj); damon_sysfs_quotas_rm_dirs(scheme->quotas); kobject_put(&scheme->quotas->kobj); kobject_put(&scheme->watermarks->kobj); diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index d19031f275a3..830107b6dd08 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -990,29 +990,34 @@ static void __init destroy_args(struct pgtable_debug_args *args) /* Free page table entries */ if (args->start_ptep) { + pmd_clear(args->pmdp); pte_free(args->mm, args->start_ptep); mm_dec_nr_ptes(args->mm); } if (args->start_pmdp) { + pud_clear(args->pudp); pmd_free(args->mm, args->start_pmdp); mm_dec_nr_pmds(args->mm); } if (args->start_pudp) { + p4d_clear(args->p4dp); pud_free(args->mm, args->start_pudp); mm_dec_nr_puds(args->mm); } - if (args->start_p4dp) + if (args->start_p4dp) { + pgd_clear(args->pgdp); p4d_free(args->mm, args->start_p4dp); + } /* Free vma and mm struct */ if (args->vma) vm_area_free(args->vma); if (args->mm) - mmdrop(args->mm); + mmput(args->mm); } static struct page * __init diff --git a/mm/kasan/init.c b/mm/kasan/init.c index ced6b29fcf76..8fce3370c84e 100644 --- a/mm/kasan/init.c +++ b/mm/kasan/init.c @@ -13,9 +13,9 @@ #include <linux/mm.h> #include <linux/pfn.h> #include <linux/slab.h> +#include <linux/pgalloc.h> #include <asm/page.h> -#include <asm/pgalloc.h> #include "kasan.h" @@ -191,7 +191,7 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr, pud_t *pud; pmd_t *pmd; - p4d_populate(&init_mm, p4d, + p4d_populate_kernel(addr, p4d, lm_alias(kasan_early_shadow_pud)); pud = pud_offset(p4d, addr); pud_populate(&init_mm, pud, @@ -212,7 +212,7 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr, } else { p = early_alloc(PAGE_SIZE, NUMA_NO_NODE); pud_init(p); - p4d_populate(&init_mm, p4d, p); + p4d_populate_kernel(addr, p4d, p); } } zero_pud_populate(p4d, addr, next); @@ -251,10 +251,10 @@ int __ref kasan_populate_early_shadow(const void *shadow_start, * puds,pmds, so pgd_populate(), pud_populate() * is noops. */ - pgd_populate(&init_mm, pgd, + pgd_populate_kernel(addr, pgd, lm_alias(kasan_early_shadow_p4d)); p4d = p4d_offset(pgd, addr); - p4d_populate(&init_mm, p4d, + p4d_populate_kernel(addr, p4d, lm_alias(kasan_early_shadow_pud)); pud = pud_offset(p4d, addr); pud_populate(&init_mm, pud, @@ -273,7 +273,7 @@ int __ref kasan_populate_early_shadow(const void *shadow_start, if (!p) return -ENOMEM; } else { - pgd_populate(&init_mm, pgd, + pgd_populate_kernel(addr, pgd, early_alloc(PAGE_SIZE, NUMA_NO_NODE)); } } diff --git a/mm/kasan/kasan_test_c.c b/mm/kasan/kasan_test_c.c index e0968acc03aa..f4b17984b627 100644 --- a/mm/kasan/kasan_test_c.c +++ b/mm/kasan/kasan_test_c.c @@ -1578,9 +1578,11 @@ static void kasan_strings(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + OPTIMIZER_HIDE_VAR(ptr); src = kmalloc(KASAN_GRANULE_SIZE, GFP_KERNEL | __GFP_ZERO); strscpy(src, "f0cacc1a0000000", KASAN_GRANULE_SIZE); + OPTIMIZER_HIDE_VAR(src); /* * Make sure that strscpy() does not trigger KASAN if it overreads into diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index d2c70cd2afb1..e2ceebf737ef 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -305,8 +305,7 @@ static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr, pte_t pte; int index; - if (likely(!pte_none(ptep_get(ptep)))) - return 0; + arch_leave_lazy_mmu_mode(); index = PFN_DOWN(addr - data->start); page = data->pages[index]; @@ -320,6 +319,8 @@ static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr, } spin_unlock(&init_mm.page_table_lock); + arch_enter_lazy_mmu_mode(); + return 0; } @@ -461,18 +462,23 @@ int kasan_populate_vmalloc(unsigned long addr, unsigned long size) static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr, void *unused) { - unsigned long page; + pte_t pte; + int none; - page = (unsigned long)__va(pte_pfn(ptep_get(ptep)) << PAGE_SHIFT); + arch_leave_lazy_mmu_mode(); spin_lock(&init_mm.page_table_lock); - - if (likely(!pte_none(ptep_get(ptep)))) { + pte = ptep_get(ptep); + none = pte_none(pte); + if (likely(!none)) pte_clear(&init_mm, addr, ptep); - free_page(page); - } spin_unlock(&init_mm.page_table_lock); + if (likely(!none)) + __free_page(pfn_to_page(pte_pfn(pte))); + + arch_enter_lazy_mmu_mode(); + return 0; } diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 84265983f239..1ac56ceb29b6 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -437,9 +437,15 @@ static struct kmemleak_object *__lookup_object(unsigned long ptr, int alias, else if (untagged_objp == untagged_ptr || alias) return object; else { + /* + * Printk deferring due to the kmemleak_lock held. + * This is done to avoid deadlock. + */ + printk_deferred_enter(); kmemleak_warn("Found object by alias at 0x%08lx\n", ptr); dump_object_info(object); + printk_deferred_exit(); break; } } @@ -736,6 +742,11 @@ static int __link_object(struct kmemleak_object *object, unsigned long ptr, else if (untagged_objp + parent->size <= untagged_ptr) link = &parent->rb_node.rb_right; else { + /* + * Printk deferring due to the kmemleak_lock held. + * This is done to avoid deadlock. + */ + printk_deferred_enter(); kmemleak_stop("Cannot insert 0x%lx into the object search tree (overlaps existing)\n", ptr); /* @@ -743,6 +754,7 @@ static int __link_object(struct kmemleak_object *object, unsigned long ptr, * be freed while the kmemleak_lock is held. */ dump_object_info(parent); + printk_deferred_exit(); return -EEXIST; } } @@ -856,13 +868,8 @@ static void delete_object_part(unsigned long ptr, size_t size, raw_spin_lock_irqsave(&kmemleak_lock, flags); object = __find_and_remove_object(ptr, 1, objflags); - if (!object) { -#ifdef DEBUG - kmemleak_warn("Partially freeing unknown object at 0x%08lx (size %zu)\n", - ptr, size); -#endif + if (!object) goto unlock; - } /* * Create one or two objects that may result from the memory block @@ -882,8 +889,14 @@ static void delete_object_part(unsigned long ptr, size_t size, unlock: raw_spin_unlock_irqrestore(&kmemleak_lock, flags); - if (object) + if (object) { __delete_object(object); + } else { +#ifdef DEBUG + kmemleak_warn("Partially freeing unknown object at 0x%08lx (size %zu)\n", + ptr, size); +#endif + } out: if (object_l) diff --git a/mm/memblock.c b/mm/memblock.c index 154f1d73b61f..117d963e677c 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -780,9 +780,9 @@ bool __init_memblock memblock_validate_numa_coverage(unsigned long threshold_byt } if ((nr_pages << PAGE_SHIFT) > threshold_bytes) { - mem_size_mb = memblock_phys_mem_size() >> 20; + mem_size_mb = memblock_phys_mem_size() / SZ_1M; pr_err("NUMA: no nodes coverage for %luMB of %luMB RAM\n", - (nr_pages << PAGE_SHIFT) >> 20, mem_size_mb); + (nr_pages << PAGE_SHIFT) / SZ_1M, mem_size_mb); return false; } @@ -1091,13 +1091,20 @@ int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size) /** * memblock_reserved_mark_noinit - Mark a reserved memory region with flag - * MEMBLOCK_RSRV_NOINIT which results in the struct pages not being initialized - * for this region. + * MEMBLOCK_RSRV_NOINIT + * * @base: the base phys addr of the region * @size: the size of the region * - * struct pages will not be initialized for reserved memory regions marked with - * %MEMBLOCK_RSRV_NOINIT. + * The struct pages for the reserved regions marked %MEMBLOCK_RSRV_NOINIT will + * not be fully initialized to allow the caller optimize their initialization. + * + * When %CONFIG_DEFERRED_STRUCT_PAGE_INIT is enabled, setting this flag + * completely bypasses the initialization of struct pages for such region. + * + * When %CONFIG_DEFERRED_STRUCT_PAGE_INIT is disabled, struct pages in this + * region will be initialized with default values but won't be marked as + * reserved. * * Return: 0 on success, -errno on failure. */ diff --git a/mm/memory-failure.c b/mm/memory-failure.c index e2e685b971bb..fc30ca4804bf 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -853,9 +853,17 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask, #define hwpoison_hugetlb_range NULL #endif +static int hwpoison_test_walk(unsigned long start, unsigned long end, + struct mm_walk *walk) +{ + /* We also want to consider pages mapped into VM_PFNMAP. */ + return 0; +} + static const struct mm_walk_ops hwpoison_walk_ops = { .pmd_entry = hwpoison_pte_range, .hugetlb_entry = hwpoison_hugetlb_range, + .test_walk = hwpoison_test_walk, .walk_lock = PGWALK_RDLOCK, }; diff --git a/mm/migrate.c b/mm/migrate.c index 425401b2d4e1..9e5ef39ce73a 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -43,8 +43,6 @@ #include <linux/sched/sysctl.h> #include <linux/memory-tiers.h> #include <linux/pagewalk.h> -#include <linux/balloon_compaction.h> -#include <linux/zsmalloc.h> #include <asm/tlbflush.h> @@ -53,6 +51,33 @@ #include "internal.h" #include "swap.h" +static const struct movable_operations *offline_movable_ops; +static const struct movable_operations *zsmalloc_movable_ops; + +int set_movable_ops(const struct movable_operations *ops, enum pagetype type) +{ + /* + * We only allow for selected types and don't handle concurrent + * registration attempts yet. + */ + switch (type) { + case PGTY_offline: + if (offline_movable_ops && ops) + return -EBUSY; + offline_movable_ops = ops; + break; + case PGTY_zsmalloc: + if (zsmalloc_movable_ops && ops) + return -EBUSY; + zsmalloc_movable_ops = ops; + break; + default: + return -EINVAL; + } + return 0; +} +EXPORT_SYMBOL_GPL(set_movable_ops); + static const struct movable_operations *page_movable_ops(struct page *page) { VM_WARN_ON_ONCE_PAGE(!page_has_movable_ops(page), page); @@ -62,15 +87,12 @@ static const struct movable_operations *page_movable_ops(struct page *page) * it as movable, the page type must be sticky until the page gets freed * back to the buddy. */ -#ifdef CONFIG_BALLOON_COMPACTION if (PageOffline(page)) /* Only balloon compaction sets PageOffline pages movable. */ - return &balloon_mops; -#endif /* CONFIG_BALLOON_COMPACTION */ -#if defined(CONFIG_ZSMALLOC) && defined(CONFIG_COMPACTION) + return offline_movable_ops; if (PageZsmalloc(page)) - return &zsmalloc_mops; -#endif /* defined(CONFIG_ZSMALLOC) && defined(CONFIG_COMPACTION) */ + return zsmalloc_movable_ops; + return NULL; } diff --git a/mm/mremap.c b/mm/mremap.c index 9afa8cd524f5..e618a706aff5 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -323,6 +323,25 @@ static inline bool arch_supports_page_table_move(void) } #endif +static inline bool uffd_supports_page_table_move(struct pagetable_move_control *pmc) +{ + /* + * If we are moving a VMA that has uffd-wp registered but with + * remap events disabled (new VMA will not be registered with uffd), we + * need to ensure that the uffd-wp state is cleared from all pgtables. + * This means recursing into lower page tables in move_page_tables(). + * + * We might get called with VMAs reversed when recovering from a + * failed page table move. In that case, the + * "old"-but-actually-"originally new" VMA during recovery will not have + * a uffd context. Recursing into lower page tables during the original + * move but not during the recovery move will cause trouble, because we + * run into already-existing page tables. So check both VMAs. + */ + return !vma_has_uffd_without_event_remap(pmc->old) && + !vma_has_uffd_without_event_remap(pmc->new); +} + #ifdef CONFIG_HAVE_MOVE_PMD static bool move_normal_pmd(struct pagetable_move_control *pmc, pmd_t *old_pmd, pmd_t *new_pmd) @@ -335,6 +354,8 @@ static bool move_normal_pmd(struct pagetable_move_control *pmc, if (!arch_supports_page_table_move()) return false; + if (!uffd_supports_page_table_move(pmc)) + return false; /* * The destination pmd shouldn't be established, free_pgtables() * should have released it. @@ -361,15 +382,6 @@ static bool move_normal_pmd(struct pagetable_move_control *pmc, if (WARN_ON_ONCE(!pmd_none(*new_pmd))) return false; - /* If this pmd belongs to a uffd vma with remap events disabled, we need - * to ensure that the uffd-wp state is cleared from all pgtables. This - * means recursing into lower page tables in move_page_tables(), and we - * can reuse the existing code if we simply treat the entry as "not - * moved". - */ - if (vma_has_uffd_without_event_remap(vma)) - return false; - /* * We don't have to worry about the ordering of src and dst * ptlocks because exclusive mmap_lock prevents deadlock. @@ -418,6 +430,8 @@ static bool move_normal_pud(struct pagetable_move_control *pmc, if (!arch_supports_page_table_move()) return false; + if (!uffd_supports_page_table_move(pmc)) + return false; /* * The destination pud shouldn't be established, free_pgtables() * should have released it. @@ -425,15 +439,6 @@ static bool move_normal_pud(struct pagetable_move_control *pmc, if (WARN_ON_ONCE(!pud_none(*new_pud))) return false; - /* If this pud belongs to a uffd vma with remap events disabled, we need - * to ensure that the uffd-wp state is cleared from all pgtables. This - * means recursing into lower page tables in move_page_tables(), and we - * can reuse the existing code if we simply treat the entry as "not - * moved". - */ - if (vma_has_uffd_without_event_remap(vma)) - return false; - /* * We don't have to worry about the ordering of src and dst * ptlocks because exclusive mmap_lock prevents deadlock. @@ -1620,7 +1625,7 @@ static void notify_uffd(struct vma_remap_struct *vrm, bool failed) static bool vma_multi_allowed(struct vm_area_struct *vma) { - struct file *file; + struct file *file = vma->vm_file; /* * We can't support moving multiple uffd VMAs as notify requires @@ -1633,15 +1638,17 @@ static bool vma_multi_allowed(struct vm_area_struct *vma) * Custom get unmapped area might result in MREMAP_FIXED not * being obeyed. */ - file = vma->vm_file; - if (file && !vma_is_shmem(vma) && !is_vm_hugetlb_page(vma)) { - const struct file_operations *fop = file->f_op; - - if (fop->get_unmapped_area) - return false; - } + if (!file || !file->f_op->get_unmapped_area) + return true; + /* Known good. */ + if (vma_is_shmem(vma)) + return true; + if (is_vm_hugetlb_page(vma)) + return true; + if (file->f_op->get_unmapped_area == thp_get_unmapped_area) + return true; - return true; + return false; } static int check_prep_vma(struct vma_remap_struct *vrm) @@ -1818,10 +1825,11 @@ static unsigned long remap_move(struct vma_remap_struct *vrm) unsigned long start = vrm->addr; unsigned long end = vrm->addr + vrm->old_len; unsigned long new_addr = vrm->new_addr; - bool allowed = true, seen_vma = false; unsigned long target_addr = new_addr; unsigned long res = -EFAULT; unsigned long last_end; + bool seen_vma = false; + VMA_ITERATOR(vmi, current->mm, start); /* @@ -1834,9 +1842,7 @@ static unsigned long remap_move(struct vma_remap_struct *vrm) unsigned long addr = max(vma->vm_start, start); unsigned long len = min(end, vma->vm_end) - addr; unsigned long offset, res_vma; - - if (!allowed) - return -EFAULT; + bool multi_allowed; /* No gap permitted at the start of the range. */ if (!seen_vma && start < vma->vm_start) @@ -1865,9 +1871,15 @@ static unsigned long remap_move(struct vma_remap_struct *vrm) vrm->new_addr = target_addr + offset; vrm->old_len = vrm->new_len = len; - allowed = vma_multi_allowed(vma); - if (seen_vma && !allowed) - return -EFAULT; + multi_allowed = vma_multi_allowed(vma); + if (!multi_allowed) { + /* This is not the first VMA, abort immediately. */ + if (seen_vma) + return -EFAULT; + /* This is the first, but there are more, abort. */ + if (vma->vm_end < end) + return -EFAULT; + } res_vma = check_prep_vma(vrm); if (!res_vma) @@ -1876,7 +1888,7 @@ static unsigned long remap_move(struct vma_remap_struct *vrm) return res_vma; if (!seen_vma) { - VM_WARN_ON_ONCE(allowed && res_vma != new_addr); + VM_WARN_ON_ONCE(multi_allowed && res_vma != new_addr); res = res_vma; } diff --git a/mm/numa_emulation.c b/mm/numa_emulation.c index 9d55679d99ce..703c8fa05048 100644 --- a/mm/numa_emulation.c +++ b/mm/numa_emulation.c @@ -73,7 +73,7 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei, } printk(KERN_INFO "Faking node %d at [mem %#018Lx-%#018Lx] (%LuMB)\n", - nid, eb->start, eb->end - 1, (eb->end - eb->start) >> 20); + nid, eb->start, eb->end - 1, (eb->end - eb->start) / SZ_1M); return 0; } @@ -264,7 +264,7 @@ static int __init split_nodes_size_interleave_uniform(struct numa_meminfo *ei, min_size = ALIGN(max(min_size, FAKE_NODE_MIN_SIZE), FAKE_NODE_MIN_SIZE); if (size < min_size) { pr_err("Fake node size %LuMB too small, increasing to %LuMB\n", - size >> 20, min_size >> 20); + size / SZ_1M, min_size / SZ_1M); size = min_size; } size = ALIGN_DOWN(size, FAKE_NODE_MIN_SIZE); diff --git a/mm/numa_memblks.c b/mm/numa_memblks.c index 541a99c4071a..5b009a9cd8b4 100644 --- a/mm/numa_memblks.c +++ b/mm/numa_memblks.c @@ -76,7 +76,7 @@ static int __init numa_alloc_distance(void) for (j = 0; j < cnt; j++) numa_distance[i * cnt + j] = i == j ? LOCAL_DISTANCE : REMOTE_DISTANCE; - printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt); + pr_debug("NUMA: Initialized distance table, cnt=%d\n", cnt); return 0; } @@ -427,9 +427,9 @@ static int __init numa_register_meminfo(struct numa_meminfo *mi) unsigned long pfn_align = node_map_pfn_alignment(); if (pfn_align && pfn_align < PAGES_PER_SECTION) { - unsigned long node_align_mb = PFN_PHYS(pfn_align) >> 20; + unsigned long node_align_mb = PFN_PHYS(pfn_align) / SZ_1M; - unsigned long sect_align_mb = PFN_PHYS(PAGES_PER_SECTION) >> 20; + unsigned long sect_align_mb = PFN_PHYS(PAGES_PER_SECTION) / SZ_1M; pr_warn("Node alignment %luMB < min %luMB, rejecting NUMA config\n", node_align_mb, sect_align_mb); diff --git a/mm/percpu.c b/mm/percpu.c index d9cbaee92b60..a56f35dcc417 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -3108,7 +3108,7 @@ out_free: #endif /* BUILD_EMBED_FIRST_CHUNK */ #ifdef BUILD_PAGE_FIRST_CHUNK -#include <asm/pgalloc.h> +#include <linux/pgalloc.h> #ifndef P4D_TABLE_SIZE #define P4D_TABLE_SIZE PAGE_SIZE @@ -3134,13 +3134,13 @@ void __init __weak pcpu_populate_pte(unsigned long addr) if (pgd_none(*pgd)) { p4d = memblock_alloc_or_panic(P4D_TABLE_SIZE, P4D_TABLE_SIZE); - pgd_populate(&init_mm, pgd, p4d); + pgd_populate_kernel(addr, pgd, p4d); } p4d = p4d_offset(pgd, addr); if (p4d_none(*p4d)) { pud = memblock_alloc_or_panic(PUD_TABLE_SIZE, PUD_TABLE_SIZE); - p4d_populate(&init_mm, p4d, pud); + p4d_populate_kernel(addr, p4d, pud); } pud = pud_offset(p4d, addr); diff --git a/mm/slub.c b/mm/slub.c index 30003763d224..d257141896c9 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -962,19 +962,19 @@ static struct track *get_track(struct kmem_cache *s, void *object, } #ifdef CONFIG_STACKDEPOT -static noinline depot_stack_handle_t set_track_prepare(void) +static noinline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags) { depot_stack_handle_t handle; unsigned long entries[TRACK_ADDRS_COUNT]; unsigned int nr_entries; nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 3); - handle = stack_depot_save(entries, nr_entries, GFP_NOWAIT); + handle = stack_depot_save(entries, nr_entries, gfp_flags); return handle; } #else -static inline depot_stack_handle_t set_track_prepare(void) +static inline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags) { return 0; } @@ -996,9 +996,9 @@ static void set_track_update(struct kmem_cache *s, void *object, } static __always_inline void set_track(struct kmem_cache *s, void *object, - enum track_item alloc, unsigned long addr) + enum track_item alloc, unsigned long addr, gfp_t gfp_flags) { - depot_stack_handle_t handle = set_track_prepare(); + depot_stack_handle_t handle = set_track_prepare(gfp_flags); set_track_update(s, object, alloc, addr, handle); } @@ -1140,7 +1140,12 @@ static void object_err(struct kmem_cache *s, struct slab *slab, return; slab_bug(s, reason); - print_trailer(s, slab, object); + if (!object || !check_valid_pointer(s, slab, object)) { + print_slab_info(slab); + pr_err("Invalid pointer 0x%p\n", object); + } else { + print_trailer(s, slab, object); + } add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); WARN_ON(1); @@ -1921,9 +1926,9 @@ static inline bool free_debug_processing(struct kmem_cache *s, static inline void slab_pad_check(struct kmem_cache *s, struct slab *slab) {} static inline int check_object(struct kmem_cache *s, struct slab *slab, void *object, u8 val) { return 1; } -static inline depot_stack_handle_t set_track_prepare(void) { return 0; } +static inline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags) { return 0; } static inline void set_track(struct kmem_cache *s, void *object, - enum track_item alloc, unsigned long addr) {} + enum track_item alloc, unsigned long addr, gfp_t gfp_flags) {} static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, struct slab *slab) {} static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, @@ -3876,9 +3881,14 @@ new_objects: * For debug caches here we had to go through * alloc_single_from_partial() so just store the * tracking info and return the object. + * + * Due to disabled preemption we need to disallow + * blocking. The flags are further adjusted by + * gfp_nested_mask() in stack_depot itself. */ if (s->flags & SLAB_STORE_USER) - set_track(s, freelist, TRACK_ALLOC, addr); + set_track(s, freelist, TRACK_ALLOC, addr, + gfpflags & ~(__GFP_DIRECT_RECLAIM)); return freelist; } @@ -3910,7 +3920,8 @@ new_objects: goto new_objects; if (s->flags & SLAB_STORE_USER) - set_track(s, freelist, TRACK_ALLOC, addr); + set_track(s, freelist, TRACK_ALLOC, addr, + gfpflags & ~(__GFP_DIRECT_RECLAIM)); return freelist; } @@ -4421,8 +4432,12 @@ static noinline void free_to_partial_list( unsigned long flags; depot_stack_handle_t handle = 0; + /* + * We cannot use GFP_NOWAIT as there are callsites where waking up + * kswapd could deadlock + */ if (s->flags & SLAB_STORE_USER) - handle = set_track_prepare(); + handle = set_track_prepare(__GFP_NOWARN); spin_lock_irqsave(&n->list_lock, flags); diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index fd2ab5118e13..dbd8daccade2 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -27,9 +27,9 @@ #include <linux/spinlock.h> #include <linux/vmalloc.h> #include <linux/sched.h> +#include <linux/pgalloc.h> #include <asm/dma.h> -#include <asm/pgalloc.h> #include <asm/tlbflush.h> #include "hugetlb_vmemmap.h" @@ -229,7 +229,7 @@ p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node) if (!p) return NULL; pud_init(p); - p4d_populate(&init_mm, p4d, p); + p4d_populate_kernel(addr, p4d, p); } return p4d; } @@ -241,7 +241,7 @@ pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node) void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); if (!p) return NULL; - pgd_populate(&init_mm, pgd, p); + pgd_populate_kernel(addr, pgd, p); } return pgd; } @@ -578,11 +578,6 @@ struct page * __meminit __populate_section_memmap(unsigned long pfn, if (r < 0) return NULL; - if (system_state == SYSTEM_BOOTING) - memmap_boot_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE)); - else - memmap_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE)); - return pfn_to_page(pfn); } diff --git a/mm/sparse.c b/mm/sparse.c index 3c012cf83cc2..e6075b622407 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -454,9 +454,6 @@ static void __init sparse_buffer_init(unsigned long size, int nid) */ sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, true); sparsemap_buf_end = sparsemap_buf + size; -#ifndef CONFIG_SPARSEMEM_VMEMMAP - memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE)); -#endif } static void __init sparse_buffer_fini(void) @@ -567,6 +564,8 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin, sparse_buffer_fini(); goto failed; } + memmap_boot_pages_add(DIV_ROUND_UP(PAGES_PER_SECTION * sizeof(struct page), + PAGE_SIZE)); sparse_init_early_section(nid, map, pnum, 0); } } @@ -680,7 +679,6 @@ static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages, unsigned long start = (unsigned long) pfn_to_page(pfn); unsigned long end = start + nr_pages * sizeof(struct page); - memmap_pages_add(-1L * (DIV_ROUND_UP(end - start, PAGE_SIZE))); vmemmap_free(start, end, altmap); } static void free_map_bootmem(struct page *memmap) @@ -856,10 +854,14 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages, * The memmap of early sections is always fully populated. See * section_activate() and pfn_valid() . */ - if (!section_is_early) + if (!section_is_early) { + memmap_pages_add(-1L * (DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE))); depopulate_section_memmap(pfn, nr_pages, altmap); - else if (memmap) + } else if (memmap) { + memmap_boot_pages_add(-1L * (DIV_ROUND_UP(nr_pages * sizeof(struct page), + PAGE_SIZE))); free_map_bootmem(memmap); + } if (empty) ms->section_mem_map = (unsigned long)NULL; @@ -904,6 +906,7 @@ static struct page * __meminit section_activate(int nid, unsigned long pfn, section_deactivate(pfn, nr_pages, altmap); return ERR_PTR(-ENOMEM); } + memmap_pages_add(DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE)); return memmap; } diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 45e6290e2e8b..aefdf3a812a1 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1453,10 +1453,15 @@ out: folio_unlock(src_folio); folio_put(src_folio); } - if (dst_pte) - pte_unmap(dst_pte); + /* + * Unmap in reverse order (LIFO) to maintain proper kmap_local + * index ordering when CONFIG_HIGHPTE is enabled. We mapped dst_pte + * first, then src_pte, so we must unmap src_pte first, then dst_pte. + */ if (src_pte) pte_unmap(src_pte); + if (dst_pte) + pte_unmap(dst_pte); mmu_notifier_invalidate_range_end(&range); if (si) put_swap_device(si); diff --git a/mm/vmscan.c b/mm/vmscan.c index 7de11524a936..a48aec8bfd92 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -5772,9 +5772,9 @@ static int __init init_lru_gen(void) if (sysfs_create_group(mm_kobj, &lru_gen_attr_group)) pr_err("lru_gen: failed to create sysfs group\n"); - debugfs_create_file_aux_num("lru_gen", 0644, NULL, NULL, 1, + debugfs_create_file_aux_num("lru_gen", 0644, NULL, NULL, false, &lru_gen_rw_fops); - debugfs_create_file_aux_num("lru_gen_full", 0444, NULL, NULL, 0, + debugfs_create_file_aux_num("lru_gen_full", 0444, NULL, NULL, true, &lru_gen_ro_fops); return 0; diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 2c5e56a65354..805a10b41266 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -2246,9 +2246,16 @@ EXPORT_SYMBOL_GPL(zs_destroy_pool); static int __init zs_init(void) { + int rc __maybe_unused; + #ifdef CONFIG_ZPOOL zpool_register_driver(&zs_zpool_driver); #endif +#ifdef CONFIG_COMPACTION + rc = set_movable_ops(&zsmalloc_mops, PGTY_zsmalloc); + if (rc) + return rc; +#endif zs_stat_init(); return 0; } @@ -2258,6 +2265,9 @@ static void __exit zs_exit(void) #ifdef CONFIG_ZPOOL zpool_unregister_driver(&zs_zpool_driver); #endif +#ifdef CONFIG_COMPACTION + set_movable_ops(NULL, PGTY_zsmalloc); +#endif zs_stat_exit(); } |