diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-06-02 16:00:26 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-06-02 16:00:26 -0700 |
commit | fd1f8473503e5bf897bd3e8efe3545c0352954e6 (patch) | |
tree | bd9f699a23c0093dd55be8cac76d4329837654d0 /mm | |
parent | fe4281644c62ce9385d3b9165e27d6c86ae0a845 (diff) | |
parent | 0b43b8bc8ef88bb45b018b2d4853d38bfc5ce2a7 (diff) |
Merge tag 'mm-stable-2025-06-01-14-06' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull more MM updates from Andrew Morton:
- "zram: support algorithm-specific parameters" from Sergey Senozhatsky
adds infrastructure for passing algorithm-specific parameters into
zram. A single parameter `winbits' is implemented at this time.
- "memcg: nmi-safe kmem charging" from Shakeel Butt makes memcg
charging nmi-safe, which is required by BFP, which can operate in NMI
context.
- "Some random fixes and cleanup to shmem" from Kemeng Shi implements
small fixes and cleanups in the shmem code.
- "Skip mm selftests instead when kernel features are not present" from
Zi Yan fixes some issues in the MM selftest code.
- "mm/damon: build-enable essential DAMON components by default" from
SeongJae Park reworks DAMON Kconfig to make it easier to enable
CONFIG_DAMON.
- "sched/numa: add statistics of numa balance task migration" from Libo
Chen adds more info into sysfs and procfs files to improve visibility
into the NUMA balancer's task migration activity.
- "selftests/mm: cow and gup_longterm cleanups" from Mark Brown
provides various updates to some of the MM selftests to make them
play better with the overall containing framework.
* tag 'mm-stable-2025-06-01-14-06' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (43 commits)
mm/khugepaged: clean up refcount check using folio_expected_ref_count()
selftests/mm: fix test result reporting in gup_longterm
selftests/mm: report unique test names for each cow test
selftests/mm: add helper for logging test start and results
selftests/mm: use standard ksft_finished() in cow and gup_longterm
selftests/damon/_damon_sysfs: skip testcases if CONFIG_DAMON_SYSFS is disabled
sched/numa: add statistics of numa balance task
sched/numa: fix task swap by skipping kernel threads
tools/testing: check correct variable in open_procmap()
tools/testing/vma: add missing function stub
mm/gup: update comment explaining why gup_fast() disables IRQs
selftests/mm: two fixes for the pfnmap test
mm/khugepaged: fix race with folio split/free using temporary reference
mm: add CONFIG_PAGE_BLOCK_ORDER to select page block order
mmu_notifiers: remove leftover stub macros
selftests/mm: deduplicate test names in madv_populate
kcov: rust: add flags for KCOV with Rust
mm: rust: make CONFIG_MMU ifdefs more narrow
mmu_gather: move tlb flush for VM_PFNMAP/VM_MIXEDMAP vmas into free_pgtables()
mm/damon/Kconfig: enable CONFIG_DAMON by default
...
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 34 | ||||
-rw-r--r-- | mm/damon/Kconfig | 4 | ||||
-rw-r--r-- | mm/damon/core.c | 8 | ||||
-rw-r--r-- | mm/filemap.c | 4 | ||||
-rw-r--r-- | mm/gup.c | 2 | ||||
-rw-r--r-- | mm/hugetlb.c | 2 | ||||
-rw-r--r-- | mm/khugepaged.c | 35 | ||||
-rw-r--r-- | mm/memcontrol.c | 127 | ||||
-rw-r--r-- | mm/memory.c | 6 | ||||
-rw-r--r-- | mm/mm_init.c | 2 | ||||
-rw-r--r-- | mm/mmu_gather.c | 1 | ||||
-rw-r--r-- | mm/page-writeback.c | 6 | ||||
-rw-r--r-- | mm/shmem.c | 23 | ||||
-rw-r--r-- | mm/truncate.c | 2 | ||||
-rw-r--r-- | mm/vmstat.c | 2 | ||||
-rw-r--r-- | mm/zpdesc.h | 4 |
16 files changed, 211 insertions, 51 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index bd08e151fa1b..f8bb8f070d0d 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -993,6 +993,40 @@ config CMA_AREAS If unsure, leave the default value "8" in UMA and "20" in NUMA. +# +# Select this config option from the architecture Kconfig, if available, to set +# the max page order for physically contiguous allocations. +# +config ARCH_FORCE_MAX_ORDER + int + +# +# When ARCH_FORCE_MAX_ORDER is not defined, +# the default page block order is MAX_PAGE_ORDER (10) as per +# include/linux/mmzone.h. +# +config PAGE_BLOCK_ORDER + int "Page Block Order" + range 1 10 if ARCH_FORCE_MAX_ORDER = 0 + default 10 if ARCH_FORCE_MAX_ORDER = 0 + range 1 ARCH_FORCE_MAX_ORDER if ARCH_FORCE_MAX_ORDER != 0 + default ARCH_FORCE_MAX_ORDER if ARCH_FORCE_MAX_ORDER != 0 + help + The page block order refers to the power of two number of pages that + are physically contiguous and can have a migrate type associated to + them. The maximum size of the page block order is limited by + ARCH_FORCE_MAX_ORDER. + + This config allows overriding the default page block order when the + page block order is required to be smaller than ARCH_FORCE_MAX_ORDER + or MAX_PAGE_ORDER. + + Reducing pageblock order can negatively impact THP generation + success rate. If your workloads uses THP heavily, please use this + option with caution. + + Don't change if unsure. + config MEM_SOFT_DIRTY bool "Track memory changes" depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS diff --git a/mm/damon/Kconfig b/mm/damon/Kconfig index c213cf8b5638..551745df011b 100644 --- a/mm/damon/Kconfig +++ b/mm/damon/Kconfig @@ -4,6 +4,7 @@ menu "Data Access Monitoring" config DAMON bool "DAMON: Data Access Monitoring Framework" + default y help This builds a framework that allows kernel subsystems to monitor access frequency of each memory region. The information can be useful @@ -28,6 +29,7 @@ config DAMON_VADDR bool "Data access monitoring operations for virtual address spaces" depends on DAMON && MMU select PAGE_IDLE_FLAG + default DAMON help This builds the default data access monitoring operations for DAMON that work for virtual address spaces. @@ -36,6 +38,7 @@ config DAMON_PADDR bool "Data access monitoring operations for the physical address space" depends on DAMON && MMU select PAGE_IDLE_FLAG + default DAMON help This builds the default data access monitoring operations for DAMON that works for the physical address space. @@ -55,6 +58,7 @@ config DAMON_VADDR_KUNIT_TEST config DAMON_SYSFS bool "DAMON sysfs interface" depends on DAMON && SYSFS + default DAMON help This builds the sysfs interface for DAMON. The user space can use the interface for arbitrary data access monitoring. diff --git a/mm/damon/core.c b/mm/damon/core.c index 0bb71e2ab713..b217e0120e09 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1093,9 +1093,17 @@ static int damon_commit_targets( if (err) return err; } else { + struct damos *s; + if (damon_target_has_pid(dst)) put_pid(dst_target->pid); damon_destroy_target(dst_target); + damon_for_each_scheme(s, dst) { + if (s->quota.charge_target_from == dst_target) { + s->quota.charge_target_from = NULL; + s->quota.charge_addr_from = 0; + } + } } } diff --git a/mm/filemap.c b/mm/filemap.c index 48c944e2c163..bada249b9fb7 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -142,7 +142,7 @@ static void page_cache_delete(struct address_space *mapping, xas_init_marks(&xas); folio->mapping = NULL; - /* Leave page->index set: truncation lookup relies upon it */ + /* Leave folio->index set: truncation lookup relies upon it */ mapping->nrpages -= nr; } @@ -949,7 +949,7 @@ unlock: return 0; error: folio->mapping = NULL; - /* Leave page->index set: truncation relies upon it */ + /* Leave folio->index set: truncation relies upon it */ folio_put_refs(folio, nr); return xas_error(&xas); } @@ -3299,7 +3299,7 @@ static unsigned long gup_fast(unsigned long start, unsigned long end, * include/asm-generic/tlb.h for more details. * * We do not adopt an rcu_read_lock() here as we also want to block IPIs - * that come from THPs splitting. + * that come from callers of tlb_remove_table_sync_one(). */ local_irq_save(flags); gup_fast_pgd_range(start, end, gup_flags, pages, &nr_pinned); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 32ab14aa4074..f0b1d53079f9 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3741,7 +3741,7 @@ static void __init report_hugepages(void) string_get_size(huge_page_size(h), 1, STRING_UNITS_2, buf, 32); pr_info("HugeTLB: registered %s page size, pre-allocated %ld pages\n", - buf, h->free_huge_pages); + buf, h->nr_huge_pages); if (nrinvalid) pr_info("HugeTLB: %s page size: %lu invalid page%s discarded\n", buf, nrinvalid, nrinvalid > 1 ? "s" : ""); diff --git a/mm/khugepaged.c b/mm/khugepaged.c index cdf5a581368b..15203ea7d007 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -548,19 +548,6 @@ static void release_pte_pages(pte_t *pte, pte_t *_pte, } } -static bool is_refcount_suitable(struct folio *folio) -{ - int expected_refcount = folio_mapcount(folio); - - if (!folio_test_anon(folio) || folio_test_swapcache(folio)) - expected_refcount += folio_nr_pages(folio); - - if (folio_test_private(folio)) - expected_refcount++; - - return folio_ref_count(folio) == expected_refcount; -} - static int __collapse_huge_page_isolate(struct vm_area_struct *vma, unsigned long address, pte_t *pte, @@ -652,7 +639,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, * but not from this process. The other process cannot write to * the page, only trigger CoW. */ - if (!is_refcount_suitable(folio)) { + if (folio_expected_ref_count(folio) != folio_ref_count(folio)) { folio_unlock(folio); result = SCAN_PAGE_COUNT; goto out; @@ -1402,7 +1389,7 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm, * has excessive GUP pins (i.e. 512). Anyway the same check * will be done again later the risk seems low. */ - if (!is_refcount_suitable(folio)) { + if (folio_expected_ref_count(folio) != folio_ref_count(folio)) { result = SCAN_PAGE_COUNT; goto out_unmap; } @@ -2293,6 +2280,17 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, continue; } + if (!folio_try_get(folio)) { + xas_reset(&xas); + continue; + } + + if (unlikely(folio != xas_reload(&xas))) { + folio_put(folio); + xas_reset(&xas); + continue; + } + if (folio_order(folio) == HPAGE_PMD_ORDER && folio->index == start) { /* Maybe PMD-mapped */ @@ -2303,23 +2301,27 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, * it's safe to skip LRU and refcount checks before * returning. */ + folio_put(folio); break; } node = folio_nid(folio); if (hpage_collapse_scan_abort(node, cc)) { result = SCAN_SCAN_ABORT; + folio_put(folio); break; } cc->node_load[node]++; if (!folio_test_lru(folio)) { result = SCAN_PAGE_LRU; + folio_put(folio); break; } - if (!is_refcount_suitable(folio)) { + if (folio_expected_ref_count(folio) + 1 != folio_ref_count(folio)) { result = SCAN_PAGE_COUNT; + folio_put(folio); break; } @@ -2331,6 +2333,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, */ present += folio_nr_pages(folio); + folio_put(folio); if (need_resched()) { xas_pause(&xas); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b90aa3075950..902da8a9c643 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -474,6 +474,8 @@ static const unsigned int memcg_vm_event_stat[] = { NUMA_PAGE_MIGRATE, NUMA_PTE_UPDATES, NUMA_HINT_FAULTS, + NUMA_TASK_MIGRATE, + NUMA_TASK_SWAP, #endif }; @@ -531,7 +533,7 @@ struct memcg_vmstats { unsigned long events_pending[NR_MEMCG_EVENTS]; /* Stats updates since the last flush */ - atomic64_t stats_updates; + atomic_t stats_updates; }; /* @@ -557,7 +559,7 @@ static u64 flush_last_time; static bool memcg_vmstats_needs_flush(struct memcg_vmstats *vmstats) { - return atomic64_read(&vmstats->stats_updates) > + return atomic_read(&vmstats->stats_updates) > MEMCG_CHARGE_BATCH * num_online_cpus(); } @@ -571,7 +573,9 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val, if (!val) return; - css_rstat_updated(&memcg->css, cpu); + /* TODO: add to cgroup update tree once it is nmi-safe. */ + if (!in_nmi()) + css_rstat_updated(&memcg->css, cpu); statc_pcpu = memcg->vmstats_percpu; for (; statc_pcpu; statc_pcpu = statc->parent_pcpu) { statc = this_cpu_ptr(statc_pcpu); @@ -589,7 +593,7 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val, continue; stats_updates = this_cpu_xchg(statc_pcpu->stats_updates, 0); - atomic64_add(stats_updates, &statc->vmstats->stats_updates); + atomic_add(stats_updates, &statc->vmstats->stats_updates); } } @@ -597,7 +601,7 @@ static void __mem_cgroup_flush_stats(struct mem_cgroup *memcg, bool force) { bool needs_flush = memcg_vmstats_needs_flush(memcg->vmstats); - trace_memcg_flush_stats(memcg, atomic64_read(&memcg->vmstats->stats_updates), + trace_memcg_flush_stats(memcg, atomic_read(&memcg->vmstats->stats_updates), force, needs_flush); if (!force && !needs_flush) @@ -2513,17 +2517,47 @@ static void commit_charge(struct folio *folio, struct mem_cgroup *memcg) folio->memcg_data = (unsigned long)memcg; } +#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC +static inline void account_slab_nmi_safe(struct mem_cgroup *memcg, + struct pglist_data *pgdat, + enum node_stat_item idx, int nr) +{ + struct lruvec *lruvec; + + if (likely(!in_nmi())) { + lruvec = mem_cgroup_lruvec(memcg, pgdat); + mod_memcg_lruvec_state(lruvec, idx, nr); + } else { + struct mem_cgroup_per_node *pn = memcg->nodeinfo[pgdat->node_id]; + + /* TODO: add to cgroup update tree once it is nmi-safe. */ + if (idx == NR_SLAB_RECLAIMABLE_B) + atomic_add(nr, &pn->slab_reclaimable); + else + atomic_add(nr, &pn->slab_unreclaimable); + } +} +#else +static inline void account_slab_nmi_safe(struct mem_cgroup *memcg, + struct pglist_data *pgdat, + enum node_stat_item idx, int nr) +{ + struct lruvec *lruvec; + + lruvec = mem_cgroup_lruvec(memcg, pgdat); + mod_memcg_lruvec_state(lruvec, idx, nr); +} +#endif + static inline void mod_objcg_mlstate(struct obj_cgroup *objcg, struct pglist_data *pgdat, enum node_stat_item idx, int nr) { struct mem_cgroup *memcg; - struct lruvec *lruvec; rcu_read_lock(); memcg = obj_cgroup_memcg(objcg); - lruvec = mem_cgroup_lruvec(memcg, pgdat); - mod_memcg_lruvec_state(lruvec, idx, nr); + account_slab_nmi_safe(memcg, pgdat, idx, nr); rcu_read_unlock(); } @@ -2648,6 +2682,9 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void) struct mem_cgroup *memcg; struct obj_cgroup *objcg; + if (IS_ENABLED(CONFIG_MEMCG_NMI_UNSAFE) && in_nmi()) + return NULL; + if (in_task()) { memcg = current->active_memcg; if (unlikely(memcg)) @@ -2710,6 +2747,23 @@ struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio) return objcg; } +#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC +static inline void account_kmem_nmi_safe(struct mem_cgroup *memcg, int val) +{ + if (likely(!in_nmi())) { + mod_memcg_state(memcg, MEMCG_KMEM, val); + } else { + /* TODO: add to cgroup update tree once it is nmi-safe. */ + atomic_add(val, &memcg->kmem_stat); + } +} +#else +static inline void account_kmem_nmi_safe(struct mem_cgroup *memcg, int val) +{ + mod_memcg_state(memcg, MEMCG_KMEM, val); +} +#endif + /* * obj_cgroup_uncharge_pages: uncharge a number of kernel pages from a objcg * @objcg: object cgroup to uncharge @@ -2722,7 +2776,7 @@ static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg, memcg = get_mem_cgroup_from_objcg(objcg); - mod_memcg_state(memcg, MEMCG_KMEM, -nr_pages); + account_kmem_nmi_safe(memcg, -nr_pages); memcg1_account_kmem(memcg, -nr_pages); if (!mem_cgroup_is_root(memcg)) refill_stock(memcg, nr_pages); @@ -2750,7 +2804,7 @@ static int obj_cgroup_charge_pages(struct obj_cgroup *objcg, gfp_t gfp, if (ret) goto out; - mod_memcg_state(memcg, MEMCG_KMEM, nr_pages); + account_kmem_nmi_safe(memcg, nr_pages); memcg1_account_kmem(memcg, nr_pages); out: css_put(&memcg->css); @@ -3961,6 +4015,53 @@ static void mem_cgroup_stat_aggregate(struct aggregate_control *ac) } } +#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC +static void flush_nmi_stats(struct mem_cgroup *memcg, struct mem_cgroup *parent, + int cpu) +{ + int nid; + + if (atomic_read(&memcg->kmem_stat)) { + int kmem = atomic_xchg(&memcg->kmem_stat, 0); + int index = memcg_stats_index(MEMCG_KMEM); + + memcg->vmstats->state[index] += kmem; + if (parent) + parent->vmstats->state_pending[index] += kmem; + } + + for_each_node_state(nid, N_MEMORY) { + struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid]; + struct lruvec_stats *lstats = pn->lruvec_stats; + struct lruvec_stats *plstats = NULL; + + if (parent) + plstats = parent->nodeinfo[nid]->lruvec_stats; + + if (atomic_read(&pn->slab_reclaimable)) { + int slab = atomic_xchg(&pn->slab_reclaimable, 0); + int index = memcg_stats_index(NR_SLAB_RECLAIMABLE_B); + + lstats->state[index] += slab; + if (plstats) + plstats->state_pending[index] += slab; + } + if (atomic_read(&pn->slab_unreclaimable)) { + int slab = atomic_xchg(&pn->slab_unreclaimable, 0); + int index = memcg_stats_index(NR_SLAB_UNRECLAIMABLE_B); + + lstats->state[index] += slab; + if (plstats) + plstats->state_pending[index] += slab; + } + } +} +#else +static void flush_nmi_stats(struct mem_cgroup *memcg, struct mem_cgroup *parent, + int cpu) +{} +#endif + static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); @@ -3969,6 +4070,8 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu) struct aggregate_control ac; int nid; + flush_nmi_stats(memcg, parent, cpu); + statc = per_cpu_ptr(memcg->vmstats_percpu, cpu); ac = (struct aggregate_control) { @@ -4018,8 +4121,8 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu) } WRITE_ONCE(statc->stats_updates, 0); /* We are in a per-cpu loop here, only do the atomic write once */ - if (atomic64_read(&memcg->vmstats->stats_updates)) - atomic64_set(&memcg->vmstats->stats_updates, 0); + if (atomic_read(&memcg->vmstats->stats_updates)) + atomic_set(&memcg->vmstats->stats_updates, 0); } static void mem_cgroup_fork(struct task_struct *task) diff --git a/mm/memory.c b/mm/memory.c index 5cb48f262ab0..8eba595056fe 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -358,6 +358,8 @@ void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas, { struct unlink_vma_file_batch vb; + tlb_free_vmas(tlb); + do { unsigned long addr = vma->vm_start; struct vm_area_struct *next; @@ -4668,8 +4670,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) /* * KSM sometimes has to copy on read faults, for example, if - * page->index of !PageKSM() pages would be nonlinear inside the - * anon VMA -- PageKSM() is lost on actual swapout. + * folio->index of non-ksm folios would be nonlinear inside the + * anon VMA -- the ksm flag is lost on actual swapout. */ folio = ksm_might_need_to_copy(folio, vma, vmf->address); if (unlikely(!folio)) { diff --git a/mm/mm_init.c b/mm/mm_init.c index f0bd0830daad..f2944748f526 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1509,7 +1509,7 @@ static inline void setup_usemap(struct zone *zone) {} /* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */ void __init set_pageblock_order(void) { - unsigned int order = MAX_PAGE_ORDER; + unsigned int order = PAGE_BLOCK_ORDER; /* Check that pageblock_nr_pages has not already been setup */ if (pageblock_order) diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index db7ba4a725d6..b49cc6385f1f 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -424,6 +424,7 @@ static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, #ifdef CONFIG_MMU_GATHER_PAGE_SIZE tlb->page_size = 0; #endif + tlb->vma_pfn = 0; __tlb_reset_range(tlb); inc_tlb_flush_pending(tlb->mm); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index b603a59cf8f7..b8eea5b3c064 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2565,11 +2565,11 @@ struct folio *writeback_iter(struct address_space *mapping, if (!folio) { /* * To avoid deadlocks between range_cyclic writeback and callers - * that hold pages in PageWriteback to aggregate I/O until + * that hold folios in writeback to aggregate I/O until * the writeback iteration finishes, we do not loop back to the - * start of the file. Doing so causes a page lock/page + * start of the file. Doing so causes a folio lock/folio * writeback access order inversion - we should only ever lock - * multiple pages in ascending page->index order, and looping + * multiple folios in ascending folio->index order, and looping * back to the start of the file violates that rule and causes * deadlocks. */ diff --git a/mm/shmem.c b/mm/shmem.c index 858cee02ca49..0c5fb4ffa03a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1446,8 +1446,6 @@ static int shmem_unuse_swap_entries(struct inode *inode, for (i = 0; i < folio_batch_count(fbatch); i++) { struct folio *folio = fbatch->folios[i]; - if (!xa_is_value(folio)) - continue; error = shmem_swapin_folio(inode, indices[i], &folio, SGP_CACHE, mapping_gfp_mask(mapping), NULL, NULL); if (error == 0) { @@ -1505,6 +1503,7 @@ int shmem_unuse(unsigned int type) return 0; mutex_lock(&shmem_swaplist_mutex); +start_over: list_for_each_entry_safe(info, next, &shmem_swaplist, swaplist) { if (!info->swapped) { list_del_init(&info->swaplist); @@ -1523,13 +1522,15 @@ int shmem_unuse(unsigned int type) cond_resched(); mutex_lock(&shmem_swaplist_mutex); - next = list_next_entry(info, swaplist); - if (!info->swapped) - list_del_init(&info->swaplist); if (atomic_dec_and_test(&info->stop_eviction)) wake_up_var(&info->stop_eviction); if (error) break; + if (list_empty(&info->swaplist)) + goto start_over; + next = list_next_entry(info, swaplist); + if (!info->swapped) + list_del_init(&info->swaplist); } mutex_unlock(&shmem_swaplist_mutex); @@ -1643,8 +1644,8 @@ try_split: BUG_ON(folio_mapped(folio)); return swap_writeout(folio, wbc); } - - list_del_init(&info->swaplist); + if (!info->swapped) + list_del_init(&info->swaplist); mutex_unlock(&shmem_swaplist_mutex); if (nr_pages > 1) goto try_split; @@ -2331,6 +2332,8 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, */ split_order = shmem_split_large_entry(inode, index, swap, gfp); if (split_order < 0) { + folio_put(folio); + folio = NULL; error = split_order; goto failed; } @@ -5805,12 +5808,12 @@ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, if (size < 0 || size > MAX_LFS_FILESIZE) return ERR_PTR(-EINVAL); - if (shmem_acct_size(flags, size)) - return ERR_PTR(-ENOMEM); - if (is_idmapped_mnt(mnt)) return ERR_PTR(-EINVAL); + if (shmem_acct_size(flags, size)) + return ERR_PTR(-ENOMEM); + inode = shmem_get_inode(&nop_mnt_idmap, mnt->mnt_sb, NULL, S_IFREG | S_IRWXUGO, 0, flags); if (IS_ERR(inode)) { diff --git a/mm/truncate.c b/mm/truncate.c index f2aaf99f2990..91eb92a5ce4f 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -425,7 +425,7 @@ void truncate_inode_pages_range(struct address_space *mapping, for (i = 0; i < folio_batch_count(&fbatch); i++) { struct folio *folio = fbatch.folios[i]; - /* We rely upon deletion not changing page->index */ + /* We rely upon deletion not changing folio->index */ if (xa_is_value(folio)) continue; diff --git a/mm/vmstat.c b/mm/vmstat.c index d888c248d99f..6f740f070b3d 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1347,6 +1347,8 @@ const char * const vmstat_text[] = { "numa_hint_faults", "numa_hint_faults_local", "numa_pages_migrated", + "numa_task_migrated", + "numa_task_swapped", #endif #ifdef CONFIG_MIGRATION "pgmigrate_success", diff --git a/mm/zpdesc.h b/mm/zpdesc.h index 57e7a4d6c6ca..d3df316e5bb7 100644 --- a/mm/zpdesc.h +++ b/mm/zpdesc.h @@ -54,8 +54,8 @@ struct zpdesc { ZPDESC_MATCH(flags, flags); ZPDESC_MATCH(lru, lru); ZPDESC_MATCH(mapping, movable_ops); -ZPDESC_MATCH(index, next); -ZPDESC_MATCH(index, handle); +ZPDESC_MATCH(__folio_index, next); +ZPDESC_MATCH(__folio_index, handle); ZPDESC_MATCH(private, zspage); ZPDESC_MATCH(page_type, first_obj_offset); ZPDESC_MATCH(_refcount, _refcount); |