diff options
59 files changed, 909 insertions, 408 deletions
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index bd98ea3175ec..0cc35a14afbe 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1732,6 +1732,12 @@ The following nested keys are defined. numa_hint_faults (npn) Number of NUMA hinting faults. + numa_task_migrated (npn) + Number of task migration by NUMA balancing. + + numa_task_swapped (npn) + Number of task swap by NUMA balancing. + pgdemote_kswapd Number of pages demoted by kswapd. diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 0749cf8a6637..5219158d54cf 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -227,9 +227,9 @@ void __flush_dcache_folio(struct address_space *mapping, struct folio *folio) } /* - * If this is a page cache page, and we have an aliasing VIPT cache, + * If this is a page cache folio, and we have an aliasing VIPT cache, * we only need to do one flush - which would be at the relevant - * userspace colour, which is congruent with page->index. + * userspace colour, which is congruent with folio->index. */ if (mapping && cache_is_vipt_aliasing()) flush_pfn_alias(folio_pfn(folio), folio_pos(folio)); diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c index 6ab3ef39ba7a..745bd575dcfa 100644 --- a/arch/m68k/mm/motorola.c +++ b/arch/m68k/mm/motorola.c @@ -105,7 +105,8 @@ static struct list_head ptable_list[3] = { #define PD_PTABLE(page) ((ptable_desc *)&(virt_to_page((void *)(page))->lru)) #define PD_PAGE(ptable) (list_entry(ptable, struct page, lru)) -#define PD_MARKBITS(dp) (*(unsigned int *)&PD_PAGE(dp)->index) +#define PD_PTDESC(ptable) (list_entry(ptable, struct ptdesc, pt_list)) +#define PD_MARKBITS(dp) (*(unsigned int *)&PD_PTDESC(dp)->pt_index) static const int ptable_shift[3] = { 7+2, /* PGD */ diff --git a/drivers/block/zram/backend_deflate.c b/drivers/block/zram/backend_deflate.c index 0f7f252c12f4..b75016e0e654 100644 --- a/drivers/block/zram/backend_deflate.c +++ b/drivers/block/zram/backend_deflate.c @@ -8,7 +8,7 @@ #include "backend_deflate.h" /* Use the same value as crypto API */ -#define DEFLATE_DEF_WINBITS 11 +#define DEFLATE_DEF_WINBITS (-11) #define DEFLATE_DEF_MEMLEVEL MAX_MEM_LEVEL struct deflate_ctx { @@ -22,8 +22,10 @@ static void deflate_release_params(struct zcomp_params *params) static int deflate_setup_params(struct zcomp_params *params) { - if (params->level == ZCOMP_PARAM_NO_LEVEL) + if (params->level == ZCOMP_PARAM_NOT_SET) params->level = Z_DEFAULT_COMPRESSION; + if (params->deflate.winbits == ZCOMP_PARAM_NOT_SET) + params->deflate.winbits = DEFLATE_DEF_WINBITS; return 0; } @@ -57,13 +59,13 @@ static int deflate_create(struct zcomp_params *params, struct zcomp_ctx *ctx) return -ENOMEM; ctx->context = zctx; - sz = zlib_deflate_workspacesize(-DEFLATE_DEF_WINBITS, MAX_MEM_LEVEL); + sz = zlib_deflate_workspacesize(params->deflate.winbits, MAX_MEM_LEVEL); zctx->cctx.workspace = vzalloc(sz); if (!zctx->cctx.workspace) goto error; ret = zlib_deflateInit2(&zctx->cctx, params->level, Z_DEFLATED, - -DEFLATE_DEF_WINBITS, DEFLATE_DEF_MEMLEVEL, + params->deflate.winbits, DEFLATE_DEF_MEMLEVEL, Z_DEFAULT_STRATEGY); if (ret != Z_OK) goto error; @@ -73,7 +75,7 @@ static int deflate_create(struct zcomp_params *params, struct zcomp_ctx *ctx) if (!zctx->dctx.workspace) goto error; - ret = zlib_inflateInit2(&zctx->dctx, -DEFLATE_DEF_WINBITS); + ret = zlib_inflateInit2(&zctx->dctx, params->deflate.winbits); if (ret != Z_OK) goto error; diff --git a/drivers/block/zram/backend_lz4.c b/drivers/block/zram/backend_lz4.c index 847f3334eb38..daccd60857eb 100644 --- a/drivers/block/zram/backend_lz4.c +++ b/drivers/block/zram/backend_lz4.c @@ -18,7 +18,7 @@ static void lz4_release_params(struct zcomp_params *params) static int lz4_setup_params(struct zcomp_params *params) { - if (params->level == ZCOMP_PARAM_NO_LEVEL) + if (params->level == ZCOMP_PARAM_NOT_SET) params->level = LZ4_ACCELERATION_DEFAULT; return 0; diff --git a/drivers/block/zram/backend_lz4hc.c b/drivers/block/zram/backend_lz4hc.c index 5f37d5abcaeb..9e8a35dfa56d 100644 --- a/drivers/block/zram/backend_lz4hc.c +++ b/drivers/block/zram/backend_lz4hc.c @@ -18,7 +18,7 @@ static void lz4hc_release_params(struct zcomp_params *params) static int lz4hc_setup_params(struct zcomp_params *params) { - if (params->level == ZCOMP_PARAM_NO_LEVEL) + if (params->level == ZCOMP_PARAM_NOT_SET) params->level = LZ4HC_DEFAULT_CLEVEL; return 0; diff --git a/drivers/block/zram/backend_zstd.c b/drivers/block/zram/backend_zstd.c index 22c8067536f3..81defb98ed09 100644 --- a/drivers/block/zram/backend_zstd.c +++ b/drivers/block/zram/backend_zstd.c @@ -58,7 +58,7 @@ static int zstd_setup_params(struct zcomp_params *params) return -ENOMEM; params->drv_data = zp; - if (params->level == ZCOMP_PARAM_NO_LEVEL) + if (params->level == ZCOMP_PARAM_NOT_SET) params->level = zstd_default_clevel(); zp->cprm = zstd_get_params(params->level, PAGE_SIZE); diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h index 25339ed1e07e..4acffe671a5e 100644 --- a/drivers/block/zram/zcomp.h +++ b/drivers/block/zram/zcomp.h @@ -5,7 +5,11 @@ #include <linux/mutex.h> -#define ZCOMP_PARAM_NO_LEVEL INT_MIN +#define ZCOMP_PARAM_NOT_SET INT_MIN + +struct deflate_params { + s32 winbits; +}; /* * Immutable driver (backend) parameters. The driver may attach private @@ -17,6 +21,9 @@ struct zcomp_params { void *dict; size_t dict_sz; s32 level; + union { + struct deflate_params deflate; + }; void *drv_data; }; diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 94e6e9b80bf0..54c57103715f 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1276,13 +1276,15 @@ static void comp_params_reset(struct zram *zram, u32 prio) struct zcomp_params *params = &zram->params[prio]; vfree(params->dict); - params->level = ZCOMP_PARAM_NO_LEVEL; + params->level = ZCOMP_PARAM_NOT_SET; + params->deflate.winbits = ZCOMP_PARAM_NOT_SET; params->dict_sz = 0; params->dict = NULL; } static int comp_params_store(struct zram *zram, u32 prio, s32 level, - const char *dict_path) + const char *dict_path, + struct deflate_params *deflate_params) { ssize_t sz = 0; @@ -1300,6 +1302,7 @@ static int comp_params_store(struct zram *zram, u32 prio, s32 level, zram->params[prio].dict_sz = sz; zram->params[prio].level = level; + zram->params[prio].deflate.winbits = deflate_params->winbits; return 0; } @@ -1308,11 +1311,14 @@ static ssize_t algorithm_params_store(struct device *dev, const char *buf, size_t len) { - s32 prio = ZRAM_PRIMARY_COMP, level = ZCOMP_PARAM_NO_LEVEL; + s32 prio = ZRAM_PRIMARY_COMP, level = ZCOMP_PARAM_NOT_SET; char *args, *param, *val, *algo = NULL, *dict_path = NULL; + struct deflate_params deflate_params; struct zram *zram = dev_to_zram(dev); int ret; + deflate_params.winbits = ZCOMP_PARAM_NOT_SET; + args = skip_spaces(buf); while (*args) { args = next_arg(args, ¶m, &val); @@ -1343,6 +1349,13 @@ static ssize_t algorithm_params_store(struct device *dev, dict_path = val; continue; } + + if (!strcmp(param, "deflate.winbits")) { + ret = kstrtoint(val, 10, &deflate_params.winbits); + if (ret) + return ret; + continue; + } } /* Lookup priority by algorithm name */ @@ -1364,7 +1377,7 @@ static ssize_t algorithm_params_store(struct device *dev, if (prio < ZRAM_PRIMARY_COMP || prio >= ZRAM_MAX_COMPS) return -EINVAL; - ret = comp_params_store(zram, prio, level, dict_path); + ret = comp_params_store(zram, prio, level, dict_path, &deflate_params); return ret ? ret : len; } diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 34ed242e1063..1e99a35691cd 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -913,7 +913,8 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) struct ntfs_inode *ni = ntfs_i(inode); u64 valid = ni->i_valid; struct ntfs_sb_info *sbi = ni->mi.sbi; - struct page *page, **pages = NULL; + struct page **pages = NULL; + struct folio *folio; size_t written = 0; u8 frame_bits = NTFS_LZNT_CUNIT + sbi->cluster_bits; u32 frame_size = 1u << frame_bits; @@ -923,7 +924,6 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) u64 frame_vbo; pgoff_t index; bool frame_uptodate; - struct folio *folio; if (frame_size < PAGE_SIZE) { /* @@ -977,8 +977,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) pages_per_frame); if (err) { for (ip = 0; ip < pages_per_frame; ip++) { - page = pages[ip]; - folio = page_folio(page); + folio = page_folio(pages[ip]); folio_unlock(folio); folio_put(folio); } @@ -989,10 +988,9 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) ip = off >> PAGE_SHIFT; off = offset_in_page(valid); for (; ip < pages_per_frame; ip++, off = 0) { - page = pages[ip]; - folio = page_folio(page); - zero_user_segment(page, off, PAGE_SIZE); - flush_dcache_page(page); + folio = page_folio(pages[ip]); + folio_zero_segment(folio, off, PAGE_SIZE); + flush_dcache_folio(folio); folio_mark_uptodate(folio); } @@ -1001,8 +999,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) ni_unlock(ni); for (ip = 0; ip < pages_per_frame; ip++) { - page = pages[ip]; - folio = page_folio(page); + folio = page_folio(pages[ip]); folio_mark_uptodate(folio); folio_unlock(folio); folio_put(folio); @@ -1046,8 +1043,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) if (err) { for (ip = 0; ip < pages_per_frame; ip++) { - page = pages[ip]; - folio = page_folio(page); + folio = page_folio(pages[ip]); folio_unlock(folio); folio_put(folio); } @@ -1065,10 +1061,10 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) for (;;) { size_t cp, tail = PAGE_SIZE - off; - page = pages[ip]; - cp = copy_page_from_iter_atomic(page, off, + folio = page_folio(pages[ip]); + cp = copy_folio_from_iter_atomic(folio, off, min(tail, bytes), from); - flush_dcache_page(page); + flush_dcache_folio(folio); copied += cp; bytes -= cp; @@ -1088,9 +1084,8 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) ni_unlock(ni); for (ip = 0; ip < pages_per_frame; ip++) { - page = pages[ip]; - ClearPageDirty(page); - folio = page_folio(page); + folio = page_folio(pages[ip]); + folio_clear_dirty(folio); folio_mark_uptodate(folio); folio_unlock(folio); folio_put(folio); diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 88a42973fa47..1fff717cae51 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -58,6 +58,11 @@ * Defaults to flushing at tlb_end_vma() to reset the range; helps when * there's large holes between the VMAs. * + * - tlb_free_vmas() + * + * tlb_free_vmas() marks the start of unlinking of one or more vmas + * and freeing page-tables. + * * - tlb_remove_table() * * tlb_remove_table() is the basic primitive to free page-table directories @@ -464,7 +469,12 @@ tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) */ tlb->vma_huge = is_vm_hugetlb_page(vma); tlb->vma_exec = !!(vma->vm_flags & VM_EXEC); - tlb->vma_pfn = !!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)); + + /* + * Track if there's at least one VM_PFNMAP/VM_MIXEDMAP vma + * in the tracked range, see tlb_free_vmas(). + */ + tlb->vma_pfn |= !!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)); } static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) @@ -548,22 +558,38 @@ static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct * static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { + if (tlb->fullmm || IS_ENABLED(CONFIG_MMU_GATHER_MERGE_VMAS)) + return; + + /* + * Do a TLB flush and reset the range at VMA boundaries; this avoids + * the ranges growing with the unused space between consecutive VMAs, + * but also the mmu_gather::vma_* flags from tlb_start_vma() rely on + * this. + */ + tlb_flush_mmu_tlbonly(tlb); +} + +static inline void tlb_free_vmas(struct mmu_gather *tlb) +{ if (tlb->fullmm) return; /* * VM_PFNMAP is more fragile because the core mm will not track the - * page mapcount -- there might not be page-frames for these PFNs after - * all. Force flush TLBs for such ranges to avoid munmap() vs - * unmap_mapping_range() races. + * page mapcount -- there might not be page-frames for these PFNs + * after all. + * + * Specifically() there is a race between munmap() and + * unmap_mapping_range(), where munmap() will unlink the VMA, such + * that unmap_mapping_range() will no longer observe the VMA and + * no-op, without observing the TLBI, returning prematurely. + * + * So if we're about to unlink such a VMA, and we have pending + * TLBI for such a vma, flush things now. */ - if (tlb->vma_pfn || !IS_ENABLED(CONFIG_MMU_GATHER_MERGE_VMAS)) { - /* - * Do a TLB flush and reset the range at VMA boundaries; this avoids - * the ranges growing with the unused space between consecutive VMAs. - */ + if (tlb->vma_pfn) tlb_flush_mmu_tlbonly(tlb); - } } /* diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index f7848f73f41c..87b6688f124a 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -113,6 +113,12 @@ struct mem_cgroup_per_node { CACHELINE_PADDING(_pad2_); unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; struct mem_cgroup_reclaim_iter iter; + +#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC + /* slab stats for nmi context */ + atomic_t slab_reclaimable; + atomic_t slab_unreclaimable; +#endif }; struct mem_cgroup_threshold { @@ -236,6 +242,10 @@ struct mem_cgroup { atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS]; atomic_long_t memory_events_local[MEMCG_NR_MEMORY_EVENTS]; +#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC + /* MEMCG_KMEM for nmi context */ + atomic_t kmem_stat; +#endif /* * Hint of reclaim pressure for socket memroy management. Note * that this indicator should NOT be used in legacy cgroup mode diff --git a/include/linux/mm.h b/include/linux/mm.h index 9e221ffcb868..0ef2ba0c667a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1276,9 +1276,9 @@ vm_fault_t finish_fault(struct vm_fault *vmf); * the page's disk buffers. PG_private must be set to tell the VM to call * into the filesystem to release these pages. * - * A page may belong to an inode's memory mapping. In this case, page->mapping - * is the pointer to the inode, and page->index is the file offset of the page, - * in units of PAGE_SIZE. + * A folio may belong to an inode's memory mapping. In this case, + * folio->mapping points to the inode, and folio->index is the file + * offset of the folio, in units of PAGE_SIZE. * * If pagecache pages are not associated with an inode, they are said to be * anonymous pages. These may become associated with the swapcache, and in that diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index d3cffd8828c9..d6b91e8a66d6 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -108,7 +108,7 @@ struct page { /* See page-flags.h for PAGE_MAPPING_FLAGS */ struct address_space *mapping; union { - pgoff_t index; /* Our offset within mapping. */ + pgoff_t __folio_index; /* Our offset within mapping. */ unsigned long share; /* share count for fsdax */ }; /** @@ -489,7 +489,7 @@ FOLIO_MATCH(flags, flags); FOLIO_MATCH(lru, lru); FOLIO_MATCH(mapping, mapping); FOLIO_MATCH(compound_head, lru); -FOLIO_MATCH(index, index); +FOLIO_MATCH(__folio_index, index); FOLIO_MATCH(private, private); FOLIO_MATCH(_mapcount, _mapcount); FOLIO_MATCH(_refcount, _refcount); @@ -590,7 +590,7 @@ TABLE_MATCH(flags, __page_flags); TABLE_MATCH(compound_head, pt_list); TABLE_MATCH(compound_head, _pt_pad_1); TABLE_MATCH(mapping, __page_mapping); -TABLE_MATCH(index, pt_index); +TABLE_MATCH(__folio_index, pt_index); TABLE_MATCH(rcu_head, pt_rcu_head); TABLE_MATCH(page_type, __page_type); TABLE_MATCH(_refcount, __page_refcount); diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index bc2402a45741..d1094c2d5fb6 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -654,9 +654,6 @@ static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm) #define pmdp_clear_flush_young_notify pmdp_clear_flush_young #define ptep_clear_young_notify ptep_test_and_clear_young #define pmdp_clear_young_notify pmdp_test_and_clear_young -#define ptep_clear_flush_notify ptep_clear_flush -#define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush -#define pudp_huge_clear_flush_notify pudp_huge_clear_flush static inline void mmu_notifier_synchronize(void) { diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 28066b4ced81..283913d42d7b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -37,6 +37,22 @@ #define NR_PAGE_ORDERS (MAX_PAGE_ORDER + 1) +/* Defines the order for the number of pages that have a migrate type. */ +#ifndef CONFIG_PAGE_BLOCK_ORDER +#define PAGE_BLOCK_ORDER MAX_PAGE_ORDER +#else +#define PAGE_BLOCK_ORDER CONFIG_PAGE_BLOCK_ORDER +#endif /* CONFIG_PAGE_BLOCK_ORDER */ + +/* + * The MAX_PAGE_ORDER, which defines the max order of pages to be allocated + * by the buddy allocator, has to be larger or equal to the PAGE_BLOCK_ORDER, + * which defines the order for the number of pages that can have a migrate type + */ +#if (PAGE_BLOCK_ORDER > MAX_PAGE_ORDER) +#error MAX_PAGE_ORDER must be >= PAGE_BLOCK_ORDER +#endif + /* * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed * costly to service. That is between allocation orders which should diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index fc6b9c87cb0a..e73a4292ef02 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -41,18 +41,18 @@ extern unsigned int pageblock_order; * Huge pages are a constant size, but don't exceed the maximum allocation * granularity. */ -#define pageblock_order MIN_T(unsigned int, HUGETLB_PAGE_ORDER, MAX_PAGE_ORDER) +#define pageblock_order MIN_T(unsigned int, HUGETLB_PAGE_ORDER, PAGE_BLOCK_ORDER) #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ #elif defined(CONFIG_TRANSPARENT_HUGEPAGE) -#define pageblock_order MIN_T(unsigned int, HPAGE_PMD_ORDER, MAX_PAGE_ORDER) +#define pageblock_order MIN_T(unsigned int, HPAGE_PMD_ORDER, PAGE_BLOCK_ORDER) #else /* CONFIG_TRANSPARENT_HUGEPAGE */ -/* If huge pages are not used, group by MAX_ORDER_NR_PAGES */ -#define pageblock_order MAX_PAGE_ORDER +/* If huge pages are not used, group by PAGE_BLOCK_ORDER */ +#define pageblock_order PAGE_BLOCK_ORDER #endif /* CONFIG_HUGETLB_PAGE */ diff --git a/include/linux/sched.h b/include/linux/sched.h index aa9c5be7a632..4f78a64beb52 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -548,6 +548,10 @@ struct sched_statistics { u64 nr_failed_migrations_running; u64 nr_failed_migrations_hot; u64 nr_forced_migrations; +#ifdef CONFIG_NUMA_BALANCING + u64 numa_task_migrated; + u64 numa_task_swapped; +#endif u64 nr_wakeups; u64 nr_wakeups_sync; diff --git a/include/linux/uio.h b/include/linux/uio.h index 393d0622cc28..2e86c653186c 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -182,8 +182,6 @@ static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs) return ret; } -size_t copy_page_from_iter_atomic(struct page *page, size_t offset, - size_t bytes, struct iov_iter *i); void iov_iter_advance(struct iov_iter *i, size_t bytes); void iov_iter_revert(struct iov_iter *i, size_t bytes); size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t bytes); @@ -193,6 +191,8 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); +size_t copy_folio_from_iter_atomic(struct folio *folio, size_t offset, + size_t bytes, struct iov_iter *i); size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i); size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i); @@ -210,12 +210,6 @@ static inline size_t copy_folio_from_iter(struct folio *folio, size_t offset, return copy_page_from_iter(&folio->page, offset, bytes, i); } -static inline size_t copy_folio_from_iter_atomic(struct folio *folio, - size_t offset, size_t bytes, struct iov_iter *i) -{ - return copy_page_from_iter_atomic(&folio->page, offset, bytes, i); -} - size_t copy_page_to_iter_nofault(struct page *page, unsigned offset, size_t bytes, struct iov_iter *i); diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 9e15a088ba38..91a3ce9a2687 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -66,6 +66,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, NUMA_HINT_FAULTS, NUMA_HINT_FAULTS_LOCAL, NUMA_PAGE_MIGRATE, + NUMA_TASK_MIGRATE, + NUMA_TASK_SWAP, #endif #ifdef CONFIG_MIGRATION PGMIGRATE_SUCCESS, PGMIGRATE_FAIL, diff --git a/init/Kconfig b/init/Kconfig index 20716189fe68..ab83abe0fd9d 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -992,6 +992,20 @@ config MEMCG help Provides control over the memory footprint of tasks in a cgroup. +config MEMCG_NMI_UNSAFE + bool + depends on MEMCG + depends on HAVE_NMI + depends on !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && !ARCH_HAVE_NMI_SAFE_CMPXCHG + default y + +config MEMCG_NMI_SAFETY_REQUIRES_ATOMIC + bool + depends on MEMCG + depends on HAVE_NMI + depends on !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && ARCH_HAVE_NMI_SAFE_CMPXCHG + default y + config MEMCG_V1 bool "Legacy cgroup v1 memory controller" depends on MEMCG diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 19a2c65f3d37..565f9717c6ca 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -531,7 +531,7 @@ static u64 get_inode_sequence_number(struct inode *inode) * * For shared mappings (when @fshared), the key is: * - * ( inode->i_sequence, page->index, offset_within_page ) + * ( inode->i_sequence, page offset within mapping, offset_within_page ) * * [ also see get_inode_sequence_number() ] * diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 62b3416f5e43..dce50fa57471 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3362,6 +3362,10 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) #ifdef CONFIG_NUMA_BALANCING static void __migrate_swap_task(struct task_struct *p, int cpu) { + __schedstat_inc(p->stats.numa_task_swapped); + count_vm_numa_event(NUMA_TASK_SWAP); + count_memcg_event_mm(p->mm, NUMA_TASK_SWAP); + if (task_on_rq_queued(p)) { struct rq *src_rq, *dst_rq; struct rq_flags srf, drf; @@ -7930,8 +7934,9 @@ int migrate_task_to(struct task_struct *p, int target_cpu) if (!cpumask_test_cpu(target_cpu, p->cpus_ptr)) return -EINVAL; - /* TODO: This is not properly updating schedstats */ - + __schedstat_inc(p->stats.numa_task_migrated); + count_vm_numa_event(NUMA_TASK_MIGRATE); + count_memcg_event_mm(p->mm, NUMA_TASK_MIGRATE); trace_sched_move_numa(p, curr_cpu, target_cpu); return stop_one_cpu(curr_cpu, migration_cpu_stop, &arg); } diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 557246880a7e..9d71baf08075 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -1210,6 +1210,10 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, P_SCHEDSTAT(nr_failed_migrations_running); P_SCHEDSTAT(nr_failed_migrations_hot); P_SCHEDSTAT(nr_forced_migrations); +#ifdef CONFIG_NUMA_BALANCING + P_SCHEDSTAT(numa_task_migrated); + P_SCHEDSTAT(numa_task_swapped); +#endif P_SCHEDSTAT(nr_wakeups); P_SCHEDSTAT(nr_wakeups_sync); P_SCHEDSTAT(nr_wakeups_migrate); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b4326827e326..7a14da5396fb 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2273,7 +2273,8 @@ static bool task_numa_compare(struct task_numa_env *env, rcu_read_lock(); cur = rcu_dereference(dst_rq->curr); - if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur))) + if (cur && ((cur->flags & (PF_EXITING | PF_KTHREAD)) || + !cur->mm)) cur = NULL; /* diff --git a/lib/iov_iter.c b/lib/iov_iter.c index d9e19fb2dcf3..969d4ad510df 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -457,38 +457,35 @@ size_t iov_iter_zero(size_t bytes, struct iov_iter *i) } EXPORT_SYMBOL(iov_iter_zero); -size_t copy_page_from_iter_atomic(struct page *page, size_t offset, +size_t copy_folio_from_iter_atomic(struct folio *folio, size_t offset, size_t bytes, struct iov_iter *i) { size_t n, copied = 0; - bool uses_kmap = IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP) || - PageHighMem(page); - if (!page_copy_sane(page, offset, bytes)) + if (!page_copy_sane(&folio->page, offset, bytes)) return 0; if (WARN_ON_ONCE(!i->data_source)) return 0; do { - char *p; + char *to = kmap_local_folio(folio, offset); n = bytes - copied; - if (uses_kmap) { - page += offset / PAGE_SIZE; - offset %= PAGE_SIZE; - n = min_t(size_t, n, PAGE_SIZE - offset); - } - - p = kmap_atomic(page) + offset; - n = __copy_from_iter(p, n, i); - kunmap_atomic(p); + if (folio_test_partial_kmap(folio) && + n > PAGE_SIZE - offset_in_page(offset)) + n = PAGE_SIZE - offset_in_page(offset); + + pagefault_disable(); + n = __copy_from_iter(to, n, i); + pagefault_enable(); + kunmap_local(to); copied += n; offset += n; - } while (uses_kmap && copied != bytes && n > 0); + } while (copied != bytes && n > 0); return copied; } -EXPORT_SYMBOL(copy_page_from_iter_atomic); +EXPORT_SYMBOL(copy_folio_from_iter_atomic); static void iov_iter_bvec_advance(struct iov_iter *i, size_t size) { diff --git a/mm/Kconfig b/mm/Kconfig index bd08e151fa1b..f8bb8f070d0d 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -993,6 +993,40 @@ config CMA_AREAS If unsure, leave the default value "8" in UMA and "20" in NUMA. +# +# Select this config option from the architecture Kconfig, if available, to set +# the max page order for physically contiguous allocations. +# +config ARCH_FORCE_MAX_ORDER + int + +# +# When ARCH_FORCE_MAX_ORDER is not defined, +# the default page block order is MAX_PAGE_ORDER (10) as per +# include/linux/mmzone.h. +# +config PAGE_BLOCK_ORDER + int "Page Block Order" + range 1 10 if ARCH_FORCE_MAX_ORDER = 0 + default 10 if ARCH_FORCE_MAX_ORDER = 0 + range 1 ARCH_FORCE_MAX_ORDER if ARCH_FORCE_MAX_ORDER != 0 + default ARCH_FORCE_MAX_ORDER if ARCH_FORCE_MAX_ORDER != 0 + help + The page block order refers to the power of two number of pages that + are physically contiguous and can have a migrate type associated to + them. The maximum size of the page block order is limited by + ARCH_FORCE_MAX_ORDER. + + This config allows overriding the default page block order when the + page block order is required to be smaller than ARCH_FORCE_MAX_ORDER + or MAX_PAGE_ORDER. + + Reducing pageblock order can negatively impact THP generation + success rate. If your workloads uses THP heavily, please use this + option with caution. + + Don't change if unsure. + config MEM_SOFT_DIRTY bool "Track memory changes" depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS diff --git a/mm/damon/Kconfig b/mm/damon/Kconfig index c213cf8b5638..551745df011b 100644 --- a/mm/damon/Kconfig +++ b/mm/damon/Kconfig @@ -4,6 +4,7 @@ menu "Data Access Monitoring" config DAMON bool "DAMON: Data Access Monitoring Framework" + default y help This builds a framework that allows kernel subsystems to monitor access frequency of each memory region. The information can be useful @@ -28,6 +29,7 @@ config DAMON_VADDR bool "Data access monitoring operations for virtual address spaces" depends on DAMON && MMU select PAGE_IDLE_FLAG + default DAMON help This builds the default data access monitoring operations for DAMON that work for virtual address spaces. @@ -36,6 +38,7 @@ config DAMON_PADDR bool "Data access monitoring operations for the physical address space" depends on DAMON && MMU select PAGE_IDLE_FLAG + default DAMON help This builds the default data access monitoring operations for DAMON that works for the physical address space. @@ -55,6 +58,7 @@ config DAMON_VADDR_KUNIT_TEST config DAMON_SYSFS bool "DAMON sysfs interface" depends on DAMON && SYSFS + default DAMON help This builds the sysfs interface for DAMON. The user space can use the interface for arbitrary data access monitoring. diff --git a/mm/damon/core.c b/mm/damon/core.c index 0bb71e2ab713..b217e0120e09 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1093,9 +1093,17 @@ static int damon_commit_targets( if (err) return err; } else { + struct damos *s; + if (damon_target_has_pid(dst)) put_pid(dst_target->pid); damon_destroy_target(dst_target); + damon_for_each_scheme(s, dst) { + if (s->quota.charge_target_from == dst_target) { + s->quota.charge_target_from = NULL; + s->quota.charge_addr_from = 0; + } + } } } diff --git a/mm/filemap.c b/mm/filemap.c index 48c944e2c163..bada249b9fb7 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -142,7 +142,7 @@ static void page_cache_delete(struct address_space *mapping, xas_init_marks(&xas); folio->mapping = NULL; - /* Leave page->index set: truncation lookup relies upon it */ + /* Leave folio->index set: truncation lookup relies upon it */ mapping->nrpages -= nr; } @@ -949,7 +949,7 @@ unlock: return 0; error: folio->mapping = NULL; - /* Leave page->index set: truncation relies upon it */ + /* Leave folio->index set: truncation relies upon it */ folio_put_refs(folio, nr); return xas_error(&xas); } @@ -3299,7 +3299,7 @@ static unsigned long gup_fast(unsigned long start, unsigned long end, * include/asm-generic/tlb.h for more details. * * We do not adopt an rcu_read_lock() here as we also want to block IPIs - * that come from THPs splitting. + * that come from callers of tlb_remove_table_sync_one(). */ local_irq_save(flags); gup_fast_pgd_range(start, end, gup_flags, pages, &nr_pinned); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 32ab14aa4074..f0b1d53079f9 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3741,7 +3741,7 @@ static void __init report_hugepages(void) string_get_size(huge_page_size(h), 1, STRING_UNITS_2, buf, 32); pr_info("HugeTLB: registered %s page size, pre-allocated %ld pages\n", - buf, h->free_huge_pages); + buf, h->nr_huge_pages); if (nrinvalid) pr_info("HugeTLB: %s page size: %lu invalid page%s discarded\n", buf, nrinvalid, nrinvalid > 1 ? "s" : ""); diff --git a/mm/khugepaged.c b/mm/khugepaged.c index cdf5a581368b..15203ea7d007 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -548,19 +548,6 @@ static void release_pte_pages(pte_t *pte, pte_t *_pte, } } -static bool is_refcount_suitable(struct folio *folio) -{ - int expected_refcount = folio_mapcount(folio); - - if (!folio_test_anon(folio) || folio_test_swapcache(folio)) - expected_refcount += folio_nr_pages(folio); - - if (folio_test_private(folio)) - expected_refcount++; - - return folio_ref_count(folio) == expected_refcount; -} - static int __collapse_huge_page_isolate(struct vm_area_struct *vma, unsigned long address, pte_t *pte, @@ -652,7 +639,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, * but not from this process. The other process cannot write to * the page, only trigger CoW. */ - if (!is_refcount_suitable(folio)) { + if (folio_expected_ref_count(folio) != folio_ref_count(folio)) { folio_unlock(folio); result = SCAN_PAGE_COUNT; goto out; @@ -1402,7 +1389,7 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm, * has excessive GUP pins (i.e. 512). Anyway the same check * will be done again later the risk seems low. */ - if (!is_refcount_suitable(folio)) { + if (folio_expected_ref_count(folio) != folio_ref_count(folio)) { result = SCAN_PAGE_COUNT; goto out_unmap; } @@ -2293,6 +2280,17 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, continue; } + if (!folio_try_get(folio)) { + xas_reset(&xas); + continue; + } + + if (unlikely(folio != xas_reload(&xas))) { + folio_put(folio); + xas_reset(&xas); + continue; + } + if (folio_order(folio) == HPAGE_PMD_ORDER && folio->index == start) { /* Maybe PMD-mapped */ @@ -2303,23 +2301,27 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, * it's safe to skip LRU and refcount checks before * returning. */ + folio_put(folio); break; } node = folio_nid(folio); if (hpage_collapse_scan_abort(node, cc)) { result = SCAN_SCAN_ABORT; + folio_put(folio); break; } cc->node_load[node]++; if (!folio_test_lru(folio)) { result = SCAN_PAGE_LRU; + folio_put(folio); break; } - if (!is_refcount_suitable(folio)) { + if (folio_expected_ref_count(folio) + 1 != folio_ref_count(folio)) { result = SCAN_PAGE_COUNT; + folio_put(folio); break; } @@ -2331,6 +2333,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, */ present += folio_nr_pages(folio); + folio_put(folio); if (need_resched()) { xas_pause(&xas); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b90aa3075950..902da8a9c643 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -474,6 +474,8 @@ static const unsigned int memcg_vm_event_stat[] = { NUMA_PAGE_MIGRATE, NUMA_PTE_UPDATES, NUMA_HINT_FAULTS, + NUMA_TASK_MIGRATE, + NUMA_TASK_SWAP, #endif }; @@ -531,7 +533,7 @@ struct memcg_vmstats { unsigned long events_pending[NR_MEMCG_EVENTS]; /* Stats updates since the last flush */ - atomic64_t stats_updates; + atomic_t stats_updates; }; /* @@ -557,7 +559,7 @@ static u64 flush_last_time; static bool memcg_vmstats_needs_flush(struct memcg_vmstats *vmstats) { - return atomic64_read(&vmstats->stats_updates) > + return atomic_read(&vmstats->stats_updates) > MEMCG_CHARGE_BATCH * num_online_cpus(); } @@ -571,7 +573,9 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val, if (!val) return; - css_rstat_updated(&memcg->css, cpu); + /* TODO: add to cgroup update tree once it is nmi-safe. */ + if (!in_nmi()) + css_rstat_updated(&memcg->css, cpu); statc_pcpu = memcg->vmstats_percpu; for (; statc_pcpu; statc_pcpu = statc->parent_pcpu) { statc = this_cpu_ptr(statc_pcpu); @@ -589,7 +593,7 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val, continue; stats_updates = this_cpu_xchg(statc_pcpu->stats_updates, 0); - atomic64_add(stats_updates, &statc->vmstats->stats_updates); + atomic_add(stats_updates, &statc->vmstats->stats_updates); } } @@ -597,7 +601,7 @@ static void __mem_cgroup_flush_stats(struct mem_cgroup *memcg, bool force) { bool needs_flush = memcg_vmstats_needs_flush(memcg->vmstats); - trace_memcg_flush_stats(memcg, atomic64_read(&memcg->vmstats->stats_updates), + trace_memcg_flush_stats(memcg, atomic_read(&memcg->vmstats->stats_updates), force, needs_flush); if (!force && !needs_flush) @@ -2513,17 +2517,47 @@ static void commit_charge(struct folio *folio, struct mem_cgroup *memcg) folio->memcg_data = (unsigned long)memcg; } +#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC +static inline void account_slab_nmi_safe(struct mem_cgroup *memcg, + struct pglist_data *pgdat, + enum node_stat_item idx, int nr) +{ + struct lruvec *lruvec; + + if (likely(!in_nmi())) { + lruvec = mem_cgroup_lruvec(memcg, pgdat); + mod_memcg_lruvec_state(lruvec, idx, nr); + } else { + struct mem_cgroup_per_node *pn = memcg->nodeinfo[pgdat->node_id]; + + /* TODO: add to cgroup update tree once it is nmi-safe. */ + if (idx == NR_SLAB_RECLAIMABLE_B) + atomic_add(nr, &pn->slab_reclaimable); + else + atomic_add(nr, &pn->slab_unreclaimable); + } +} +#else +static inline void account_slab_nmi_safe(struct mem_cgroup *memcg, + struct pglist_data *pgdat, + enum node_stat_item idx, int nr) +{ + struct lruvec *lruvec; + + lruvec = mem_cgroup_lruvec(memcg, pgdat); + mod_memcg_lruvec_state(lruvec, idx, nr); +} +#endif + static inline void mod_objcg_mlstate(struct obj_cgroup *objcg, struct pglist_data *pgdat, enum node_stat_item idx, int nr) { struct mem_cgroup *memcg; - struct lruvec *lruvec; rcu_read_lock(); memcg = obj_cgroup_memcg(objcg); - lruvec = mem_cgroup_lruvec(memcg, pgdat); - mod_memcg_lruvec_state(lruvec, idx, nr); + account_slab_nmi_safe(memcg, pgdat, idx, nr); rcu_read_unlock(); } @@ -2648,6 +2682,9 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void) struct mem_cgroup *memcg; struct obj_cgroup *objcg; + if (IS_ENABLED(CONFIG_MEMCG_NMI_UNSAFE) && in_nmi()) + return NULL; + if (in_task()) { memcg = current->active_memcg; if (unlikely(memcg)) @@ -2710,6 +2747,23 @@ struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio) return objcg; } +#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC +static inline void account_kmem_nmi_safe(struct mem_cgroup *memcg, int val) +{ + if (likely(!in_nmi())) { + mod_memcg_state(memcg, MEMCG_KMEM, val); + } else { + /* TODO: add to cgroup update tree once it is nmi-safe. */ + atomic_add(val, &memcg->kmem_stat); + } +} +#else +static inline void account_kmem_nmi_safe(struct mem_cgroup *memcg, int val) +{ + mod_memcg_state(memcg, MEMCG_KMEM, val); +} +#endif + /* * obj_cgroup_uncharge_pages: uncharge a number of kernel pages from a objcg * @objcg: object cgroup to uncharge @@ -2722,7 +2776,7 @@ static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg, memcg = get_mem_cgroup_from_objcg(objcg); - mod_memcg_state(memcg, MEMCG_KMEM, -nr_pages); + account_kmem_nmi_safe(memcg, -nr_pages); memcg1_account_kmem(memcg, -nr_pages); if (!mem_cgroup_is_root(memcg)) refill_stock(memcg, nr_pages); @@ -2750,7 +2804,7 @@ static int obj_cgroup_charge_pages(struct obj_cgroup *objcg, gfp_t gfp, if (ret) goto out; - mod_memcg_state(memcg, MEMCG_KMEM, nr_pages); + account_kmem_nmi_safe(memcg, nr_pages); memcg1_account_kmem(memcg, nr_pages); out: css_put(&memcg->css); @@ -3961,6 +4015,53 @@ static void mem_cgroup_stat_aggregate(struct aggregate_control *ac) } } +#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC +static void flush_nmi_stats(struct mem_cgroup *memcg, struct mem_cgroup *parent, + int cpu) +{ + int nid; + + if (atomic_read(&memcg->kmem_stat)) { + int kmem = atomic_xchg(&memcg->kmem_stat, 0); + int index = memcg_stats_index(MEMCG_KMEM); + + memcg->vmstats->state[index] += kmem; + if (parent) + parent->vmstats->state_pending[index] += kmem; + } + + for_each_node_state(nid, N_MEMORY) { + struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid]; + struct lruvec_stats *lstats = pn->lruvec_stats; + struct lruvec_stats *plstats = NULL; + + if (parent) + plstats = parent->nodeinfo[nid]->lruvec_stats; + + if (atomic_read(&pn->slab_reclaimable)) { + int slab = atomic_xchg(&pn->slab_reclaimable, 0); + int index = memcg_stats_index(NR_SLAB_RECLAIMABLE_B); + + lstats->state[index] += slab; + if (plstats) + plstats->state_pending[index] += slab; + } + if (atomic_read(&pn->slab_unreclaimable)) { + int slab = atomic_xchg(&pn->slab_unreclaimable, 0); + int index = memcg_stats_index(NR_SLAB_UNRECLAIMABLE_B); + + lstats->state[index] += slab; + if (plstats) + plstats->state_pending[index] += slab; + } + } +} +#else +static void flush_nmi_stats(struct mem_cgroup *memcg, struct mem_cgroup *parent, + int cpu) +{} +#endif + static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); @@ -3969,6 +4070,8 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu) struct aggregate_control ac; int nid; + flush_nmi_stats(memcg, parent, cpu); + statc = per_cpu_ptr(memcg->vmstats_percpu, cpu); ac = (struct aggregate_control) { @@ -4018,8 +4121,8 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu) } WRITE_ONCE(statc->stats_updates, 0); /* We are in a per-cpu loop here, only do the atomic write once */ - if (atomic64_read(&memcg->vmstats->stats_updates)) - atomic64_set(&memcg->vmstats->stats_updates, 0); + if (atomic_read(&memcg->vmstats->stats_updates)) + atomic_set(&memcg->vmstats->stats_updates, 0); } static void mem_cgroup_fork(struct task_struct *task) diff --git a/mm/memory.c b/mm/memory.c index 5cb48f262ab0..8eba595056fe 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -358,6 +358,8 @@ void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas, { struct unlink_vma_file_batch vb; + tlb_free_vmas(tlb); + do { unsigned long addr = vma->vm_start; struct vm_area_struct *next; @@ -4668,8 +4670,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) /* * KSM sometimes has to copy on read faults, for example, if - * page->index of !PageKSM() pages would be nonlinear inside the - * anon VMA -- PageKSM() is lost on actual swapout. + * folio->index of non-ksm folios would be nonlinear inside the + * anon VMA -- the ksm flag is lost on actual swapout. */ folio = ksm_might_need_to_copy(folio, vma, vmf->address); if (unlikely(!folio)) { diff --git a/mm/mm_init.c b/mm/mm_init.c index f0bd0830daad..f2944748f526 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1509,7 +1509,7 @@ static inline void setup_usemap(struct zone *zone) {} /* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */ void __init set_pageblock_order(void) { - unsigned int order = MAX_PAGE_ORDER; + unsigned int order = PAGE_BLOCK_ORDER; /* Check that pageblock_nr_pages has not already been setup */ if (pageblock_order) diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index db7ba4a725d6..b49cc6385f1f 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -424,6 +424,7 @@ static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, #ifdef CONFIG_MMU_GATHER_PAGE_SIZE tlb->page_size = 0; #endif + tlb->vma_pfn = 0; __tlb_reset_range(tlb); inc_tlb_flush_pending(tlb->mm); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index b603a59cf8f7..b8eea5b3c064 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2565,11 +2565,11 @@ struct folio *writeback_iter(struct address_space *mapping, if (!folio) { /* * To avoid deadlocks between range_cyclic writeback and callers - * that hold pages in PageWriteback to aggregate I/O until + * that hold folios in writeback to aggregate I/O until * the writeback iteration finishes, we do not loop back to the - * start of the file. Doing so causes a page lock/page + * start of the file. Doing so causes a folio lock/folio * writeback access order inversion - we should only ever lock - * multiple pages in ascending page->index order, and looping + * multiple folios in ascending folio->index order, and looping * back to the start of the file violates that rule and causes * deadlocks. */ diff --git a/mm/shmem.c b/mm/shmem.c index 858cee02ca49..0c5fb4ffa03a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1446,8 +1446,6 @@ static int shmem_unuse_swap_entries(struct inode *inode, for (i = 0; i < folio_batch_count(fbatch); i++) { struct folio *folio = fbatch->folios[i]; - if (!xa_is_value(folio)) - continue; error = shmem_swapin_folio(inode, indices[i], &folio, SGP_CACHE, mapping_gfp_mask(mapping), NULL, NULL); if (error == 0) { @@ -1505,6 +1503,7 @@ int shmem_unuse(unsigned int type) return 0; mutex_lock(&shmem_swaplist_mutex); +start_over: list_for_each_entry_safe(info, next, &shmem_swaplist, swaplist) { if (!info->swapped) { list_del_init(&info->swaplist); @@ -1523,13 +1522,15 @@ int shmem_unuse(unsigned int type) cond_resched(); mutex_lock(&shmem_swaplist_mutex); - next = list_next_entry(info, swaplist); - if (!info->swapped) - list_del_init(&info->swaplist); if (atomic_dec_and_test(&info->stop_eviction)) wake_up_var(&info->stop_eviction); if (error) break; + if (list_empty(&info->swaplist)) + goto start_over; + next = list_next_entry(info, swaplist); + if (!info->swapped) + list_del_init(&info->swaplist); } mutex_unlock(&shmem_swaplist_mutex); @@ -1643,8 +1644,8 @@ try_split: BUG_ON(folio_mapped(folio)); return swap_writeout(folio, wbc); } - - list_del_init(&info->swaplist); + if (!info->swapped) + list_del_init(&info->swaplist); mutex_unlock(&shmem_swaplist_mutex); if (nr_pages > 1) goto try_split; @@ -2331,6 +2332,8 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, */ split_order = shmem_split_large_entry(inode, index, swap, gfp); if (split_order < 0) { + folio_put(folio); + folio = NULL; error = split_order; goto failed; } @@ -5805,12 +5808,12 @@ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, if (size < 0 || size > MAX_LFS_FILESIZE) return ERR_PTR(-EINVAL); - if (shmem_acct_size(flags, size)) - return ERR_PTR(-ENOMEM); - if (is_idmapped_mnt(mnt)) return ERR_PTR(-EINVAL); + if (shmem_acct_size(flags, size)) + return ERR_PTR(-ENOMEM); + inode = shmem_get_inode(&nop_mnt_idmap, mnt->mnt_sb, NULL, S_IFREG | S_IRWXUGO, 0, flags); if (IS_ERR(inode)) { diff --git a/mm/truncate.c b/mm/truncate.c index f2aaf99f2990..91eb92a5ce4f 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -425,7 +425,7 @@ void truncate_inode_pages_range(struct address_space *mapping, for (i = 0; i < folio_batch_count(&fbatch); i++) { struct folio *folio = fbatch.folios[i]; - /* We rely upon deletion not changing page->index */ + /* We rely upon deletion not changing folio->index */ if (xa_is_value(folio)) continue; diff --git a/mm/vmstat.c b/mm/vmstat.c index d888c248d99f..6f740f070b3d 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1347,6 +1347,8 @@ const char * const vmstat_text[] = { "numa_hint_faults", "numa_hint_faults_local", "numa_pages_migrated", + "numa_task_migrated", + "numa_task_swapped", #endif #ifdef CONFIG_MIGRATION "pgmigrate_success", diff --git a/mm/zpdesc.h b/mm/zpdesc.h index 57e7a4d6c6ca..d3df316e5bb7 100644 --- a/mm/zpdesc.h +++ b/mm/zpdesc.h @@ -54,8 +54,8 @@ struct zpdesc { ZPDESC_MATCH(flags, flags); ZPDESC_MATCH(lru, lru); ZPDESC_MATCH(mapping, movable_ops); -ZPDESC_MATCH(index, next); -ZPDESC_MATCH(index, handle); +ZPDESC_MATCH(__folio_index, next); +ZPDESC_MATCH(__folio_index, handle); ZPDESC_MATCH(private, zspage); ZPDESC_MATCH(page_type, first_obj_offset); ZPDESC_MATCH(_refcount, _refcount); diff --git a/rust/Makefile b/rust/Makefile index 3aca903a7d08..80c84749d734 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -492,6 +492,7 @@ $(obj)/core.o: $(RUST_LIB_SRC)/core/src/lib.rs \ ifneq ($(or $(CONFIG_X86_64),$(CONFIG_X86_32)),) $(obj)/core.o: scripts/target.json endif +KCOV_INSTRUMENT_core.o := n $(obj)/compiler_builtins.o: private skip_gendwarfksyms = 1 $(obj)/compiler_builtins.o: private rustc_objcopy = -w -W '__*' diff --git a/rust/kernel/mm.rs b/rust/kernel/mm.rs index 615907a0f3b4..43f525c0d16c 100644 --- a/rust/kernel/mm.rs +++ b/rust/kernel/mm.rs @@ -10,7 +10,6 @@ //! control what happens when userspace reads or writes to that region of memory. //! //! C header: [`include/linux/mm.h`](srctree/include/linux/mm.h) -#![cfg(CONFIG_MMU)] use crate::{ bindings, @@ -21,6 +20,10 @@ use core::{ops::Deref, ptr::NonNull}; pub mod virt; use virt::VmaRef; +#[cfg(CONFIG_MMU)] +pub use mmput_async::MmWithUserAsync; +mod mmput_async; + /// A wrapper for the kernel's `struct mm_struct`. /// /// This represents the address space of a userspace process, so each process has one `Mm` @@ -111,50 +114,6 @@ impl Deref for MmWithUser { } } -/// A wrapper for the kernel's `struct mm_struct`. -/// -/// This type is identical to `MmWithUser` except that it uses `mmput_async` when dropping a -/// refcount. This means that the destructor of `ARef<MmWithUserAsync>` is safe to call in atomic -/// context. -/// -/// # Invariants -/// -/// Values of this type are always refcounted using `mmget`. The value of `mm_users` is non-zero. -#[repr(transparent)] -pub struct MmWithUserAsync { - mm: MmWithUser, -} - -// SAFETY: It is safe to call `mmput_async` on another thread than where `mmget` was called. -unsafe impl Send for MmWithUserAsync {} -// SAFETY: All methods on `MmWithUserAsync` can be called in parallel from several threads. -unsafe impl Sync for MmWithUserAsync {} - -// SAFETY: By the type invariants, this type is always refcounted. -unsafe impl AlwaysRefCounted for MmWithUserAsync { - #[inline] - fn inc_ref(&self) { - // SAFETY: The pointer is valid since self is a reference. - unsafe { bindings::mmget(self.as_raw()) }; - } - - #[inline] - unsafe fn dec_ref(obj: NonNull<Self>) { - // SAFETY: The caller is giving up their refcount. - unsafe { bindings::mmput_async(obj.cast().as_ptr()) }; - } -} - -// Make all `MmWithUser` methods available on `MmWithUserAsync`. -impl Deref for MmWithUserAsync { - type Target = MmWithUser; - - #[inline] - fn deref(&self) -> &MmWithUser { - &self.mm - } -} - // These methods are safe to call even if `mm_users` is zero. impl Mm { /// Returns a raw pointer to the inner `mm_struct`. @@ -206,13 +165,6 @@ impl MmWithUser { unsafe { &*ptr.cast() } } - /// Use `mmput_async` when dropping this refcount. - #[inline] - pub fn into_mmput_async(me: ARef<MmWithUser>) -> ARef<MmWithUserAsync> { - // SAFETY: The layouts and invariants are compatible. - unsafe { ARef::from_raw(ARef::into_raw(me).cast()) } - } - /// Attempt to access a vma using the vma read lock. /// /// This is an optimistic trylock operation, so it may fail if there is contention. In that diff --git a/rust/kernel/mm/mmput_async.rs b/rust/kernel/mm/mmput_async.rs new file mode 100644 index 000000000000..9289e05f7a67 --- /dev/null +++ b/rust/kernel/mm/mmput_async.rs @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024 Google LLC. + +//! Version of `MmWithUser` using `mmput_async`. +//! +//! This is a separate file from `mm.rs` due to the dependency on `CONFIG_MMU=y`. +#![cfg(CONFIG_MMU)] + +use crate::{ + bindings, + mm::MmWithUser, + types::{ARef, AlwaysRefCounted}, +}; +use core::{ops::Deref, ptr::NonNull}; + +/// A wrapper for the kernel's `struct mm_struct`. +/// +/// This type is identical to `MmWithUser` except that it uses `mmput_async` when dropping a +/// refcount. This means that the destructor of `ARef<MmWithUserAsync>` is safe to call in atomic +/// context. +/// +/// # Invariants +/// +/// Values of this type are always refcounted using `mmget`. The value of `mm_users` is non-zero. +#[repr(transparent)] +pub struct MmWithUserAsync { + mm: MmWithUser, +} + +// SAFETY: It is safe to call `mmput_async` on another thread than where `mmget` was called. +unsafe impl Send for MmWithUserAsync {} +// SAFETY: All methods on `MmWithUserAsync` can be called in parallel from several threads. +unsafe impl Sync for MmWithUserAsync {} + +// SAFETY: By the type invariants, this type is always refcounted. +unsafe impl AlwaysRefCounted for MmWithUserAsync { + #[inline] + fn inc_ref(&self) { + // SAFETY: The pointer is valid since self is a reference. + unsafe { bindings::mmget(self.as_raw()) }; + } + + #[inline] + unsafe fn dec_ref(obj: NonNull<Self>) { + // SAFETY: The caller is giving up their refcount. + unsafe { bindings::mmput_async(obj.cast().as_ptr()) }; + } +} + +// Make all `MmWithUser` methods available on `MmWithUserAsync`. +impl Deref for MmWithUserAsync { + type Target = MmWithUser; + + #[inline] + fn deref(&self) -> &MmWithUser { + &self.mm + } +} + +impl MmWithUser { + /// Use `mmput_async` when dropping this refcount. + #[inline] + pub fn into_mmput_async(me: ARef<MmWithUser>) -> ARef<MmWithUserAsync> { + // SAFETY: The layouts and invariants are compatible. + unsafe { ARef::from_raw(ARef::into_raw(me).cast()) } + } +} diff --git a/scripts/Makefile.kcov b/scripts/Makefile.kcov index 01616472f43e..78305a84ba9d 100644 --- a/scripts/Makefile.kcov +++ b/scripts/Makefile.kcov @@ -2,4 +2,10 @@ kcov-flags-y += -fsanitize-coverage=trace-pc kcov-flags-$(CONFIG_KCOV_ENABLE_COMPARISONS) += -fsanitize-coverage=trace-cmp +kcov-rflags-y += -Cpasses=sancov-module +kcov-rflags-y += -Cllvm-args=-sanitizer-coverage-level=3 +kcov-rflags-y += -Cllvm-args=-sanitizer-coverage-trace-pc +kcov-rflags-$(CONFIG_KCOV_ENABLE_COMPARISONS) += -Cllvm-args=-sanitizer-coverage-trace-compares + export CFLAGS_KCOV := $(kcov-flags-y) +export RUSTFLAGS_KCOV := $(kcov-rflags-y) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 6fc2a82ee3bb..2b332645e0c2 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -169,6 +169,9 @@ ifeq ($(CONFIG_KCOV),y) _c_flags += $(if $(patsubst n%,, \ $(KCOV_INSTRUMENT_$(target-stem).o)$(KCOV_INSTRUMENT)$(if $(is-kernel-object),$(CONFIG_KCOV_INSTRUMENT_ALL))), \ $(CFLAGS_KCOV)) +_rust_flags += $(if $(patsubst n%,, \ + $(KCOV_INSTRUMENT_$(target-stem).o)$(KCOV_INSTRUMENT)$(if $(is-kernel-object),$(CONFIG_KCOV_INSTRUMENT_ALL))), \ + $(RUSTFLAGS_KCOV)) endif # diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py index 1e587e0b1a39..5b1cb6b3ce4e 100644 --- a/tools/testing/selftests/damon/_damon_sysfs.py +++ b/tools/testing/selftests/damon/_damon_sysfs.py @@ -15,6 +15,10 @@ if sysfs_root is None: print('Seems sysfs not mounted?') exit(ksft_skip) +if not os.path.exists(sysfs_root): + print('Seems DAMON disabled?') + exit(ksft_skip) + def write_file(path, string): "Returns error string if failed, or None otherwise" string = '%s' % string diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index b6cfe0a4b7df..dbbcc5eb3dce 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -112,9 +112,12 @@ struct comm_pipes { static int setup_comm_pipes(struct comm_pipes *comm_pipes) { - if (pipe(comm_pipes->child_ready) < 0) + if (pipe(comm_pipes->child_ready) < 0) { + ksft_perror("pipe()"); return -errno; + } if (pipe(comm_pipes->parent_ready) < 0) { + ksft_perror("pipe()"); close(comm_pipes->child_ready[0]); close(comm_pipes->child_ready[1]); return -errno; @@ -207,13 +210,14 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, ret = setup_comm_pipes(&comm_pipes); if (ret) { - ksft_test_result_fail("pipe() failed\n"); + log_test_result(KSFT_FAIL); return; } ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } else if (!ret) { exit(fn(mem, size, &comm_pipes)); @@ -228,9 +232,18 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, * write-faults by directly mapping pages writable. */ ret = mprotect(mem, size, PROT_READ); - ret |= mprotect(mem, size, PROT_READ|PROT_WRITE); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); + write(comm_pipes.parent_ready[1], "0", 1); + wait(&ret); + goto close_comm_pipes; + } + + ret = mprotect(mem, size, PROT_READ|PROT_WRITE); + if (ret) { + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); write(comm_pipes.parent_ready[1], "0", 1); wait(&ret); goto close_comm_pipes; @@ -248,16 +261,16 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, ret = -EINVAL; if (!ret) { - ksft_test_result_pass("No leak from parent into child\n"); + log_test_result(KSFT_PASS); } else if (xfail) { /* * With hugetlb, some vmsplice() tests are currently expected to * fail because (a) harder to fix and (b) nobody really cares. * Flag them as expected failure for now. */ - ksft_test_result_xfail("Leak from parent into child\n"); + log_test_result(KSFT_XFAIL); } else { - ksft_test_result_fail("Leak from parent into child\n"); + log_test_result(KSFT_FAIL); } close_comm_pipes: close_comm_pipes(&comm_pipes); @@ -306,26 +319,29 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, ret = setup_comm_pipes(&comm_pipes); if (ret) { - ksft_test_result_fail("pipe() failed\n"); + log_test_result(KSFT_FAIL); goto free; } if (pipe(fds) < 0) { - ksft_test_result_fail("pipe() failed\n"); + ksft_perror("pipe() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } if (before_fork) { transferred = vmsplice(fds[1], &iov, 1, 0); if (transferred <= 0) { - ksft_test_result_fail("vmsplice() failed\n"); + ksft_print_msg("vmsplice() failed\n"); + log_test_result(KSFT_FAIL); goto close_pipe; } } ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed\n"); + log_test_result(KSFT_FAIL); goto close_pipe; } else if (!ret) { write(comm_pipes.child_ready[1], "0", 1); @@ -339,7 +355,8 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, if (!before_fork) { transferred = vmsplice(fds[1], &iov, 1, 0); if (transferred <= 0) { - ksft_test_result_fail("vmsplice() failed\n"); + ksft_perror("vmsplice() failed"); + log_test_result(KSFT_FAIL); wait(&ret); goto close_pipe; } @@ -348,7 +365,8 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, while (read(comm_pipes.child_ready[0], &buf, 1) != 1) ; if (munmap(mem, size) < 0) { - ksft_test_result_fail("munmap() failed\n"); + ksft_perror("munmap() failed"); + log_test_result(KSFT_FAIL); goto close_pipe; } write(comm_pipes.parent_ready[1], "0", 1); @@ -356,7 +374,8 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, /* Wait until the child is done writing. */ wait(&ret); if (!WIFEXITED(ret)) { - ksft_test_result_fail("wait() failed\n"); + ksft_perror("wait() failed"); + log_test_result(KSFT_FAIL); goto close_pipe; } @@ -364,22 +383,23 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, for (total = 0; total < transferred; total += cur) { cur = read(fds[0], new + total, transferred - total); if (cur < 0) { - ksft_test_result_fail("read() failed\n"); + ksft_perror("read() failed"); + log_test_result(KSFT_FAIL); goto close_pipe; } } if (!memcmp(old, new, transferred)) { - ksft_test_result_pass("No leak from child into parent\n"); + log_test_result(KSFT_PASS); } else if (xfail) { /* * With hugetlb, some vmsplice() tests are currently expected to * fail because (a) harder to fix and (b) nobody really cares. * Flag them as expected failure for now. */ - ksft_test_result_xfail("Leak from child into parent\n"); + log_test_result(KSFT_XFAIL); } else { - ksft_test_result_fail("Leak from child into parent\n"); + log_test_result(KSFT_FAIL); } close_pipe: close(fds[0]); @@ -416,13 +436,14 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) ret = setup_comm_pipes(&comm_pipes); if (ret) { - ksft_test_result_fail("pipe() failed\n"); + log_test_result(KSFT_FAIL); return; } file = tmpfile(); if (!file) { - ksft_test_result_fail("tmpfile() failed\n"); + ksft_perror("tmpfile() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } fd = fileno(file); @@ -430,14 +451,16 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) tmp = malloc(size); if (!tmp) { - ksft_test_result_fail("malloc() failed\n"); + ksft_print_msg("malloc() failed\n"); + log_test_result(KSFT_FAIL); goto close_file; } /* Skip on errors, as we might just lack kernel support. */ ret = io_uring_queue_init(1, &ring, 0); if (ret < 0) { - ksft_test_result_skip("io_uring_queue_init() failed\n"); + ksft_print_msg("io_uring_queue_init() failed\n"); + log_test_result(KSFT_SKIP); goto free_tmp; } @@ -452,7 +475,8 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) iov.iov_len = size; ret = io_uring_register_buffers(&ring, &iov, 1); if (ret) { - ksft_test_result_skip("io_uring_register_buffers() failed\n"); + ksft_print_msg("io_uring_register_buffers() failed\n"); + log_test_result(KSFT_SKIP); goto queue_exit; } @@ -463,7 +487,8 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) */ ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed"); + log_test_result(KSFT_FAIL); goto unregister_buffers; } else if (!ret) { write(comm_pipes.child_ready[1], "0", 1); @@ -483,10 +508,17 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) * if the page is mapped R/O vs. R/W). */ ret = mprotect(mem, size, PROT_READ); + if (ret) { + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); + goto unregister_buffers; + } + clear_softdirty(); - ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); + ret = mprotect(mem, size, PROT_READ | PROT_WRITE); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto unregister_buffers; } } @@ -498,25 +530,29 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) memset(mem, 0xff, size); sqe = io_uring_get_sqe(&ring); if (!sqe) { - ksft_test_result_fail("io_uring_get_sqe() failed\n"); + ksft_print_msg("io_uring_get_sqe() failed\n"); + log_test_result(KSFT_FAIL); goto quit_child; } io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0); ret = io_uring_submit(&ring); if (ret < 0) { - ksft_test_result_fail("io_uring_submit() failed\n"); + ksft_print_msg("io_uring_submit() failed\n"); + log_test_result(KSFT_FAIL); goto quit_child; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret < 0) { - ksft_test_result_fail("io_uring_wait_cqe() failed\n"); + ksft_print_msg("io_uring_wait_cqe() failed\n"); + log_test_result(KSFT_FAIL); goto quit_child; } if (cqe->res != size) { - ksft_test_result_fail("write_fixed failed\n"); + ksft_print_msg("write_fixed failed\n"); + log_test_result(KSFT_FAIL); goto quit_child; } io_uring_cqe_seen(&ring, cqe); @@ -526,15 +562,18 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) while (total < size) { cur = pread(fd, tmp + total, size - total, total); if (cur < 0) { - ksft_test_result_fail("pread() failed\n"); + ksft_print_msg("pread() failed\n"); + log_test_result(KSFT_FAIL); goto quit_child; } total += cur; } /* Finally, check if we read what we expected. */ - ksft_test_result(!memcmp(mem, tmp, size), - "Longterm R/W pin is reliable\n"); + if (!memcmp(mem, tmp, size)) + log_test_result(KSFT_PASS); + else + log_test_result(KSFT_FAIL); quit_child: if (use_fork) { @@ -582,19 +621,21 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, int ret; if (gup_fd < 0) { - ksft_test_result_skip("gup_test not available\n"); + ksft_print_msg("gup_test not available\n"); + log_test_result(KSFT_SKIP); return; } tmp = malloc(size); if (!tmp) { - ksft_test_result_fail("malloc() failed\n"); + ksft_print_msg("malloc() failed\n"); + log_test_result(KSFT_FAIL); return; } ret = setup_comm_pipes(&comm_pipes); if (ret) { - ksft_test_result_fail("pipe() failed\n"); + log_test_result(KSFT_FAIL); goto free_tmp; } @@ -609,7 +650,8 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, */ ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } else if (!ret) { write(comm_pipes.child_ready[1], "0", 1); @@ -646,7 +688,8 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, clear_softdirty(); ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } break; @@ -661,9 +704,11 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); if (ret) { if (errno == EINVAL) - ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n"); + ret = KSFT_SKIP; else - ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n"); + ret = KSFT_FAIL; + ksft_perror("PIN_LONGTERM_TEST_START failed"); + log_test_result(ret); goto wait; } @@ -676,22 +721,26 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, */ tmp_val = (__u64)(uintptr_t)tmp; ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val); - if (ret) - ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n"); - else - ksft_test_result(!memcmp(mem, tmp, size), - "Longterm R/O pin is reliable\n"); + if (ret) { + ksft_perror("PIN_LONGTERM_TEST_READ failed"); + log_test_result(KSFT_FAIL); + } else { + if (!memcmp(mem, tmp, size)) + log_test_result(KSFT_PASS); + else + log_test_result(KSFT_FAIL); + } ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP); if (ret) - ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n"); + ksft_perror("PIN_LONGTERM_TEST_STOP failed"); wait: switch (test) { case RO_PIN_TEST_SHARED: write(comm_pipes.parent_ready[1], "0", 1); wait(&ret); if (!WIFEXITED(ret)) - ksft_print_msg("[INFO] wait() failed\n"); + ksft_perror("wait() failed"); break; default: break; @@ -746,14 +795,16 @@ static void do_run_with_base_page(test_fn fn, bool swapout) mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); return; } ret = madvise(mem, pagesize, MADV_NOHUGEPAGE); /* Ignore if not around on a kernel. */ if (ret && errno != EINVAL) { - ksft_test_result_fail("MADV_NOHUGEPAGE failed\n"); + ksft_perror("MADV_NOHUGEPAGE failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -763,7 +814,8 @@ static void do_run_with_base_page(test_fn fn, bool swapout) if (swapout) { madvise(mem, pagesize, MADV_PAGEOUT); if (!pagemap_is_swapped(pagemap_fd, mem)) { - ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); + ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n"); + log_test_result(KSFT_SKIP); goto munmap; } } @@ -775,13 +827,13 @@ munmap: static void run_with_base_page(test_fn fn, const char *desc) { - ksft_print_msg("[RUN] %s ... with base page\n", desc); + log_test_start("%s ... with base page", desc); do_run_with_base_page(fn, false); } static void run_with_base_page_swap(test_fn fn, const char *desc) { - ksft_print_msg("[RUN] %s ... with swapped out base page\n", desc); + log_test_start("%s ... with swapped out base page", desc); do_run_with_base_page(fn, true); } @@ -807,7 +859,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mmap_mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); return; } @@ -816,7 +869,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) ret = madvise(mem, thpsize, MADV_HUGEPAGE); if (ret) { - ksft_test_result_fail("MADV_HUGEPAGE failed\n"); + ksft_perror("MADV_HUGEPAGE failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -826,7 +880,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) */ mem[0] = 1; if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) { - ksft_test_result_skip("Did not get a THP populated\n"); + ksft_print_msg("Did not get a THP populated\n"); + log_test_result(KSFT_SKIP); goto munmap; } memset(mem, 1, thpsize); @@ -846,12 +901,14 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) */ ret = mprotect(mem + pagesize, pagesize, PROT_READ); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto munmap; } ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto munmap; } break; @@ -863,7 +920,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) */ ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED); if (ret) { - ksft_test_result_fail("MADV_DONTNEED failed\n"); + ksft_perror("MADV_DONTNEED failed"); + log_test_result(KSFT_FAIL); goto munmap; } size = pagesize; @@ -877,13 +935,15 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) mremap_mem = mmap(NULL, mremap_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mremap_mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } tmp = mremap(mem + mremap_size, mremap_size, mremap_size, MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem); if (tmp != mremap_mem) { - ksft_test_result_fail("mremap() failed\n"); + ksft_perror("mremap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } size = mremap_size; @@ -896,12 +956,14 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) */ ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK); if (ret) { - ksft_test_result_fail("MADV_DONTFORK failed\n"); + ksft_perror("MADV_DONTFORK failed"); + log_test_result(KSFT_FAIL); goto munmap; } ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed"); + log_test_result(KSFT_FAIL); goto munmap; } else if (!ret) { exit(0); @@ -910,7 +972,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) /* Allow for sharing all pages again. */ ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK); if (ret) { - ksft_test_result_fail("MADV_DOFORK failed\n"); + ksft_perror("MADV_DOFORK failed"); + log_test_result(KSFT_FAIL); goto munmap; } break; @@ -924,7 +987,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) case THP_RUN_SINGLE_PTE_SWAPOUT: madvise(mem, size, MADV_PAGEOUT); if (!range_is_swapped(mem, size)) { - ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); + ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n"); + log_test_result(KSFT_SKIP); goto munmap; } break; @@ -941,56 +1005,56 @@ munmap: static void run_with_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with THP (%zu kB)\n", + log_test_start("%s ... with THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PMD, size); } static void run_with_thp_swap(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with swapped-out THP (%zu kB)\n", + log_test_start("%s ... with swapped-out THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT, size); } static void run_with_pte_mapped_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with PTE-mapped THP (%zu kB)\n", + log_test_start("%s ... with PTE-mapped THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PTE, size); } static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP (%zu kB)\n", + log_test_start("%s ... with swapped-out, PTE-mapped THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT, size); } static void run_with_single_pte_of_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with single PTE of THP (%zu kB)\n", + log_test_start("%s ... with single PTE of THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_SINGLE_PTE, size); } static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP (%zu kB)\n", + log_test_start("%s ... with single PTE of swapped-out THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT, size); } static void run_with_partial_mremap_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP (%zu kB)\n", + log_test_start("%s ... with partially mremap()'ed THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP, size); } static void run_with_partial_shared_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with partially shared THP (%zu kB)\n", + log_test_start("%s ... with partially shared THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED, size); } @@ -1000,14 +1064,15 @@ static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; char *mem, *dummy; - ksft_print_msg("[RUN] %s ... with hugetlb (%zu kB)\n", desc, + log_test_start("%s ... with hugetlb (%zu kB)", desc, hugetlbsize / 1024); flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT; mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); if (mem == MAP_FAILED) { - ksft_test_result_skip("need more free huge pages\n"); + ksft_perror("need more free huge pages"); + log_test_result(KSFT_SKIP); return; } @@ -1020,7 +1085,8 @@ static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) */ dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); if (dummy == MAP_FAILED) { - ksft_test_result_skip("need more free huge pages\n"); + ksft_perror("need more free huge pages"); + log_test_result(KSFT_SKIP); goto munmap; } munmap(dummy, hugetlbsize); @@ -1226,7 +1292,7 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, ret = setup_comm_pipes(&comm_pipes); if (ret) { - ksft_test_result_fail("pipe() failed\n"); + log_test_result(KSFT_FAIL); return; } @@ -1236,12 +1302,14 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, */ ret = mprotect(mem + pagesize, pagesize, PROT_READ); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } @@ -1250,8 +1318,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, /* Collapse before actually COW-sharing the page. */ ret = madvise(mem, size, MADV_COLLAPSE); if (ret) { - ksft_test_result_skip("MADV_COLLAPSE failed: %s\n", - strerror(errno)); + ksft_perror("MADV_COLLAPSE failed"); + log_test_result(KSFT_SKIP); goto close_comm_pipes; } break; @@ -1262,7 +1330,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, /* Don't COW-share the upper part of the THP. */ ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK); if (ret) { - ksft_test_result_fail("MADV_DONTFORK failed\n"); + ksft_perror("MADV_DONTFORK failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } break; @@ -1270,7 +1339,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, /* Don't COW-share the lower part of the THP. */ ret = madvise(mem, size / 2, MADV_DONTFORK); if (ret) { - ksft_test_result_fail("MADV_DONTFORK failed\n"); + ksft_perror("MADV_DONTFORK failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } break; @@ -1280,7 +1350,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } else if (!ret) { switch (test) { @@ -1314,7 +1385,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, */ ret = madvise(mem, size, MADV_DOFORK); if (ret) { - ksft_test_result_fail("MADV_DOFORK failed\n"); + ksft_perror("MADV_DOFORK failed"); + log_test_result(KSFT_FAIL); write(comm_pipes.parent_ready[1], "0", 1); wait(&ret); goto close_comm_pipes; @@ -1324,8 +1396,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, /* Collapse before anyone modified the COW-shared page. */ ret = madvise(mem, size, MADV_COLLAPSE); if (ret) { - ksft_test_result_skip("MADV_COLLAPSE failed: %s\n", - strerror(errno)); + ksft_perror("MADV_COLLAPSE failed"); + log_test_result(KSFT_SKIP); write(comm_pipes.parent_ready[1], "0", 1); wait(&ret); goto close_comm_pipes; @@ -1345,7 +1417,10 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, else ret = -EINVAL; - ksft_test_result(!ret, "No leak from parent into child\n"); + if (!ret) + log_test_result(KSFT_PASS); + else + log_test_result(KSFT_FAIL); close_comm_pipes: close_comm_pipes(&comm_pipes); } @@ -1430,7 +1505,7 @@ static void run_anon_thp_test_cases(void) for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) { struct test_case const *test_case = &anon_thp_test_cases[i]; - ksft_print_msg("[RUN] %s\n", test_case->desc); + log_test_start("%s", test_case->desc); do_run_with_thp(test_case->fn, THP_RUN_PMD, pmdsize); } } @@ -1453,8 +1528,10 @@ static void test_cow(char *mem, const char *smem, size_t size) memset(mem, 0xff, size); /* See if we still read the old values via the other mapping. */ - ksft_test_result(!memcmp(smem, old, size), - "Other mapping not modified\n"); + if (!memcmp(smem, old, size)) + log_test_result(KSFT_PASS); + else + log_test_result(KSFT_FAIL); free(old); } @@ -1472,18 +1549,20 @@ static void run_with_zeropage(non_anon_test_fn fn, const char *desc) { char *mem, *smem, tmp; - ksft_print_msg("[RUN] %s ... with shared zeropage\n", desc); + log_test_start("%s ... with shared zeropage", desc); mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); return; } smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); if (smem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1504,10 +1583,11 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) size_t mmap_size; int ret; - ksft_print_msg("[RUN] %s ... with huge zeropage\n", desc); + log_test_start("%s ... with huge zeropage", desc); if (!has_huge_zeropage) { - ksft_test_result_skip("Huge zeropage not enabled\n"); + ksft_print_msg("Huge zeropage not enabled\n"); + log_test_result(KSFT_SKIP); return; } @@ -1516,13 +1596,15 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mmap_mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); return; } mmap_smem = mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mmap_smem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1531,9 +1613,15 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1)); ret = madvise(mem, pmdsize, MADV_HUGEPAGE); + if (ret != 0) { + ksft_perror("madvise()"); + log_test_result(KSFT_FAIL); + goto munmap; + } ret |= madvise(smem, pmdsize, MADV_HUGEPAGE); - if (ret) { - ksft_test_result_fail("MADV_HUGEPAGE failed\n"); + if (ret != 0) { + ksft_perror("madvise()"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1562,29 +1650,33 @@ static void run_with_memfd(non_anon_test_fn fn, const char *desc) char *mem, *smem, tmp; int fd; - ksft_print_msg("[RUN] %s ... with memfd\n", desc); + log_test_start("%s ... with memfd", desc); fd = memfd_create("test", 0); if (fd < 0) { - ksft_test_result_fail("memfd_create() failed\n"); + ksft_perror("memfd_create() failed"); + log_test_result(KSFT_FAIL); return; } /* File consists of a single page filled with zeroes. */ if (fallocate(fd, 0, 0, pagesize)) { - ksft_test_result_fail("fallocate() failed\n"); + ksft_perror("fallocate() failed"); + log_test_result(KSFT_FAIL); goto close; } /* Create a private mapping of the memfd. */ mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto close; } smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); if (smem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1607,35 +1699,40 @@ static void run_with_tmpfile(non_anon_test_fn fn, const char *desc) FILE *file; int fd; - ksft_print_msg("[RUN] %s ... with tmpfile\n", desc); + log_test_start("%s ... with tmpfile", desc); file = tmpfile(); if (!file) { - ksft_test_result_fail("tmpfile() failed\n"); + ksft_perror("tmpfile() failed"); + log_test_result(KSFT_FAIL); return; } fd = fileno(file); if (fd < 0) { - ksft_test_result_skip("fileno() failed\n"); + ksft_perror("fileno() failed"); + log_test_result(KSFT_SKIP); return; } /* File consists of a single page filled with zeroes. */ if (fallocate(fd, 0, 0, pagesize)) { - ksft_test_result_fail("fallocate() failed\n"); + ksft_perror("fallocate() failed"); + log_test_result(KSFT_FAIL); goto close; } /* Create a private mapping of the memfd. */ mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto close; } smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); if (smem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1659,20 +1756,22 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, char *mem, *smem, tmp; int fd; - ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc, + log_test_start("%s ... with memfd hugetlb (%zu kB)", desc, hugetlbsize / 1024); flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; fd = memfd_create("test", flags); if (fd < 0) { - ksft_test_result_skip("memfd_create() failed\n"); + ksft_perror("memfd_create() failed"); + log_test_result(KSFT_SKIP); return; } /* File consists of a single page filled with zeroes. */ if (fallocate(fd, 0, 0, hugetlbsize)) { - ksft_test_result_skip("need more free huge pages\n"); + ksft_perror("need more free huge pages"); + log_test_result(KSFT_SKIP); goto close; } @@ -1680,12 +1779,14 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); if (mem == MAP_FAILED) { - ksft_test_result_skip("need more free huge pages\n"); + ksft_perror("need more free huge pages"); + log_test_result(KSFT_SKIP); goto close; } smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0); if (smem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1771,7 +1872,6 @@ static int tests_per_non_anon_test_case(void) int main(int argc, char **argv) { - int err; struct thp_settings default_settings; ksft_print_header(); @@ -1811,9 +1911,5 @@ int main(int argc, char **argv) thp_restore_settings(); } - err = ksft_get_fail_cnt(); - if (err) - ksft_exit_fail_msg("%d out of %d tests failed\n", - err, ksft_test_num()); - ksft_exit_pass(); + ksft_finished(); } diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c index 0cd9d236649d..93af3d3760f9 100644 --- a/tools/testing/selftests/mm/guard-regions.c +++ b/tools/testing/selftests/mm/guard-regions.c @@ -1453,8 +1453,21 @@ TEST_F(guard_regions, uffd) /* Set up uffd. */ uffd = userfaultfd(0); - if (uffd == -1 && errno == EPERM) - ksft_exit_skip("No userfaultfd permissions, try running as root.\n"); + if (uffd == -1) { + switch (errno) { + case EPERM: + SKIP(return, "No userfaultfd permissions, try running as root."); + break; + case ENOSYS: + SKIP(return, "userfaultfd is not supported/not enabled."); + break; + default: + ksft_exit_fail_msg("userfaultfd failed with %s\n", + strerror(errno)); + break; + } + } + ASSERT_NE(uffd, -1); ASSERT_EQ(ioctl(uffd, UFFDIO_API, &api), 0); diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c index d50ada0c7dbf..8a97ac5176a4 100644 --- a/tools/testing/selftests/mm/gup_longterm.c +++ b/tools/testing/selftests/mm/gup_longterm.c @@ -93,33 +93,48 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) __fsword_t fs_type = get_fs_type(fd); bool should_work; char *mem; + int result = KSFT_PASS; int ret; + if (fd < 0) { + result = KSFT_FAIL; + goto report; + } + if (ftruncate(fd, size)) { if (errno == ENOENT) { skip_test_dodgy_fs("ftruncate()"); } else { - ksft_test_result_fail("ftruncate() failed (%s)\n", strerror(errno)); + ksft_print_msg("ftruncate() failed (%s)\n", + strerror(errno)); + result = KSFT_FAIL; + goto report; } return; } if (fallocate(fd, 0, 0, size)) { - if (size == pagesize) - ksft_test_result_fail("fallocate() failed (%s)\n", strerror(errno)); - else - ksft_test_result_skip("need more free huge pages\n"); - return; + if (size == pagesize) { + ksft_print_msg("fallocate() failed (%s)\n", strerror(errno)); + result = KSFT_FAIL; + } else { + ksft_print_msg("need more free huge pages\n"); + result = KSFT_SKIP; + } + goto report; } mem = mmap(NULL, size, PROT_READ | PROT_WRITE, shared ? MAP_SHARED : MAP_PRIVATE, fd, 0); if (mem == MAP_FAILED) { - if (size == pagesize || shared) - ksft_test_result_fail("mmap() failed (%s)\n", strerror(errno)); - else - ksft_test_result_skip("need more free huge pages\n"); - return; + if (size == pagesize || shared) { + ksft_print_msg("mmap() failed (%s)\n", strerror(errno)); + result = KSFT_FAIL; + } else { + ksft_print_msg("need more free huge pages\n"); + result = KSFT_SKIP; + } + goto report; } /* Fault in the page such that GUP-fast can pin it directly. */ @@ -134,7 +149,8 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) */ ret = mprotect(mem, size, PROT_READ); if (ret) { - ksft_test_result_fail("mprotect() failed (%s)\n", strerror(errno)); + ksft_print_msg("mprotect() failed (%s)\n", strerror(errno)); + result = KSFT_FAIL; goto munmap; } /* FALLTHROUGH */ @@ -147,12 +163,14 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) type == TEST_TYPE_RW_FAST; if (gup_fd < 0) { - ksft_test_result_skip("gup_test not available\n"); + ksft_print_msg("gup_test not available\n"); + result = KSFT_SKIP; break; } if (rw && shared && fs_is_unknown(fs_type)) { - ksft_test_result_skip("Unknown filesystem\n"); + ksft_print_msg("Unknown filesystem\n"); + result = KSFT_SKIP; return; } /* @@ -169,14 +187,19 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) args.flags |= rw ? PIN_LONGTERM_TEST_FLAG_USE_WRITE : 0; ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); if (ret && errno == EINVAL) { - ksft_test_result_skip("PIN_LONGTERM_TEST_START failed (EINVAL)n"); + ksft_print_msg("PIN_LONGTERM_TEST_START failed (EINVAL)n"); + result = KSFT_SKIP; break; } else if (ret && errno == EFAULT) { - ksft_test_result(!should_work, "Should have failed\n"); + if (should_work) + result = KSFT_FAIL; + else + result = KSFT_PASS; break; } else if (ret) { - ksft_test_result_fail("PIN_LONGTERM_TEST_START failed (%s)\n", - strerror(errno)); + ksft_print_msg("PIN_LONGTERM_TEST_START failed (%s)\n", + strerror(errno)); + result = KSFT_FAIL; break; } @@ -189,7 +212,10 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) * some previously unsupported filesystems, we might want to * perform some additional tests for possible data corruptions. */ - ksft_test_result(should_work, "Should have worked\n"); + if (should_work) + result = KSFT_PASS; + else + result = KSFT_FAIL; break; } #ifdef LOCAL_CONFIG_HAVE_LIBURING @@ -199,8 +225,9 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) /* io_uring always pins pages writable. */ if (shared && fs_is_unknown(fs_type)) { - ksft_test_result_skip("Unknown filesystem\n"); - return; + ksft_print_msg("Unknown filesystem\n"); + result = KSFT_SKIP; + goto report; } should_work = !shared || fs_supports_writable_longterm_pinning(fs_type); @@ -208,8 +235,9 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) /* Skip on errors, as we might just lack kernel support. */ ret = io_uring_queue_init(1, &ring, 0); if (ret < 0) { - ksft_test_result_skip("io_uring_queue_init() failed (%s)\n", - strerror(-ret)); + ksft_print_msg("io_uring_queue_init() failed (%s)\n", + strerror(-ret)); + result = KSFT_SKIP; break; } /* @@ -222,17 +250,28 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) /* Only new kernels return EFAULT. */ if (ret && (errno == ENOSPC || errno == EOPNOTSUPP || errno == EFAULT)) { - ksft_test_result(!should_work, "Should have failed (%s)\n", - strerror(errno)); + if (should_work) { + ksft_print_msg("Should have failed (%s)\n", + strerror(errno)); + result = KSFT_FAIL; + } else { + result = KSFT_PASS; + } } else if (ret) { /* * We might just lack support or have insufficient * MEMLOCK limits. */ - ksft_test_result_skip("io_uring_register_buffers() failed (%s)\n", - strerror(-ret)); + ksft_print_msg("io_uring_register_buffers() failed (%s)\n", + strerror(-ret)); + result = KSFT_SKIP; } else { - ksft_test_result(should_work, "Should have worked\n"); + if (should_work) { + result = KSFT_PASS; + } else { + ksft_print_msg("Should have worked\n"); + result = KSFT_FAIL; + } io_uring_unregister_buffers(&ring); } @@ -246,6 +285,8 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) munmap: munmap(mem, size); +report: + log_test_result(result); } typedef void (*test_fn)(int fd, size_t size); @@ -254,13 +295,11 @@ static void run_with_memfd(test_fn fn, const char *desc) { int fd; - ksft_print_msg("[RUN] %s ... with memfd\n", desc); + log_test_start("%s ... with memfd", desc); fd = memfd_create("test", 0); - if (fd < 0) { - ksft_test_result_fail("memfd_create() failed (%s)\n", strerror(errno)); - return; - } + if (fd < 0) + ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno)); fn(fd, pagesize); close(fd); @@ -271,23 +310,23 @@ static void run_with_tmpfile(test_fn fn, const char *desc) FILE *file; int fd; - ksft_print_msg("[RUN] %s ... with tmpfile\n", desc); + log_test_start("%s ... with tmpfile", desc); file = tmpfile(); if (!file) { - ksft_test_result_fail("tmpfile() failed (%s)\n", strerror(errno)); - return; - } - - fd = fileno(file); - if (fd < 0) { - ksft_test_result_fail("fileno() failed (%s)\n", strerror(errno)); - goto close; + ksft_print_msg("tmpfile() failed (%s)\n", strerror(errno)); + fd = -1; + } else { + fd = fileno(file); + if (fd < 0) { + ksft_print_msg("fileno() failed (%s)\n", strerror(errno)); + } } fn(fd, pagesize); -close: - fclose(file); + + if (file) + fclose(file); } static void run_with_local_tmpfile(test_fn fn, const char *desc) @@ -295,22 +334,22 @@ static void run_with_local_tmpfile(test_fn fn, const char *desc) char filename[] = __FILE__"_tmpfile_XXXXXX"; int fd; - ksft_print_msg("[RUN] %s ... with local tmpfile\n", desc); + log_test_start("%s ... with local tmpfile", desc); fd = mkstemp(filename); - if (fd < 0) { - ksft_test_result_fail("mkstemp() failed (%s)\n", strerror(errno)); - return; - } + if (fd < 0) + ksft_print_msg("mkstemp() failed (%s)\n", strerror(errno)); if (unlink(filename)) { - ksft_test_result_fail("unlink() failed (%s)\n", strerror(errno)); - goto close; + ksft_print_msg("unlink() failed (%s)\n", strerror(errno)); + close(fd); + fd = -1; } fn(fd, pagesize); -close: - close(fd); + + if (fd >= 0) + close(fd); } static void run_with_memfd_hugetlb(test_fn fn, const char *desc, @@ -319,15 +358,14 @@ static void run_with_memfd_hugetlb(test_fn fn, const char *desc, int flags = MFD_HUGETLB; int fd; - ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc, + log_test_start("%s ... with memfd hugetlb (%zu kB)", desc, hugetlbsize / 1024); flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; fd = memfd_create("test", flags); if (fd < 0) { - ksft_test_result_skip("memfd_create() failed (%s)\n", strerror(errno)); - return; + ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno)); } fn(fd, hugetlbsize); @@ -455,7 +493,7 @@ static int tests_per_test_case(void) int main(int argc, char **argv) { - int i, err; + int i; pagesize = getpagesize(); nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, @@ -469,9 +507,5 @@ int main(int argc, char **argv) for (i = 0; i < ARRAY_SIZE(test_cases); i++) run_test_case(&test_cases[i]); - err = ksft_get_fail_cnt(); - if (err) - ksft_exit_fail_msg("%d out of %d tests failed\n", - err, ksft_test_num()); - ksft_exit_pass(); + ksft_finished(); } diff --git a/tools/testing/selftests/mm/madv_populate.c b/tools/testing/selftests/mm/madv_populate.c index ef7d911da13e..b6fabd5c27ed 100644 --- a/tools/testing/selftests/mm/madv_populate.c +++ b/tools/testing/selftests/mm/madv_populate.c @@ -172,12 +172,12 @@ static void test_populate_read(void) if (addr == MAP_FAILED) ksft_exit_fail_msg("mmap failed\n"); ksft_test_result(range_is_not_populated(addr, SIZE), - "range initially not populated\n"); + "read range initially not populated\n"); ret = madvise(addr, SIZE, MADV_POPULATE_READ); ksft_test_result(!ret, "MADV_POPULATE_READ\n"); ksft_test_result(range_is_populated(addr, SIZE), - "range is populated\n"); + "read range is populated\n"); munmap(addr, SIZE); } @@ -194,12 +194,12 @@ static void test_populate_write(void) if (addr == MAP_FAILED) ksft_exit_fail_msg("mmap failed\n"); ksft_test_result(range_is_not_populated(addr, SIZE), - "range initially not populated\n"); + "write range initially not populated\n"); ret = madvise(addr, SIZE, MADV_POPULATE_WRITE); ksft_test_result(!ret, "MADV_POPULATE_WRITE\n"); ksft_test_result(range_is_populated(addr, SIZE), - "range is populated\n"); + "write range is populated\n"); munmap(addr, SIZE); } @@ -247,19 +247,19 @@ static void test_softdirty(void) /* Clear any softdirty bits. */ clear_softdirty(); ksft_test_result(range_is_not_softdirty(addr, SIZE), - "range is not softdirty\n"); + "cleared range is not softdirty\n"); /* Populating READ should set softdirty. */ ret = madvise(addr, SIZE, MADV_POPULATE_READ); - ksft_test_result(!ret, "MADV_POPULATE_READ\n"); + ksft_test_result(!ret, "softdirty MADV_POPULATE_READ\n"); ksft_test_result(range_is_not_softdirty(addr, SIZE), - "range is not softdirty\n"); + "range is not softdirty after MADV_POPULATE_READ\n"); /* Populating WRITE should set softdirty. */ ret = madvise(addr, SIZE, MADV_POPULATE_WRITE); - ksft_test_result(!ret, "MADV_POPULATE_WRITE\n"); + ksft_test_result(!ret, "softdirty MADV_POPULATE_WRITE\n"); ksft_test_result(range_is_softdirty(addr, SIZE), - "range is softdirty\n"); + "range is softdirty after MADV_POPULATE_WRITE \n"); munmap(addr, SIZE); } diff --git a/tools/testing/selftests/mm/mlock2-tests.c b/tools/testing/selftests/mm/mlock2-tests.c index 7f0d50fa361d..3e90ff37e336 100644 --- a/tools/testing/selftests/mm/mlock2-tests.c +++ b/tools/testing/selftests/mm/mlock2-tests.c @@ -196,7 +196,7 @@ static void test_mlock_lock(void) ksft_exit_fail_msg("munlock(): %s\n", strerror(errno)); } - ksft_test_result(!unlock_lock_check(map), "%s: Locked\n", __func__); + ksft_test_result(!unlock_lock_check(map), "%s: Unlocked\n", __func__); munmap(map, 2 * page_size); } diff --git a/tools/testing/selftests/mm/pfnmap.c b/tools/testing/selftests/mm/pfnmap.c index 8a9d19b6020c..866ac023baf5 100644 --- a/tools/testing/selftests/mm/pfnmap.c +++ b/tools/testing/selftests/mm/pfnmap.c @@ -12,6 +12,8 @@ #include <stdint.h> #include <unistd.h> #include <errno.h> +#include <stdio.h> +#include <ctype.h> #include <fcntl.h> #include <signal.h> #include <setjmp.h> @@ -43,14 +45,62 @@ static int test_read_access(char *addr, size_t size, size_t pagesize) /* Force a read that the compiler cannot optimize out. */ *((volatile char *)(addr + offs)); } - if (signal(SIGSEGV, signal_handler) == SIG_ERR) + if (signal(SIGSEGV, SIG_DFL) == SIG_ERR) return -EINVAL; return ret; } +static int find_ram_target(off_t *phys_addr, + unsigned long long pagesize) +{ + unsigned long long start, end; + char line[80], *end_ptr; + FILE *file; + + /* Search /proc/iomem for the first suitable "System RAM" range. */ + file = fopen("/proc/iomem", "r"); + if (!file) + return -errno; + + while (fgets(line, sizeof(line), file)) { + /* Ignore any child nodes. */ + if (!isalnum(line[0])) + continue; + + if (!strstr(line, "System RAM\n")) + continue; + + start = strtoull(line, &end_ptr, 16); + /* Skip over the "-" */ + end_ptr++; + /* Make end "exclusive". */ + end = strtoull(end_ptr, NULL, 16) + 1; + + /* Actual addresses are not exported */ + if (!start && !end) + break; + + /* We need full pages. */ + start = (start + pagesize - 1) & ~(pagesize - 1); + end &= ~(pagesize - 1); + + if (start != (off_t)start) + break; + + /* We need two pages. */ + if (end > start + 2 * pagesize) { + fclose(file); + *phys_addr = start; + return 0; + } + } + return -ENOENT; +} + FIXTURE(pfnmap) { + off_t phys_addr; size_t pagesize; int dev_mem_fd; char *addr1; @@ -63,14 +113,17 @@ FIXTURE_SETUP(pfnmap) { self->pagesize = getpagesize(); + /* We'll require two physical pages throughout our tests ... */ + if (find_ram_target(&self->phys_addr, self->pagesize)) + SKIP(return, "Cannot find ram target in '/proc/iomem'\n"); + self->dev_mem_fd = open("/dev/mem", O_RDONLY); if (self->dev_mem_fd < 0) SKIP(return, "Cannot open '/dev/mem'\n"); - /* We'll require the first two pages throughout our tests ... */ self->size1 = self->pagesize * 2; self->addr1 = mmap(NULL, self->size1, PROT_READ, MAP_SHARED, - self->dev_mem_fd, 0); + self->dev_mem_fd, self->phys_addr); if (self->addr1 == MAP_FAILED) SKIP(return, "Cannot mmap '/dev/mem'\n"); @@ -129,7 +182,7 @@ TEST_F(pfnmap, munmap_split) */ self->size2 = self->pagesize; self->addr2 = mmap(NULL, self->pagesize, PROT_READ, MAP_SHARED, - self->dev_mem_fd, 0); + self->dev_mem_fd, self->phys_addr); ASSERT_NE(self->addr2, MAP_FAILED); } diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c index cd5174d735be..a41bc1234b37 100644 --- a/tools/testing/selftests/mm/thuge-gen.c +++ b/tools/testing/selftests/mm/thuge-gen.c @@ -127,7 +127,7 @@ void test_mmap(unsigned long size, unsigned flags) show(size); ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES, - "%s mmap %lu\n", __func__, size); + "%s mmap %lu %x\n", __func__, size, flags); if (munmap(map, size * NUM_PAGES)) ksft_exit_fail_msg("%s: unmap %s\n", __func__, strerror(errno)); @@ -165,7 +165,7 @@ void test_shmget(unsigned long size, unsigned flags) show(size); ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES, - "%s: mmap %lu\n", __func__, size); + "%s: mmap %lu %x\n", __func__, size, flags); if (shmdt(map)) ksft_exit_fail_msg("%s: shmdt: %s\n", __func__, strerror(errno)); } diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh index 1f92e8caceac..325de53966b6 100755 --- a/tools/testing/selftests/mm/va_high_addr_switch.sh +++ b/tools/testing/selftests/mm/va_high_addr_switch.sh @@ -7,23 +7,20 @@ # real test to check that the kernel is configured to support at least 5 # pagetable levels. -# 1 means the test failed -exitcode=1 - # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 -fail() +skip() { echo "$1" - exit $exitcode + exit $ksft_skip } check_supported_x86_64() { local config="/proc/config.gz" [[ -f "${config}" ]] || config="/boot/config-$(uname -r)" - [[ -f "${config}" ]] || fail "Cannot find kernel config in /proc or /boot" + [[ -f "${config}" ]] || skip "Cannot find kernel config in /proc or /boot" # gzip -dcfq automatically handles both compressed and plaintext input. # See man 1 gzip under '-f'. @@ -33,11 +30,9 @@ check_supported_x86_64() else {print 1}; exit}' /proc/cpuinfo 2>/dev/null) if [[ "${pg_table_levels}" -lt 5 ]]; then - echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" - exit $ksft_skip + skip "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" elif [[ "${cpu_supports_pl5}" -ne 0 ]]; then - echo "$0: CPU does not have the necessary la57 flag to support page table level 5" - exit $ksft_skip + skip "$0: CPU does not have the necessary la57 flag to support page table level 5" fi } @@ -45,24 +40,21 @@ check_supported_ppc64() { local config="/proc/config.gz" [[ -f "${config}" ]] || config="/boot/config-$(uname -r)" - [[ -f "${config}" ]] || fail "Cannot find kernel config in /proc or /boot" + [[ -f "${config}" ]] || skip "Cannot find kernel config in /proc or /boot" local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2) if [[ "${pg_table_levels}" -lt 5 ]]; then - echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" - exit $ksft_skip + skip "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" fi local mmu_support=$(grep -m1 "mmu" /proc/cpuinfo | awk '{print $3}') if [[ "$mmu_support" != "radix" ]]; then - echo "$0: System does not use Radix MMU, required for 5-level paging" - exit $ksft_skip + skip "$0: System does not use Radix MMU, required for 5-level paging" fi local hugepages_total=$(awk '/HugePages_Total/ {print $2}' /proc/meminfo) if [[ "${hugepages_total}" -eq 0 ]]; then - echo "$0: HugePages are not enabled, required for some tests" - exit $ksft_skip + skip "$0: HugePages are not enabled, required for some tests" fi } diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c index 1357e2d6a7b6..61d7bf1f8c62 100644 --- a/tools/testing/selftests/mm/vm_util.c +++ b/tools/testing/selftests/mm/vm_util.c @@ -439,7 +439,7 @@ int open_procmap(pid_t pid, struct procmap_fd *procmap_out) sprintf(path, "/proc/%d/maps", pid); procmap_out->query.size = sizeof(procmap_out->query); procmap_out->fd = open(path, O_RDONLY); - if (procmap_out < 0) + if (procmap_out->fd < 0) ret = -errno; return ret; diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index 9211ba640d9c..adb5d294a220 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -3,6 +3,7 @@ #include <stdbool.h> #include <sys/mman.h> #include <err.h> +#include <stdarg.h> #include <strings.h> /* ffsl() */ #include <unistd.h> /* _SC_PAGESIZE */ #include "../kselftest.h" @@ -95,6 +96,25 @@ static inline int open_self_procmap(struct procmap_fd *procmap_out) return open_procmap(pid, procmap_out); } +/* These helpers need to be inline to match the kselftest.h idiom. */ +static char test_name[1024]; + +static inline void log_test_start(const char *name, ...) +{ + va_list args; + va_start(args, name); + + vsnprintf(test_name, sizeof(test_name), name, args); + ksft_print_msg("[RUN] %s\n", test_name); + + va_end(args); +} + +static inline void log_test_result(int result) +{ + ksft_test_result_report(result, "%s\n", test_name); +} + /* * On ppc64 this will only work with radix 2M hugepage size */ diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index f6e45e62da3a..441feb21aa5a 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -1461,4 +1461,9 @@ static inline int __call_mmap_prepare(struct file *file, return file->f_op->mmap_prepare(desc); } +static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) +{ + (void)vma; +} + #endif /* __MM_VMA_INTERNAL_H */ |