diff options
author | Christoph Hellwig <hch@lst.de> | 2020-09-25 06:19:19 +0200 |
---|---|---|
committer | Christoph Hellwig <hch@lst.de> | 2020-09-25 06:19:19 +0200 |
commit | 8c1c6c7588b2faf9dd45fb3cfb7497fd09bbfe7c (patch) | |
tree | 50a7ddcfd7e7b1a7e4fb656bb88a036dbcff4bd6 /mm/memory.c | |
parent | 38225f2ef2f4263fc65acae63fdd4e8489c9ffcf (diff) | |
parent | 171d4ff79f965c1f164705ef0aaea102a6ad238b (diff) |
Merge branch 'master' of https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux into dma-mapping-for-next
Pull in the latest 5.9 tree for the commit to revert the
V4L2_FLAG_MEMORY_NON_CONSISTENT uapi addition.
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 251 |
1 files changed, 125 insertions, 126 deletions
diff --git a/mm/memory.c b/mm/memory.c index 602f4283122f7..f3eb559759023 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -73,6 +73,7 @@ #include <linux/numa.h> #include <linux/perf_event.h> #include <linux/ptrace.h> +#include <linux/vmalloc.h> #include <trace/events/kmem.h> @@ -83,6 +84,7 @@ #include <asm/tlb.h> #include <asm/tlbflush.h> +#include "pgalloc-track.h" #include "internal.h" #if defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS) && !defined(CONFIG_COMPILE_TEST) @@ -693,85 +695,92 @@ out: * covered by this vma. */ -static inline unsigned long -copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, +static unsigned long +copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, unsigned long addr, int *rss) { unsigned long vm_flags = vma->vm_flags; pte_t pte = *src_pte; struct page *page; + swp_entry_t entry = pte_to_swp_entry(pte); + + if (likely(!non_swap_entry(entry))) { + if (swap_duplicate(entry) < 0) + return entry.val; + + /* make sure dst_mm is on swapoff's mmlist. */ + if (unlikely(list_empty(&dst_mm->mmlist))) { + spin_lock(&mmlist_lock); + if (list_empty(&dst_mm->mmlist)) + list_add(&dst_mm->mmlist, + &src_mm->mmlist); + spin_unlock(&mmlist_lock); + } + rss[MM_SWAPENTS]++; + } else if (is_migration_entry(entry)) { + page = migration_entry_to_page(entry); - /* pte contains position in swap or file, so copy. */ - if (unlikely(!pte_present(pte))) { - swp_entry_t entry = pte_to_swp_entry(pte); - - if (likely(!non_swap_entry(entry))) { - if (swap_duplicate(entry) < 0) - return entry.val; - - /* make sure dst_mm is on swapoff's mmlist. */ - if (unlikely(list_empty(&dst_mm->mmlist))) { - spin_lock(&mmlist_lock); - if (list_empty(&dst_mm->mmlist)) - list_add(&dst_mm->mmlist, - &src_mm->mmlist); - spin_unlock(&mmlist_lock); - } - rss[MM_SWAPENTS]++; - } else if (is_migration_entry(entry)) { - page = migration_entry_to_page(entry); - - rss[mm_counter(page)]++; - - if (is_write_migration_entry(entry) && - is_cow_mapping(vm_flags)) { - /* - * COW mappings require pages in both - * parent and child to be set to read. - */ - make_migration_entry_read(&entry); - pte = swp_entry_to_pte(entry); - if (pte_swp_soft_dirty(*src_pte)) - pte = pte_swp_mksoft_dirty(pte); - if (pte_swp_uffd_wp(*src_pte)) - pte = pte_swp_mkuffd_wp(pte); - set_pte_at(src_mm, addr, src_pte, pte); - } - } else if (is_device_private_entry(entry)) { - page = device_private_entry_to_page(entry); + rss[mm_counter(page)]++; + if (is_write_migration_entry(entry) && + is_cow_mapping(vm_flags)) { /* - * Update rss count even for unaddressable pages, as - * they should treated just like normal pages in this - * respect. - * - * We will likely want to have some new rss counters - * for unaddressable pages, at some point. But for now - * keep things as they are. + * COW mappings require pages in both + * parent and child to be set to read. */ - get_page(page); - rss[mm_counter(page)]++; - page_dup_rmap(page, false); + make_migration_entry_read(&entry); + pte = swp_entry_to_pte(entry); + if (pte_swp_soft_dirty(*src_pte)) + pte = pte_swp_mksoft_dirty(pte); + if (pte_swp_uffd_wp(*src_pte)) + pte = pte_swp_mkuffd_wp(pte); + set_pte_at(src_mm, addr, src_pte, pte); + } + } else if (is_device_private_entry(entry)) { + page = device_private_entry_to_page(entry); - /* - * We do not preserve soft-dirty information, because so - * far, checkpoint/restore is the only feature that - * requires that. And checkpoint/restore does not work - * when a device driver is involved (you cannot easily - * save and restore device driver state). - */ - if (is_write_device_private_entry(entry) && - is_cow_mapping(vm_flags)) { - make_device_private_entry_read(&entry); - pte = swp_entry_to_pte(entry); - if (pte_swp_uffd_wp(*src_pte)) - pte = pte_swp_mkuffd_wp(pte); - set_pte_at(src_mm, addr, src_pte, pte); - } + /* + * Update rss count even for unaddressable pages, as + * they should treated just like normal pages in this + * respect. + * + * We will likely want to have some new rss counters + * for unaddressable pages, at some point. But for now + * keep things as they are. + */ + get_page(page); + rss[mm_counter(page)]++; + page_dup_rmap(page, false); + + /* + * We do not preserve soft-dirty information, because so + * far, checkpoint/restore is the only feature that + * requires that. And checkpoint/restore does not work + * when a device driver is involved (you cannot easily + * save and restore device driver state). + */ + if (is_write_device_private_entry(entry) && + is_cow_mapping(vm_flags)) { + make_device_private_entry_read(&entry); + pte = swp_entry_to_pte(entry); + if (pte_swp_uffd_wp(*src_pte)) + pte = pte_swp_mkuffd_wp(pte); + set_pte_at(src_mm, addr, src_pte, pte); } - goto out_set_pte; } + set_pte_at(dst_mm, addr, dst_pte, pte); + return 0; +} + +static inline void +copy_present_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, + pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, + unsigned long addr, int *rss) +{ + unsigned long vm_flags = vma->vm_flags; + pte_t pte = *src_pte; + struct page *page; /* * If it's a COW mapping, write protect it both @@ -805,9 +814,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, rss[mm_counter(page)]++; } -out_set_pte: set_pte_at(dst_mm, addr, dst_pte, pte); - return 0; } static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, @@ -849,10 +856,17 @@ again: progress++; continue; } - entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, + if (unlikely(!pte_present(*src_pte))) { + entry.val = copy_nonpresent_pte(dst_mm, src_mm, + dst_pte, src_pte, vma, addr, rss); - if (entry.val) - break; + if (entry.val) + break; + progress += 8; + continue; + } + copy_present_pte(dst_mm, src_mm, dst_pte, src_pte, + vma, addr, rss); progress += 8; } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); @@ -2206,7 +2220,8 @@ EXPORT_SYMBOL(vm_iomap_memory); static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data, bool create) + pte_fn_t fn, void *data, bool create, + pgtbl_mod_mask *mask) { pte_t *pte; int err = 0; @@ -2214,7 +2229,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, if (create) { pte = (mm == &init_mm) ? - pte_alloc_kernel(pmd, addr) : + pte_alloc_kernel_track(pmd, addr, mask) : pte_alloc_map_lock(mm, pmd, addr, &ptl); if (!pte) return -ENOMEM; @@ -2235,6 +2250,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, break; } } while (addr += PAGE_SIZE, addr != end); + *mask |= PGTBL_PTE_MODIFIED; arch_leave_lazy_mmu_mode(); @@ -2245,7 +2261,8 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data, bool create) + pte_fn_t fn, void *data, bool create, + pgtbl_mod_mask *mask) { pmd_t *pmd; unsigned long next; @@ -2254,7 +2271,7 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, BUG_ON(pud_huge(*pud)); if (create) { - pmd = pmd_alloc(mm, pud, addr); + pmd = pmd_alloc_track(mm, pud, addr, mask); if (!pmd) return -ENOMEM; } else { @@ -2264,7 +2281,7 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, next = pmd_addr_end(addr, end); if (create || !pmd_none_or_clear_bad(pmd)) { err = apply_to_pte_range(mm, pmd, addr, next, fn, data, - create); + create, mask); if (err) break; } @@ -2274,14 +2291,15 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data, bool create) + pte_fn_t fn, void *data, bool create, + pgtbl_mod_mask *mask) { pud_t *pud; unsigned long next; int err = 0; if (create) { - pud = pud_alloc(mm, p4d, addr); + pud = pud_alloc_track(mm, p4d, addr, mask); if (!pud) return -ENOMEM; } else { @@ -2291,7 +2309,7 @@ static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, next = pud_addr_end(addr, end); if (create || !pud_none_or_clear_bad(pud)) { err = apply_to_pmd_range(mm, pud, addr, next, fn, data, - create); + create, mask); if (err) break; } @@ -2301,14 +2319,15 @@ static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data, bool create) + pte_fn_t fn, void *data, bool create, + pgtbl_mod_mask *mask) { p4d_t *p4d; unsigned long next; int err = 0; if (create) { - p4d = p4d_alloc(mm, pgd, addr); + p4d = p4d_alloc_track(mm, pgd, addr, mask); if (!p4d) return -ENOMEM; } else { @@ -2318,7 +2337,7 @@ static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd, next = p4d_addr_end(addr, end); if (create || !p4d_none_or_clear_bad(p4d)) { err = apply_to_pud_range(mm, p4d, addr, next, fn, data, - create); + create, mask); if (err) break; } @@ -2331,8 +2350,9 @@ static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr, void *data, bool create) { pgd_t *pgd; - unsigned long next; + unsigned long start = addr, next; unsigned long end = addr + size; + pgtbl_mod_mask mask = 0; int err = 0; if (WARN_ON(addr >= end)) @@ -2343,11 +2363,14 @@ static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr, next = pgd_addr_end(addr, end); if (!create && pgd_none_or_clear_bad(pgd)) continue; - err = apply_to_p4d_range(mm, pgd, addr, next, fn, data, create); + err = apply_to_p4d_range(mm, pgd, addr, next, fn, data, create, &mask); if (err) break; } while (pgd++, addr = next, addr != end); + if (mask & ARCH_PAGE_TABLE_SYNC_MASK) + arch_sync_kernel_mappings(start, start + size); + return err; } @@ -2622,6 +2645,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf) if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1)) update_mmu_cache(vma, vmf->address, vmf->pte); pte_unmap_unlock(vmf->pte, vmf->ptl); + count_vm_event(PGREUSE); } /* @@ -2927,50 +2951,25 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) * not dirty accountable. */ if (PageAnon(vmf->page)) { - int total_map_swapcount; - if (PageKsm(vmf->page) && (PageSwapCache(vmf->page) || - page_count(vmf->page) != 1)) + struct page *page = vmf->page; + + /* PageKsm() doesn't necessarily raise the page refcount */ + if (PageKsm(page) || page_count(page) != 1) + goto copy; + if (!trylock_page(page)) + goto copy; + if (PageKsm(page) || page_mapcount(page) != 1 || page_count(page) != 1) { + unlock_page(page); goto copy; - if (!trylock_page(vmf->page)) { - get_page(vmf->page); - pte_unmap_unlock(vmf->pte, vmf->ptl); - lock_page(vmf->page); - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, - vmf->address, &vmf->ptl); - if (!pte_same(*vmf->pte, vmf->orig_pte)) { - update_mmu_tlb(vma, vmf->address, vmf->pte); - unlock_page(vmf->page); - pte_unmap_unlock(vmf->pte, vmf->ptl); - put_page(vmf->page); - return 0; - } - put_page(vmf->page); - } - if (PageKsm(vmf->page)) { - bool reused = reuse_ksm_page(vmf->page, vmf->vma, - vmf->address); - unlock_page(vmf->page); - if (!reused) - goto copy; - wp_page_reuse(vmf); - return VM_FAULT_WRITE; - } - if (reuse_swap_page(vmf->page, &total_map_swapcount)) { - if (total_map_swapcount == 1) { - /* - * The page is all ours. Move it to - * our anon_vma so the rmap code will - * not search our parent or siblings. - * Protected against the rmap code by - * the page lock. - */ - page_move_anon_rmap(vmf->page, vma); - } - unlock_page(vmf->page); - wp_page_reuse(vmf); - return VM_FAULT_WRITE; } - unlock_page(vmf->page); + /* + * Ok, we've got the only map reference, and the only + * page count reference, and the page is locked, + * it's dark out, and we're wearing sunglasses. Hit it. + */ + unlock_page(page); + wp_page_reuse(vmf); + return VM_FAULT_WRITE; } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED))) { return wp_page_shared(vmf); |