diff options
-rw-r--r-- | mm/internal.h | 43 | ||||
-rw-r--r-- | mm/mmap.c | 5 | ||||
-rw-r--r-- | mm/mremap.c | 172 |
3 files changed, 168 insertions, 52 deletions
diff --git a/mm/internal.h b/mm/internal.h index aa30282a774ae..06f816cdb43bd 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -25,6 +25,44 @@ struct folio_batch; /* + * Maintains state across a page table move. The operation assumes both source + * and destination VMAs already exist and are specified by the user. + * + * Partial moves are permitted, but the old and new ranges must both reside + * within a VMA. + * + * mmap lock must be held in write and VMA write locks must be held on any VMA + * that is visible. + * + * Use the PAGETABLE_MOVE() macro to initialise this struct. + * + * NOTE: The page table move is affected by reading from [old_addr, old_end), + * and old_addr may be updated for better page table alignment, so len_in + * represents the length of the range being copied as specified by the user. + */ +struct pagetable_move_control { + struct vm_area_struct *old; /* Source VMA. */ + struct vm_area_struct *new; /* Destination VMA. */ + unsigned long old_addr; /* Address from which the move begins. */ + unsigned long old_end; /* Exclusive address at which old range ends. */ + unsigned long new_addr; /* Address to move page tables to. */ + unsigned long len_in; /* Bytes to remap specified by user. */ + + bool need_rmap_locks; /* Do rmap locks need to be taken? */ + bool for_stack; /* Is this an early temp stack being moved? */ +}; + +#define PAGETABLE_MOVE(name, old_, new_, old_addr_, new_addr_, len_) \ + struct pagetable_move_control name = { \ + .old = old_, \ + .new = new_, \ + .old_addr = old_addr_, \ + .old_end = (old_addr_) + (len_), \ + .new_addr = new_addr_, \ + .len_in = len_, \ + } + +/* * The set of flags that only affect watermark checking and reclaim * behaviour. This is used by the MM to obey the caller constraints * about IO, FS and watermark checking while ignoring placement @@ -1527,10 +1565,7 @@ extern struct list_lru shadow_nodes; } while (0) /* mremap.c */ -unsigned long move_page_tables(struct vm_area_struct *vma, - unsigned long old_addr, struct vm_area_struct *new_vma, - unsigned long new_addr, unsigned long len, - bool need_rmap_locks, bool for_stack); +unsigned long move_page_tables(struct pagetable_move_control *pmc); #ifdef CONFIG_UNACCEPTED_MEMORY void accept_page(struct page *page); diff --git a/mm/mmap.c b/mm/mmap.c index 15d6cd7cc845f..efcc4ca7500d8 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1694,6 +1694,7 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) VMG_STATE(vmg, mm, &vmi, new_start, old_end, 0, vma->vm_pgoff); struct vm_area_struct *next; struct mmu_gather tlb; + PAGETABLE_MOVE(pmc, vma, vma, old_start, new_start, length); BUG_ON(new_start > new_end); @@ -1716,8 +1717,8 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) * move the page tables downwards, on failure we rely on * process cleanup to remove whatever mess we made. */ - if (length != move_page_tables(vma, old_start, - vma, new_start, length, false, true)) + pmc.for_stack = true; + if (length != move_page_tables(&pmc)) return -ENOMEM; tlb_gather_mmu(&tlb, mm); diff --git a/mm/mremap.c b/mm/mremap.c index 7dc058d5d5e2c..3a2ac167e876d 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -580,8 +580,9 @@ static bool move_pgt_entry(enum pgt_entry entry, struct vm_area_struct *vma, * the VMA that is created to span the source and destination of the move, * so we make an exception for it. */ -static bool can_align_down(struct vm_area_struct *vma, unsigned long addr_to_align, - unsigned long mask, bool for_stack) +static bool can_align_down(struct pagetable_move_control *pmc, + struct vm_area_struct *vma, unsigned long addr_to_align, + unsigned long mask) { unsigned long addr_masked = addr_to_align & mask; @@ -590,11 +591,11 @@ static bool can_align_down(struct vm_area_struct *vma, unsigned long addr_to_ali * of the corresponding VMA, we can't align down or we will destroy part * of the current mapping. */ - if (!for_stack && vma->vm_start != addr_to_align) + if (!pmc->for_stack && vma->vm_start != addr_to_align) return false; /* In the stack case we explicitly permit in-VMA alignment. */ - if (for_stack && addr_masked >= vma->vm_start) + if (pmc->for_stack && addr_masked >= vma->vm_start) return true; /* @@ -604,54 +605,131 @@ static bool can_align_down(struct vm_area_struct *vma, unsigned long addr_to_ali return find_vma_intersection(vma->vm_mm, addr_masked, vma->vm_start) == NULL; } -/* Opportunistically realign to specified boundary for faster copy. */ -static void try_realign_addr(unsigned long *old_addr, struct vm_area_struct *old_vma, - unsigned long *new_addr, struct vm_area_struct *new_vma, - unsigned long mask, bool for_stack) +/* + * Determine if are in fact able to realign for efficiency to a higher page + * table boundary. + */ +static bool can_realign_addr(struct pagetable_move_control *pmc, + unsigned long pagetable_mask) { + unsigned long align_mask = ~pagetable_mask; + unsigned long old_align = pmc->old_addr & align_mask; + unsigned long new_align = pmc->new_addr & align_mask; + unsigned long pagetable_size = align_mask + 1; + unsigned long old_align_next = pagetable_size - old_align; + + /* + * We don't want to have to go hunting for VMAs from the end of the old + * VMA to the next page table boundary, also we want to make sure the + * operation is wortwhile. + * + * So ensure that we only perform this realignment if the end of the + * range being copied reaches or crosses the page table boundary. + * + * boundary boundary + * .<- old_align -> . + * . |----------------.-----------| + * . | vma . | + * . |----------------.-----------| + * . <----------------.-----------> + * . len_in + * <-------------------------------> + * . pagetable_size . + * . <----------------> + * . old_align_next . + */ + if (pmc->len_in < old_align_next) + return false; + /* Skip if the addresses are already aligned. */ - if ((*old_addr & ~mask) == 0) - return; + if (old_align == 0) + return false; /* Only realign if the new and old addresses are mutually aligned. */ - if ((*old_addr & ~mask) != (*new_addr & ~mask)) - return; + if (old_align != new_align) + return false; /* Ensure realignment doesn't cause overlap with existing mappings. */ - if (!can_align_down(old_vma, *old_addr, mask, for_stack) || - !can_align_down(new_vma, *new_addr, mask, for_stack)) + if (!can_align_down(pmc, pmc->old, pmc->old_addr, pagetable_mask) || + !can_align_down(pmc, pmc->new, pmc->new_addr, pagetable_mask)) + return false; + + return true; +} + +/* + * Opportunistically realign to specified boundary for faster copy. + * + * Consider an mremap() of a VMA with page table boundaries as below, and no + * preceding VMAs from the lower page table boundary to the start of the VMA, + * with the end of the range reaching or crossing the page table boundary. + * + * boundary boundary + * . |----------------.-----------| + * . | vma . | + * . |----------------.-----------| + * . pmc->old_addr . pmc->old_end + * . <----------------------------> + * . move these page tables + * + * If we proceed with moving page tables in this scenario, we will have a lot of + * work to do traversing old page tables and establishing new ones in the + * destination across multiple lower level page tables. + * + * The idea here is simply to align pmc->old_addr, pmc->new_addr down to the + * page table boundary, so we can simply copy a single page table entry for the + * aligned portion of the VMA instead: + * + * boundary boundary + * . |----------------.-----------| + * . | vma . | + * . |----------------.-----------| + * pmc->old_addr . pmc->old_end + * <-------------------------------------------> + * . move these page tables + */ +static void try_realign_addr(struct pagetable_move_control *pmc, + unsigned long pagetable_mask) +{ + + if (!can_realign_addr(pmc, pagetable_mask)) return; - *old_addr = *old_addr & mask; - *new_addr = *new_addr & mask; + /* + * Simply align to page table boundaries. Note that we do NOT update the + * pmc->old_end value, and since the move_page_tables() operation spans + * from [old_addr, old_end) (offsetting new_addr as it is performed), + * this simply changes the start of the copy, not the end. + */ + pmc->old_addr &= pagetable_mask; + pmc->new_addr &= pagetable_mask; } -unsigned long move_page_tables(struct vm_area_struct *vma, - unsigned long old_addr, struct vm_area_struct *new_vma, - unsigned long new_addr, unsigned long len, - bool need_rmap_locks, bool for_stack) +unsigned long move_page_tables(struct pagetable_move_control *pmc) { unsigned long extent, old_end; struct mmu_notifier_range range; pmd_t *old_pmd, *new_pmd; pud_t *old_pud, *new_pud; + unsigned long old_addr, new_addr; + struct vm_area_struct *vma = pmc->old; - if (!len) + if (!pmc->len_in) return 0; - old_end = old_addr + len; - if (is_vm_hugetlb_page(vma)) - return move_hugetlb_page_tables(vma, new_vma, old_addr, - new_addr, len); + return move_hugetlb_page_tables(pmc->old, pmc->new, pmc->old_addr, + pmc->new_addr, pmc->len_in); + old_end = pmc->old_end; /* * If possible, realign addresses to PMD boundary for faster copy. * Only realign if the mremap copying hits a PMD boundary. */ - if (len >= PMD_SIZE - (old_addr & ~PMD_MASK)) - try_realign_addr(&old_addr, vma, &new_addr, new_vma, PMD_MASK, - for_stack); + try_realign_addr(pmc, PMD_MASK); + /* These may have been changed. */ + old_addr = pmc->old_addr; + new_addr = pmc->new_addr; flush_cache_range(vma, old_addr, old_end); mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma->vm_mm, @@ -675,12 +753,11 @@ unsigned long move_page_tables(struct vm_area_struct *vma, if (pud_trans_huge(*old_pud) || pud_devmap(*old_pud)) { if (extent == HPAGE_PUD_SIZE) { move_pgt_entry(HPAGE_PUD, vma, old_addr, new_addr, - old_pud, new_pud, need_rmap_locks); + old_pud, new_pud, pmc->need_rmap_locks); /* We ignore and continue on error? */ continue; } } else if (IS_ENABLED(CONFIG_HAVE_MOVE_PUD) && extent == PUD_SIZE) { - if (move_pgt_entry(NORMAL_PUD, vma, old_addr, new_addr, old_pud, new_pud, true)) continue; @@ -698,7 +775,7 @@ again: pmd_devmap(*old_pmd)) { if (extent == HPAGE_PMD_SIZE && move_pgt_entry(HPAGE_PMD, vma, old_addr, new_addr, - old_pmd, new_pmd, need_rmap_locks)) + old_pmd, new_pmd, pmc->need_rmap_locks)) continue; split_huge_pmd(vma, old_pmd, old_addr); } else if (IS_ENABLED(CONFIG_HAVE_MOVE_PMD) && @@ -713,10 +790,10 @@ again: } if (pmd_none(*old_pmd)) continue; - if (pte_alloc(new_vma->vm_mm, new_pmd)) + if (pte_alloc(pmc->new->vm_mm, new_pmd)) break; if (move_ptes(vma, old_pmd, old_addr, old_addr + extent, - new_vma, new_pmd, new_addr, need_rmap_locks) < 0) + pmc->new, new_pmd, new_addr, pmc->need_rmap_locks) < 0) goto again; } @@ -726,10 +803,10 @@ again: * Prevent negative return values when {old,new}_addr was realigned * but we broke out of the above loop for the first PMD itself. */ - if (old_addr < old_end - len) + if (old_addr < old_end - pmc->len_in) return 0; - return len + old_addr - old_end; /* how much done */ + return pmc->len_in + old_addr - old_end; /* how much done */ } /* Set vrm->delta to the difference in VMA size specified by user. */ @@ -1040,37 +1117,40 @@ static int copy_vma_and_data(struct vma_remap_struct *vrm, unsigned long internal_pgoff = internal_offset >> PAGE_SHIFT; unsigned long new_pgoff = vrm->vma->vm_pgoff + internal_pgoff; unsigned long moved_len; - bool need_rmap_locks; - struct vm_area_struct *vma; + struct vm_area_struct *vma = vrm->vma; struct vm_area_struct *new_vma; int err = 0; + PAGETABLE_MOVE(pmc, NULL, NULL, vrm->addr, vrm->new_addr, vrm->old_len); - new_vma = copy_vma(&vrm->vma, vrm->new_addr, vrm->new_len, new_pgoff, - &need_rmap_locks); + new_vma = copy_vma(&vma, vrm->new_addr, vrm->new_len, new_pgoff, + &pmc.need_rmap_locks); if (!new_vma) { vrm_uncharge(vrm); *new_vma_ptr = NULL; return -ENOMEM; } - vma = vrm->vma; + vrm->vma = vma; + pmc.old = vma; + pmc.new = new_vma; - moved_len = move_page_tables(vma, vrm->addr, new_vma, - vrm->new_addr, vrm->old_len, - need_rmap_locks, /* for_stack= */false); + moved_len = move_page_tables(&pmc); if (moved_len < vrm->old_len) err = -ENOMEM; else if (vma->vm_ops && vma->vm_ops->mremap) err = vma->vm_ops->mremap(new_vma); if (unlikely(err)) { + PAGETABLE_MOVE(pmc_revert, new_vma, vma, vrm->new_addr, + vrm->addr, moved_len); + /* * On error, move entries back from new area to old, * which will succeed since page tables still there, * and then proceed to unmap new area instead of old. */ - move_page_tables(new_vma, vrm->new_addr, vma, vrm->addr, - moved_len, /* need_rmap_locks = */true, - /* for_stack= */false); + pmc_revert.need_rmap_locks = true; + move_page_tables(&pmc_revert); + vrm->vma = new_vma; vrm->old_len = vrm->new_len; vrm->addr = vrm->new_addr; |