summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Wilcox (Oracle) <willy@infradead.org>2025-04-02 19:16:55 +0100
committerAndrew Morton <akpm@linux-foundation.org>2025-05-11 17:48:02 -0700
commit4e92030c05dc351e62955ac1aba7233157f49b78 (patch)
tree19da519d98c001f2fbfd28533d8a191c15a5193f
parentf83f362d40ccceb647f7d80eb92206733d76a36b (diff)
mm: set the pte dirty if the folio is already dirty
Patch series "Add folio_mk_pte()", v2. Today if you have a folio and want to create a PTE that points to the first page in it, you have to convert from a folio to a page. That's zero-cost today but will be more expensive in the future. I didn't want to add folio_mk_pte() to each architecture, and I didn't want to lose any optimisations that architectures have from their own implementation of mk_pte(). Fortunately, most architectures have by now turned their mk_pte() into a fairly bland variant of pfn_pte(), but s390 has a special optimisation that needs to be moved into generic code in the first patch. At the end of this patch set, we have mk_pte() and folio_mk_pte() in mm.h and each architecture only has to implement pfn_pte(). We've also eliminated mk_huge_pte(), mk_huge_pmd() and mk_pmd(). This patch (of 11): If the first access to a folio is a read that is then followed by a write, we can save a page fault. s390 implemented this in their mk_pte() in commit abf09bed3cce ("s390/mm: implement software dirty bits"), but other architectures can also benefit from this. Link: https://lkml.kernel.org/r/20250402181709.2386022-1-willy@infradead.org Link: https://lkml.kernel.org/r/20250402181709.2386022-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> Acked-by: David Hildenbrand <david@redhat.com> Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com> # for s390 Cc: Zi Yan <ziy@nvidia.com> Cc: Andreas Larsson <andreas@gaisler.com> Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Johannes Berg <johannes@sipsolutions.net> Cc: Muchun Song <muchun.song@linux.dev> Cc: Richard Weinberger <richard@nod.at> Cc: <x86@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-rw-r--r--arch/s390/include/asm/pgtable.h7
-rw-r--r--mm/memory.c2
2 files changed, 3 insertions, 6 deletions
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index f8a6b54986ec..49833002232b 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1450,12 +1450,7 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
{
- unsigned long physpage = page_to_phys(page);
- pte_t __pte = mk_pte_phys(physpage, pgprot);
-
- if (pte_write(__pte) && PageDirty(page))
- __pte = pte_mkdirty(__pte);
- return __pte;
+ return mk_pte_phys(page_to_phys(page), pgprot);
}
#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
diff --git a/mm/memory.c b/mm/memory.c
index 49199410805c..da4778fb3a38 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5245,6 +5245,8 @@ void set_pte_range(struct vm_fault *vmf, struct folio *folio,
if (write)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+ else if (pte_write(entry) && folio_test_dirty(folio))
+ entry = pte_mkdirty(entry);
if (unlikely(vmf_orig_pte_uffd_wp(vmf)))
entry = pte_mkuffd_wp(entry);
/* copy-on-write page */