mm: add batched versions of ptep_modify_prot_start/commit

Batch ptep_modify_prot_start/commit in preparation for optimizing
mprotect, implementing them as a simple loop over the corresponding single
pte helpers.  Architecture may override these helpers.

Link: https://lkml.kernel.org/r/20250718090244.21092-4-dev.jain@arm.com
Signed-off-by: Dev Jain <dev.jain@arm.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Barry Song <baohua@kernel.org>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jann Horn <jannh@google.com>
Cc: Joey Gouly <joey.gouly@arm.com>
Cc: Kevin Brodsky <kevin.brodsky@arm.com>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Cc: Yang Shi <yang@os.amperecomputing.com>
Cc: Yicong Yang <yangyicong@hisilicon.com>
Cc: Zhenhua Huang <quic_zhenhuah@quicinc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Dev Jain
2025-07-18 14:32:40 +05:30
committed by Andrew Morton
parent 1d40f4e3d9
commit 0aa3657df3
2 changed files with 85 additions and 3 deletions

View File

@@ -1331,7 +1331,9 @@ static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
/*
* Commit an update to a pte, leaving any hardware-controlled bits in
* the PTE unmodified.
* the PTE unmodified. The pte returned from ptep_modify_prot_start() may
* additionally have young and/or dirty bits set where previously they were not,
* so the updated pte may have these additional changes.
*/
static inline void ptep_modify_prot_commit(struct vm_area_struct *vma,
unsigned long addr,
@@ -1340,6 +1342,86 @@ static inline void ptep_modify_prot_commit(struct vm_area_struct *vma,
__ptep_modify_prot_commit(vma, addr, ptep, pte);
}
#endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */
/**
* modify_prot_start_ptes - Start a pte protection read-modify-write transaction
* over a batch of ptes, which protects against asynchronous hardware
* modifications to the ptes. The intention is not to prevent the hardware from
* making pte updates, but to prevent any updates it may make from being lost.
* Please see the comment above ptep_modify_prot_start() for full description.
*
* @vma: The virtual memory area the pages are mapped into.
* @addr: Address the first page is mapped at.
* @ptep: Page table pointer for the first entry.
* @nr: Number of entries.
*
* May be overridden by the architecture; otherwise, implemented as a simple
* loop over ptep_modify_prot_start(), collecting the a/d bits from each pte
* in the batch.
*
* Note that PTE bits in the PTE batch besides the PFN can differ.
*
* Context: The caller holds the page table lock. The PTEs map consecutive
* pages that belong to the same folio. All other PTE bits must be identical for
* all PTEs in the batch except for young and dirty bits. The PTEs are all in
* the same PMD.
*/
#ifndef modify_prot_start_ptes
static inline pte_t modify_prot_start_ptes(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep, unsigned int nr)
{
pte_t pte, tmp_pte;
pte = ptep_modify_prot_start(vma, addr, ptep);
while (--nr) {
ptep++;
addr += PAGE_SIZE;
tmp_pte = ptep_modify_prot_start(vma, addr, ptep);
if (pte_dirty(tmp_pte))
pte = pte_mkdirty(pte);
if (pte_young(tmp_pte))
pte = pte_mkyoung(pte);
}
return pte;
}
#endif
/**
* modify_prot_commit_ptes - Commit an update to a batch of ptes, leaving any
* hardware-controlled bits in the PTE unmodified.
*
* @vma: The virtual memory area the pages are mapped into.
* @addr: Address the first page is mapped at.
* @ptep: Page table pointer for the first entry.
* @old_pte: Old page table entry (for the first entry) which is now cleared.
* @pte: New page table entry to be set.
* @nr: Number of entries.
*
* May be overridden by the architecture; otherwise, implemented as a simple
* loop over ptep_modify_prot_commit().
*
* Context: The caller holds the page table lock. The PTEs are all in the same
* PMD. On exit, the set ptes in the batch map the same folio. The ptes set by
* ptep_modify_prot_start() may additionally have young and/or dirty bits set
* where previously they were not, so the updated ptes may have these
* additional changes.
*/
#ifndef modify_prot_commit_ptes
static inline void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned long addr,
pte_t *ptep, pte_t old_pte, pte_t pte, unsigned int nr)
{
int i;
for (i = 0; i < nr; ++i, ++ptep, addr += PAGE_SIZE) {
ptep_modify_prot_commit(vma, addr, ptep, old_pte, pte);
/* Advance PFN only, set same prot */
old_pte = pte_next_pfn(old_pte);
pte = pte_next_pfn(pte);
}
}
#endif
#endif /* CONFIG_MMU */
/*

View File

@@ -204,7 +204,7 @@ static long change_pte_range(struct mmu_gather *tlb,
}
}
oldpte = ptep_modify_prot_start(vma, addr, pte);
oldpte = modify_prot_start_ptes(vma, addr, pte, nr_ptes);
ptent = pte_modify(oldpte, newprot);
if (uffd_wp)
@@ -230,7 +230,7 @@ static long change_pte_range(struct mmu_gather *tlb,
can_change_pte_writable(vma, addr, ptent))
ptent = pte_mkwrite(ptent, vma);
ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent);
modify_prot_commit_ptes(vma, addr, pte, oldpte, ptent, nr_ptes);
if (pte_needs_flush(oldpte, ptent))
tlb_flush_pte_range(tlb, addr, PAGE_SIZE);
pages++;