diff options
Diffstat (limited to 'include/linux/pgtable.h')
-rw-r--r-- | include/linux/pgtable.h | 119 |
1 files changed, 110 insertions, 9 deletions
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 85fc7554cd52..18019f037bae 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -50,6 +50,8 @@ #define pmd_pgtable(pmd) pmd_page(pmd) #endif +#define pmd_folio(pmd) page_folio(pmd_page(pmd)) + /* * A page table page can be thought of an array like this: pXd_t[PTRS_PER_PxD] * @@ -149,9 +151,7 @@ static inline pgd_t *pgd_offset_pgd(pgd_t *pgd, unsigned long address) * a shortcut which implies the use of the kernel's pgd, instead * of a process's */ -#ifndef pgd_offset_k #define pgd_offset_k(address) pgd_offset(&init_mm, (address)) -#endif /* * In many cases it is known that a virtual address is mapped at PMD or PTE @@ -459,6 +459,50 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, } #endif +#ifndef clear_young_dirty_ptes +/** + * clear_young_dirty_ptes - Mark PTEs that map consecutive pages of the + * same folio as old/clean. + * @mm: Address space the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to mark old/clean. + * @flags: Flags to modify the PTE batch semantics. + * + * May be overridden by the architecture; otherwise, implemented by + * get_and_clear/modify/set for each pte in the range. + * + * Note that PTE bits in the PTE range besides the PFN can differ. For example, + * some PTEs might be write-protected. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. The PTEs are all in the same PMD. + */ +static inline void clear_young_dirty_ptes(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + unsigned int nr, cydp_t flags) +{ + pte_t pte; + + for (;;) { + if (flags == CYDP_CLEAR_YOUNG) + ptep_test_and_clear_young(vma, addr, ptep); + else { + pte = ptep_get_and_clear(vma->vm_mm, addr, ptep); + if (flags & CYDP_CLEAR_YOUNG) + pte = pte_mkold(pte); + if (flags & CYDP_CLEAR_DIRTY) + pte = pte_mkclean(pte); + set_pte_at(vma->vm_mm, addr, ptep, pte); + } + if (--nr == 0) + break; + ptep++; + addr += PAGE_SIZE; + } +} +#endif + static inline void ptep_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { @@ -708,6 +752,35 @@ static inline void pte_clear_not_present_full(struct mm_struct *mm, } #endif +#ifndef clear_not_present_full_ptes +/** + * clear_not_present_full_ptes - Clear multiple not present PTEs which are + * consecutive in the pgtable. + * @mm: Address space the ptes represent. + * @addr: Address of the first pte. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to clear. + * @full: Whether we are clearing a full mm. + * + * May be overridden by the architecture; otherwise, implemented as a simple + * loop over pte_clear_not_present_full(). + * + * Context: The caller holds the page table lock. The PTEs are all not present. + * The PTEs are all in the same PMD. + */ +static inline void clear_not_present_full_ptes(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, unsigned int nr, int full) +{ + for (;;) { + pte_clear_not_present_full(mm, addr, ptep, full); + if (--nr == 0) + break; + ptep++; + addr += PAGE_SIZE; + } +} +#endif + #ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH extern pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, @@ -1052,7 +1125,7 @@ static inline int arch_unmap_one(struct mm_struct *mm, * prototypes must be defined in the arch-specific asm/pgtable.h file. */ #ifndef __HAVE_ARCH_PREPARE_TO_SWAP -static inline int arch_prepare_to_swap(struct page *page) +static inline int arch_prepare_to_swap(struct folio *folio) { return 0; } @@ -1079,7 +1152,7 @@ static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio) #endif #ifndef __HAVE_ARCH_MOVE_PTE -#define move_pte(pte, prot, old_addr, new_addr) (pte) +#define move_pte(pte, old_addr, new_addr) (pte) #endif #ifndef pte_accessible @@ -1770,11 +1843,25 @@ typedef unsigned int pgtbl_mod_mask; #endif /* - * p?d_leaf() - true if this entry is a final mapping to a physical address. - * This differs from p?d_huge() by the fact that they are always available (if - * the architecture supports large pages at the appropriate level) even - * if CONFIG_HUGETLB_PAGE is not defined. - * Only meaningful when called on a valid entry. + * pXd_leaf() is the API to check whether a pgtable entry is a huge page + * mapping. It should work globally across all archs, without any + * dependency on CONFIG_* options. For architectures that do not support + * huge mappings on specific levels, below fallbacks will be used. + * + * A leaf pgtable entry should always imply the following: + * + * - It is a "present" entry. IOW, before using this API, please check it + * with pXd_present() first. NOTE: it may not always mean the "present + * bit" is set. For example, PROT_NONE entries are always "present". + * + * - It should _never_ be a swap entry of any type. Above "present" check + * should have guarded this, but let's be crystal clear on this. + * + * - It should contain a huge PFN, which points to a huge page larger than + * PAGE_SIZE of the platform. The PFN format isn't important here. + * + * - It should cover all kinds of huge mappings (e.g., pXd_trans_huge(), + * pXd_devmap(), or hugetlb mappings). */ #ifndef pgd_leaf #define pgd_leaf(x) false @@ -1806,6 +1893,20 @@ typedef unsigned int pgtbl_mod_mask; #endif /* + * We always define pmd_pfn for all archs as it's used in lots of generic + * code. Now it happens too for pud_pfn (and can happen for larger + * mappings too in the future; we're not there yet). Instead of defining + * it for all archs (like pmd_pfn), provide a fallback. + * + * Note that returning 0 here means any arch that didn't define this can + * get severely wrong when it hits a real pud leaf. It's arch's + * responsibility to properly define it when a huge pud is possible. + */ +#ifndef pud_pfn +#define pud_pfn(x) 0 +#endif + +/* * Some architectures have MMUs that are configurable or selectable at boot * time. These lead to variable PTRS_PER_x. For statically allocated arrays it * helps to have a static maximum value. |