summaryrefslogtreecommitdiffstats
path: root/mm/rmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/rmap.c')
-rw-r--r--mm/rmap.c186
1 files changed, 186 insertions, 0 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index b922c7074cdd..37c24672125c 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -2028,6 +2028,192 @@ void page_mlock(struct page *page)
rmap_walk(page, &rwc);
}
+#ifdef CONFIG_DEVICE_PRIVATE
+struct make_exclusive_args {
+ struct mm_struct *mm;
+ unsigned long address;
+ void *owner;
+ bool valid;
+};
+
+static bool page_make_device_exclusive_one(struct page *page,
+ struct vm_area_struct *vma, unsigned long address, void *priv)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ struct page_vma_mapped_walk pvmw = {
+ .page = page,
+ .vma = vma,
+ .address = address,
+ };
+ struct make_exclusive_args *args = priv;
+ pte_t pteval;
+ struct page *subpage;
+ bool ret = true;
+ struct mmu_notifier_range range;
+ swp_entry_t entry;
+ pte_t swp_pte;
+
+ mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0, vma,
+ vma->vm_mm, address, min(vma->vm_end,
+ address + page_size(page)), args->owner);
+ mmu_notifier_invalidate_range_start(&range);
+
+ while (page_vma_mapped_walk(&pvmw)) {
+ /* Unexpected PMD-mapped THP? */
+ VM_BUG_ON_PAGE(!pvmw.pte, page);
+
+ if (!pte_present(*pvmw.pte)) {
+ ret = false;
+ page_vma_mapped_walk_done(&pvmw);
+ break;
+ }
+
+ subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
+ address = pvmw.address;
+
+ /* Nuke the page table entry. */
+ flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
+ pteval = ptep_clear_flush(vma, address, pvmw.pte);
+
+ /* Move the dirty bit to the page. Now the pte is gone. */
+ if (pte_dirty(pteval))
+ set_page_dirty(page);
+
+ /*
+ * Check that our target page is still mapped at the expected
+ * address.
+ */
+ if (args->mm == mm && args->address == address &&
+ pte_write(pteval))
+ args->valid = true;
+
+ /*
+ * Store the pfn of the page in a special migration
+ * pte. do_swap_page() will wait until the migration
+ * pte is removed and then restart fault handling.
+ */
+ if (pte_write(pteval))
+ entry = make_writable_device_exclusive_entry(
+ page_to_pfn(subpage));
+ else
+ entry = make_readable_device_exclusive_entry(
+ page_to_pfn(subpage));
+ swp_pte = swp_entry_to_pte(entry);
+ if (pte_soft_dirty(pteval))
+ swp_pte = pte_swp_mksoft_dirty(swp_pte);
+ if (pte_uffd_wp(pteval))
+ swp_pte = pte_swp_mkuffd_wp(swp_pte);
+
+ set_pte_at(mm, address, pvmw.pte, swp_pte);
+
+ /*
+ * There is a reference on the page for the swap entry which has
+ * been removed, so shouldn't take another.
+ */
+ page_remove_rmap(subpage, false);
+ }
+
+ mmu_notifier_invalidate_range_end(&range);
+
+ return ret;
+}
+
+/**
+ * page_make_device_exclusive - mark the page exclusively owned by a device
+ * @page: the page to replace page table entries for
+ * @mm: the mm_struct where the page is expected to be mapped
+ * @address: address where the page is expected to be mapped
+ * @owner: passed to MMU_NOTIFY_EXCLUSIVE range notifier callbacks
+ *
+ * Tries to remove all the page table entries which are mapping this page and
+ * replace them with special device exclusive swap entries to grant a device
+ * exclusive access to the page. Caller must hold the page lock.
+ *
+ * Returns false if the page is still mapped, or if it could not be unmapped
+ * from the expected address. Otherwise returns true (success).
+ */
+static bool page_make_device_exclusive(struct page *page, struct mm_struct *mm,
+ unsigned long address, void *owner)
+{
+ struct make_exclusive_args args = {
+ .mm = mm,
+ .address = address,
+ .owner = owner,
+ .valid = false,
+ };
+ struct rmap_walk_control rwc = {
+ .rmap_one = page_make_device_exclusive_one,
+ .done = page_not_mapped,
+ .anon_lock = page_lock_anon_vma_read,
+ .arg = &args,
+ };
+
+ /*
+ * Restrict to anonymous pages for now to avoid potential writeback
+ * issues. Also tail pages shouldn't be passed to rmap_walk so skip
+ * those.
+ */
+ if (!PageAnon(page) || PageTail(page))
+ return false;
+
+ rmap_walk(page, &rwc);
+
+ return args.valid && !page_mapcount(page);
+}
+
+/**
+ * make_device_exclusive_range() - Mark a range for exclusive use by a device
+ * @mm: mm_struct of assoicated target process
+ * @start: start of the region to mark for exclusive device access
+ * @end: end address of region
+ * @pages: returns the pages which were successfully marked for exclusive access
+ * @owner: passed to MMU_NOTIFY_EXCLUSIVE range notifier to allow filtering
+ *
+ * Returns: number of pages found in the range by GUP. A page is marked for
+ * exclusive access only if the page pointer is non-NULL.
+ *
+ * This function finds ptes mapping page(s) to the given address range, locks
+ * them and replaces mappings with special swap entries preventing userspace CPU
+ * access. On fault these entries are replaced with the original mapping after
+ * calling MMU notifiers.
+ *
+ * A driver using this to program access from a device must use a mmu notifier
+ * critical section to hold a device specific lock during programming. Once
+ * programming is complete it should drop the page lock and reference after
+ * which point CPU access to the page will revoke the exclusive access.
+ */
+int make_device_exclusive_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end, struct page **pages,
+ void *owner)
+{
+ long npages = (end - start) >> PAGE_SHIFT;
+ long i;
+
+ npages = get_user_pages_remote(mm, start, npages,
+ FOLL_GET | FOLL_WRITE | FOLL_SPLIT_PMD,
+ pages, NULL, NULL);
+ if (npages < 0)
+ return npages;
+
+ for (i = 0; i < npages; i++, start += PAGE_SIZE) {
+ if (!trylock_page(pages[i])) {
+ put_page(pages[i]);
+ pages[i] = NULL;
+ continue;
+ }
+
+ if (!page_make_device_exclusive(pages[i], mm, start, owner)) {
+ unlock_page(pages[i]);
+ put_page(pages[i]);
+ pages[i] = NULL;
+ }
+ }
+
+ return npages;
+}
+EXPORT_SYMBOL_GPL(make_device_exclusive_range);
+#endif
+
void __put_anon_vma(struct anon_vma *anon_vma)
{
struct anon_vma *root = anon_vma->root;