summaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c55
1 files changed, 51 insertions, 4 deletions
diff --git a/mm/memory.c b/mm/memory.c
index a75040a47fcc..bca60092b4d5 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -792,6 +792,11 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
&src_mm->mmlist);
spin_unlock(&mmlist_lock);
}
+ /* Mark the swap entry as shared. */
+ if (pte_swp_exclusive(*src_pte)) {
+ pte = pte_swp_clear_exclusive(*src_pte);
+ set_pte_at(src_mm, addr, src_pte, pte);
+ }
rss[MM_SWAPENTS]++;
} else if (is_migration_entry(entry)) {
page = pfn_swap_entry_to_page(entry);
@@ -3563,6 +3568,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
struct page *page = NULL, *swapcache;
struct swap_info_struct *si = NULL;
rmap_t rmap_flags = RMAP_NONE;
+ bool exclusive = false;
swp_entry_t entry;
pte_t pte;
int locked;
@@ -3729,6 +3735,46 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
BUG_ON(PageAnon(page) && PageAnonExclusive(page));
/*
+ * Check under PT lock (to protect against concurrent fork() sharing
+ * the swap entry concurrently) for certainly exclusive pages.
+ */
+ if (!PageKsm(page)) {
+ /*
+ * Note that pte_swp_exclusive() == false for architectures
+ * without __HAVE_ARCH_PTE_SWP_EXCLUSIVE.
+ */
+ exclusive = pte_swp_exclusive(vmf->orig_pte);
+ if (page != swapcache) {
+ /*
+ * We have a fresh page that is not exposed to the
+ * swapcache -> certainly exclusive.
+ */
+ exclusive = true;
+ } else if (exclusive && PageWriteback(page) &&
+ (swp_swap_info(entry)->flags & SWP_STABLE_WRITES)) {
+ /*
+ * This is tricky: not all swap backends support
+ * concurrent page modifications while under writeback.
+ *
+ * So if we stumble over such a page in the swapcache
+ * we must not set the page exclusive, otherwise we can
+ * map it writable without further checks and modify it
+ * while still under writeback.
+ *
+ * For these problematic swap backends, simply drop the
+ * exclusive marker: this is perfectly fine as we start
+ * writeback only if we fully unmapped the page and
+ * there are no unexpected references on the page after
+ * unmapping succeeded. After fully unmapped, no
+ * further GUP references (FOLL_GET and FOLL_PIN) can
+ * appear, so dropping the exclusive marker and mapping
+ * it only R/O is fine.
+ */
+ exclusive = false;
+ }
+ }
+
+ /*
* Remove the swap entry and conditionally try to free up the swapcache.
* We're already holding a reference on the page but haven't mapped it
* yet.
@@ -3742,11 +3788,12 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
pte = mk_pte(page, vma->vm_page_prot);
/*
- * Same logic as in do_wp_page(); however, optimize for fresh pages
- * that are certainly not shared because we just allocated them without
- * exposing them to the swapcache.
+ * Same logic as in do_wp_page(); however, optimize for pages that are
+ * certainly not shared either because we just allocated them without
+ * exposing them to the swapcache or because the swap entry indicates
+ * exclusivity.
*/
- if (!PageKsm(page) && (page != swapcache || page_count(page) == 1)) {
+ if (!PageKsm(page) && (exclusive || page_count(page) == 1)) {
if (vmf->flags & FAULT_FLAG_WRITE) {
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
vmf->flags &= ~FAULT_FLAG_WRITE;