5 files changed, 81 insertions, 19 deletions
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 3a5c8fb590e5..0d2c6c0168a3 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -25,7 +25,8 @@
 void __tlb_remove_table(void *_table);
 static inline void tlb_flush(struct mmu_gather *tlb);
 static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
-					  struct page *page, int page_size);
+					  struct page *page, int page_size,
+					  unsigned int flags);
 
 #define tlb_flush tlb_flush
 #define pte_free_tlb pte_free_tlb
@@ -36,14 +37,19 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
 #include <asm/tlbflush.h>
 #include <asm-generic/tlb.h>
 
+void page_zap_pte_rmap(struct page *);
+
 /*
  * Release the page cache reference for a pte removed by
  * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page
  * has already been freed, so just do free_page_and_swap_cache.
  */
 static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
-					  struct page *page, int page_size)
+					  struct page *page, int page_size,
+					  unsigned int flags)
 {
+	if (flags & TLB_ZAP_RMAP)
+		page_zap_pte_rmap(page);
 	free_page_and_swap_cache(page);
 	return false;
 }
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 492dce43236e..b6045149d0b9 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -73,7 +73,8 @@
  *    __tlb_remove_page_size() is the basic primitive that queues a page for
  *    freeing. __tlb_remove_page() assumes PAGE_SIZE. Both will return a
  *    boolean indicating if the queue is (now) full and a call to
- *    tlb_flush_mmu() is required.
+ *    tlb_flush_mmu() is required. They take a 'flags' parameter that
+ *    states whether the rmap of the page should be removed after TLB flush.
  *
  *    tlb_remove_page() and tlb_remove_page_size() imply the call to
  *    tlb_flush_mmu() when required and has no return value.
@@ -187,6 +188,7 @@
  *  This is useful if your architecture already flushes TLB entries in the
  *  various ptep_get_and_clear() functions.
  */
+#define TLB_ZAP_RMAP 1ul
 
 #ifdef CONFIG_MMU_GATHER_TABLE_FREE
 
@@ -238,11 +240,36 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
  */
 #define MMU_GATHER_BUNDLE	8
 
+/*
+ * Fake type for an encoded page with flag bits in the low bits.
+ *
+ * Right now just one bit, but we could have more depending on the
+ * alignment of 'struct page'.
+ */
+struct encoded_page;
+#define ENCODE_PAGE_BITS (TLB_ZAP_RMAP)
+
+static inline struct encoded_page *encode_page(struct page *page, unsigned long flags)
+{
+	flags &= ENCODE_PAGE_BITS;
+	return (struct encoded_page *)(flags | (unsigned long)page);
+}
+
+static inline bool encoded_page_flags(struct encoded_page *page)
+{
+	return ENCODE_PAGE_BITS & (unsigned long)page;
+}
+
+static inline struct page *encoded_page_ptr(struct encoded_page *page)
+{
+	return (struct page *)(~ENCODE_PAGE_BITS & (unsigned long)page);
+}
+
 struct mmu_gather_batch {
 	struct mmu_gather_batch	*next;
 	unsigned int		nr;
 	unsigned int		max;
-	struct page		*pages[];
+	struct encoded_page	*encoded_pages[];
 };
 
 #define MAX_GATHER_BATCH	\
@@ -257,7 +284,7 @@ struct mmu_gather_batch {
 #define MAX_GATHER_BATCH_COUNT	(10000UL/MAX_GATHER_BATCH)
 
 extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
-				   int page_size);
+				   int page_size, unsigned int flags);
 #endif
 
 /*
@@ -431,13 +458,13 @@ static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
 static inline void tlb_remove_page_size(struct mmu_gather *tlb,
 					struct page *page, int page_size)
 {
-	if (__tlb_remove_page_size(tlb, page, page_size))
+	if (__tlb_remove_page_size(tlb, page, page_size, 0))
 		tlb_flush_mmu(tlb);
 }
 
-static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page, unsigned int flags)
 {
-	return __tlb_remove_page_size(tlb, page, PAGE_SIZE);
+	return __tlb_remove_page_size(tlb, page, PAGE_SIZE, flags);
 }
 
 /* tlb_remove_page
diff --git a/mm/memory.c b/mm/memory.c
index c893f5ffc5a8..cc24f38a8a7d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1452,12 +1452,9 @@ again:
 				    likely(!(vma->vm_flags & VM_SEQ_READ)))
 					mark_page_accessed(page);
 			}
-			page_zap_pte_rmap(page);
 			munlock_vma_page(page, vma, false);
 			rss[mm_counter(page)]--;
-			if (unlikely(page_mapcount(page) < 0))
-				print_bad_pte(vma, addr, ptent, page);
-			if (unlikely(__tlb_remove_page(tlb, page))) {
+			if (unlikely(__tlb_remove_page(tlb, page, TLB_ZAP_RMAP))) {
 				force_flush = 1;
 				addr += PAGE_SIZE;
 				break;
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index add4244e5790..34f6a07fd711 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -9,6 +9,7 @@
 #include <linux/rcupdate.h>
 #include <linux/smp.h>
 #include <linux/swap.h>
+#include <linux/rmap.h>
 
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
@@ -43,12 +44,44 @@ static bool tlb_next_batch(struct mmu_gather *tlb)
 	return true;
 }
 
+/*
+ * We get an 'encoded page' array, which has page pointers with
+ * encoded flags in the low bits of the array.
+ *
+ * The TLB has been flushed, now we need to react to the flag bits
+ * the 'struct page', clean the array in-place, and then free the
+ * pages and their swap cache.
+ */
+static void clean_and_free_pages_and_swap_cache(struct encoded_page **pages, unsigned int nr)
+{
+	for (unsigned int i = 0; i < nr; i++) {
+		struct encoded_page *encoded = pages[i];
+		unsigned int flags = encoded_page_flags(encoded);
+		if (flags) {
+			/* Clean the flagged pointer in-place */
+			struct page *page = encoded_page_ptr(encoded);
+			pages[i] = encode_page(page, 0);
+
+			/* The flag bit being set means that we should zap the rmap */
+			page_zap_pte_rmap(page);
+			VM_WARN_ON_ONCE_PAGE(page_mapcount(page) < 0, page);
+		}
+	}
+
+	/*
+	 * Now all entries have been un-encoded, and changed to plain
+	 * page pointers, so we can cast the 'encoded_page' array to
+	 * a plain page array and free them
+	 */
+	free_pages_and_swap_cache((struct page **)pages, nr);
+}
+
 static void tlb_batch_pages_flush(struct mmu_gather *tlb)
 {
 	struct mmu_gather_batch *batch;
 
 	for (batch = &tlb->local; batch && batch->nr; batch = batch->next) {
-		struct page **pages = batch->pages;
+		struct encoded_page **pages = batch->encoded_pages;
 
 		do {
 			/*
@@ -56,7 +89,7 @@ static void tlb_batch_pages_flush(struct mmu_gather *tlb)
 			 */
 			unsigned int nr = min(512U, batch->nr);
 
-			free_pages_and_swap_cache(pages, nr);
+			clean_and_free_pages_and_swap_cache(pages, nr);
 			pages += nr;
 			batch->nr -= nr;
 
@@ -77,11 +110,12 @@ static void tlb_batch_list_free(struct mmu_gather *tlb)
 	tlb->local.next = NULL;
 }
 
-bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size)
+bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size, unsigned int flags)
 {
 	struct mmu_gather_batch *batch;
 
 	VM_BUG_ON(!tlb->end);
+	VM_BUG_ON(flags & ~ENCODE_PAGE_BITS);
 
 #ifdef CONFIG_MMU_GATHER_PAGE_SIZE
 	VM_WARN_ON(tlb->page_size != page_size);
@@ -92,7 +126,7 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
 	 * Add the page and check if we are full. If so
 	 * force a flush.
 	 */
-	batch->pages[batch->nr++] = page;
+	batch->encoded_pages[batch->nr++] = encode_page(page, flags);
 	if (batch->nr == batch->max) {
 		if (!tlb_next_batch(tlb))
 			return true;
diff --git a/mm/rmap.c b/mm/rmap.c
index 28b51a31ebb0..416b7078b75f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1422,8 +1422,6 @@ static void page_remove_anon_compound_rmap(struct page *page)
  * separately.
  *
  * This allows for a much simpler calling convention and code.
- *
- * The caller holds the pte lock.
  */
 void page_zap_pte_rmap(struct page *page)
 {
@@ -1431,7 +1429,7 @@ void page_zap_pte_rmap(struct page *page)
 		return;
 
 	lock_page_memcg(page);
-	__dec_lruvec_page_state(page,
+	dec_lruvec_page_state(page,
 		PageAnon(page) ? NR_ANON_MAPPED : NR_FILE_MAPPED);
 	unlock_page_memcg(page);
 }