summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>2015-02-11 15:28:06 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-11 17:06:06 -0800
commit48684a65b4e3ff544d62532c1b78962c9677b632 (patch)
tree884da0dc303b41042c9928b40ef7ae651d34e22d
parent6f4576e3687b1f93145b89fce49d6a8fec9e7dc2 (diff)
downloadlinux-48684a65b4e3ff544d62532c1b78962c9677b632.tar.gz
linux-48684a65b4e3ff544d62532c1b78962c9677b632.tar.bz2
linux-48684a65b4e3ff544d62532c1b78962c9677b632.zip
mm: pagewalk: fix misbehavior of walk_page_range for vma(VM_PFNMAP)
walk_page_range() silently skips vma having VM_PFNMAP set, which leads to undesirable behaviour at client end (who called walk_page_range). For example for pagemap_read(), when no callbacks are called against VM_PFNMAP vma, pagemap_read() may prepare pagemap data for next virtual address range at wrong index. That could confuse and/or break userspace applications. This patch avoid this misbehavior caused by vma(VM_PFNMAP) like follows: - for pagemap_read() which has its own ->pte_hole(), call the ->pte_hole() over vma(VM_PFNMAP), - for clear_refs and queue_pages which have their own ->tests_walk, just return 1 and skip vma(VM_PFNMAP). This is no problem because these are not interested in hole regions, - for other callers, just skip the vma(VM_PFNMAP) as a default behavior. Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Signed-off-by: Shiraz Hashim <shashim@codeaurora.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--mm/mempolicy.c3
-rw-r--r--mm/pagewalk.c21
3 files changed, 19 insertions, 8 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index a36db4ad140b..f5ca96524f5f 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -806,6 +806,9 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end,
struct clear_refs_private *cp = walk->private;
struct vm_area_struct *vma = walk->vma;
+ if (vma->vm_flags & VM_PFNMAP)
+ return 1;
+
/*
* Writing 1 to /proc/pid/clear_refs affects all pages.
* Writing 2 to /proc/pid/clear_refs only affects anonymous pages.
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index b1dcd11d867a..f1bd23803576 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -591,6 +591,9 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
unsigned long endvma = vma->vm_end;
unsigned long flags = qp->flags;
+ if (vma->vm_flags & VM_PFNMAP)
+ return 1;
+
if (endvma > end)
endvma = end;
if (vma->vm_start > start)
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 4c9a653ba563..75c1f2878519 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -35,7 +35,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
do {
again:
next = pmd_addr_end(addr, end);
- if (pmd_none(*pmd)) {
+ if (pmd_none(*pmd) || !walk->vma) {
if (walk->pte_hole)
err = walk->pte_hole(addr, next, walk);
if (err)
@@ -165,9 +165,6 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end,
* or skip it via the returned value. Return 0 if we do walk over the
* current vma, and return 1 if we skip the vma. Negative values means
* error, where we abort the current walk.
- *
- * Default check (only VM_PFNMAP check for now) is used when the caller
- * doesn't define test_walk() callback.
*/
static int walk_page_test(unsigned long start, unsigned long end,
struct mm_walk *walk)
@@ -178,11 +175,19 @@ static int walk_page_test(unsigned long start, unsigned long end,
return walk->test_walk(start, end, walk);
/*
- * Do not walk over vma(VM_PFNMAP), because we have no valid struct
- * page backing a VM_PFNMAP range. See also commit a9ff785e4437.
+ * vma(VM_PFNMAP) doesn't have any valid struct pages behind VM_PFNMAP
+ * range, so we don't walk over it as we do for normal vmas. However,
+ * Some callers are interested in handling hole range and they don't
+ * want to just ignore any single address range. Such users certainly
+ * define their ->pte_hole() callbacks, so let's delegate them to handle
+ * vma(VM_PFNMAP).
*/
- if (vma->vm_flags & VM_PFNMAP)
- return 1;
+ if (vma->vm_flags & VM_PFNMAP) {
+ int err = 1;
+ if (walk->pte_hole)
+ err = walk->pte_hole(start, end, walk);
+ return err ? err : 1;
+ }
return 0;
}