diff options
author | Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> | 2015-02-11 15:28:06 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-11 17:06:06 -0800 |
commit | 48684a65b4e3ff544d62532c1b78962c9677b632 (patch) | |
tree | 884da0dc303b41042c9928b40ef7ae651d34e22d | |
parent | 6f4576e3687b1f93145b89fce49d6a8fec9e7dc2 (diff) | |
download | linux-48684a65b4e3ff544d62532c1b78962c9677b632.tar.gz linux-48684a65b4e3ff544d62532c1b78962c9677b632.tar.bz2 linux-48684a65b4e3ff544d62532c1b78962c9677b632.zip |
mm: pagewalk: fix misbehavior of walk_page_range for vma(VM_PFNMAP)
walk_page_range() silently skips vma having VM_PFNMAP set, which leads to
undesirable behaviour at client end (who called walk_page_range). For
example for pagemap_read(), when no callbacks are called against VM_PFNMAP
vma, pagemap_read() may prepare pagemap data for next virtual address
range at wrong index. That could confuse and/or break userspace
applications.
This patch avoid this misbehavior caused by vma(VM_PFNMAP) like follows:
- for pagemap_read() which has its own ->pte_hole(), call the ->pte_hole()
over vma(VM_PFNMAP),
- for clear_refs and queue_pages which have their own ->tests_walk,
just return 1 and skip vma(VM_PFNMAP). This is no problem because
these are not interested in hole regions,
- for other callers, just skip the vma(VM_PFNMAP) as a default behavior.
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Shiraz Hashim <shashim@codeaurora.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/proc/task_mmu.c | 3 | ||||
-rw-r--r-- | mm/mempolicy.c | 3 | ||||
-rw-r--r-- | mm/pagewalk.c | 21 |
3 files changed, 19 insertions, 8 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index a36db4ad140b..f5ca96524f5f 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -806,6 +806,9 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end, struct clear_refs_private *cp = walk->private; struct vm_area_struct *vma = walk->vma; + if (vma->vm_flags & VM_PFNMAP) + return 1; + /* * Writing 1 to /proc/pid/clear_refs affects all pages. * Writing 2 to /proc/pid/clear_refs only affects anonymous pages. diff --git a/mm/mempolicy.c b/mm/mempolicy.c index b1dcd11d867a..f1bd23803576 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -591,6 +591,9 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, unsigned long endvma = vma->vm_end; unsigned long flags = qp->flags; + if (vma->vm_flags & VM_PFNMAP) + return 1; + if (endvma > end) endvma = end; if (vma->vm_start > start) diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 4c9a653ba563..75c1f2878519 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -35,7 +35,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, do { again: next = pmd_addr_end(addr, end); - if (pmd_none(*pmd)) { + if (pmd_none(*pmd) || !walk->vma) { if (walk->pte_hole) err = walk->pte_hole(addr, next, walk); if (err) @@ -165,9 +165,6 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end, * or skip it via the returned value. Return 0 if we do walk over the * current vma, and return 1 if we skip the vma. Negative values means * error, where we abort the current walk. - * - * Default check (only VM_PFNMAP check for now) is used when the caller - * doesn't define test_walk() callback. */ static int walk_page_test(unsigned long start, unsigned long end, struct mm_walk *walk) @@ -178,11 +175,19 @@ static int walk_page_test(unsigned long start, unsigned long end, return walk->test_walk(start, end, walk); /* - * Do not walk over vma(VM_PFNMAP), because we have no valid struct - * page backing a VM_PFNMAP range. See also commit a9ff785e4437. + * vma(VM_PFNMAP) doesn't have any valid struct pages behind VM_PFNMAP + * range, so we don't walk over it as we do for normal vmas. However, + * Some callers are interested in handling hole range and they don't + * want to just ignore any single address range. Such users certainly + * define their ->pte_hole() callbacks, so let's delegate them to handle + * vma(VM_PFNMAP). */ - if (vma->vm_flags & VM_PFNMAP) - return 1; + if (vma->vm_flags & VM_PFNMAP) { + int err = 1; + if (walk->pte_hole) + err = walk->pte_hole(start, end, walk); + return err ? err : 1; + } return 0; } |