summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2019-05-14 15:41:38 -0700
committerBen Hutchings <ben@decadent.org.uk>2019-06-20 18:11:26 +0100
commitb96659f18c61120dbf8b4cc36fbc05589bf9dc02 (patch)
tree02e18445fe8e6e7e8bc7e41cc38ff2b505b12ab0
parent762d8ea0c73165fc9c99a9bc63b82706cbb56062 (diff)
downloadlinux-stable-b96659f18c61120dbf8b4cc36fbc05589bf9dc02.tar.gz
linux-stable-b96659f18c61120dbf8b4cc36fbc05589bf9dc02.tar.bz2
linux-stable-b96659f18c61120dbf8b4cc36fbc05589bf9dc02.zip
mm/mincore.c: make mincore() more conservative
commit 134fca9063ad4851de767d1768180e5dede9a881 upstream. The semantics of what mincore() considers to be resident is not completely clear, but Linux has always (since 2.3.52, which is when mincore() was initially done) treated it as "page is available in page cache". That's potentially a problem, as that [in]directly exposes meta-information about pagecache / memory mapping state even about memory not strictly belonging to the process executing the syscall, opening possibilities for sidechannel attacks. Change the semantics of mincore() so that it only reveals pagecache information for non-anonymous mappings that belog to files that the calling process could (if it tried to) successfully open for writing; otherwise we'd be including shared non-exclusive mappings, which - is the sidechannel - is not the usecase for mincore(), as that's primarily used for data, not (shared) text [jkosina@suse.cz: v2] Link: http://lkml.kernel.org/r/20190312141708.6652-2-vbabka@suse.cz [mhocko@suse.com: restructure can_do_mincore() conditions] Link: http://lkml.kernel.org/r/nycvar.YFH.7.76.1903062342020.19912@cbobk.fhfr.pm Signed-off-by: Jiri Kosina <jkosina@suse.cz> Signed-off-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: Josh Snyder <joshs@netflix.com> Acked-by: Michal Hocko <mhocko@suse.com> Originally-by: Linus Torvalds <torvalds@linux-foundation.org> Originally-by: Dominique Martinet <asmadeus@codewreck.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Dave Chinner <david@fromorbit.com> Cc: Kevin Easton <kevin@guarana.org> Cc: Matthew Wilcox <willy@infradead.org> Cc: Cyril Hrubis <chrubis@suse.cz> Cc: Tejun Heo <tj@kernel.org> Cc: Kirill A. Shutemov <kirill@shutemov.name> Cc: Daniel Gruss <daniel@gruss.cc> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> [bwh: Backported to 3.16: adjust context] Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
-rw-r--r--mm/mincore.c21
1 files changed, 21 insertions, 0 deletions
diff --git a/mm/mincore.c b/mm/mincore.c
index 3c4c8f6ab57e..1d802326abab 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -212,6 +212,22 @@ static void mincore_page_range(struct vm_area_struct *vma,
} while (pgd++, addr = next, addr != end);
}
+static inline bool can_do_mincore(struct vm_area_struct *vma)
+{
+ if (vma_is_anonymous(vma))
+ return true;
+ if (!vma->vm_file)
+ return false;
+ /*
+ * Reveal pagecache information only for non-anonymous mappings that
+ * correspond to the files the calling process could (if tried) open
+ * for writing; otherwise we'd be including shared non-exclusive
+ * mappings, which opens a side channel.
+ */
+ return inode_owner_or_capable(file_inode(vma->vm_file)) ||
+ inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0;
+}
+
/*
* Do a chunk of "sys_mincore()". We've already checked
* all the arguments, we hold the mmap semaphore: we should
@@ -227,6 +243,11 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v
return -ENOMEM;
end = min(vma->vm_end, addr + (pages << PAGE_SHIFT));
+ if (!can_do_mincore(vma)) {
+ unsigned long pages = DIV_ROUND_UP(end - addr, PAGE_SIZE);
+ memset(vec, 1, pages);
+ return pages;
+ }
if (is_vm_hugetlb_page(vma))
mincore_hugetlb_page_range(vma, addr, end, vec);