diff options
author | Shawn Bohrer <sbohrer@rgmadvisors.com> | 2011-06-30 11:21:32 -0500 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@suse.de> | 2011-08-29 14:08:07 -0700 |
commit | d64ec7bb4e2e4915e698e2b0dee3bc115f9ff273 (patch) | |
tree | cf5edd153ea3188a3e1b26a4a8371e60d4675b92 | |
parent | 3d854ed83c020d229df63d9247523ac3610694e5 (diff) | |
download | linux-stable-d64ec7bb4e2e4915e698e2b0dee3bc115f9ff273.tar.gz linux-stable-d64ec7bb4e2e4915e698e2b0dee3bc115f9ff273.tar.bz2 linux-stable-d64ec7bb4e2e4915e698e2b0dee3bc115f9ff273.zip |
futex: Fix regression with read only mappings
commit 9ea71503a8ed9184d2d0b8ccc4d269d05f7940ae upstream.
commit 7485d0d3758e8e6491a5c9468114e74dc050785d (futexes: Remove rw
parameter from get_futex_key()) in 2.6.33 fixed two problems: First, It
prevented a loop when encountering a ZERO_PAGE. Second, it fixed RW
MAP_PRIVATE futex operations by forcing the COW to occur by
unconditionally performing a write access get_user_pages_fast() to get
the page. The commit also introduced a user-mode regression in that it
broke futex operations on read-only memory maps. For example, this
breaks workloads that have one or more reader processes doing a
FUTEX_WAIT on a futex within a read only shared file mapping, and a
writer processes that has a writable mapping issuing the FUTEX_WAKE.
This fixes the regression for valid futex operations on RO mappings by
trying a RO get_user_pages_fast() when the RW get_user_pages_fast()
fails. This change makes it necessary to also check for invalid use
cases, such as anonymous RO mappings (which can never change) and the
ZERO_PAGE which the commit referenced above was written to address.
This patch does restore the original behavior with RO MAP_PRIVATE
mappings, which have inherent user-mode usage problems and don't really
make sense. With this patch performing a FUTEX_WAIT within a RO
MAP_PRIVATE mapping will be successfully woken provided another process
updates the region of the underlying mapped file. However, the mmap()
man page states that for a MAP_PRIVATE mapping:
It is unspecified whether changes made to the file after
the mmap() call are visible in the mapped region.
So user-mode users attempting to use futex operations on RO MAP_PRIVATE
mappings are depending on unspecified behavior. Additionally a
RO MAP_PRIVATE mapping could fail to wake up in the following case.
Thread-A: call futex(FUTEX_WAIT, memory-region-A).
get_futex_key() return inode based key.
sleep on the key
Thread-B: call mprotect(PROT_READ|PROT_WRITE, memory-region-A)
Thread-B: write memory-region-A.
COW happen. This process's memory-region-A become related
to new COWed private (ie PageAnon=1) page.
Thread-B: call futex(FUETX_WAKE, memory-region-A).
get_futex_key() return mm based key.
IOW, we fail to wake up Thread-A.
Once again doing something like this is just silly and users who do
something like this get what they deserve.
While RO MAP_PRIVATE mappings are nonsensical, checking for a private
mapping requires walking the vmas and was deemed too costly to avoid a
userspace hang.
This Patch is based on Peter Zijlstra's initial patch with modifications to
only allow RO mappings for futex operations that need VERIFY_READ access.
Reported-by: David Oliver <david@rgmadvisors.com>
Signed-off-by: Shawn Bohrer <sbohrer@rgmadvisors.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: peterz@infradead.org
Cc: eric.dumazet@gmail.com
Cc: zvonler@rgmadvisors.com
Cc: hughd@google.com
Link: http://lkml.kernel.org/r/1309450892-30676-1-git-send-email-sbohrer@rgmadvisors.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
-rw-r--r-- | kernel/futex.c | 54 |
1 files changed, 42 insertions, 12 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index 09dbee2ea3e8..fb98c9f3ecbc 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -203,6 +203,8 @@ static void drop_futex_key_refs(union futex_key *key) * @uaddr: virtual address of the futex * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED * @key: address where result is stored. + * @rw: mapping needs to be read/write (values: VERIFY_READ, + * VERIFY_WRITE) * * Returns a negative error code or 0 * The key words are stored in *key on success. @@ -214,12 +216,12 @@ static void drop_futex_key_refs(union futex_key *key) * lock_page() might sleep, the caller should not hold a spinlock. */ static int -get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) +get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) { unsigned long address = (unsigned long)uaddr; struct mm_struct *mm = current->mm; struct page *page; - int err; + int err, ro = 0; /* * The futex address must be "naturally" aligned. @@ -247,14 +249,31 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) again: err = get_user_pages_fast(address, 1, 1, &page); + /* + * If write access is not required (eg. FUTEX_WAIT), try + * and get read-only access. + */ + if (err == -EFAULT && rw == VERIFY_READ) { + err = get_user_pages_fast(address, 1, 0, &page); + ro = 1; + } if (err < 0) return err; + else + err = 0; page = compound_head(page); lock_page(page); if (!page->mapping) { unlock_page(page); put_page(page); + /* + * ZERO_PAGE pages don't have a mapping. Avoid a busy loop + * trying to find one. RW mapping would have COW'd (and thus + * have a mapping) so this page is RO and won't ever change. + */ + if ((page == ZERO_PAGE(address))) + return -EFAULT; goto again; } @@ -266,6 +285,15 @@ again: * the object not the particular process. */ if (PageAnon(page)) { + /* + * A RO anonymous page will never change and thus doesn't make + * sense for futex operations. + */ + if (ro) { + err = -EFAULT; + goto out; + } + key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ key->private.mm = mm; key->private.address = address; @@ -277,9 +305,10 @@ again: get_futex_key_refs(key); +out: unlock_page(page); put_page(page); - return 0; + return err; } static inline @@ -880,7 +909,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) if (!bitset) return -EINVAL; - ret = get_futex_key(uaddr, fshared, &key); + ret = get_futex_key(uaddr, fshared, &key, VERIFY_READ); if (unlikely(ret != 0)) goto out; @@ -926,10 +955,10 @@ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, int ret, op_ret; retry: - ret = get_futex_key(uaddr1, fshared, &key1); + ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ); if (unlikely(ret != 0)) goto out; - ret = get_futex_key(uaddr2, fshared, &key2); + ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE); if (unlikely(ret != 0)) goto out_put_key1; @@ -1188,10 +1217,11 @@ retry: pi_state = NULL; } - ret = get_futex_key(uaddr1, fshared, &key1); + ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ); if (unlikely(ret != 0)) goto out; - ret = get_futex_key(uaddr2, fshared, &key2); + ret = get_futex_key(uaddr2, fshared, &key2, + requeue_pi ? VERIFY_WRITE : VERIFY_READ); if (unlikely(ret != 0)) goto out_put_key1; @@ -1746,7 +1776,7 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared, */ retry: q->key = FUTEX_KEY_INIT; - ret = get_futex_key(uaddr, fshared, &q->key); + ret = get_futex_key(uaddr, fshared, &q->key, VERIFY_READ); if (unlikely(ret != 0)) return ret; @@ -1912,7 +1942,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, q.requeue_pi_key = NULL; retry: q.key = FUTEX_KEY_INIT; - ret = get_futex_key(uaddr, fshared, &q.key); + ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE); if (unlikely(ret != 0)) goto out; @@ -2031,7 +2061,7 @@ retry: if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current)) return -EPERM; - ret = get_futex_key(uaddr, fshared, &key); + ret = get_futex_key(uaddr, fshared, &key, VERIFY_WRITE); if (unlikely(ret != 0)) goto out; @@ -2223,7 +2253,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, rt_waiter.task = NULL; key2 = FUTEX_KEY_INIT; - ret = get_futex_key(uaddr2, fshared, &key2); + ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE); if (unlikely(ret != 0)) goto out; |