summaryrefslogtreecommitdiffstats
path: root/fs/aio.c
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2012-06-15 12:32:04 -0700
committerSage Weil <sage@inktank.com>2012-06-15 12:32:04 -0700
commit9a64e8e0ace51b309fdcff4b4754b3649250382a (patch)
tree1f0d75c196c5ab0408c55ed6cf3a152f1f921e15 /fs/aio.c
parentf3dea7edd3d449fe7a6d402c1ce56a294b985261 (diff)
parentf8f5701bdaf9134b1f90e5044a82c66324d2073f (diff)
downloadlinux-9a64e8e0ace51b309fdcff4b4754b3649250382a.tar.gz
linux-9a64e8e0ace51b309fdcff4b4754b3649250382a.tar.bz2
linux-9a64e8e0ace51b309fdcff4b4754b3649250382a.zip
Merge tag 'v3.5-rc1'
Linux 3.5-rc1 Conflicts: net/ceph/messenger.c
Diffstat (limited to 'fs/aio.c')
-rw-r--r--fs/aio.c185
1 files changed, 81 insertions, 104 deletions
diff --git a/fs/aio.c b/fs/aio.c
index b9d64d89a043..55c4c7656053 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -13,7 +13,7 @@
#include <linux/errno.h>
#include <linux/time.h>
#include <linux/aio_abi.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/syscalls.h>
#include <linux/backing-dev.h>
#include <linux/uio.h>
@@ -93,9 +93,8 @@ static void aio_free_ring(struct kioctx *ctx)
put_page(info->ring_pages[i]);
if (info->mmap_size) {
- down_write(&ctx->mm->mmap_sem);
- do_munmap(ctx->mm, info->mmap_base, info->mmap_size);
- up_write(&ctx->mm->mmap_sem);
+ BUG_ON(ctx->mm != current->mm);
+ vm_munmap(info->mmap_base, info->mmap_size);
}
if (info->ring_pages && info->ring_pages != info->internal_pages)
@@ -135,9 +134,9 @@ static int aio_setup_ring(struct kioctx *ctx)
info->mmap_size = nr_pages * PAGE_SIZE;
dprintk("attempting mmap of %lu bytes\n", info->mmap_size);
down_write(&ctx->mm->mmap_sem);
- info->mmap_base = do_mmap(NULL, 0, info->mmap_size,
- PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE,
- 0);
+ info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size,
+ PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE, 0);
if (IS_ERR((void *)info->mmap_base)) {
up_write(&ctx->mm->mmap_sem);
info->mmap_size = 0;
@@ -160,7 +159,7 @@ static int aio_setup_ring(struct kioctx *ctx)
info->nr = nr_events; /* trusted copy */
- ring = kmap_atomic(info->ring_pages[0], KM_USER0);
+ ring = kmap_atomic(info->ring_pages[0]);
ring->nr = nr_events; /* user copy */
ring->id = ctx->user_id;
ring->head = ring->tail = 0;
@@ -168,47 +167,38 @@ static int aio_setup_ring(struct kioctx *ctx)
ring->compat_features = AIO_RING_COMPAT_FEATURES;
ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
ring->header_length = sizeof(struct aio_ring);
- kunmap_atomic(ring, KM_USER0);
+ kunmap_atomic(ring);
return 0;
}
/* aio_ring_event: returns a pointer to the event at the given index from
- * kmap_atomic(, km). Release the pointer with put_aio_ring_event();
+ * kmap_atomic(). Release the pointer with put_aio_ring_event();
*/
#define AIO_EVENTS_PER_PAGE (PAGE_SIZE / sizeof(struct io_event))
#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
-#define aio_ring_event(info, nr, km) ({ \
+#define aio_ring_event(info, nr) ({ \
unsigned pos = (nr) + AIO_EVENTS_OFFSET; \
struct io_event *__event; \
__event = kmap_atomic( \
- (info)->ring_pages[pos / AIO_EVENTS_PER_PAGE], km); \
+ (info)->ring_pages[pos / AIO_EVENTS_PER_PAGE]); \
__event += pos % AIO_EVENTS_PER_PAGE; \
__event; \
})
-#define put_aio_ring_event(event, km) do { \
+#define put_aio_ring_event(event) do { \
struct io_event *__event = (event); \
(void)__event; \
- kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \
+ kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK)); \
} while(0)
static void ctx_rcu_free(struct rcu_head *head)
{
struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
- unsigned nr_events = ctx->max_reqs;
-
kmem_cache_free(kioctx_cachep, ctx);
-
- if (nr_events) {
- spin_lock(&aio_nr_lock);
- BUG_ON(aio_nr - nr_events > aio_nr);
- aio_nr -= nr_events;
- spin_unlock(&aio_nr_lock);
- }
}
/* __put_ioctx
@@ -217,13 +207,19 @@ static void ctx_rcu_free(struct rcu_head *head)
*/
static void __put_ioctx(struct kioctx *ctx)
{
+ unsigned nr_events = ctx->max_reqs;
BUG_ON(ctx->reqs_active);
- cancel_delayed_work(&ctx->wq);
- cancel_work_sync(&ctx->wq.work);
+ cancel_delayed_work_sync(&ctx->wq);
aio_free_ring(ctx);
mmdrop(ctx->mm);
ctx->mm = NULL;
+ if (nr_events) {
+ spin_lock(&aio_nr_lock);
+ BUG_ON(aio_nr - nr_events > aio_nr);
+ aio_nr -= nr_events;
+ spin_unlock(&aio_nr_lock);
+ }
pr_debug("__put_ioctx: freeing %p\n", ctx);
call_rcu(&ctx->rcu_head, ctx_rcu_free);
}
@@ -247,7 +243,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
{
struct mm_struct *mm;
struct kioctx *ctx;
- int did_sync = 0;
+ int err = -ENOMEM;
/* Prevent overflows */
if ((nr_events > (0x10000000U / sizeof(struct io_event))) ||
@@ -256,7 +252,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
return ERR_PTR(-EINVAL);
}
- if ((unsigned long)nr_events > aio_max_nr)
+ if (!nr_events || (unsigned long)nr_events > aio_max_nr)
return ERR_PTR(-EAGAIN);
ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL);
@@ -280,25 +276,14 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
goto out_freectx;
/* limit the number of system wide aios */
- do {
- spin_lock_bh(&aio_nr_lock);
- if (aio_nr + nr_events > aio_max_nr ||
- aio_nr + nr_events < aio_nr)
- ctx->max_reqs = 0;
- else
- aio_nr += ctx->max_reqs;
- spin_unlock_bh(&aio_nr_lock);
- if (ctx->max_reqs || did_sync)
- break;
-
- /* wait for rcu callbacks to have completed before giving up */
- synchronize_rcu();
- did_sync = 1;
- ctx->max_reqs = nr_events;
- } while (1);
-
- if (ctx->max_reqs == 0)
+ spin_lock(&aio_nr_lock);
+ if (aio_nr + nr_events > aio_max_nr ||
+ aio_nr + nr_events < aio_nr) {
+ spin_unlock(&aio_nr_lock);
goto out_cleanup;
+ }
+ aio_nr += ctx->max_reqs;
+ spin_unlock(&aio_nr_lock);
/* now link into global list. */
spin_lock(&mm->ioctx_lock);
@@ -310,27 +295,27 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
return ctx;
out_cleanup:
- __put_ioctx(ctx);
- return ERR_PTR(-EAGAIN);
-
+ err = -EAGAIN;
+ aio_free_ring(ctx);
out_freectx:
mmdrop(mm);
kmem_cache_free(kioctx_cachep, ctx);
- ctx = ERR_PTR(-ENOMEM);
-
- dprintk("aio: error allocating ioctx %p\n", ctx);
- return ctx;
+ dprintk("aio: error allocating ioctx %d\n", err);
+ return ERR_PTR(err);
}
-/* aio_cancel_all
+/* kill_ctx
* Cancels all outstanding aio requests on an aio context. Used
* when the processes owning a context have all exited to encourage
* the rapid destruction of the kioctx.
*/
-static void aio_cancel_all(struct kioctx *ctx)
+static void kill_ctx(struct kioctx *ctx)
{
int (*cancel)(struct kiocb *, struct io_event *);
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
struct io_event res;
+
spin_lock_irq(&ctx->ctx_lock);
ctx->dead = 1;
while (!list_empty(&ctx->active_reqs)) {
@@ -346,15 +331,7 @@ static void aio_cancel_all(struct kioctx *ctx)
spin_lock_irq(&ctx->ctx_lock);
}
}
- spin_unlock_irq(&ctx->ctx_lock);
-}
-
-static void wait_for_all_aios(struct kioctx *ctx)
-{
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
- spin_lock_irq(&ctx->ctx_lock);
if (!ctx->reqs_active)
goto out;
@@ -404,19 +381,24 @@ void exit_aio(struct mm_struct *mm)
ctx = hlist_entry(mm->ioctx_list.first, struct kioctx, list);
hlist_del_rcu(&ctx->list);
- aio_cancel_all(ctx);
-
- wait_for_all_aios(ctx);
- /*
- * Ensure we don't leave the ctx on the aio_wq
- */
- cancel_work_sync(&ctx->wq.work);
+ kill_ctx(ctx);
if (1 != atomic_read(&ctx->users))
printk(KERN_DEBUG
"exit_aio:ioctx still alive: %d %d %d\n",
atomic_read(&ctx->users), ctx->dead,
ctx->reqs_active);
+ /*
+ * We don't need to bother with munmap() here -
+ * exit_mmap(mm) is coming and it'll unmap everything.
+ * Since aio_free_ring() uses non-zero ->mmap_size
+ * as indicator that it needs to unmap the area,
+ * just set it to 0; aio_free_ring() is the only
+ * place that uses ->mmap_size, so it's safe.
+ * That way we get all munmap done to current->mm -
+ * all other callers have ctx->mm == current->mm.
+ */
+ ctx->ring_info.mmap_size = 0;
put_ioctx(ctx);
}
}
@@ -920,7 +902,7 @@ static void aio_kick_handler(struct work_struct *work)
unuse_mm(mm);
set_fs(oldfs);
/*
- * we're in a worker thread already, don't use queue_delayed_work,
+ * we're in a worker thread already; no point using non-zero delay
*/
if (requeue)
queue_delayed_work(aio_wq, &ctx->wq, 0);
@@ -1019,10 +1001,10 @@ int aio_complete(struct kiocb *iocb, long res, long res2)
if (kiocbIsCancelled(iocb))
goto put_rq;
- ring = kmap_atomic(info->ring_pages[0], KM_IRQ1);
+ ring = kmap_atomic(info->ring_pages[0]);
tail = info->tail;
- event = aio_ring_event(info, tail, KM_IRQ0);
+ event = aio_ring_event(info, tail);
if (++tail >= info->nr)
tail = 0;
@@ -1043,8 +1025,8 @@ int aio_complete(struct kiocb *iocb, long res, long res2)
info->tail = tail;
ring->tail = tail;
- put_aio_ring_event(event, KM_IRQ0);
- kunmap_atomic(ring, KM_IRQ1);
+ put_aio_ring_event(event);
+ kunmap_atomic(ring);
pr_debug("added to ring %p at [%lu]\n", iocb, tail);
@@ -1089,7 +1071,7 @@ static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent)
unsigned long head;
int ret = 0;
- ring = kmap_atomic(info->ring_pages[0], KM_USER0);
+ ring = kmap_atomic(info->ring_pages[0]);
dprintk("in aio_read_evt h%lu t%lu m%lu\n",
(unsigned long)ring->head, (unsigned long)ring->tail,
(unsigned long)ring->nr);
@@ -1101,18 +1083,18 @@ static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent)
head = ring->head % info->nr;
if (head != ring->tail) {
- struct io_event *evp = aio_ring_event(info, head, KM_USER1);
+ struct io_event *evp = aio_ring_event(info, head);
*ent = *evp;
head = (head + 1) % info->nr;
smp_mb(); /* finish reading the event before updatng the head */
ring->head = head;
ret = 1;
- put_aio_ring_event(evp, KM_USER1);
+ put_aio_ring_event(evp);
}
spin_unlock(&info->ring_lock);
out:
- kunmap_atomic(ring, KM_USER0);
+ kunmap_atomic(ring);
dprintk("leaving aio_read_evt: %d h%lu t%lu\n", ret,
(unsigned long)ring->head, (unsigned long)ring->tail);
return ret;
@@ -1290,8 +1272,7 @@ static void io_destroy(struct kioctx *ioctx)
if (likely(!was_dead))
put_ioctx(ioctx); /* twice for the list */
- aio_cancel_all(ioctx);
- wait_for_all_aios(ioctx);
+ kill_ctx(ioctx);
/*
* Wake up any waiters. The setting of ctx->dead must be seen
@@ -1299,7 +1280,6 @@ static void io_destroy(struct kioctx *ioctx)
* locking done by the above calls to ensure this consistency.
*/
wake_up_all(&ioctx->wait);
- put_ioctx(ioctx); /* once for the lookup */
}
/* sys_io_setup:
@@ -1336,11 +1316,9 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
ret = PTR_ERR(ioctx);
if (!IS_ERR(ioctx)) {
ret = put_user(ioctx->user_id, ctxp);
- if (!ret) {
- put_ioctx(ioctx);
- return 0;
- }
- io_destroy(ioctx);
+ if (ret)
+ io_destroy(ioctx);
+ put_ioctx(ioctx);
}
out:
@@ -1358,6 +1336,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
struct kioctx *ioctx = lookup_ioctx(ctx);
if (likely(NULL != ioctx)) {
io_destroy(ioctx);
+ put_ioctx(ioctx);
return 0;
}
pr_debug("EINVAL: io_destroy: invalid context id\n");
@@ -1467,13 +1446,17 @@ static ssize_t aio_setup_vectored_rw(int type, struct kiocb *kiocb, bool compat)
ret = compat_rw_copy_check_uvector(type,
(struct compat_iovec __user *)kiocb->ki_buf,
kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec,
- &kiocb->ki_iovec, 1);
+ &kiocb->ki_iovec);
else
#endif
ret = rw_copy_check_uvector(type,
(struct iovec __user *)kiocb->ki_buf,
kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec,
- &kiocb->ki_iovec, 1);
+ &kiocb->ki_iovec);
+ if (ret < 0)
+ goto out;
+
+ ret = rw_verify_area(type, kiocb->ki_filp, &kiocb->ki_pos, ret);
if (ret < 0)
goto out;
@@ -1488,11 +1471,17 @@ out:
return ret;
}
-static ssize_t aio_setup_single_vector(struct kiocb *kiocb)
+static ssize_t aio_setup_single_vector(int type, struct file * file, struct kiocb *kiocb)
{
+ int bytes;
+
+ bytes = rw_verify_area(type, file, &kiocb->ki_pos, kiocb->ki_left);
+ if (bytes < 0)
+ return bytes;
+
kiocb->ki_iovec = &kiocb->ki_inline_vec;
kiocb->ki_iovec->iov_base = kiocb->ki_buf;
- kiocb->ki_iovec->iov_len = kiocb->ki_left;
+ kiocb->ki_iovec->iov_len = bytes;
kiocb->ki_nr_segs = 1;
kiocb->ki_cur_seg = 0;
return 0;
@@ -1517,10 +1506,7 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
if (unlikely(!access_ok(VERIFY_WRITE, kiocb->ki_buf,
kiocb->ki_left)))
break;
- ret = security_file_permission(file, MAY_READ);
- if (unlikely(ret))
- break;
- ret = aio_setup_single_vector(kiocb);
+ ret = aio_setup_single_vector(READ, file, kiocb);
if (ret)
break;
ret = -EINVAL;
@@ -1535,10 +1521,7 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
if (unlikely(!access_ok(VERIFY_READ, kiocb->ki_buf,
kiocb->ki_left)))
break;
- ret = security_file_permission(file, MAY_WRITE);
- if (unlikely(ret))
- break;
- ret = aio_setup_single_vector(kiocb);
+ ret = aio_setup_single_vector(WRITE, file, kiocb);
if (ret)
break;
ret = -EINVAL;
@@ -1549,9 +1532,6 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
ret = -EBADF;
if (unlikely(!(file->f_mode & FMODE_READ)))
break;
- ret = security_file_permission(file, MAY_READ);
- if (unlikely(ret))
- break;
ret = aio_setup_vectored_rw(READ, kiocb, compat);
if (ret)
break;
@@ -1563,9 +1543,6 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
ret = -EBADF;
if (unlikely(!(file->f_mode & FMODE_WRITE)))
break;
- ret = security_file_permission(file, MAY_WRITE);
- if (unlikely(ret))
- break;
ret = aio_setup_vectored_rw(WRITE, kiocb, compat);
if (ret)
break;