diff options
Diffstat (limited to 'fs/aio.c')
-rw-r--r-- | fs/aio.c | 115 |
1 files changed, 94 insertions, 21 deletions
@@ -36,10 +36,10 @@ #include <linux/eventfd.h> #include <linux/blkdev.h> #include <linux/compat.h> -#include <linux/anon_inodes.h> #include <linux/migrate.h> #include <linux/ramfs.h> #include <linux/percpu-refcount.h> +#include <linux/mount.h> #include <asm/kmap_types.h> #include <asm/uaccess.h> @@ -152,12 +152,67 @@ unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio request static struct kmem_cache *kiocb_cachep; static struct kmem_cache *kioctx_cachep; +static struct vfsmount *aio_mnt; + +static const struct file_operations aio_ring_fops; +static const struct address_space_operations aio_ctx_aops; + +static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) +{ + struct qstr this = QSTR_INIT("[aio]", 5); + struct file *file; + struct path path; + struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb); + if (IS_ERR(inode)) + return ERR_CAST(inode); + + inode->i_mapping->a_ops = &aio_ctx_aops; + inode->i_mapping->private_data = ctx; + inode->i_size = PAGE_SIZE * nr_pages; + + path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this); + if (!path.dentry) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + path.mnt = mntget(aio_mnt); + + d_instantiate(path.dentry, inode); + file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &aio_ring_fops); + if (IS_ERR(file)) { + path_put(&path); + return file; + } + + file->f_flags = O_RDWR; + file->private_data = ctx; + return file; +} + +static struct dentry *aio_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + static const struct dentry_operations ops = { + .d_dname = simple_dname, + }; + return mount_pseudo(fs_type, "aio:", NULL, &ops, 0xa10a10a1); +} + /* aio_setup * Creates the slab caches used by the aio routines, panic on * failure as this is done early during the boot sequence. */ static int __init aio_setup(void) { + static struct file_system_type aio_fs = { + .name = "aio", + .mount = aio_mount, + .kill_sb = kill_anon_super, + }; + aio_mnt = kern_mount(&aio_fs); + if (IS_ERR(aio_mnt)) + panic("Failed to create aio fs mount."); + kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); @@ -167,10 +222,25 @@ static int __init aio_setup(void) } __initcall(aio_setup); +static void put_aio_ring_file(struct kioctx *ctx) +{ + struct file *aio_ring_file = ctx->aio_ring_file; + if (aio_ring_file) { + truncate_setsize(aio_ring_file->f_inode, 0); + + /* Prevent further access to the kioctx from migratepages */ + spin_lock(&aio_ring_file->f_inode->i_mapping->private_lock); + aio_ring_file->f_inode->i_mapping->private_data = NULL; + ctx->aio_ring_file = NULL; + spin_unlock(&aio_ring_file->f_inode->i_mapping->private_lock); + + fput(aio_ring_file); + } +} + static void aio_free_ring(struct kioctx *ctx) { int i; - struct file *aio_ring_file = ctx->aio_ring_file; for (i = 0; i < ctx->nr_pages; i++) { pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i, @@ -178,14 +248,10 @@ static void aio_free_ring(struct kioctx *ctx) put_page(ctx->ring_pages[i]); } + put_aio_ring_file(ctx); + if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) kfree(ctx->ring_pages); - - if (aio_ring_file) { - truncate_setsize(aio_ring_file->f_inode, 0); - fput(aio_ring_file); - ctx->aio_ring_file = NULL; - } } static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma) @@ -207,9 +273,8 @@ static int aio_set_page_dirty(struct page *page) static int aio_migratepage(struct address_space *mapping, struct page *new, struct page *old, enum migrate_mode mode) { - struct kioctx *ctx = mapping->private_data; + struct kioctx *ctx; unsigned long flags; - unsigned idx = old->index; int rc; /* Writeback must be complete */ @@ -224,10 +289,23 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, get_page(new); - spin_lock_irqsave(&ctx->completion_lock, flags); - migrate_page_copy(new, old); - ctx->ring_pages[idx] = new; - spin_unlock_irqrestore(&ctx->completion_lock, flags); + /* We can potentially race against kioctx teardown here. Use the + * address_space's private data lock to protect the mapping's + * private_data. + */ + spin_lock(&mapping->private_lock); + ctx = mapping->private_data; + if (ctx) { + pgoff_t idx; + spin_lock_irqsave(&ctx->completion_lock, flags); + migrate_page_copy(new, old); + idx = old->index; + if (idx < (pgoff_t)ctx->nr_pages) + ctx->ring_pages[idx] = new; + spin_unlock_irqrestore(&ctx->completion_lock, flags); + } else + rc = -EBUSY; + spin_unlock(&mapping->private_lock); return rc; } @@ -260,16 +338,12 @@ static int aio_setup_ring(struct kioctx *ctx) if (nr_pages < 0) return -EINVAL; - file = anon_inode_getfile_private("[aio]", &aio_ring_fops, ctx, O_RDWR); + file = aio_private_file(ctx, nr_pages); if (IS_ERR(file)) { ctx->aio_ring_file = NULL; return -EAGAIN; } - file->f_inode->i_mapping->a_ops = &aio_ctx_aops; - file->f_inode->i_mapping->private_data = ctx; - file->f_inode->i_size = PAGE_SIZE * (loff_t)nr_pages; - for (i = 0; i < nr_pages; i++) { struct page *page; page = find_or_create_page(file->f_inode->i_mapping, @@ -617,8 +691,7 @@ out_freepcpu: out_freeref: free_percpu(ctx->users.pcpu_count); out_freectx: - if (ctx->aio_ring_file) - fput(ctx->aio_ring_file); + put_aio_ring_file(ctx); kmem_cache_free(kioctx_cachep, ctx); pr_debug("error allocating ioctx %d\n", err); return ERR_PTR(err); |