From 55708698c5f153f4e390175cdfc395333b2eafbd Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Tue, 16 Jul 2013 17:56:12 +0800 Subject: fs/anon_inode: Introduce a new lib function anon_inode_getfile_private() Introduce a new lib function anon_inode_getfile_private(), it creates a new file instance by hooking it up to an anonymous inode, and a dentry that describe the "class" of the file, similar to anon_inode_getfile(), but each file holds a single inode. Furthermore, anyone who wants to create a private anon file will benefit from this change. Signed-off-by: Gu Zheng Signed-off-by: Benjamin LaHaise --- include/linux/anon_inodes.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h index 8013a45242fe..cf573c22b81e 100644 --- a/include/linux/anon_inodes.h +++ b/include/linux/anon_inodes.h @@ -13,6 +13,9 @@ struct file_operations; struct file *anon_inode_getfile(const char *name, const struct file_operations *fops, void *priv, int flags); +struct file *anon_inode_getfile_private(const char *name, + const struct file_operations *fops, + void *priv, int flags); int anon_inode_getfd(const char *name, const struct file_operations *fops, void *priv, int flags); -- cgit v1.2.3 From 36bc08cc01709b4a9bb563b35aa530241ddc63e3 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Tue, 16 Jul 2013 17:56:16 +0800 Subject: fs/aio: Add support to aio ring pages migration As the aio job will pin the ring pages, that will lead to mem migrated failed. In order to fix this problem we use an anon inode to manage the aio ring pages, and setup the migratepage callback in the anon inode's address space, so that when mem migrating the aio ring pages will be moved to other mem node safely. Signed-off-by: Gu Zheng Signed-off-by: Benjamin LaHaise --- include/linux/migrate.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index a405d3dc0f61..c407d88f5979 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -55,6 +55,9 @@ extern int migrate_vmas(struct mm_struct *mm, extern void migrate_page_copy(struct page *newpage, struct page *page); extern int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page); +extern int migrate_page_move_mapping(struct address_space *mapping, + struct page *newpage, struct page *page, + struct buffer_head *head, enum migrate_mode mode); #else static inline void putback_lru_pages(struct list_head *l) {} -- cgit v1.2.3 From bec68faaf3ba74ed0dcd5dc3a881b30aec542973 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 13 May 2013 14:45:08 -0700 Subject: aio: io_cancel() no longer returns the io_event Originally, io_event() was documented to return the io_event if cancellation succeeded - the io_event wouldn't be delivered via the ring buffer like it normally would. But this isn't what the implementation was actually doing; the only driver implementing cancellation, the usb gadget code, never returned an io_event in its cancel function. And aio_complete() was recently changed to no longer suppress event delivery if the kiocb had been cancelled. This gets rid of the unused io_event argument to kiocb_cancel() and kiocb->ki_cancel(), and changes io_cancel() to return -EINPROGRESS if kiocb->ki_cancel() returned success. Also tweak the refcounting in kiocb_cancel() to make more sense. Signed-off-by: Kent Overstreet Cc: Zach Brown Cc: Felipe Balbi Cc: Greg Kroah-Hartman Cc: Mark Fasheh Cc: Joel Becker Cc: Rusty Russell Cc: Jens Axboe Cc: Asai Thambi S P Cc: Selvan Mani Cc: Sam Bradshaw Cc: Jeff Moyer Cc: Al Viro Cc: Benjamin LaHaise Signed-off-by: Benjamin LaHaise --- include/linux/aio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/aio.h b/include/linux/aio.h index 1bdf965339f9..8c8dd1d84d2e 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -27,7 +27,7 @@ struct kiocb; */ #define KIOCB_CANCELLED ((void *) (~0ULL)) -typedef int (kiocb_cancel_fn)(struct kiocb *, struct io_event *); +typedef int (kiocb_cancel_fn)(struct kiocb *); struct kiocb { atomic_t ki_users; -- cgit v1.2.3 From 73a7075e3f6ec63dc359064eea6fd84f406cf2a5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 9 May 2013 15:03:42 -0700 Subject: aio: Kill aio_rw_vect_retry() This code doesn't serve any purpose anymore, since the aio retry infrastructure has been removed. This change should be safe because aio_read/write are also used for synchronous IO, and called from do_sync_read()/do_sync_write() - and there's no looping done in the sync case (the read and write syscalls). Signed-off-by: Kent Overstreet Cc: Zach Brown Cc: Felipe Balbi Cc: Greg Kroah-Hartman Cc: Mark Fasheh Cc: Joel Becker Cc: Rusty Russell Cc: Jens Axboe Cc: Asai Thambi S P Cc: Selvan Mani Cc: Sam Bradshaw Cc: Jeff Moyer Cc: Al Viro Cc: Benjamin LaHaise Signed-off-by: Benjamin LaHaise --- include/linux/aio.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/aio.h b/include/linux/aio.h index 8c8dd1d84d2e..7bb766e73968 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -50,11 +50,9 @@ struct kiocb { unsigned short ki_opcode; size_t ki_nbytes; /* copy of iocb->aio_nbytes */ char __user *ki_buf; /* remaining iocb->aio_buf */ - size_t ki_left; /* remaining bytes */ struct iovec ki_inline_vec; /* inline vector */ struct iovec *ki_iovec; unsigned long ki_nr_segs; - unsigned long ki_cur_seg; struct list_head ki_list; /* the aio core uses this * for cancellation */ -- cgit v1.2.3 From 8bc92afcf7f5c598001dd04e62d88f57f6e89e51 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 25 Feb 2013 16:36:27 -0800 Subject: aio: Kill unneeded kiocb members The old aio retry infrastucture needed to save the various arguments to to aio operations. But with the retry infrastructure gone, we can trim struct kiocb quite a bit. Signed-off-by: Kent Overstreet Cc: Zach Brown Cc: Felipe Balbi Cc: Greg Kroah-Hartman Cc: Mark Fasheh Cc: Joel Becker Cc: Rusty Russell Cc: Jens Axboe Cc: Asai Thambi S P Cc: Selvan Mani Cc: Sam Bradshaw Cc: Jeff Moyer Cc: Al Viro Cc: Benjamin LaHaise Cc: Theodore Ts'o Signed-off-by: Benjamin LaHaise --- include/linux/aio.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/aio.h b/include/linux/aio.h index 7bb766e73968..b570472355d1 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -36,6 +36,7 @@ struct kiocb { struct kioctx *ki_ctx; /* NULL for sync ops */ kiocb_cancel_fn *ki_cancel; void (*ki_dtor)(struct kiocb *); + void *private; union { void __user *user; @@ -44,15 +45,7 @@ struct kiocb { __u64 ki_user_data; /* user's data for completion */ loff_t ki_pos; - - void *private; - /* State that we remember to be able to restart/retry */ - unsigned short ki_opcode; - size_t ki_nbytes; /* copy of iocb->aio_nbytes */ - char __user *ki_buf; /* remaining iocb->aio_buf */ - struct iovec ki_inline_vec; /* inline vector */ - struct iovec *ki_iovec; - unsigned long ki_nr_segs; + size_t ki_nbytes; /* copy of iocb->aio_nbytes */ struct list_head ki_list; /* the aio core uses this * for cancellation */ -- cgit v1.2.3 From 57282d8fd744072d6d6f18fa6ebe3cc1149015bf Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 13 May 2013 13:42:52 -0700 Subject: aio: Kill ki_users The kiocb refcount is only needed for cancellation - to ensure a kiocb isn't freed while a ki_cancel callback is running. But if we restrict ki_cancel callbacks to not block (which they currently don't), we can simply drop the refcount. Signed-off-by: Kent Overstreet Cc: Zach Brown Cc: Felipe Balbi Cc: Greg Kroah-Hartman Cc: Mark Fasheh Cc: Joel Becker Cc: Rusty Russell Cc: Jens Axboe Cc: Asai Thambi S P Cc: Selvan Mani Cc: Sam Bradshaw Cc: Jeff Moyer Cc: Al Viro Cc: Benjamin LaHaise Cc: Theodore Ts'o Signed-off-by: Benjamin LaHaise --- include/linux/aio.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/aio.h b/include/linux/aio.h index b570472355d1..c4f07ffa1cbb 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -30,8 +30,6 @@ struct kiocb; typedef int (kiocb_cancel_fn)(struct kiocb *); struct kiocb { - atomic_t ki_users; - struct file *ki_filp; struct kioctx *ki_ctx; /* NULL for sync ops */ kiocb_cancel_fn *ki_cancel; @@ -65,7 +63,6 @@ static inline bool is_sync_kiocb(struct kiocb *kiocb) static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) { *kiocb = (struct kiocb) { - .ki_users = ATOMIC_INIT(1), .ki_ctx = NULL, .ki_filp = filp, .ki_obj.tsk = current, @@ -75,7 +72,6 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) /* prototypes */ #ifdef CONFIG_AIO extern ssize_t wait_on_sync_kiocb(struct kiocb *iocb); -extern void aio_put_req(struct kiocb *iocb); extern void aio_complete(struct kiocb *iocb, long res, long res2); struct mm_struct; extern void exit_aio(struct mm_struct *mm); @@ -84,7 +80,6 @@ extern long do_io_submit(aio_context_t ctx_id, long nr, void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel); #else static inline ssize_t wait_on_sync_kiocb(struct kiocb *iocb) { return 0; } -static inline void aio_put_req(struct kiocb *iocb) { } static inline void aio_complete(struct kiocb *iocb, long res, long res2) { } struct mm_struct; static inline void exit_aio(struct mm_struct *mm) { } -- cgit v1.2.3 From d29c445b635b3a03cf683cafcbae58a4ec1e1125 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 18 Mar 2013 11:09:26 -0700 Subject: aio: Kill ki_dtor sock_aio_dtor() is dead code - and stuff that does need to do cleanup can simply do it before calling aio_complete(). Signed-off-by: Kent Overstreet Cc: Zach Brown Cc: Felipe Balbi Cc: Greg Kroah-Hartman Cc: Mark Fasheh Cc: Joel Becker Cc: Rusty Russell Cc: Jens Axboe Cc: Asai Thambi S P Cc: Selvan Mani Cc: Sam Bradshaw Cc: Jeff Moyer Cc: Al Viro Cc: Benjamin LaHaise Cc: Theodore Ts'o Signed-off-by: Benjamin LaHaise --- include/linux/aio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/aio.h b/include/linux/aio.h index c4f07ffa1cbb..d9c92daa3944 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -33,7 +33,6 @@ struct kiocb { struct file *ki_filp; struct kioctx *ki_ctx; /* NULL for sync ops */ kiocb_cancel_fn *ki_cancel; - void (*ki_dtor)(struct kiocb *); void *private; union { -- cgit v1.2.3 From db446a08c23d5475e6b08c87acca79ebb20f283c Mon Sep 17 00:00:00 2001 From: Benjamin LaHaise Date: Tue, 30 Jul 2013 12:54:40 -0400 Subject: aio: convert the ioctx list to table lookup v3 On Wed, Jun 12, 2013 at 11:14:40AM -0700, Kent Overstreet wrote: > On Mon, Apr 15, 2013 at 02:40:55PM +0300, Octavian Purdila wrote: > > When using a large number of threads performing AIO operations the > > IOCTX list may get a significant number of entries which will cause > > significant overhead. For example, when running this fio script: > > > > rw=randrw; size=256k ;directory=/mnt/fio; ioengine=libaio; iodepth=1 > > blocksize=1024; numjobs=512; thread; loops=100 > > > > on an EXT2 filesystem mounted on top of a ramdisk we can observe up to > > 30% CPU time spent by lookup_ioctx: > > > > 32.51% [guest.kernel] [g] lookup_ioctx > > 9.19% [guest.kernel] [g] __lock_acquire.isra.28 > > 4.40% [guest.kernel] [g] lock_release > > 4.19% [guest.kernel] [g] sched_clock_local > > 3.86% [guest.kernel] [g] local_clock > > 3.68% [guest.kernel] [g] native_sched_clock > > 3.08% [guest.kernel] [g] sched_clock_cpu > > 2.64% [guest.kernel] [g] lock_release_holdtime.part.11 > > 2.60% [guest.kernel] [g] memcpy > > 2.33% [guest.kernel] [g] lock_acquired > > 2.25% [guest.kernel] [g] lock_acquire > > 1.84% [guest.kernel] [g] do_io_submit > > > > This patchs converts the ioctx list to a radix tree. For a performance > > comparison the above FIO script was run on a 2 sockets 8 core > > machine. This are the results (average and %rsd of 10 runs) for the > > original list based implementation and for the radix tree based > > implementation: > > > > cores 1 2 4 8 16 32 > > list 109376 ms 69119 ms 35682 ms 22671 ms 19724 ms 16408 ms > > %rsd 0.69% 1.15% 1.17% 1.21% 1.71% 1.43% > > radix 73651 ms 41748 ms 23028 ms 16766 ms 15232 ms 13787 ms > > %rsd 1.19% 0.98% 0.69% 1.13% 0.72% 0.75% > > % of radix > > relative 66.12% 65.59% 66.63% 72.31% 77.26% 83.66% > > to list > > > > To consider the impact of the patch on the typical case of having > > only one ctx per process the following FIO script was run: > > > > rw=randrw; size=100m ;directory=/mnt/fio; ioengine=libaio; iodepth=1 > > blocksize=1024; numjobs=1; thread; loops=100 > > > > on the same system and the results are the following: > > > > list 58892 ms > > %rsd 0.91% > > radix 59404 ms > > %rsd 0.81% > > % of radix > > relative 100.87% > > to list > > So, I was just doing some benchmarking/profiling to get ready to send > out the aio patches I've got for 3.11 - and it looks like your patch is > causing a ~1.5% throughput regression in my testing :/ ... I've got an alternate approach for fixing this wart in lookup_ioctx()... Instead of using an rbtree, just use the reserved id in the ring buffer header to index an array pointing the ioctx. It's not finished yet, and it needs to be tidied up, but is most of the way there. -ben -- "Thought is the essence of where you are now." -- kmo> And, a rework of Ben's code, but this was entirely his idea kmo> -Kent bcrl> And fix the code to use the right mm_struct in kill_ioctx(), actually free memory. Signed-off-by: Benjamin LaHaise --- include/linux/mm_types.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index fb425aa16c01..da8cf5cc1aa6 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -322,6 +322,7 @@ struct mm_rss_stat { atomic_long_t count[NR_MM_COUNTERS]; }; +struct kioctx_table; struct mm_struct { struct vm_area_struct * mmap; /* list of VMAs */ struct rb_root mm_rb; @@ -382,8 +383,8 @@ struct mm_struct { struct core_state *core_state; /* coredumping support */ #ifdef CONFIG_AIO - spinlock_t ioctx_lock; - struct hlist_head ioctx_list; + spinlock_t ioctx_lock; + struct kioctx_table __rcu *ioctx_table; #endif #ifdef CONFIG_MM_OWNER /* -- cgit v1.2.3