From 2b0143b5c986be1ce8408b3aadc4709e0a94429d Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 17 Mar 2015 22:25:59 +0000 Subject: VFS: normal filesystems (and lustre): d_inode() annotations that's the bulk of filesystem drivers dealing with inodes of their own Signed-off-by: David Howells Signed-off-by: Al Viro --- include/trace/events/btrfs.h | 4 ++-- include/trace/events/ext3.h | 16 ++++++++-------- include/trace/events/ext4.h | 16 ++++++++-------- 3 files changed, 18 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 1faecea101f3..9134bba9f218 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -407,10 +407,10 @@ TRACE_EVENT(btrfs_sync_file, TP_fast_assign( struct dentry *dentry = file->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = d_inode(dentry); __entry->ino = inode->i_ino; - __entry->parent = dentry->d_parent->d_inode->i_ino; + __entry->parent = d_inode(dentry->d_parent)->i_ino; __entry->datasync = datasync; __entry->root_objectid = BTRFS_I(inode)->root->root_key.objectid; diff --git a/include/trace/events/ext3.h b/include/trace/events/ext3.h index 6797b9de90ed..4a633258cab6 100644 --- a/include/trace/events/ext3.h +++ b/include/trace/events/ext3.h @@ -439,10 +439,10 @@ TRACE_EVENT(ext3_sync_file_enter, TP_fast_assign( struct dentry *dentry = file->f_path.dentry; - __entry->dev = dentry->d_inode->i_sb->s_dev; - __entry->ino = dentry->d_inode->i_ino; + __entry->dev = d_inode(dentry)->i_sb->s_dev; + __entry->ino = d_inode(dentry)->i_ino; __entry->datasync = datasync; - __entry->parent = dentry->d_parent->d_inode->i_ino; + __entry->parent = d_inode(dentry->d_parent)->i_ino; ), TP_printk("dev %d,%d ino %lu parent %ld datasync %d ", @@ -710,9 +710,9 @@ TRACE_EVENT(ext3_unlink_enter, TP_fast_assign( __entry->parent = parent->i_ino; - __entry->ino = dentry->d_inode->i_ino; - __entry->size = dentry->d_inode->i_size; - __entry->dev = dentry->d_inode->i_sb->s_dev; + __entry->ino = d_inode(dentry)->i_ino; + __entry->size = d_inode(dentry)->i_size; + __entry->dev = d_inode(dentry)->i_sb->s_dev; ), TP_printk("dev %d,%d ino %lu size %lld parent %ld", @@ -734,8 +734,8 @@ TRACE_EVENT(ext3_unlink_exit, ), TP_fast_assign( - __entry->ino = dentry->d_inode->i_ino; - __entry->dev = dentry->d_inode->i_sb->s_dev; + __entry->ino = d_inode(dentry)->i_ino; + __entry->dev = d_inode(dentry)->i_sb->s_dev; __entry->ret = ret; ), diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 6e5abd6d38a2..68d2a677c04c 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -872,10 +872,10 @@ TRACE_EVENT(ext4_sync_file_enter, TP_fast_assign( struct dentry *dentry = file->f_path.dentry; - __entry->dev = dentry->d_inode->i_sb->s_dev; - __entry->ino = dentry->d_inode->i_ino; + __entry->dev = d_inode(dentry)->i_sb->s_dev; + __entry->ino = d_inode(dentry)->i_ino; __entry->datasync = datasync; - __entry->parent = dentry->d_parent->d_inode->i_ino; + __entry->parent = d_inode(dentry->d_parent)->i_ino; ), TP_printk("dev %d,%d ino %lu parent %lu datasync %d ", @@ -1453,10 +1453,10 @@ TRACE_EVENT(ext4_unlink_enter, ), TP_fast_assign( - __entry->dev = dentry->d_inode->i_sb->s_dev; - __entry->ino = dentry->d_inode->i_ino; + __entry->dev = d_inode(dentry)->i_sb->s_dev; + __entry->ino = d_inode(dentry)->i_ino; __entry->parent = parent->i_ino; - __entry->size = dentry->d_inode->i_size; + __entry->size = d_inode(dentry)->i_size; ), TP_printk("dev %d,%d ino %lu size %lld parent %lu", @@ -1477,8 +1477,8 @@ TRACE_EVENT(ext4_unlink_exit, ), TP_fast_assign( - __entry->dev = dentry->d_inode->i_sb->s_dev; - __entry->ino = dentry->d_inode->i_ino; + __entry->dev = d_inode(dentry)->i_sb->s_dev; + __entry->ino = d_inode(dentry)->i_ino; __entry->ret = ret; ), -- cgit v1.2.3 From fe0f07d08ee35fb13d2cb048970072fe4f71ad14 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 15 Apr 2015 17:05:48 -0600 Subject: direct-io: only inc/dec inode->i_dio_count for file systems do_blockdev_direct_IO() increments and decrements the inode ->i_dio_count for each IO operation. It does this to protect against truncate of a file. Block devices don't need this sort of protection. For a capable multiqueue setup, this atomic int is the only shared state between applications accessing the device for O_DIRECT, and it presents a scaling wall for that. In my testing, as much as 30% of system time is spent incrementing and decrementing this value. A mixed read/write workload improved from ~2.5M IOPS to ~9.6M IOPS, with better latencies too. Before: clat percentiles (usec): | 1.00th=[ 33], 5.00th=[ 34], 10.00th=[ 34], 20.00th=[ 34], | 30.00th=[ 34], 40.00th=[ 34], 50.00th=[ 35], 60.00th=[ 35], | 70.00th=[ 35], 80.00th=[ 35], 90.00th=[ 37], 95.00th=[ 80], | 99.00th=[ 98], 99.50th=[ 151], 99.90th=[ 155], 99.95th=[ 155], | 99.99th=[ 165] After: clat percentiles (usec): | 1.00th=[ 95], 5.00th=[ 108], 10.00th=[ 129], 20.00th=[ 149], | 30.00th=[ 155], 40.00th=[ 161], 50.00th=[ 167], 60.00th=[ 171], | 70.00th=[ 177], 80.00th=[ 185], 90.00th=[ 201], 95.00th=[ 270], | 99.00th=[ 390], 99.50th=[ 398], 99.90th=[ 418], 99.95th=[ 422], | 99.99th=[ 438] In other setups, Robert Elliott reported seeing good performance improvements: https://lkml.org/lkml/2015/4/3/557 The more applications accessing the device, the worse it gets. Add a new direct-io flags, DIO_SKIP_DIO_COUNT, which tells do_blockdev_direct_IO() that it need not worry about incrementing or decrementing the inode i_dio_count for this caller. Cc: Andrew Morton Cc: Christoph Hellwig Cc: Theodore Ts'o Cc: Elliott, Robert (Server Storage) Cc: Al Viro Signed-off-by: Jens Axboe Signed-off-by: Al Viro --- include/linux/fs.h | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index b1d7db28c13c..9055eefa92c7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2635,6 +2635,9 @@ enum { /* filesystem can handle aio writes beyond i_size */ DIO_ASYNC_EXTEND = 0x04, + + /* inode/fs/bdev does not need truncate protection */ + DIO_SKIP_DIO_COUNT = 0x08, }; void dio_end_io(struct bio *bio, int error); @@ -2657,7 +2660,31 @@ static inline ssize_t blockdev_direct_IO(struct kiocb *iocb, #endif void inode_dio_wait(struct inode *inode); -void inode_dio_done(struct inode *inode); + +/* + * inode_dio_begin - signal start of a direct I/O requests + * @inode: inode the direct I/O happens on + * + * This is called once we've finished processing a direct I/O request, + * and is used to wake up callers waiting for direct I/O to be quiesced. + */ +static inline void inode_dio_begin(struct inode *inode) +{ + atomic_inc(&inode->i_dio_count); +} + +/* + * inode_dio_end - signal finish of a direct I/O requests + * @inode: inode the direct I/O happens on + * + * This is called once we've finished processing a direct I/O request, + * and is used to wake up callers waiting for direct I/O to be quiesced. + */ +static inline void inode_dio_end(struct inode *inode) +{ + if (atomic_dec_and_test(&inode->i_dio_count)) + wake_up_bit(&inode->i_state, __I_DIO_WAKEUP); +} extern void inode_set_flags(struct inode *inode, unsigned int flags, unsigned int mask); -- cgit v1.2.3 From ac74d8d65c83d8061034d0908e1eab6a0c24f923 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 16 Apr 2015 15:04:56 -0500 Subject: fix I_DIO_WAKEUP definition I_DIO_WAKEUP is never directly used, but fix it up anyway. Signed-off-by: Eric Sandeen Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9055eefa92c7..43565607088e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1812,7 +1812,7 @@ struct super_operations { #define I_SYNC (1 << __I_SYNC) #define I_REFERENCED (1 << 8) #define __I_DIO_WAKEUP 9 -#define I_DIO_WAKEUP (1 << I_DIO_WAKEUP) +#define I_DIO_WAKEUP (1 << __I_DIO_WAKEUP) #define I_LINKABLE (1 << 10) #define I_DIRTY_TIME (1 << 11) #define __I_DIRTY_TIME_EXPIRED 12 -- cgit v1.2.3