diff options
author | Andi Kleen <ak@linux.intel.com> | 2011-08-01 21:38:08 -0700 |
---|---|---|
committer | root <root@serles.lst.de> | 2011-10-28 14:58:58 +0200 |
commit | ba253fbf6d3502c54e1ac8792e7ac8290a1f5b8d (patch) | |
tree | 08bfd191e714b53ed44ec185f890cc5113e6dafd /fs/direct-io.c | |
parent | 18772641dbe2c89c6122c603f81f6a9574aee556 (diff) | |
download | linux-ba253fbf6d3502c54e1ac8792e7ac8290a1f5b8d.tar.gz linux-ba253fbf6d3502c54e1ac8792e7ac8290a1f5b8d.tar.bz2 linux-ba253fbf6d3502c54e1ac8792e7ac8290a1f5b8d.zip |
direct-io: inline the complete submission path
Add inlines to all the submission path functions. While this increases
code size it also gives gcc a lot of optimization opportunities
in this critical hotpath.
In particular -- together with some other changes -- this
allows gcc to get rid of the unnecessary clearing of
sdio at the beginning and optimize the messy parameter passing.
Any non inlining of a function which takes a sdio parameter
would break this optimization because they cannot be done if the
address of a structure is taken.
Note that benefits are only seen with CONFIG_OPTIMIZE_INLINING
and CONFIG_CC_OPTIMIZE_FOR_SIZE both set to off.
This gives about 2.2% improvement on a large database benchmark
with a high IOPS rate.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'fs/direct-io.c')
-rw-r--r-- | fs/direct-io.c | 36 |
1 files changed, 21 insertions, 15 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c index edf3174afd6a..6d425821be66 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -199,7 +199,7 @@ static inline unsigned dio_pages_present(struct dio_submit *sdio) /* * Go grab and pin some userspace pages. Typically we'll get 64 at a time. */ -static int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) +static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) { int ret; int nr_pages; @@ -245,7 +245,8 @@ out: * decent number of pages, less frequently. To provide nicer use of the * L1 cache. */ -static struct page *dio_get_page(struct dio *dio, struct dio_submit *sdio) +static inline struct page *dio_get_page(struct dio *dio, + struct dio_submit *sdio) { if (dio_pages_present(sdio) == 0) { int ret; @@ -376,7 +377,7 @@ void dio_end_io(struct bio *bio, int error) } EXPORT_SYMBOL_GPL(dio_end_io); -static void +static inline void dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, struct block_device *bdev, sector_t first_sector, int nr_vecs) @@ -407,7 +408,7 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, * * bios hold a dio reference between submit_bio and ->end_io. */ -static void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) +static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) { struct bio *bio = sdio->bio; unsigned long flags; @@ -435,7 +436,7 @@ static void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) /* * Release any resources in case of a failure */ -static void dio_cleanup(struct dio *dio, struct dio_submit *sdio) +static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio) { while (dio_pages_present(sdio)) page_cache_release(dio_get_page(dio, sdio)); @@ -528,7 +529,7 @@ static void dio_await_completion(struct dio *dio) * * This also helps to limit the peak amount of pinned userspace memory. */ -static int dio_bio_reap(struct dio *dio, struct dio_submit *sdio) +static inline int dio_bio_reap(struct dio *dio, struct dio_submit *sdio) { int ret = 0; @@ -631,8 +632,8 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, /* * There is no bio. Make one now. */ -static int dio_new_bio(struct dio *dio, struct dio_submit *sdio, - sector_t start_sector, struct buffer_head *map_bh) +static inline int dio_new_bio(struct dio *dio, struct dio_submit *sdio, + sector_t start_sector, struct buffer_head *map_bh) { sector_t sector; int ret, nr_pages; @@ -657,7 +658,7 @@ out: * * Return zero on success. Non-zero means the caller needs to start a new BIO. */ -static int dio_bio_add_page(struct dio_submit *sdio) +static inline int dio_bio_add_page(struct dio_submit *sdio) { int ret; @@ -689,8 +690,8 @@ static int dio_bio_add_page(struct dio_submit *sdio) * The caller of this function is responsible for removing cur_page from the * dio, and for dropping the refcount which came from that presence. */ -static int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio, - struct buffer_head *map_bh) +static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio, + struct buffer_head *map_bh) { int ret = 0; @@ -759,7 +760,7 @@ out: * If that doesn't work out then we put the old page into the bio and add this * page to the dio instead. */ -static int +static inline int submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page, unsigned offset, unsigned len, sector_t blocknr, struct buffer_head *map_bh) @@ -842,8 +843,8 @@ static void clean_blockdev_aliases(struct dio *dio, struct buffer_head *map_bh) * `end' is zero if we're doing the start of the IO, 1 at the end of the * IO. */ -static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end, - struct buffer_head *map_bh) +static inline void dio_zero_block(struct dio *dio, struct dio_submit *sdio, + int end, struct buffer_head *map_bh) { unsigned dio_blocks_per_fs_block; unsigned this_chunk_blocks; /* In dio_blocks */ @@ -1042,7 +1043,7 @@ out: return ret; } -static ssize_t +static inline ssize_t direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, const struct iovec *iov, loff_t offset, unsigned long nr_segs, unsigned blkbits, get_block_t get_block, dio_iodone_t end_io, @@ -1216,6 +1217,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, * expected that filesystem provide exclusion between new direct I/O * and truncates. For DIO_LOCKING filesystems this is done by i_mutex, * but other filesystems need to take care of this on their own. + * + * NOTE: if you pass "sdio" to anything by pointer make sure that function + * is always inlined. Otherwise gcc is unable to split the structure into + * individual fields and will generate much worse code. This is important + * for the whole file. */ ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, |