From a564da3964db3256069190c2ae95069143ac37fb Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 22 Mar 2006 00:08:47 -0800 Subject: [PATCH] readahead: ->prev_page can overrun the ahead window If get_next_ra_size() does not grow fast enough, ->prev_page can overrun the ahead window. This means the caller will read the pages from ->ahead_start + ->ahead_size to ->prev_page synchronously. Signed-off-by: Oleg Nesterov Cc: Steven Pratt Cc: Ram Pai Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/readahead.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'mm/readahead.c') diff --git a/mm/readahead.c b/mm/readahead.c index 8d6eeaaa6296..57557e294987 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -52,13 +52,24 @@ static inline unsigned long get_min_readahead(struct file_ra_state *ra) return (VM_MIN_READAHEAD * 1024) / PAGE_CACHE_SIZE; } +static inline void reset_ahead_window(struct file_ra_state *ra) +{ + /* + * ... but preserve ahead_start + ahead_size value, + * see 'recheck:' label in page_cache_readahead(). + * Note: We never use ->ahead_size as rvalue without + * checking ->ahead_start != 0 first. + */ + ra->ahead_size += ra->ahead_start; + ra->ahead_start = 0; +} + static inline void ra_off(struct file_ra_state *ra) { ra->start = 0; ra->flags = 0; ra->size = 0; - ra->ahead_start = 0; - ra->ahead_size = 0; + reset_ahead_window(ra); return; } @@ -426,8 +437,7 @@ static int make_ahead_window(struct address_space *mapping, struct file *filp, * congestion. The ahead window will any way be closed * in case we failed due to excessive page cache hits. */ - ra->ahead_start = 0; - ra->ahead_size = 0; + reset_ahead_window(ra); } return ret; @@ -520,11 +530,11 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra, * If we get here we are doing sequential IO and this was not the first * occurence (ie we have an existing window) */ - if (ra->ahead_start == 0) { /* no ahead window yet */ if (!make_ahead_window(mapping, filp, ra, 0)) - goto out; + goto recheck; } + /* * Already have an ahead window, check if we crossed into it. * If so, shift windows and issue a new ahead window. @@ -536,6 +546,10 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra, ra->start = ra->ahead_start; ra->size = ra->ahead_size; make_ahead_window(mapping, filp, ra, 0); +recheck: + /* prev_page shouldn't overrun the ahead window */ + ra->prev_page = min(ra->prev_page, + ra->ahead_start + ra->ahead_size - 1); } out: -- cgit v1.2.3 From aed75ff3caafce404d9be7f0c088716375be5279 Mon Sep 17 00:00:00 2001 From: Steven Pratt Date: Wed, 22 Mar 2006 00:08:48 -0800 Subject: [PATCH] readahead: fix initial window size calculation The current current get_init_ra_size is not optimal across different IO sizes and max_readahead values. Here is a quick summary of sizes computed under current design and under the attached patch. All of these assume 1st IO at offset 0, or 1st detected sequential IO. 32k max, 4k request old new ----------------- 8k 8k 16k 16k 32k 32k 128k max, 4k request old new ----------------- 32k 16k 64k 32k 128k 64k 128k 128k 128k max, 32k request old new ----------------- 32k 64k <----- 64k 128k 128k 128k 512k max, 4k request old new ----------------- 4k 32k <---- 16k 64k 64k 128k 128k 256k 512k 512k Cc: Oleg Nesterov Cc: Steven Pratt Cc: Ram Pai Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/readahead.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'mm/readahead.c') diff --git a/mm/readahead.c b/mm/readahead.c index 57557e294987..301b36c4a0ce 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -83,10 +83,10 @@ static unsigned long get_init_ra_size(unsigned long size, unsigned long max) { unsigned long newsize = roundup_pow_of_two(size); - if (newsize <= max / 64) - newsize = newsize * newsize; + if (newsize <= max / 32) + newsize = newsize * 4; else if (newsize <= max / 4) - newsize = max / 4; + newsize = newsize * 2; else newsize = max; return newsize; -- cgit v1.2.3 From d8733c2956968a01394a4d2a9e97a8b431a78776 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 23 Mar 2006 03:00:11 -0800 Subject: [PATCH] ext3_readdir: use generic readahead Linus points out that ext3_readdir's readahead only cuts in when ext3_readdir() is operating at the very start of the directory. So for large directories we end up performing no readahead at all and we suck. So take it all out and use the core VM's page_cache_readahead(). This means that ext3 directory reads will use all of readahead's dynamic sizing goop. Note that we're using the directory's filp->f_ra to hold the readahead state, but readahead is actually being performed against the underlying blockdev's address_space. Fortunately the readahead code is all set up to handle this. Tested with printk. It works. I was struggling to find a real workload which actually cared. (The patch also exports page_cache_readahead() to GPL modules) Cc: "Stephen C. Tweedie" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/readahead.c | 1 + 1 file changed, 1 insertion(+) (limited to 'mm/readahead.c') diff --git a/mm/readahead.c b/mm/readahead.c index 301b36c4a0ce..0f142a40984b 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -555,6 +555,7 @@ recheck: out: return ra->prev_page + 1; } +EXPORT_SYMBOL_GPL(page_cache_readahead); /* * handle_ra_miss() is called when it is known that a page which should have -- cgit v1.2.3