From a22b169df1b9f259391cf3b8ad8bfeea3d7be3f1 Mon Sep 17 00:00:00 2001 From: Vasily Tarasov Date: Wed, 11 Oct 2006 09:24:27 +0200 Subject: [PATCH] block layer: elevator_find function cleanup We can easily produce search through the elevator list without introducing additional elevator_type variable. Signed-off-by: Vasily Tarasov Signed-off-by: Jens Axboe --- block/elevator.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'block') diff --git a/block/elevator.c b/block/elevator.c index 487dd3da8853..d8030a84773a 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -93,21 +93,18 @@ static inline int elv_try_merge(struct request *__rq, struct bio *bio) static struct elevator_type *elevator_find(const char *name) { - struct elevator_type *e = NULL; + struct elevator_type *e; struct list_head *entry; list_for_each(entry, &elv_list) { - struct elevator_type *__e; - __e = list_entry(entry, struct elevator_type, list); + e = list_entry(entry, struct elevator_type, list); - if (!strcmp(__e->elevator_name, name)) { - e = __e; - break; - } + if (!strcmp(e->elevator_name, name)) + return e; } - return e; + return NULL; } static void elevator_put(struct elevator_type *e) -- cgit v1.2.3 From c5841642242e9ae817275e09b36b298456dc17d2 Mon Sep 17 00:00:00 2001 From: Vasily Tarasov Date: Wed, 11 Oct 2006 13:26:30 +0200 Subject: [PATCH] block layer: elv_iosched_show should get elv_list_lock elv_iosched_show function iterates other elv_list, hence elv_list_lock should be got. Signed-off-by: Vasily Tarasov Signed-off-by: Vasily Tarasov --- block/elevator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/elevator.c b/block/elevator.c index d8030a84773a..8ccd163254b8 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -1085,7 +1085,7 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name) struct list_head *entry; int len = 0; - spin_lock_irq(q->queue_lock); + spin_lock_irq(&elv_list_lock); list_for_each(entry, &elv_list) { struct elevator_type *__e; @@ -1095,7 +1095,7 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name) else len += sprintf(name+len, "%s ", __e->elevator_name); } - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&elv_list_lock); len += sprintf(len+name, "\n"); return len; -- cgit v1.2.3 From 79e2de4bc53d7ca2a8eedee49e4a92479b4b530e Mon Sep 17 00:00:00 2001 From: Thomas Maier Date: Thu, 19 Oct 2006 23:28:15 -0700 Subject: [PATCH] export clear_queue_congested and set_queue_congested Export the clear_queue_congested() and set_queue_congested() functions located in ll_rw_blk.c The functions are renamed to blk_clear_queue_congested() and blk_set_queue_congested(). (needed in the pktcdvd driver's bio write congestion control) Signed-off-by: Thomas Maier Cc: Peter Osterlund Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- block/ll_rw_blk.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'block') diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index c847e17e5caa..132a858ce2c5 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -117,7 +117,7 @@ static void blk_queue_congestion_threshold(struct request_queue *q) * congested queues, and wake up anyone who was waiting for requests to be * put back. */ -static void clear_queue_congested(request_queue_t *q, int rw) +void blk_clear_queue_congested(request_queue_t *q, int rw) { enum bdi_state bit; wait_queue_head_t *wqh = &congestion_wqh[rw]; @@ -128,18 +128,20 @@ static void clear_queue_congested(request_queue_t *q, int rw) if (waitqueue_active(wqh)) wake_up(wqh); } +EXPORT_SYMBOL(blk_clear_queue_congested); /* * A queue has just entered congestion. Flag that in the queue's VM-visible * state flags and increment the global gounter of congested queues. */ -static void set_queue_congested(request_queue_t *q, int rw) +void blk_set_queue_congested(request_queue_t *q, int rw) { enum bdi_state bit; bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; set_bit(bit, &q->backing_dev_info.state); } +EXPORT_SYMBOL(blk_set_queue_congested); /** * blk_get_backing_dev_info - get the address of a queue's backing_dev_info @@ -159,7 +161,6 @@ struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) ret = &q->backing_dev_info; return ret; } - EXPORT_SYMBOL(blk_get_backing_dev_info); void blk_queue_activity_fn(request_queue_t *q, activity_fn *fn, void *data) @@ -167,7 +168,6 @@ void blk_queue_activity_fn(request_queue_t *q, activity_fn *fn, void *data) q->activity_fn = fn; q->activity_data = data; } - EXPORT_SYMBOL(blk_queue_activity_fn); /** @@ -2067,7 +2067,7 @@ static void __freed_request(request_queue_t *q, int rw) struct request_list *rl = &q->rq; if (rl->count[rw] < queue_congestion_off_threshold(q)) - clear_queue_congested(q, rw); + blk_clear_queue_congested(q, rw); if (rl->count[rw] + 1 <= q->nr_requests) { if (waitqueue_active(&rl->wait[rw])) @@ -2137,7 +2137,7 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio, } } } - set_queue_congested(q, rw); + blk_set_queue_congested(q, rw); } /* @@ -3765,14 +3765,14 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) blk_queue_congestion_threshold(q); if (rl->count[READ] >= queue_congestion_on_threshold(q)) - set_queue_congested(q, READ); + blk_set_queue_congested(q, READ); else if (rl->count[READ] < queue_congestion_off_threshold(q)) - clear_queue_congested(q, READ); + blk_clear_queue_congested(q, READ); if (rl->count[WRITE] >= queue_congestion_on_threshold(q)) - set_queue_congested(q, WRITE); + blk_set_queue_congested(q, WRITE); else if (rl->count[WRITE] < queue_congestion_off_threshold(q)) - clear_queue_congested(q, WRITE); + blk_clear_queue_congested(q, WRITE); if (rl->count[READ] >= q->nr_requests) { blk_set_queue_full(q, READ); -- cgit v1.2.3 From 3fcfab16c5b86eaa3db3a9a31adba550c5b67141 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 19 Oct 2006 23:28:16 -0700 Subject: [PATCH] separate bdi congestion functions from queue congestion functions Separate out the concept of "queue congestion" from "backing-dev congestion". Congestion is a backing-dev concept, not a queue concept. The blk_* congestion functions are retained, as wrappers around the core backing-dev congestion functions. This proper layering is needed so that NFS can cleanly use the congestion functions, and so that CONFIG_BLOCK=n actually links. Cc: "Thomas Maier" Cc: "Jens Axboe" Cc: Trond Myklebust Cc: David Howells Cc: Peter Osterlund Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- block/ll_rw_blk.c | 71 ------------------------------------------------------- 1 file changed, 71 deletions(-) (limited to 'block') diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 132a858ce2c5..136066583c68 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -56,11 +56,6 @@ static kmem_cache_t *requestq_cachep; */ static kmem_cache_t *iocontext_cachep; -static wait_queue_head_t congestion_wqh[2] = { - __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]), - __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) - }; - /* * Controlling structure to kblockd */ @@ -112,37 +107,6 @@ static void blk_queue_congestion_threshold(struct request_queue *q) q->nr_congestion_off = nr; } -/* - * A queue has just exitted congestion. Note this in the global counter of - * congested queues, and wake up anyone who was waiting for requests to be - * put back. - */ -void blk_clear_queue_congested(request_queue_t *q, int rw) -{ - enum bdi_state bit; - wait_queue_head_t *wqh = &congestion_wqh[rw]; - - bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; - clear_bit(bit, &q->backing_dev_info.state); - smp_mb__after_clear_bit(); - if (waitqueue_active(wqh)) - wake_up(wqh); -} -EXPORT_SYMBOL(blk_clear_queue_congested); - -/* - * A queue has just entered congestion. Flag that in the queue's VM-visible - * state flags and increment the global gounter of congested queues. - */ -void blk_set_queue_congested(request_queue_t *q, int rw) -{ - enum bdi_state bit; - - bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; - set_bit(bit, &q->backing_dev_info.state); -} -EXPORT_SYMBOL(blk_set_queue_congested); - /** * blk_get_backing_dev_info - get the address of a queue's backing_dev_info * @bdev: device @@ -2755,41 +2719,6 @@ void blk_end_sync_rq(struct request *rq, int error) } EXPORT_SYMBOL(blk_end_sync_rq); -/** - * blk_congestion_wait - wait for a queue to become uncongested - * @rw: READ or WRITE - * @timeout: timeout in jiffies - * - * Waits for up to @timeout jiffies for a queue (any queue) to exit congestion. - * If no queues are congested then just wait for the next request to be - * returned. - */ -long blk_congestion_wait(int rw, long timeout) -{ - long ret; - DEFINE_WAIT(wait); - wait_queue_head_t *wqh = &congestion_wqh[rw]; - - prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); - ret = io_schedule_timeout(timeout); - finish_wait(wqh, &wait); - return ret; -} - -EXPORT_SYMBOL(blk_congestion_wait); - -/** - * blk_congestion_end - wake up sleepers on a congestion queue - * @rw: READ or WRITE - */ -void blk_congestion_end(int rw) -{ - wait_queue_head_t *wqh = &congestion_wqh[rw]; - - if (waitqueue_active(wqh)) - wake_up(wqh); -} - /* * Has to be called with the request spinlock acquired */ -- cgit v1.2.3 From 0261d6886eb5822867a5310dc1e4479b940a1942 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 30 Oct 2006 19:07:48 +0100 Subject: [PATCH] CFQ: use irq safe locking in cfq_cic_link() If cfq_set_request() is called for a new process AND a non-fs io request (so that __GFP_WAIT may not be set), cfq_cic_link() may use spin_lock_irq() and spin_unlock_irq() with interrupts already disabled. Fix is to always use irq safe locking in cfq_cic_link() Acked-By: Arjan van de Ven Acked-by: Ingo Molnar Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- block/cfq-iosched.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index d3d76136f53a..5c3da894a56c 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1362,6 +1362,7 @@ cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, struct rb_node **p; struct rb_node *parent; struct cfq_io_context *__cic; + unsigned long flags; void *k; cic->ioc = ioc; @@ -1391,9 +1392,9 @@ restart: rb_link_node(&cic->rb_node, parent, p); rb_insert_color(&cic->rb_node, &ioc->cic_root); - spin_lock_irq(cfqd->queue->queue_lock); + spin_lock_irqsave(cfqd->queue->queue_lock, flags); list_add(&cic->queue_list, &cfqd->cic_list); - spin_unlock_irq(cfqd->queue->queue_lock); + spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); } /* -- cgit v1.2.3 From c1b707d253fe918b92882cff1dbd926b47e14fd2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 30 Oct 2006 19:54:23 +0100 Subject: [PATCH] CFQ: bad locking in changed_ioprio() When the ioprio code recently got juggled a bit, a bug was introduced. changed_ioprio() is no longer called with interrupts disabled, so using plain spin_lock() on the queue_lock is a bug. Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- block/cfq-iosched.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 5c3da894a56c..25c4e7ed0d00 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1215,11 +1215,12 @@ static inline void changed_ioprio(struct cfq_io_context *cic) { struct cfq_data *cfqd = cic->key; struct cfq_queue *cfqq; + unsigned long flags; if (unlikely(!cfqd)) return; - spin_lock(cfqd->queue->queue_lock); + spin_lock_irqsave(cfqd->queue->queue_lock, flags); cfqq = cic->cfqq[ASYNC]; if (cfqq) { @@ -1236,7 +1237,7 @@ static inline void changed_ioprio(struct cfq_io_context *cic) if (cfqq) cfq_mark_cfqq_prio_changed(cfqq); - spin_unlock(cfqd->queue->queue_lock); + spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); } static void cfq_ioc_set_ioprio(struct io_context *ioc) -- cgit v1.2.3 From 5ddfe9691c91a244e8d1be597b6428fcefd58103 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 30 Oct 2006 22:07:21 -0800 Subject: [PATCH] md: check bio address after mapping through partitions. Partitions are not limited to live within a device. So we should range check after partition mapping. Note that 'maxsector' was being used for two different things. I have split off the second usage into 'old_sector' so that maxsector can be still be used for it's primary usage later in the function. Cc: Jens Axboe Signed-off-by: Neil Brown Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- block/ll_rw_blk.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'block') diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 136066583c68..c7b1dac8bee9 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -2999,6 +2999,7 @@ void generic_make_request(struct bio *bio) { request_queue_t *q; sector_t maxsector; + sector_t old_sector; int ret, nr_sectors = bio_sectors(bio); dev_t old_dev; @@ -3027,7 +3028,7 @@ void generic_make_request(struct bio *bio) * NOTE: we don't repeat the blk_size check for each new device. * Stacking drivers are expected to know what they are doing. */ - maxsector = -1; + old_sector = -1; old_dev = 0; do { char b[BDEVNAME_SIZE]; @@ -3061,15 +3062,30 @@ end_io: */ blk_partition_remap(bio); - if (maxsector != -1) + if (old_sector != -1) blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, - maxsector); + old_sector); blk_add_trace_bio(q, bio, BLK_TA_QUEUE); - maxsector = bio->bi_sector; + old_sector = bio->bi_sector; old_dev = bio->bi_bdev->bd_dev; + maxsector = bio->bi_bdev->bd_inode->i_size >> 9; + if (maxsector) { + sector_t sector = bio->bi_sector; + + if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { + /* + * This may well happen - partitions are not checked + * to make sure they are within the size of the + * whole device. + */ + handle_bad_sector(bio); + goto end_io; + } + } + ret = q->make_request_fn(q, bio); } while (ret); } -- cgit v1.2.3 From 5fccbf61be2a7f32d2002b04afca4c5009612a58 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 31 Oct 2006 14:21:55 +0100 Subject: [PATCH] CFQ: request <-> request merging rr_list fixup In very rare circumstances would we be pruning a merged request and at the same time delete the implicated cfqq from the rr_list, and not readd it when the merged request got added. This could cause io stalls until that process issued io again. Fix it up by putting the rr_list add handling into cfq_add_rq_rb(), identical to how pruning is handled in cfq_del_rq_rb(). This fixes a hang reproducible with fsx-linux. Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- block/cfq-iosched.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'block') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 25c4e7ed0d00..1d9c3c70a9a0 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -456,6 +456,9 @@ static void cfq_add_rq_rb(struct request *rq) */ while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL) cfq_dispatch_insert(cfqd->queue, __alias); + + if (!cfq_cfqq_on_rr(cfqq)) + cfq_add_cfqq_rr(cfqd, cfqq); } static inline void @@ -1652,9 +1655,6 @@ static void cfq_insert_request(request_queue_t *q, struct request *rq) cfq_add_rq_rb(rq); - if (!cfq_cfqq_on_rr(cfqq)) - cfq_add_cfqq_rr(cfqd, cfqq); - list_add_tail(&rq->queuelist, &cfqq->fifo); cfq_rq_enqueued(cfqd, cfqq, rq); -- cgit v1.2.3 From df66b8552be5fdab5c4b4d53ee08b99388b9bd02 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 2 Nov 2006 22:06:56 -0800 Subject: [PATCH] tidy "md: check bio address after mapping through partitions" Neil's xterms are too wide. Cc: Neil Brown Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- block/ll_rw_blk.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'block') diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index c7b1dac8bee9..9eaee6640535 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -3075,11 +3075,12 @@ end_io: if (maxsector) { sector_t sector = bio->bi_sector; - if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { + if (maxsector < nr_sectors || + maxsector - nr_sectors < sector) { /* - * This may well happen - partitions are not checked - * to make sure they are within the size of the - * whole device. + * This may well happen - partitions are not + * checked to make sure they are within the size + * of the whole device. */ handle_bad_sector(bio); goto end_io; -- cgit v1.2.3 From 616e8a091a035c0bd9b871695f4af191df123caa Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 13 Nov 2006 18:04:59 +0100 Subject: [PATCH] Fix bad data direction in SG_IO Contrary to what the name misleads you to believe, SG_DXFER_TO_FROM_DEV is really just a normal read seen from the device side. This patch fixes http://lkml.org/lkml/2006/10/13/100 Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- block/scsi_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 2dc326421a24..a19338e6215d 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -246,10 +246,10 @@ static int sg_io(struct file *file, request_queue_t *q, switch (hdr->dxfer_direction) { default: return -EINVAL; - case SG_DXFER_TO_FROM_DEV: case SG_DXFER_TO_DEV: writing = 1; break; + case SG_DXFER_TO_FROM_DEV: case SG_DXFER_FROM_DEV: break; } -- cgit v1.2.3 From 097b8457dafe7efc22201b4062e2d1e82e494067 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 16 Nov 2006 01:19:31 -0800 Subject: [PATCH] scsi: clear garbage after CDBs on SG_IO ATAPI devices transfer fixed number of bytes for CDBs (12 or 16). Some ATAPI devices choke when shorter CDB is used and the left bytes contain garbage. Block SG_IO cleared left bytes but SCSI SG_IO didn't. This patch makes SCSI SG_IO clear it and simplify CDB clearing in block SG_IO. Signed-off-by: Tejun Heo Cc: Mathieu Fluhr Cc: James Bottomley Cc: Douglas Gilbert Acked-by: Jens Axboe Cc: Acked-by: Jeff Garzik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- block/scsi_ioctl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'block') diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index a19338e6215d..e55a75621437 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -286,9 +286,8 @@ static int sg_io(struct file *file, request_queue_t *q, * fill in request structure */ rq->cmd_len = hdr->cmd_len; + memset(rq->cmd, 0, BLK_MAX_CDB); /* ATAPI hates garbage after CDB */ memcpy(rq->cmd, cmd, hdr->cmd_len); - if (sizeof(rq->cmd) != hdr->cmd_len) - memset(rq->cmd + hdr->cmd_len, 0, sizeof(rq->cmd) - hdr->cmd_len); memset(sense, 0, sizeof(sense)); rq->sense = sense; -- cgit v1.2.3