From 21b2f0c803adaf00fce1b606c50b49ae8b106773 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 22 Mar 2006 17:52:04 +0100 Subject: [SCSI] unify SCSI_IOCTL_SEND_COMMAND implementations We currently have two implementations of this obsolete ioctl, one in the block layer and one in the scsi code. Both of them have drawbacks. This patch kills the scsi layer version after updating the block version with the missing bits: - argument checking - use scatterlist I/O - set number of retries based on the submitted command This is the last user of non-S/G I/O except for the gdth driver, so getting this in ASAP and through the scsi tree would be nie to kill the non-S/G I/O path. Jens, what do you think about adding a check for non-S/G I/O in the midlayer? Thanks to Or Gerlitz for testing this patch. Signed-off-by: Christoph Hellwig Signed-off-by: James Bottomley --- block/scsi_ioctl.c | 101 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 72 insertions(+), 29 deletions(-) (limited to 'block') diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 24f7af9d0abc..b33eda26e205 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -350,16 +350,51 @@ out: return ret; } +/** + * sg_scsi_ioctl -- handle deprecated SCSI_IOCTL_SEND_COMMAND ioctl + * @file: file this ioctl operates on (optional) + * @q: request queue to send scsi commands down + * @disk: gendisk to operate on (option) + * @sic: userspace structure describing the command to perform + * + * Send down the scsi command described by @sic to the device below + * the request queue @q. If @file is non-NULL it's used to perform + * fine-grained permission checks that allow users to send down + * non-destructive SCSI commands. If the caller has a struct gendisk + * available it should be passed in as @disk to allow the low level + * driver to use the information contained in it. A non-NULL @disk + * is only allowed if the caller knows that the low level driver doesn't + * need it (e.g. in the scsi subsystem). + * + * Notes: + * - This interface is deprecated - users should use the SG_IO + * interface instead, as this is a more flexible approach to + * performing SCSI commands on a device. + * - The SCSI command length is determined by examining the 1st byte + * of the given command. There is no way to override this. + * - Data transfers are limited to PAGE_SIZE + * - The length (x + y) must be at least OMAX_SB_LEN bytes long to + * accommodate the sense buffer when an error occurs. + * The sense buffer is truncated to OMAX_SB_LEN (16) bytes so that + * old code will not be surprised. + * - If a Unix error occurs (e.g. ENOMEM) then the user will receive + * a negative return and the Unix error code in 'errno'. + * If the SCSI command succeeds then 0 is returned. + * Positive numbers returned are the compacted SCSI error codes (4 + * bytes in one int) where the lowest byte is the SCSI status. + */ #define OMAX_SB_LEN 16 /* For backward compatibility */ - -static int sg_scsi_ioctl(struct file *file, request_queue_t *q, - struct gendisk *bd_disk, Scsi_Ioctl_Command __user *sic) +int sg_scsi_ioctl(struct file *file, struct request_queue *q, + struct gendisk *disk, struct scsi_ioctl_command __user *sic) { struct request *rq; int err; unsigned int in_len, out_len, bytes, opcode, cmdlen; char *buffer = NULL, sense[SCSI_SENSE_BUFFERSIZE]; + if (!sic) + return -EINVAL; + /* * get in an out lengths, verify they don't exceed a page worth of data */ @@ -393,45 +428,53 @@ static int sg_scsi_ioctl(struct file *file, request_queue_t *q, if (copy_from_user(rq->cmd, sic->data, cmdlen)) goto error; - if (copy_from_user(buffer, sic->data + cmdlen, in_len)) + if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len)) goto error; err = verify_command(file, rq->cmd); if (err) goto error; + /* default. possible overriden later */ + rq->retries = 5; + switch (opcode) { - case SEND_DIAGNOSTIC: - case FORMAT_UNIT: - rq->timeout = FORMAT_UNIT_TIMEOUT; - break; - case START_STOP: - rq->timeout = START_STOP_TIMEOUT; - break; - case MOVE_MEDIUM: - rq->timeout = MOVE_MEDIUM_TIMEOUT; - break; - case READ_ELEMENT_STATUS: - rq->timeout = READ_ELEMENT_STATUS_TIMEOUT; - break; - case READ_DEFECT_DATA: - rq->timeout = READ_DEFECT_DATA_TIMEOUT; - break; - default: - rq->timeout = BLK_DEFAULT_TIMEOUT; - break; + case SEND_DIAGNOSTIC: + case FORMAT_UNIT: + rq->timeout = FORMAT_UNIT_TIMEOUT; + rq->retries = 1; + break; + case START_STOP: + rq->timeout = START_STOP_TIMEOUT; + break; + case MOVE_MEDIUM: + rq->timeout = MOVE_MEDIUM_TIMEOUT; + break; + case READ_ELEMENT_STATUS: + rq->timeout = READ_ELEMENT_STATUS_TIMEOUT; + break; + case READ_DEFECT_DATA: + rq->timeout = READ_DEFECT_DATA_TIMEOUT; + rq->retries = 1; + break; + default: + rq->timeout = BLK_DEFAULT_TIMEOUT; + break; + } + + if (bytes && blk_rq_map_kern(q, rq, buffer, bytes, __GFP_WAIT)) { + err = DRIVER_ERROR << 24; + goto out; } memset(sense, 0, sizeof(sense)); rq->sense = sense; rq->sense_len = 0; - - rq->data = buffer; - rq->data_len = bytes; rq->flags |= REQ_BLOCK_PC; - rq->retries = 0; - blk_execute_rq(q, bd_disk, rq, 0); + blk_execute_rq(q, disk, rq, 0); + +out: err = rq->errors & 0xff; /* only 8 bit SCSI status */ if (err) { if (rq->sense_len && rq->sense) { @@ -450,7 +493,7 @@ error: blk_put_request(rq); return err; } - +EXPORT_SYMBOL_GPL(sg_scsi_ioctl); /* Send basic block requests */ static int __blk_send_generic(request_queue_t *q, struct gendisk *bd_disk, int cmd, int data) -- cgit v1.2.3 From fba822722e3f9d438fca8fd9541d7ddd447d7a48 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Tue, 18 Apr 2006 09:44:06 +0200 Subject: [PATCH 1/2] iosched: fix typo and barrier() On rmmod path, cfq/as waits to make sure all io-contexts was freed. However, it's using complete(), not wait_for_completion(). I think barrier() is not enough in here. To avoid the following case, this patch replaces barrier() with smb_wmb(). cpu0 visibility cpu1 [ioc_gnone=NULL,ioc_count=1] ioc_gnone = &all_gone NULL,ioc_count=1 atomic_read(&ioc_count) NULL,ioc_count=1 wait_for_completion() NULL,ioc_count=0 atomic_sub_and_test() NULL,ioc_count=0 if ( && ioc_gone) [ioc_gone==NULL, so doesn't call complete()] &all_gone,ioc_count=0 Signed-off-by: OGAWA Hirofumi Signed-off-by: Jens Axboe --- block/as-iosched.c | 5 +++-- block/cfq-iosched.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'block') diff --git a/block/as-iosched.c b/block/as-iosched.c index 296708ceceb2..e25a5d79ab27 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c @@ -1844,9 +1844,10 @@ static void __exit as_exit(void) DECLARE_COMPLETION(all_gone); elv_unregister(&iosched_as); ioc_gone = &all_gone; - barrier(); + /* ioc_gone's update must be visible before reading ioc_count */ + smp_wmb(); if (atomic_read(&ioc_count)) - complete(ioc_gone); + wait_for_completion(ioc_gone); synchronize_rcu(); kmem_cache_destroy(arq_pool); } diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 67d446de0227..01820b1094e9 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -2439,9 +2439,10 @@ static void __exit cfq_exit(void) DECLARE_COMPLETION(all_gone); elv_unregister(&iosched_cfq); ioc_gone = &all_gone; - barrier(); + /* ioc_gone's update must be visible before reading ioc_count */ + smp_wmb(); if (atomic_read(&ioc_count)) - complete(ioc_gone); + wait_for_completion(ioc_gone); synchronize_rcu(); cfq_slab_kill(); } -- cgit v1.2.3 From dbecf3ab40b5a6cc4499543778cd9f9682c0abad Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Tue, 18 Apr 2006 09:45:18 +0200 Subject: [PATCH 2/2] cfq: fix cic's rbtree traversal When queue dies, we set cic->key=NULL as dead mark. So, when we traverse a rbtree, we must check whether it's still valid key. if it was invalidated, drop it, then restart the traversal from top. Signed-off-by: OGAWA Hirofumi Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) (limited to 'block') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 01820b1094e9..246feae16c60 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1472,15 +1472,31 @@ out: return cfqq; } +static void +cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic) +{ + read_lock(&cfq_exit_lock); + rb_erase(&cic->rb_node, &ioc->cic_root); + read_unlock(&cfq_exit_lock); + kmem_cache_free(cfq_ioc_pool, cic); + atomic_dec(&ioc_count); +} + static struct cfq_io_context * cfq_cic_rb_lookup(struct cfq_data *cfqd, struct io_context *ioc) { - struct rb_node *n = ioc->cic_root.rb_node; + struct rb_node *n; struct cfq_io_context *cic; void *key = cfqd; +restart: + n = ioc->cic_root.rb_node; while (n) { cic = rb_entry(n, struct cfq_io_context, rb_node); + if (unlikely(!cic->key)) { + cfq_drop_dead_cic(ioc, cic); + goto restart; + } if (key < cic->key) n = n->rb_left; @@ -1497,20 +1513,24 @@ static inline void cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, struct cfq_io_context *cic) { - struct rb_node **p = &ioc->cic_root.rb_node; - struct rb_node *parent = NULL; + struct rb_node **p; + struct rb_node *parent; struct cfq_io_context *__cic; - read_lock(&cfq_exit_lock); - cic->ioc = ioc; cic->key = cfqd; ioc->set_ioprio = cfq_ioc_set_ioprio; - +restart: + parent = NULL; + p = &ioc->cic_root.rb_node; while (*p) { parent = *p; __cic = rb_entry(parent, struct cfq_io_context, rb_node); + if (unlikely(!__cic->key)) { + cfq_drop_dead_cic(ioc, cic); + goto restart; + } if (cic->key < __cic->key) p = &(*p)->rb_left; @@ -1520,6 +1540,7 @@ cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, BUG(); } + read_lock(&cfq_exit_lock); rb_link_node(&cic->rb_node, parent, p); rb_insert_color(&cic->rb_node, &ioc->cic_root); list_add(&cic->queue_list, &cfqd->cic_list); -- cgit v1.2.3 From be3b075354e170368a0d29558cae492205e80a64 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Tue, 18 Apr 2006 19:18:31 +0200 Subject: [PATCH] cfq: Further rbtree traversal and cfq_exit_queue() race fix In current code, we are re-reading cic->key after dead cic->key check. So, in theory, it may really re-read *after* cfq_exit_queue() seted NULL. To avoid race, we copy it to stack, then use it. With this change, I guess gcc will assign cic->key to a register or stack, and it wouldn't be re-readed. Signed-off-by: OGAWA Hirofumi Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'block') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 246feae16c60..2540dfaa3e38 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1487,20 +1487,22 @@ cfq_cic_rb_lookup(struct cfq_data *cfqd, struct io_context *ioc) { struct rb_node *n; struct cfq_io_context *cic; - void *key = cfqd; + void *k, *key = cfqd; restart: n = ioc->cic_root.rb_node; while (n) { cic = rb_entry(n, struct cfq_io_context, rb_node); - if (unlikely(!cic->key)) { + /* ->key must be copied to avoid race with cfq_exit_queue() */ + k = cic->key; + if (unlikely(!k)) { cfq_drop_dead_cic(ioc, cic); goto restart; } - if (key < cic->key) + if (key < k) n = n->rb_left; - else if (key > cic->key) + else if (key > k) n = n->rb_right; else return cic; @@ -1516,6 +1518,7 @@ cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, struct rb_node **p; struct rb_node *parent; struct cfq_io_context *__cic; + void *k; cic->ioc = ioc; cic->key = cfqd; @@ -1527,14 +1530,16 @@ restart: while (*p) { parent = *p; __cic = rb_entry(parent, struct cfq_io_context, rb_node); - if (unlikely(!__cic->key)) { + /* ->key must be copied to avoid race with cfq_exit_queue() */ + k = __cic->key; + if (unlikely(!k)) { cfq_drop_dead_cic(ioc, cic); goto restart; } - if (cic->key < __cic->key) + if (cic->key < k) p = &(*p)->rb_left; - else if (cic->key > __cic->key) + else if (cic->key > k) p = &(*p)->rb_right; else BUG(); -- cgit v1.2.3 From 7daac4902053045450fa29db42aba19a4581f850 Mon Sep 17 00:00:00 2001 From: Coywolf Qi Hunt Date: Wed, 19 Apr 2006 10:14:49 +0200 Subject: [patch] cleanup: use blk_queue_stopped This cleanup the source to use blk_queue_stopped. Signed-off-by: Coywolf Qi Hunt Signed-off-by: Jens Axboe --- block/ll_rw_blk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index e112d1a5dab6..1755c053fd68 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -1554,7 +1554,7 @@ void blk_plug_device(request_queue_t *q) * don't plug a stopped queue, it must be paired with blk_start_queue() * which will restart the queueing */ - if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags)) + if (blk_queue_stopped(q)) return; if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { @@ -1587,7 +1587,7 @@ EXPORT_SYMBOL(blk_remove_plug); */ void __generic_unplug_device(request_queue_t *q) { - if (unlikely(test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))) + if (unlikely(blk_queue_stopped(q))) return; if (!blk_remove_plug(q)) -- cgit v1.2.3 From 4f73247f0e53be1bd4aa519476e6261a8e4a64ab Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 20 Apr 2006 15:45:22 +0200 Subject: [PATCH] block/elevator.c: remove unused exports This patch removes the following unused EXPORT_SYMBOL's: - elv_requeue_request - elv_completed_request They are only used by the block core, hence they need not be exported. Signed-off-by: Adrian Bunk Signed-off-by: Jens Axboe --- block/elevator.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'block') diff --git a/block/elevator.c b/block/elevator.c index 0d6be03d929e..29825792cbd5 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -895,10 +895,8 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name) EXPORT_SYMBOL(elv_dispatch_sort); EXPORT_SYMBOL(elv_add_request); EXPORT_SYMBOL(__elv_add_request); -EXPORT_SYMBOL(elv_requeue_request); EXPORT_SYMBOL(elv_next_request); EXPORT_SYMBOL(elv_dequeue_request); EXPORT_SYMBOL(elv_queue_empty); -EXPORT_SYMBOL(elv_completed_request); EXPORT_SYMBOL(elevator_exit); EXPORT_SYMBOL(elevator_init); -- cgit v1.2.3