diff options
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/Kconfig | 4 | ||||
-rw-r--r-- | drivers/block/brd.c | 3 | ||||
-rw-r--r-- | drivers/block/cryptoloop.c | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 5 | ||||
-rw-r--r-- | drivers/block/floppy.c | 30 | ||||
-rw-r--r-- | drivers/block/loop.c | 141 | ||||
-rw-r--r-- | drivers/block/n64cart.c | 2 | ||||
-rw-r--r-- | drivers/block/nbd.c | 192 | ||||
-rw-r--r-- | drivers/block/null_blk/main.c | 7 | ||||
-rw-r--r-- | drivers/block/paride/pd.c | 2 | ||||
-rw-r--r-- | drivers/block/pktcdvd.c | 8 | ||||
-rw-r--r-- | drivers/block/ps3disk.c | 18 | ||||
-rw-r--r-- | drivers/block/ps3vram.c | 2 | ||||
-rw-r--r-- | drivers/block/rbd.c | 51 | ||||
-rw-r--r-- | drivers/block/rnbd/rnbd-clt-sysfs.c | 33 | ||||
-rw-r--r-- | drivers/block/rnbd/rnbd-clt.c | 2 | ||||
-rw-r--r-- | drivers/block/rnbd/rnbd-srv-sysfs.c | 14 | ||||
-rw-r--r-- | drivers/block/sx8.c | 2 | ||||
-rw-r--r-- | drivers/block/virtio_blk.c | 51 | ||||
-rw-r--r-- | drivers/block/xen-blkfront.c | 127 |
21 files changed, 408 insertions, 290 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 90ed1642304a..ab3e37aa1830 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -212,7 +212,7 @@ config BLK_DEV_LOOP_MIN_COUNT dynamically allocated with the /dev/loop-control interface. config BLK_DEV_CRYPTOLOOP - tristate "Cryptoloop Support" + tristate "Cryptoloop Support (DEPRECATED)" select CRYPTO select CRYPTO_CBC depends on BLK_DEV_LOOP @@ -224,7 +224,7 @@ config BLK_DEV_CRYPTOLOOP WARNING: This device is not safe for journaled file systems like ext3 or Reiserfs. Please use the Device Mapper crypto module instead, which can be configured to be on-disk compatible with the - cryptoloop device. + cryptoloop device. cryptoloop support will be removed in Linux 5.16. source "drivers/block/drbd/Kconfig" diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 95694113e38e..58ec167aa018 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -27,9 +27,6 @@ #include <linux/uaccess.h> -#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) -#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) - /* * Each block ramdisk device has a radix_tree brd_pages of pages that stores * the pages containing the block device's contents. A brd page's ->index is diff --git a/drivers/block/cryptoloop.c b/drivers/block/cryptoloop.c index 3cabc335ae74..f0a91faa43a8 100644 --- a/drivers/block/cryptoloop.c +++ b/drivers/block/cryptoloop.c @@ -189,6 +189,8 @@ init_cryptoloop(void) if (rc) printk(KERN_ERR "cryptoloop: loop_register_transfer failed\n"); + else + pr_warn("the cryptoloop driver has been deprecated and will be removed in in Linux 5.16\n"); return rc; } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index e7d0e637e632..44ccf8b4f4b2 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1364,7 +1364,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi if (b) { blk_stack_limits(&q->limits, &b->limits, 0); - blk_queue_update_readahead(q); + disk_update_readahead(device->vdisk); } fixup_discard_if_not_supported(q); fixup_write_zeroes(device, q); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 13beb98a7c5a..5ca233644d70 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -905,13 +905,12 @@ static bool drbd_may_do_local_read(struct drbd_device *device, sector_t sector, static bool remote_due_to_read_balancing(struct drbd_device *device, sector_t sector, enum drbd_read_balancing rbm) { - struct backing_dev_info *bdi; int stripe_shift; switch (rbm) { case RB_CONGESTED_REMOTE: - bdi = device->ldev->backing_bdev->bd_disk->queue->backing_dev_info; - return bdi_read_congested(bdi); + return bdi_read_congested( + device->ldev->backing_bdev->bd_disk->bdi); case RB_LEAST_PENDING: return atomic_read(&device->local_cnt) > atomic_read(&device->ap_pending_cnt) + atomic_read(&device->rs_pending_cnt); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 87460e0e5c72..fef79ea52e3e 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4029,23 +4029,23 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) if (fdc_state[FDC(drive)].rawcmd == 1) fdc_state[FDC(drive)].rawcmd = 2; - if (mode & (FMODE_READ|FMODE_WRITE)) { - drive_state[drive].last_checked = 0; - clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags); - if (bdev_check_media_change(bdev)) - floppy_revalidate(bdev->bd_disk); - if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags)) - goto out; - if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags)) + if (!(mode & FMODE_NDELAY)) { + if (mode & (FMODE_READ|FMODE_WRITE)) { + drive_state[drive].last_checked = 0; + clear_bit(FD_OPEN_SHOULD_FAIL_BIT, + &drive_state[drive].flags); + if (bdev_check_media_change(bdev)) + floppy_revalidate(bdev->bd_disk); + if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags)) + goto out; + if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags)) + goto out; + } + res = -EROFS; + if ((mode & FMODE_WRITE) && + !test_bit(FD_DISK_WRITABLE_BIT, &drive_state[drive].flags)) goto out; } - - res = -EROFS; - - if ((mode & FMODE_WRITE) && - !test_bit(FD_DISK_WRITABLE_BIT, &drive_state[drive].flags)) - goto out; - mutex_unlock(&open_lock); mutex_unlock(&floppy_mutex); return 0; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f37b9e3d833c..fa1c298a8cfb 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -88,6 +88,47 @@ static DEFINE_IDR(loop_index_idr); static DEFINE_MUTEX(loop_ctl_mutex); +static DEFINE_MUTEX(loop_validate_mutex); + +/** + * loop_global_lock_killable() - take locks for safe loop_validate_file() test + * + * @lo: struct loop_device + * @global: true if @lo is about to bind another "struct loop_device", false otherwise + * + * Returns 0 on success, -EINTR otherwise. + * + * Since loop_validate_file() traverses on other "struct loop_device" if + * is_loop_device() is true, we need a global lock for serializing concurrent + * loop_configure()/loop_change_fd()/__loop_clr_fd() calls. + */ +static int loop_global_lock_killable(struct loop_device *lo, bool global) +{ + int err; + + if (global) { + err = mutex_lock_killable(&loop_validate_mutex); + if (err) + return err; + } + err = mutex_lock_killable(&lo->lo_mutex); + if (err && global) + mutex_unlock(&loop_validate_mutex); + return err; +} + +/** + * loop_global_unlock() - release locks taken by loop_global_lock_killable() + * + * @lo: struct loop_device + * @global: true if @lo was about to bind another "struct loop_device", false otherwise + */ +static void loop_global_unlock(struct loop_device *lo, bool global) +{ + mutex_unlock(&lo->lo_mutex); + if (global) + mutex_unlock(&loop_validate_mutex); +} static int max_part; static int part_shift; @@ -672,13 +713,15 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) while (is_loop_device(f)) { struct loop_device *l; + lockdep_assert_held(&loop_validate_mutex); if (f->f_mapping->host->i_rdev == bdev->bd_dev) return -EBADF; l = I_BDEV(f->f_mapping->host)->bd_disk->private_data; - if (l->lo_state != Lo_bound) { + if (l->lo_state != Lo_bound) return -EINVAL; - } + /* Order wrt setting lo->lo_backing_file in loop_configure(). */ + rmb(); f = l->lo_backing_file; } if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) @@ -697,13 +740,18 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, unsigned int arg) { - struct file *file = NULL, *old_file; - int error; - bool partscan; + struct file *file = fget(arg); + struct file *old_file; + int error; + bool partscan; + bool is_loop; - error = mutex_lock_killable(&lo->lo_mutex); + if (!file) + return -EBADF; + is_loop = is_loop_device(file); + error = loop_global_lock_killable(lo, is_loop); if (error) - return error; + goto out_putf; error = -ENXIO; if (lo->lo_state != Lo_bound) goto out_err; @@ -713,11 +761,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, if (!(lo->lo_flags & LO_FLAGS_READ_ONLY)) goto out_err; - error = -EBADF; - file = fget(arg); - if (!file) - goto out_err; - error = loop_validate_file(file, bdev); if (error) goto out_err; @@ -731,6 +774,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, goto out_err; /* and ... switch */ + disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE); blk_mq_freeze_queue(lo->lo_queue); mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask); lo->lo_backing_file = file; @@ -740,7 +784,16 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, loop_update_dio(lo); blk_mq_unfreeze_queue(lo->lo_queue); partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; - mutex_unlock(&lo->lo_mutex); + loop_global_unlock(lo, is_loop); + + /* + * Flush loop_validate_file() before fput(), for l->lo_backing_file + * might be pointing at old_file which might be the last reference. + */ + if (!is_loop) { + mutex_lock(&loop_validate_mutex); + mutex_unlock(&loop_validate_mutex); + } /* * We must drop file reference outside of lo_mutex as dropping * the file ref can take open_mutex which creates circular locking @@ -752,9 +805,9 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, return 0; out_err: - mutex_unlock(&lo->lo_mutex); - if (file) - fput(file); + loop_global_unlock(lo, is_loop); +out_putf: + fput(file); return error; } @@ -1136,22 +1189,22 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, struct block_device *bdev, const struct loop_config *config) { - struct file *file; - struct inode *inode; + struct file *file = fget(config->fd); + struct inode *inode; struct address_space *mapping; - int error; - loff_t size; - bool partscan; - unsigned short bsize; + int error; + loff_t size; + bool partscan; + unsigned short bsize; + bool is_loop; + + if (!file) + return -EBADF; + is_loop = is_loop_device(file); /* This is safe, since we have a reference from open(). */ __module_get(THIS_MODULE); - error = -EBADF; - file = fget(config->fd); - if (!file) - goto out; - /* * If we don't hold exclusive handle for the device, upgrade to it * here to avoid changing device under exclusive owner. @@ -1162,7 +1215,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, goto out_putf; } - error = mutex_lock_killable(&lo->lo_mutex); + error = loop_global_lock_killable(lo, is_loop); if (error) goto out_bdev; @@ -1205,6 +1258,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, goto out_unlock; } + disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE); set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0); INIT_WORK(&lo->rootcg_work, loop_rootcg_workfn); @@ -1242,6 +1296,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, size = get_loop_size(lo, file); loop_set_size(lo, size); + /* Order wrt reading lo_state in loop_validate_file(). */ + wmb(); + lo->lo_state = Lo_bound; if (part_shift) lo->lo_flags |= LO_FLAGS_PARTSCAN; @@ -1249,11 +1306,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, if (partscan) lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; - /* Grab the block_device to prevent its destruction after we - * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev). - */ - bdgrab(bdev); - mutex_unlock(&lo->lo_mutex); + loop_global_unlock(lo, is_loop); if (partscan) loop_reread_partitions(lo); if (!(mode & FMODE_EXCL)) @@ -1261,13 +1314,12 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, return 0; out_unlock: - mutex_unlock(&lo->lo_mutex); + loop_global_unlock(lo, is_loop); out_bdev: if (!(mode & FMODE_EXCL)) bd_abort_claiming(bdev, loop_configure); out_putf: fput(file); -out: /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); return error; @@ -1283,6 +1335,18 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) int lo_number; struct loop_worker *pos, *worker; + /* + * Flush loop_configure() and loop_change_fd(). It is acceptable for + * loop_validate_file() to succeed, for actual clear operation has not + * started yet. + */ + mutex_lock(&loop_validate_mutex); + mutex_unlock(&loop_validate_mutex); + /* + * loop_validate_file() now fails because l->lo_state != Lo_bound + * became visible. + */ + mutex_lock(&lo->lo_mutex); if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) { err = -ENXIO; @@ -1332,7 +1396,6 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) blk_queue_physical_block_size(lo->lo_queue, 512); blk_queue_io_min(lo->lo_queue, 512); if (bdev) { - bdput(bdev); invalidate_bdev(bdev); bdev->bd_inode->i_mapping->wb_err = 0; } @@ -1349,6 +1412,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) partscan = lo->lo_flags & LO_FLAGS_PARTSCAN && bdev; lo_number = lo->lo_number; + disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE); out_unlock: mutex_unlock(&lo->lo_mutex); if (partscan) { @@ -2269,7 +2333,8 @@ static int loop_add(int i) lo->tag_set.queue_depth = 128; lo->tag_set.numa_node = NUMA_NO_NODE; lo->tag_set.cmd_size = sizeof(struct loop_cmd); - lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING; + lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING | + BLK_MQ_F_NO_SCHED_BY_DEFAULT; lo->tag_set.driver_data = lo; err = blk_mq_alloc_tag_set(&lo->tag_set); @@ -2325,6 +2390,8 @@ static int loop_add(int i) disk->fops = &lo_fops; disk->private_data = lo; disk->queue = lo->lo_queue; + disk->events = DISK_EVENT_MEDIA_CHANGE; + disk->event_flags = DISK_EVENT_FLAG_UEVENT; sprintf(disk->disk_name, "loop%d", i); add_disk(disk); mutex_unlock(&loop_ctl_mutex); diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c index 7b4dd10af9ec..c84be0028f63 100644 --- a/drivers/block/n64cart.c +++ b/drivers/block/n64cart.c @@ -74,7 +74,7 @@ static bool n64cart_do_bvec(struct device *dev, struct bio_vec *bv, u32 pos) n64cart_wait_dma(); - n64cart_write_reg(PI_DRAM_REG, dma_addr + bv->bv_offset); + n64cart_write_reg(PI_DRAM_REG, dma_addr); n64cart_write_reg(PI_CART_REG, (bstart | CART_DOMAIN) & CART_MAX); n64cart_write_reg(PI_WRITE_REG, bv->bv_len - 1); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index c38317979f74..5170a630778d 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -49,6 +49,7 @@ static DEFINE_IDR(nbd_index_idr); static DEFINE_MUTEX(nbd_index_mutex); +static struct workqueue_struct *nbd_del_wq; static int nbd_total_devices = 0; struct nbd_sock { @@ -113,12 +114,12 @@ struct nbd_device { struct mutex config_lock; struct gendisk *disk; struct workqueue_struct *recv_workq; + struct work_struct remove_work; struct list_head list; struct task_struct *task_recv; struct task_struct *task_setup; - struct completion *destroy_complete; unsigned long flags; char *backend; @@ -237,32 +238,36 @@ static void nbd_dev_remove(struct nbd_device *nbd) { struct gendisk *disk = nbd->disk; - if (disk) { - del_gendisk(disk); - blk_cleanup_disk(disk); - blk_mq_free_tag_set(&nbd->tag_set); - } + del_gendisk(disk); + blk_cleanup_disk(disk); + blk_mq_free_tag_set(&nbd->tag_set); /* - * Place this in the last just before the nbd is freed to - * make sure that the disk and the related kobject are also - * totally removed to avoid duplicate creation of the same - * one. + * Remove from idr after del_gendisk() completes, so if the same ID is + * reused, the following add_disk() will succeed. */ - if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) && nbd->destroy_complete) - complete(nbd->destroy_complete); + mutex_lock(&nbd_index_mutex); + idr_remove(&nbd_index_idr, nbd->index); + mutex_unlock(&nbd_index_mutex); kfree(nbd); } +static void nbd_dev_remove_work(struct work_struct *work) +{ + nbd_dev_remove(container_of(work, struct nbd_device, remove_work)); +} + static void nbd_put(struct nbd_device *nbd) { - if (refcount_dec_and_mutex_lock(&nbd->refs, - &nbd_index_mutex)) { - idr_remove(&nbd_index_idr, nbd->index); + if (!refcount_dec_and_test(&nbd->refs)) + return; + + /* Call del_gendisk() asynchrounously to prevent deadlock */ + if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags)) + queue_work(nbd_del_wq, &nbd->remove_work); + else nbd_dev_remove(nbd); - mutex_unlock(&nbd_index_mutex); - } } static int nbd_disconnected(struct nbd_config *config) @@ -818,6 +823,10 @@ static bool nbd_clear_req(struct request *req, void *data, bool reserved) { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); + /* don't abort one completed request */ + if (blk_mq_request_completed(req)) + return true; + mutex_lock(&cmd->lock); cmd->status = BLK_STS_IOERR; mutex_unlock(&cmd->lock); @@ -1384,6 +1393,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, unsigned int cmd, unsigned long arg) { struct nbd_config *config = nbd->config; + loff_t bytesize; switch (cmd) { case NBD_DISCONNECT: @@ -1398,8 +1408,9 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, case NBD_SET_SIZE: return nbd_set_size(nbd, arg, config->blksize); case NBD_SET_SIZE_BLOCKS: - return nbd_set_size(nbd, arg * config->blksize, - config->blksize); + if (check_mul_overflow((loff_t)arg, config->blksize, &bytesize)) + return -EINVAL; + return nbd_set_size(nbd, bytesize, config->blksize); case NBD_SET_TIMEOUT: nbd_set_cmd_timeout(nbd, arg); return 0; @@ -1661,7 +1672,7 @@ static const struct blk_mq_ops nbd_mq_ops = { .timeout = nbd_xmit_timeout, }; -static int nbd_dev_add(int index) +static struct nbd_device *nbd_dev_add(int index, unsigned int refs) { struct nbd_device *nbd; struct gendisk *disk; @@ -1679,13 +1690,14 @@ static int nbd_dev_add(int index) nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING; nbd->tag_set.driver_data = nbd; - nbd->destroy_complete = NULL; + INIT_WORK(&nbd->remove_work, nbd_dev_remove_work); nbd->backend = NULL; err = blk_mq_alloc_tag_set(&nbd->tag_set); if (err) goto out_free_nbd; + mutex_lock(&nbd_index_mutex); if (index >= 0) { err = idr_alloc(&nbd_index_idr, nbd, index, index + 1, GFP_KERNEL); @@ -1696,9 +1708,10 @@ static int nbd_dev_add(int index) if (err >= 0) index = err; } + nbd->index = index; + mutex_unlock(&nbd_index_mutex); if (err < 0) goto out_free_tags; - nbd->index = index; disk = blk_mq_alloc_disk(&nbd->tag_set, NULL); if (IS_ERR(disk)) { @@ -1722,38 +1735,65 @@ static int nbd_dev_add(int index) mutex_init(&nbd->config_lock); refcount_set(&nbd->config_refs, 0); - refcount_set(&nbd->refs, 1); + /* + * Start out with a zero references to keep other threads from using + * this device until it is fully initialized. + */ + refcount_set(&nbd->refs, 0); INIT_LIST_HEAD(&nbd->list); disk->major = NBD_MAJOR; + + /* Too big first_minor can cause duplicate creation of + * sysfs files/links, since first_minor will be truncated to + * byte in __device_add_disk(). + */ disk->first_minor = index << part_shift; + if (disk->first_minor > 0xff) { + err = -EINVAL; + goto out_free_idr; + } + disk->minors = 1 << part_shift; disk->fops = &nbd_fops; disk->private_data = nbd; sprintf(disk->disk_name, "nbd%d", index); add_disk(disk); + + /* + * Now publish the device. + */ + refcount_set(&nbd->refs, refs); nbd_total_devices++; - return index; + return nbd; out_free_idr: + mutex_lock(&nbd_index_mutex); idr_remove(&nbd_index_idr, index); + mutex_unlock(&nbd_index_mutex); out_free_tags: blk_mq_free_tag_set(&nbd->tag_set); out_free_nbd: kfree(nbd); out: - return err; + return ERR_PTR(err); } -static int find_free_cb(int id, void *ptr, void *data) +static struct nbd_device *nbd_find_get_unused(void) { - struct nbd_device *nbd = ptr; - struct nbd_device **found = data; + struct nbd_device *nbd; + int id; - if (!refcount_read(&nbd->config_refs)) { - *found = nbd; - return 1; + lockdep_assert_held(&nbd_index_mutex); + + idr_for_each_entry(&nbd_index_idr, nbd, id) { + if (refcount_read(&nbd->config_refs) || + test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags)) + continue; + if (refcount_inc_not_zero(&nbd->refs)) + return nbd; } - return 0; + + return NULL; } /* Netlink interface. */ @@ -1802,8 +1842,7 @@ static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd) static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) { - DECLARE_COMPLETION_ONSTACK(destroy_complete); - struct nbd_device *nbd = NULL; + struct nbd_device *nbd; struct nbd_config *config; int index = -1; int ret; @@ -1825,55 +1864,29 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) again: mutex_lock(&nbd_index_mutex); if (index == -1) { - ret = idr_for_each(&nbd_index_idr, &find_free_cb, &nbd); - if (ret == 0) { - int new_index; - new_index = nbd_dev_add(-1); - if (new_index < 0) { - mutex_unlock(&nbd_index_mutex); - printk(KERN_ERR "nbd: failed to add new device\n"); - return new_index; - } - nbd = idr_find(&nbd_index_idr, new_index); - } + nbd = nbd_find_get_unused(); } else { nbd = idr_find(&nbd_index_idr, index); - if (!nbd) { - ret = nbd_dev_add(index); - if (ret < 0) { + if (nbd) { + if ((test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) && + test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) || + !refcount_inc_not_zero(&nbd->refs)) { mutex_unlock(&nbd_index_mutex); - printk(KERN_ERR "nbd: failed to add new device\n"); - return ret; + pr_err("nbd: device at index %d is going down\n", + index); + return -EINVAL; } - nbd = idr_find(&nbd_index_idr, index); } } - if (!nbd) { - printk(KERN_ERR "nbd: couldn't find device at index %d\n", - index); - mutex_unlock(&nbd_index_mutex); - return -EINVAL; - } - - if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) && - test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) { - nbd->destroy_complete = &destroy_complete; - mutex_unlock(&nbd_index_mutex); - - /* Wait untill the the nbd stuff is totally destroyed */ - wait_for_completion(&destroy_complete); - goto again; - } + mutex_unlock(&nbd_index_mutex); - if (!refcount_inc_not_zero(&nbd->refs)) { - mutex_unlock(&nbd_index_mutex); - if (index == -1) - goto again; - printk(KERN_ERR "nbd: device at index %d is going down\n", - index); - return -EINVAL; + if (!nbd) { + nbd = nbd_dev_add(index, 2); + if (IS_ERR(nbd)) { + pr_err("nbd: failed to add new device\n"); + return PTR_ERR(nbd); + } } - mutex_unlock(&nbd_index_mutex); mutex_lock(&nbd->config_lock); if (refcount_read(&nbd->config_refs)) { @@ -2004,15 +2017,19 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd) { mutex_lock(&nbd->config_lock); nbd_disconnect(nbd); - nbd_clear_sock(nbd); - mutex_unlock(&nbd->config_lock); + sock_shutdown(nbd); /* * Make sure recv thread has finished, so it does not drop the last * config ref and try to destroy the workqueue from inside the work - * queue. + * queue. And this also ensure that we can safely call nbd_clear_que() + * to cancel the inflight I/Os. */ if (nbd->recv_workq) flush_workqueue(nbd->recv_workq); + nbd_clear_que(nbd); + nbd->task_setup = NULL; + mutex_unlock(&nbd->config_lock); + if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF, &nbd->config->runtime_flags)) nbd_config_put(nbd); @@ -2416,16 +2433,21 @@ static int __init nbd_init(void) if (register_blkdev(NBD_MAJOR, "nbd")) return -EIO; + nbd_del_wq = alloc_workqueue("nbd-del", WQ_UNBOUND, 0); + if (!nbd_del_wq) { + unregister_blkdev(NBD_MAJOR, "nbd"); + return -ENOMEM; + } + if (genl_register_family(&nbd_genl_family)) { + destroy_workqueue(nbd_del_wq); unregister_blkdev(NBD_MAJOR, "nbd"); return -EINVAL; } nbd_dbg_init(); - mutex_lock(&nbd_index_mutex); for (i = 0; i < nbds_max; i++) - nbd_dev_add(i); - mutex_unlock(&nbd_index_mutex); + nbd_dev_add(i, 1); return 0; } @@ -2434,7 +2456,10 @@ static int nbd_exit_cb(int id, void *ptr, void *data) struct list_head *list = (struct list_head *)data; struct nbd_device *nbd = ptr; - list_add_tail(&nbd->list, list); + /* Skip nbd that is being removed asynchronously */ + if (refcount_read(&nbd->refs)) + list_add_tail(&nbd->list, list); + return 0; } @@ -2457,6 +2482,9 @@ static void __exit nbd_cleanup(void) nbd_put(nbd); } + /* Also wait for nbd_dev_remove_work() completes */ + destroy_workqueue(nbd_del_wq); + idr_destroy(&nbd_index_idr); genl_unregister_family(&nbd_genl_family); unregister_blkdev(NBD_MAJOR, "nbd"); diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index d734e9ee1546..187d779c8ca0 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -11,10 +11,6 @@ #include <linux/init.h> #include "null_blk.h" -#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) -#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) -#define SECTOR_MASK (PAGE_SECTORS - 1) - #define FREE_BATCH 16 #define TICKS_PER_SEC 50ULL @@ -1721,8 +1717,7 @@ static int null_gendisk_register(struct nullb *nullb) return ret; } - add_disk(disk); - return 0; + return add_disk(disk); } static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set) diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 9b3298926356..675327df6aff 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -892,7 +892,7 @@ static void pd_probe_drive(struct pd_unit *disk) return; p = blk_mq_alloc_disk(&disk->tag_set, disk); - if (!p) { + if (IS_ERR(p)) { blk_mq_free_tag_set(&disk->tag_set); return; } diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 538446b652de..0f26b2510a75 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -1183,10 +1183,8 @@ try_next_bio: wakeup = (pd->write_congestion_on > 0 && pd->bio_queue_size <= pd->write_congestion_off); spin_unlock(&pd->lock); - if (wakeup) { - clear_bdi_congested(pd->disk->queue->backing_dev_info, - BLK_RW_ASYNC); - } + if (wakeup) + clear_bdi_congested(pd->disk->bdi, BLK_RW_ASYNC); pkt->sleep_time = max(PACKET_WAIT_TIME, 1); pkt_set_state(pkt, PACKET_WAITING_STATE); @@ -2366,7 +2364,7 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio) spin_lock(&pd->lock); if (pd->write_congestion_on > 0 && pd->bio_queue_size >= pd->write_congestion_on) { - set_bdi_congested(q->backing_dev_info, BLK_RW_ASYNC); + set_bdi_congested(bio->bi_bdev->bd_disk->bdi, BLK_RW_ASYNC); do { spin_unlock(&pd->lock); congestion_wait(BLK_RW_ASYNC, HZ); diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index f374ea2c67ce..8d51efbe045d 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -83,26 +83,12 @@ static void ps3disk_scatter_gather(struct ps3_storage_device *dev, unsigned int offset = 0; struct req_iterator iter; struct bio_vec bvec; - unsigned int i = 0; - size_t size; - void *buf; rq_for_each_segment(bvec, req, iter) { - unsigned long flags; - dev_dbg(&dev->sbd.core, "%s:%u: bio %u: %u sectors from %llu\n", - __func__, __LINE__, i, bio_sectors(iter.bio), - iter.bio->bi_iter.bi_sector); - - size = bvec.bv_len; - buf = bvec_kmap_irq(&bvec, &flags); if (gather) - memcpy(dev->bounce_buf+offset, buf, size); + memcpy_from_bvec(dev->bounce_buf + offset, &bvec); else - memcpy(buf, dev->bounce_buf+offset, size); - offset += size; - flush_kernel_dcache_page(bvec.bv_page); - bvec_kunmap_irq(buf, &flags); - i++; + memcpy_to_bvec(&bvec, dev->bounce_buf + offset); } } diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index 7fbf469651c4..c7b19e128b03 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -541,7 +541,7 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev, bio_for_each_segment(bvec, bio, iter) { /* PS3 is ppc64, so we don't handle highmem */ - char *ptr = page_address(bvec.bv_page) + bvec.bv_offset; + char *ptr = bvec_virt(&bvec); size_t len = bvec.bv_len, retlen; dev_dbg(&dev->core, " %s %zu bytes at offset %llu\n", op, diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 531d390902dd..e65c9d706f6f 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1219,24 +1219,13 @@ static void rbd_dev_mapping_clear(struct rbd_device *rbd_dev) rbd_dev->mapping.size = 0; } -static void zero_bvec(struct bio_vec *bv) -{ - void *buf; - unsigned long flags; - - buf = bvec_kmap_irq(bv, &flags); - memset(buf, 0, bv->bv_len); - flush_dcache_page(bv->bv_page); - bvec_kunmap_irq(buf, &flags); -} - static void zero_bios(struct ceph_bio_iter *bio_pos, u32 off, u32 bytes) { struct ceph_bio_iter it = *bio_pos; ceph_bio_iter_advance(&it, off); ceph_bio_iter_advance_step(&it, bytes, ({ - zero_bvec(&bv); + memzero_bvec(&bv); })); } @@ -1246,7 +1235,7 @@ static void zero_bvecs(struct ceph_bvec_iter *bvec_pos, u32 off, u32 bytes) ceph_bvec_iter_advance(&it, off); ceph_bvec_iter_advance_step(&it, bytes, ({ - zero_bvec(&bv); + memzero_bvec(&bv); })); } @@ -2997,8 +2986,7 @@ static bool is_zero_bvecs(struct bio_vec *bvecs, u32 bytes) }; ceph_bvec_iter_advance_step(&it, bytes, ({ - if (memchr_inv(page_address(bv.bv_page) + bv.bv_offset, 0, - bv.bv_len)) + if (memchr_inv(bvec_virt(&bv), 0, bv.bv_len)) return false; })); return true; @@ -4100,8 +4088,6 @@ again: static bool rbd_quiesce_lock(struct rbd_device *rbd_dev) { - bool need_wait; - dout("%s rbd_dev %p\n", __func__, rbd_dev); lockdep_assert_held_write(&rbd_dev->lock_rwsem); @@ -4113,11 +4099,11 @@ static bool rbd_quiesce_lock(struct rbd_device *rbd_dev) */ rbd_dev->lock_state = RBD_LOCK_STATE_RELEASING; rbd_assert(!completion_done(&rbd_dev->releasing_wait)); - need_wait = !list_empty(&rbd_dev->running_list); - downgrade_write(&rbd_dev->lock_rwsem); - if (need_wait) - wait_for_completion(&rbd_dev->releasing_wait); - up_read(&rbd_dev->lock_rwsem); + if (list_empty(&rbd_dev->running_list)) + return true; + + up_write(&rbd_dev->lock_rwsem); + wait_for_completion(&rbd_dev->releasing_wait); down_write(&rbd_dev->lock_rwsem); if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING) @@ -4203,15 +4189,11 @@ static void rbd_handle_acquired_lock(struct rbd_device *rbd_dev, u8 struct_v, if (!rbd_cid_equal(&cid, &rbd_empty_cid)) { down_write(&rbd_dev->lock_rwsem); if (rbd_cid_equal(&cid, &rbd_dev->owner_cid)) { - /* - * we already know that the remote client is - * the owner - */ - up_write(&rbd_dev->lock_rwsem); - return; + dout("%s rbd_dev %p cid %llu-%llu == owner_cid\n", + __func__, rbd_dev, cid.gid, cid.handle); + } else { + rbd_set_owner_cid(rbd_dev, &cid); } - - rbd_set_owner_cid(rbd_dev, &cid); downgrade_write(&rbd_dev->lock_rwsem); } else { down_read(&rbd_dev->lock_rwsem); @@ -4236,14 +4218,12 @@ static void rbd_handle_released_lock(struct rbd_device *rbd_dev, u8 struct_v, if (!rbd_cid_equal(&cid, &rbd_empty_cid)) { down_write(&rbd_dev->lock_rwsem); if (!rbd_cid_equal(&cid, &rbd_dev->owner_cid)) { - dout("%s rbd_dev %p unexpected owner, cid %llu-%llu != owner_cid %llu-%llu\n", + dout("%s rbd_dev %p cid %llu-%llu != owner_cid %llu-%llu\n", __func__, rbd_dev, cid.gid, cid.handle, rbd_dev->owner_cid.gid, rbd_dev->owner_cid.handle); - up_write(&rbd_dev->lock_rwsem); - return; + } else { + rbd_set_owner_cid(rbd_dev, &rbd_empty_cid); } - - rbd_set_owner_cid(rbd_dev, &rbd_empty_cid); downgrade_write(&rbd_dev->lock_rwsem); } else { down_read(&rbd_dev->lock_rwsem); @@ -4951,6 +4931,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) disk->minors = RBD_MINORS_PER_MAJOR; } disk->fops = &rbd_bd_ops; + disk->private_data = rbd_dev; blk_queue_flag_set(QUEUE_FLAG_NONROT, q); /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */ diff --git a/drivers/block/rnbd/rnbd-clt-sysfs.c b/drivers/block/rnbd/rnbd-clt-sysfs.c index 324afdd63a96..4b93fd83bf79 100644 --- a/drivers/block/rnbd/rnbd-clt-sysfs.c +++ b/drivers/block/rnbd/rnbd-clt-sysfs.c @@ -227,17 +227,17 @@ static ssize_t state_show(struct kobject *kobj, switch (dev->dev_state) { case DEV_STATE_INIT: - return snprintf(page, PAGE_SIZE, "init\n"); + return sysfs_emit(page, "init\n"); case DEV_STATE_MAPPED: /* TODO fix cli tool before changing to proper state */ - return snprintf(page, PAGE_SIZE, "open\n"); + return sysfs_emit(page, "open\n"); case DEV_STATE_MAPPED_DISCONNECTED: /* TODO fix cli tool before changing to proper state */ - return snprintf(page, PAGE_SIZE, "closed\n"); + return sysfs_emit(page, "closed\n"); case DEV_STATE_UNMAPPED: - return snprintf(page, PAGE_SIZE, "unmapped\n"); + return sysfs_emit(page, "unmapped\n"); default: - return snprintf(page, PAGE_SIZE, "unknown\n"); + return sysfs_emit(page, "unknown\n"); } } @@ -263,7 +263,7 @@ static ssize_t mapping_path_show(struct kobject *kobj, dev = container_of(kobj, struct rnbd_clt_dev, kobj); - return scnprintf(page, PAGE_SIZE, "%s\n", dev->pathname); + return sysfs_emit(page, "%s\n", dev->pathname); } static struct kobj_attribute rnbd_clt_mapping_path_attr = @@ -276,8 +276,7 @@ static ssize_t access_mode_show(struct kobject *kobj, dev = container_of(kobj, struct rnbd_clt_dev, kobj); - return snprintf(page, PAGE_SIZE, "%s\n", - rnbd_access_mode_str(dev->access_mode)); + return sysfs_emit(page, "%s\n", rnbd_access_mode_str(dev->access_mode)); } static struct kobj_attribute rnbd_clt_access_mode = @@ -286,8 +285,8 @@ static struct kobj_attribute rnbd_clt_access_mode = static ssize_t rnbd_clt_unmap_dev_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return scnprintf(page, PAGE_SIZE, "Usage: echo <normal|force> > %s\n", - attr->attr.name); + return sysfs_emit(page, "Usage: echo <normal|force> > %s\n", + attr->attr.name); } static ssize_t rnbd_clt_unmap_dev_store(struct kobject *kobj, @@ -357,9 +356,8 @@ static ssize_t rnbd_clt_resize_dev_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return scnprintf(page, PAGE_SIZE, - "Usage: echo <new size in sectors> > %s\n", - attr->attr.name); + return sysfs_emit(page, "Usage: echo <new size in sectors> > %s\n", + attr->attr.name); } static ssize_t rnbd_clt_resize_dev_store(struct kobject *kobj, @@ -390,8 +388,7 @@ static struct kobj_attribute rnbd_clt_resize_dev_attr = static ssize_t rnbd_clt_remap_dev_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return scnprintf(page, PAGE_SIZE, "Usage: echo <1> > %s\n", - attr->attr.name); + return sysfs_emit(page, "Usage: echo <1> > %s\n", attr->attr.name); } static ssize_t rnbd_clt_remap_dev_store(struct kobject *kobj, @@ -436,7 +433,7 @@ static ssize_t session_show(struct kobject *kobj, struct kobj_attribute *attr, dev = container_of(kobj, struct rnbd_clt_dev, kobj); - return scnprintf(page, PAGE_SIZE, "%s\n", dev->sess->sessname); + return sysfs_emit(page, "%s\n", dev->sess->sessname); } static struct kobj_attribute rnbd_clt_session_attr = @@ -499,8 +496,8 @@ static ssize_t rnbd_clt_map_device_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return scnprintf(page, PAGE_SIZE, - "Usage: echo \"[dest_port=server port number] sessname=<name of the rtrs session> path=<[srcaddr@]dstaddr> [path=<[srcaddr@]dstaddr>] device_path=<full path on remote side> [access_mode=<ro|rw|migration>] [nr_poll_queues=<number of queues>]\" > %s\n\naddr ::= [ ip:<ipv4> | ip:<ipv6> | gid:<gid> ]\n", + return sysfs_emit(page, + "Usage: echo \"[dest_port=server port number] sessname=<name of the rtrs session> path=<[srcaddr@]dstaddr> [path=<[srcaddr@]dstaddr>] device_path=<full path on remote side> [access_mode=<ro|rw|migration>] [nr_poll_queues=<number of queues>]\" > %s\n\naddr ::= [ ip:<ipv4> | ip:<ipv6> | gid:<gid> ]\n", attr->attr.name); } diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index e9cc413495f0..bd4a41afbbfc 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -271,7 +271,7 @@ unlock: */ if (cpu_q) *cpup = cpu_q->cpu; - put_cpu_var(sess->cpu_rr); + put_cpu_ptr(sess->cpu_rr); if (q) rnbd_clt_dev_requeue(q); diff --git a/drivers/block/rnbd/rnbd-srv-sysfs.c b/drivers/block/rnbd/rnbd-srv-sysfs.c index acf5fced11ef..4db98e0e76f0 100644 --- a/drivers/block/rnbd/rnbd-srv-sysfs.c +++ b/drivers/block/rnbd/rnbd-srv-sysfs.c @@ -90,8 +90,8 @@ static ssize_t read_only_show(struct kobject *kobj, struct kobj_attribute *attr, sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj); - return scnprintf(page, PAGE_SIZE, "%d\n", - !(sess_dev->open_flags & FMODE_WRITE)); + return sysfs_emit(page, "%d\n", + !(sess_dev->open_flags & FMODE_WRITE)); } static struct kobj_attribute rnbd_srv_dev_session_ro_attr = @@ -105,8 +105,8 @@ static ssize_t access_mode_show(struct kobject *kobj, sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj); - return scnprintf(page, PAGE_SIZE, "%s\n", - rnbd_access_mode_str(sess_dev->access_mode)); + return sysfs_emit(page, "%s\n", + rnbd_access_mode_str(sess_dev->access_mode)); } static struct kobj_attribute rnbd_srv_dev_session_access_mode_attr = @@ -119,7 +119,7 @@ static ssize_t mapping_path_show(struct kobject *kobj, sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj); - return scnprintf(page, PAGE_SIZE, "%s\n", sess_dev->pathname); + return sysfs_emit(page, "%s\n", sess_dev->pathname); } static struct kobj_attribute rnbd_srv_dev_session_mapping_path_attr = @@ -128,8 +128,8 @@ static struct kobj_attribute rnbd_srv_dev_session_mapping_path_attr = static ssize_t rnbd_srv_dev_session_force_close_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", - attr->attr.name); + return sysfs_emit(page, "Usage: echo 1 > %s\n", + attr->attr.name); } static ssize_t rnbd_srv_dev_session_force_close_store(struct kobject *kobj, diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index 7b54353ee92b..420cd952ddc4 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -1373,7 +1373,7 @@ static void carm_free_disk(struct carm_host *host, unsigned int port_no) if (!disk) return; - if (disk->flags & GENHD_FL_UP) + if (host->state > HST_DEV_ACTIVATE) del_gendisk(disk); blk_cleanup_disk(disk); } diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 4b49df2dfd23..57c6ae7debd9 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -166,11 +166,8 @@ static inline void virtblk_request_done(struct request *req) { struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); - if (req->rq_flags & RQF_SPECIAL_PAYLOAD) { - kfree(page_address(req->special_vec.bv_page) + - req->special_vec.bv_offset); - } - + if (req->rq_flags & RQF_SPECIAL_PAYLOAD) + kfree(bvec_virt(&req->special_vec)); blk_mq_end_request(req, virtblk_result(vbr)); } @@ -692,6 +689,28 @@ static const struct blk_mq_ops virtio_mq_ops = { static unsigned int virtblk_queue_depth; module_param_named(queue_depth, virtblk_queue_depth, uint, 0444); +static int virtblk_validate(struct virtio_device *vdev) +{ + u32 blk_size; + + if (!vdev->config->get) { + dev_err(&vdev->dev, "%s failure: config access disabled\n", + __func__); + return -EINVAL; + } + + if (!virtio_has_feature(vdev, VIRTIO_BLK_F_BLK_SIZE)) + return 0; + + blk_size = virtio_cread32(vdev, + offsetof(struct virtio_blk_config, blk_size)); + + if (blk_size < SECTOR_SIZE || blk_size > PAGE_SIZE) + __virtio_clear_bit(vdev, VIRTIO_BLK_F_BLK_SIZE); + + return 0; +} + static int virtblk_probe(struct virtio_device *vdev) { struct virtio_blk *vblk; @@ -703,12 +722,6 @@ static int virtblk_probe(struct virtio_device *vdev) u8 physical_block_exp, alignment_offset; unsigned int queue_depth; - if (!vdev->config->get) { - dev_err(&vdev->dev, "%s failure: config access disabled\n", - __func__); - return -EINVAL; - } - err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS), GFP_KERNEL); if (err < 0) @@ -823,6 +836,14 @@ static int virtblk_probe(struct virtio_device *vdev) else blk_size = queue_logical_block_size(q); + if (unlikely(blk_size < SECTOR_SIZE || blk_size > PAGE_SIZE)) { + dev_err(&vdev->dev, + "block size is changed unexpectedly, now is %u\n", + blk_size); + err = -EINVAL; + goto out_cleanup_disk; + } + /* Use topology information if available */ err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, struct virtio_blk_config, physical_block_exp, @@ -878,9 +899,14 @@ static int virtblk_probe(struct virtio_device *vdev) virtblk_update_capacity(vblk, false); virtio_device_ready(vdev); - device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); + err = device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); + if (err) + goto out_cleanup_disk; + return 0; +out_cleanup_disk: + blk_cleanup_disk(vblk->disk); out_free_tags: blk_mq_free_tag_set(&vblk->tag_set); out_free_vq: @@ -983,6 +1009,7 @@ static struct virtio_driver virtio_blk = { .driver.name = KBUILD_MODNAME, .driver.owner = THIS_MODULE, .id_table = id_table, + .validate = virtblk_validate, .probe = virtblk_probe, .remove = virtblk_remove, .config_changed = virtblk_config_changed, diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index d83fee21f6c5..72902104f111 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -80,6 +80,7 @@ enum blkif_state { BLKIF_STATE_DISCONNECTED, BLKIF_STATE_CONNECTED, BLKIF_STATE_SUSPENDED, + BLKIF_STATE_ERROR, }; struct grant { @@ -89,6 +90,7 @@ struct grant { }; enum blk_req_status { + REQ_PROCESSING, REQ_WAITING, REQ_DONE, REQ_ERROR, @@ -530,10 +532,10 @@ static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo, id = get_id_from_freelist(rinfo); rinfo->shadow[id].request = req; - rinfo->shadow[id].status = REQ_WAITING; + rinfo->shadow[id].status = REQ_PROCESSING; rinfo->shadow[id].associated_id = NO_ASSOCIATED_ID; - (*ring_req)->u.rw.id = id; + rinfo->shadow[id].req.u.rw.id = id; return id; } @@ -541,11 +543,12 @@ static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo, static int blkif_queue_discard_req(struct request *req, struct blkfront_ring_info *rinfo) { struct blkfront_info *info = rinfo->dev_info; - struct blkif_request *ring_req; + struct blkif_request *ring_req, *final_ring_req; unsigned long id; /* Fill out a communications ring structure. */ - id = blkif_ring_get_request(rinfo, req, &ring_req); + id = blkif_ring_get_request(rinfo, req, &final_ring_req); + ring_req = &rinfo->shadow[id].req; ring_req->operation = BLKIF_OP_DISCARD; ring_req->u.discard.nr_sectors = blk_rq_sectors(req); @@ -556,8 +559,9 @@ static int blkif_queue_discard_req(struct request *req, struct blkfront_ring_inf else ring_req->u.discard.flag = 0; - /* Keep a private copy so we can reissue requests when recovering. */ - rinfo->shadow[id].req = *ring_req; + /* Copy the request to the ring page. */ + *final_ring_req = *ring_req; + rinfo->shadow[id].status = REQ_WAITING; return 0; } @@ -690,6 +694,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri { struct blkfront_info *info = rinfo->dev_info; struct blkif_request *ring_req, *extra_ring_req = NULL; + struct blkif_request *final_ring_req, *final_extra_ring_req = NULL; unsigned long id, extra_id = NO_ASSOCIATED_ID; bool require_extra_req = false; int i; @@ -734,7 +739,8 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri } /* Fill out a communications ring structure. */ - id = blkif_ring_get_request(rinfo, req, &ring_req); + id = blkif_ring_get_request(rinfo, req, &final_ring_req); + ring_req = &rinfo->shadow[id].req; num_sg = blk_rq_map_sg(req->q, req, rinfo->shadow[id].sg); num_grant = 0; @@ -785,7 +791,9 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri ring_req->u.rw.nr_segments = num_grant; if (unlikely(require_extra_req)) { extra_id = blkif_ring_get_request(rinfo, req, - &extra_ring_req); + &final_extra_ring_req); + extra_ring_req = &rinfo->shadow[extra_id].req; + /* * Only the first request contains the scatter-gather * list. @@ -827,10 +835,13 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri if (setup.segments) kunmap_atomic(setup.segments); - /* Keep a private copy so we can reissue requests when recovering. */ - rinfo->shadow[id].req = *ring_req; - if (unlikely(require_extra_req)) - rinfo->shadow[extra_id].req = *extra_ring_req; + /* Copy request(s) to the ring page. */ + *final_ring_req = *ring_req; + rinfo->shadow[id].status = REQ_WAITING; + if (unlikely(require_extra_req)) { + *final_extra_ring_req = *extra_ring_req; + rinfo->shadow[extra_id].status = REQ_WAITING; + } if (new_persistent_gnts) gnttab_free_grant_references(setup.gref_head); @@ -1092,7 +1103,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, err = xlbd_reserve_minors(minor, nr_minors); if (err) return err; - err = -ENODEV; memset(&info->tag_set, 0, sizeof(info->tag_set)); info->tag_set.ops = &blkfront_mq_ops; @@ -1354,8 +1364,8 @@ static enum blk_req_status blkif_rsp_to_req_status(int rsp) static int blkif_get_final_status(enum blk_req_status s1, enum blk_req_status s2) { - BUG_ON(s1 == REQ_WAITING); - BUG_ON(s2 == REQ_WAITING); + BUG_ON(s1 < REQ_DONE); + BUG_ON(s2 < REQ_DONE); if (s1 == REQ_ERROR || s2 == REQ_ERROR) return BLKIF_RSP_ERROR; @@ -1388,7 +1398,7 @@ static bool blkif_completion(unsigned long *id, s->status = blkif_rsp_to_req_status(bret->status); /* Wait the second response if not yet here. */ - if (s2->status == REQ_WAITING) + if (s2->status < REQ_DONE) return false; bret->status = blkif_get_final_status(s->status, @@ -1496,7 +1506,7 @@ static bool blkif_completion(unsigned long *id, static irqreturn_t blkif_interrupt(int irq, void *dev_id) { struct request *req; - struct blkif_response *bret; + struct blkif_response bret; RING_IDX i, rp; unsigned long flags; struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id; @@ -1507,54 +1517,76 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) spin_lock_irqsave(&rinfo->ring_lock, flags); again: - rp = rinfo->ring.sring->rsp_prod; - rmb(); /* Ensure we see queued responses up to 'rp'. */ + rp = READ_ONCE(rinfo->ring.sring->rsp_prod); + virt_rmb(); /* Ensure we see queued responses up to 'rp'. */ + if (RING_RESPONSE_PROD_OVERFLOW(&rinfo->ring, rp)) { + pr_alert("%s: illegal number of responses %u\n", + info->gd->disk_name, rp - rinfo->ring.rsp_cons); + goto err; + } for (i = rinfo->ring.rsp_cons; i != rp; i++) { unsigned long id; + unsigned int op; + + RING_COPY_RESPONSE(&rinfo->ring, i, &bret); + id = bret.id; - bret = RING_GET_RESPONSE(&rinfo->ring, i); - id = bret->id; /* * The backend has messed up and given us an id that we would * never have given to it (we stamp it up to BLK_RING_SIZE - * look in get_id_from_freelist. */ if (id >= BLK_RING_SIZE(info)) { - WARN(1, "%s: response to %s has incorrect id (%ld)\n", - info->gd->disk_name, op_name(bret->operation), id); - /* We can't safely get the 'struct request' as - * the id is busted. */ - continue; + pr_alert("%s: response has incorrect id (%ld)\n", + info->gd->disk_name, id); + goto err; } + if (rinfo->shadow[id].status != REQ_WAITING) { + pr_alert("%s: response references no pending request\n", + info->gd->disk_name); + goto err; + } + + rinfo->shadow[id].status = REQ_PROCESSING; req = rinfo->shadow[id].request; - if (bret->operation != BLKIF_OP_DISCARD) { + op = rinfo->shadow[id].req.operation; + if (op == BLKIF_OP_INDIRECT) + op = rinfo->shadow[id].req.u.indirect.indirect_op; + if (bret.operation != op) { + pr_alert("%s: response has wrong operation (%u instead of %u)\n", + info->gd->disk_name, bret.operation, op); + goto err; + } + + if (bret.operation != BLKIF_OP_DISCARD) { /* * We may need to wait for an extra response if the * I/O request is split in 2 */ - if (!blkif_completion(&id, rinfo, bret)) + if (!blkif_completion(&id, rinfo, &bret)) continue; } if (add_id_to_freelist(rinfo, id)) { WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n", - info->gd->disk_name, op_name(bret->operation), id); + info->gd->disk_name, op_name(bret.operation), id); continue; } - if (bret->status == BLKIF_RSP_OKAY) + if (bret.status == BLKIF_RSP_OKAY) blkif_req(req)->error = BLK_STS_OK; else blkif_req(req)->error = BLK_STS_IOERR; - switch (bret->operation) { + switch (bret.operation) { case BLKIF_OP_DISCARD: - if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { + if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) { struct request_queue *rq = info->rq; - printk(KERN_WARNING "blkfront: %s: %s op failed\n", - info->gd->disk_name, op_name(bret->operation)); + + pr_warn_ratelimited("blkfront: %s: %s op failed\n", + info->gd->disk_name, op_name(bret.operation)); blkif_req(req)->error = BLK_STS_NOTSUPP; info->feature_discard = 0; info->feature_secdiscard = 0; @@ -1564,15 +1596,15 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) break; case BLKIF_OP_FLUSH_DISKCACHE: case BLKIF_OP_WRITE_BARRIER: - if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { - printk(KERN_WARNING "blkfront: %s: %s op failed\n", - info->gd->disk_name, op_name(bret->operation)); + if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) { + pr_warn_ratelimited("blkfront: %s: %s op failed\n", + info->gd->disk_name, op_name(bret.operation)); blkif_req(req)->error = BLK_STS_NOTSUPP; } - if (unlikely(bret->status == BLKIF_RSP_ERROR && + if (unlikely(bret.status == BLKIF_RSP_ERROR && rinfo->shadow[id].req.u.rw.nr_segments == 0)) { - printk(KERN_WARNING "blkfront: %s: empty %s op failed\n", - info->gd->disk_name, op_name(bret->operation)); + pr_warn_ratelimited("blkfront: %s: empty %s op failed\n", + info->gd->disk_name, op_name(bret.operation)); blkif_req(req)->error = BLK_STS_NOTSUPP; } if (unlikely(blkif_req(req)->error)) { @@ -1585,9 +1617,10 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) fallthrough; case BLKIF_OP_READ: case BLKIF_OP_WRITE: - if (unlikely(bret->status != BLKIF_RSP_OKAY)) - dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " - "request: %x\n", bret->status); + if (unlikely(bret.status != BLKIF_RSP_OKAY)) + dev_dbg_ratelimited(&info->xbdev->dev, + "Bad return from blkdev data request: %#x\n", + bret.status); break; default: @@ -1613,6 +1646,14 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) spin_unlock_irqrestore(&rinfo->ring_lock, flags); return IRQ_HANDLED; + + err: + info->connected = BLKIF_STATE_ERROR; + + spin_unlock_irqrestore(&rinfo->ring_lock, flags); + + pr_alert("%s disabled for further use\n", info->gd->disk_name); + return IRQ_HANDLED; } |