diff options
Diffstat (limited to 'drivers/md/bcache/super.c')
-rw-r--r-- | drivers/md/bcache/super.c | 227 |
1 files changed, 183 insertions, 44 deletions
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 1b63ac876169..26e374fbf57c 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -40,6 +40,7 @@ static const char invalid_uuid[] = { static struct kobject *bcache_kobj; struct mutex bch_register_lock; +bool bcache_is_reboot; LIST_HEAD(bch_cache_sets); static LIST_HEAD(uncached_devices); @@ -49,6 +50,7 @@ static wait_queue_head_t unregister_wait; struct workqueue_struct *bcache_wq; struct workqueue_struct *bch_journal_wq; + #define BTREE_MAX_PAGES (256 * 1024 / PAGE_SIZE) /* limitation of partitions number on single bcache device */ #define BCACHE_MINORS 128 @@ -197,7 +199,9 @@ err: static void write_bdev_super_endio(struct bio *bio) { struct cached_dev *dc = bio->bi_private; - /* XXX: error checking */ + + if (bio->bi_status) + bch_count_backing_io_errors(dc, bio); closure_put(&dc->sb_write); } @@ -691,6 +695,7 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c, { unsigned int i; struct cache *ca; + int ret; for_each_cache(ca, d->c, i) bd_link_disk_holder(ca->bdev, d->disk); @@ -698,9 +703,13 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c, snprintf(d->name, BCACHEDEVNAME_SIZE, "%s%u", name, d->id); - WARN(sysfs_create_link(&d->kobj, &c->kobj, "cache") || - sysfs_create_link(&c->kobj, &d->kobj, d->name), - "Couldn't create device <-> cache set symlinks"); + ret = sysfs_create_link(&d->kobj, &c->kobj, "cache"); + if (ret < 0) + pr_err("Couldn't create device -> cache set symlink"); + + ret = sysfs_create_link(&c->kobj, &d->kobj, d->name); + if (ret < 0) + pr_err("Couldn't create cache set -> device symlink"); clear_bit(BCACHE_DEV_UNLINK_DONE, &d->flags); } @@ -908,7 +917,7 @@ static int cached_dev_status_update(void *arg) } -void bch_cached_dev_run(struct cached_dev *dc) +int bch_cached_dev_run(struct cached_dev *dc) { struct bcache_device *d = &dc->disk; char *buf = kmemdup_nul(dc->sb.label, SB_LABEL_SIZE, GFP_KERNEL); @@ -919,11 +928,19 @@ void bch_cached_dev_run(struct cached_dev *dc) NULL, }; + if (dc->io_disable) { + pr_err("I/O disabled on cached dev %s", + dc->backing_dev_name); + return -EIO; + } + if (atomic_xchg(&dc->running, 1)) { kfree(env[1]); kfree(env[2]); kfree(buf); - return; + pr_info("cached dev %s is running already", + dc->backing_dev_name); + return -EBUSY; } if (!d->c && @@ -949,8 +966,11 @@ void bch_cached_dev_run(struct cached_dev *dc) kfree(buf); if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") || - sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache")) - pr_debug("error creating sysfs link"); + sysfs_create_link(&disk_to_dev(d->disk)->kobj, + &d->kobj, "bcache")) { + pr_err("Couldn't create bcache dev <-> disk sysfs symlinks"); + return -ENOMEM; + } dc->status_update_thread = kthread_run(cached_dev_status_update, dc, "bcache_status_update"); @@ -959,6 +979,8 @@ void bch_cached_dev_run(struct cached_dev *dc) "continue to run without monitoring backing " "device status"); } + + return 0; } /* @@ -996,7 +1018,6 @@ static void cached_dev_detach_finish(struct work_struct *w) BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)); BUG_ON(refcount_read(&dc->count)); - mutex_lock(&bch_register_lock); if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) cancel_writeback_rate_update_dwork(dc); @@ -1012,6 +1033,8 @@ static void cached_dev_detach_finish(struct work_struct *w) bch_write_bdev_super(dc, &cl); closure_sync(&cl); + mutex_lock(&bch_register_lock); + calc_cached_dev_sectors(dc->disk.c); bcache_device_detach(&dc->disk); list_move(&dc->list, &uncached_devices); @@ -1054,6 +1077,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, uint32_t rtime = cpu_to_le32((u32)ktime_get_real_seconds()); struct uuid_entry *u; struct cached_dev *exist_dc, *t; + int ret = 0; if ((set_uuid && memcmp(set_uuid, c->sb.set_uuid, 16)) || (!set_uuid && memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16))) @@ -1153,6 +1177,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, down_write(&dc->writeback_lock); if (bch_cached_dev_writeback_start(dc)) { up_write(&dc->writeback_lock); + pr_err("Couldn't start writeback facilities for %s", + dc->disk.disk->disk_name); return -ENOMEM; } @@ -1163,7 +1189,22 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, bch_sectors_dirty_init(&dc->disk); - bch_cached_dev_run(dc); + ret = bch_cached_dev_run(dc); + if (ret && (ret != -EBUSY)) { + up_write(&dc->writeback_lock); + /* + * bch_register_lock is held, bcache_device_stop() is not + * able to be directly called. The kthread and kworker + * created previously in bch_cached_dev_writeback_start() + * have to be stopped manually here. + */ + kthread_stop(dc->writeback_thread); + cancel_writeback_rate_update_dwork(dc); + pr_err("Couldn't run cached device %s", + dc->backing_dev_name); + return ret; + } + bcache_device_link(&dc->disk, c, "bdev"); atomic_inc(&c->attached_dev_nr); @@ -1190,18 +1231,16 @@ static void cached_dev_free(struct closure *cl) { struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); - mutex_lock(&bch_register_lock); - if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) cancel_writeback_rate_update_dwork(dc); if (!IS_ERR_OR_NULL(dc->writeback_thread)) kthread_stop(dc->writeback_thread); - if (dc->writeback_write_wq) - destroy_workqueue(dc->writeback_write_wq); if (!IS_ERR_OR_NULL(dc->status_update_thread)) kthread_stop(dc->status_update_thread); + mutex_lock(&bch_register_lock); + if (atomic_read(&dc->running)) bd_unlink_disk_holder(dc->bdev, dc->disk.disk); bcache_device_free(&dc->disk); @@ -1290,6 +1329,7 @@ static int register_bdev(struct cache_sb *sb, struct page *sb_page, { const char *err = "cannot allocate memory"; struct cache_set *c; + int ret = -ENOMEM; bdevname(bdev, dc->backing_dev_name); memcpy(&dc->sb, sb, sizeof(struct cache_sb)); @@ -1319,14 +1359,18 @@ static int register_bdev(struct cache_sb *sb, struct page *sb_page, bch_cached_dev_attach(dc, c, NULL); if (BDEV_STATE(&dc->sb) == BDEV_STATE_NONE || - BDEV_STATE(&dc->sb) == BDEV_STATE_STALE) - bch_cached_dev_run(dc); + BDEV_STATE(&dc->sb) == BDEV_STATE_STALE) { + err = "failed to run cached device"; + ret = bch_cached_dev_run(dc); + if (ret) + goto err; + } return 0; err: pr_notice("error %s: %s", dc->backing_dev_name, err); bcache_device_stop(&dc->disk); - return -EIO; + return ret; } /* Flash only volumes */ @@ -1437,8 +1481,6 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size) bool bch_cached_dev_error(struct cached_dev *dc) { - struct cache_set *c; - if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags)) return false; @@ -1449,21 +1491,6 @@ bool bch_cached_dev_error(struct cached_dev *dc) pr_err("stop %s: too many IO errors on backing device %s\n", dc->disk.disk->disk_name, dc->backing_dev_name); - /* - * If the cached device is still attached to a cache set, - * even dc->io_disable is true and no more I/O requests - * accepted, cache device internal I/O (writeback scan or - * garbage collection) may still prevent bcache device from - * being stopped. So here CACHE_SET_IO_DISABLE should be - * set to c->flags too, to make the internal I/O to cache - * device rejected and stopped immediately. - * If c is NULL, that means the bcache device is not attached - * to any cache set, then no CACHE_SET_IO_DISABLE bit to set. - */ - c = dc->disk.c; - if (c && test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags)) - pr_info("CACHE_SET_IO_DISABLE already set"); - bcache_device_stop(&dc->disk); return true; } @@ -1564,19 +1591,23 @@ static void cache_set_flush(struct closure *cl) kobject_put(&c->internal); kobject_del(&c->kobj); - if (c->gc_thread) + if (!IS_ERR_OR_NULL(c->gc_thread)) kthread_stop(c->gc_thread); if (!IS_ERR_OR_NULL(c->root)) list_add(&c->root->list, &c->btree_cache); - /* Should skip this if we're unregistering because of an error */ - list_for_each_entry(b, &c->btree_cache, list) { - mutex_lock(&b->write_lock); - if (btree_node_dirty(b)) - __bch_btree_node_write(b, NULL); - mutex_unlock(&b->write_lock); - } + /* + * Avoid flushing cached nodes if cache set is retiring + * due to too many I/O errors detected. + */ + if (!test_bit(CACHE_SET_IO_DISABLE, &c->flags)) + list_for_each_entry(b, &c->btree_cache, list) { + mutex_lock(&b->write_lock); + if (btree_node_dirty(b)) + __bch_btree_node_write(b, NULL); + mutex_unlock(&b->write_lock); + } for_each_cache(ca, c, i) if (ca->alloc_thread) @@ -1849,6 +1880,23 @@ static int run_cache_set(struct cache_set *c) if (bch_btree_check(c)) goto err; + /* + * bch_btree_check() may occupy too much system memory which + * has negative effects to user space application (e.g. data + * base) performance. Shrink the mca cache memory proactively + * here to avoid competing memory with user space workloads.. + */ + if (!c->shrinker_disabled) { + struct shrink_control sc; + + sc.gfp_mask = GFP_KERNEL; + sc.nr_to_scan = c->btree_cache_used * c->btree_pages; + /* first run to clear b->accessed tag */ + c->shrink.scan_objects(&c->shrink, &sc); + /* second run to reap non-accessed nodes */ + c->shrink.scan_objects(&c->shrink, &sc); + } + bch_journal_mark(c, &journal); bch_initial_gc_finish(c); pr_debug("btree_check() done"); @@ -1957,7 +2005,7 @@ err: } closure_sync(&cl); - /* XXX: test this, it's broken */ + bch_cache_set_error(c, "%s", err); return -EIO; @@ -2251,9 +2299,13 @@ err: static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, const char *buffer, size_t size); +static ssize_t bch_pending_bdevs_cleanup(struct kobject *k, + struct kobj_attribute *attr, + const char *buffer, size_t size); kobj_attribute_write(register, register_bcache); kobj_attribute_write(register_quiet, register_bcache); +kobj_attribute_write(pendings_cleanup, bch_pending_bdevs_cleanup); static bool bch_is_open_backing(struct block_device *bdev) { @@ -2301,6 +2353,11 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, if (!try_module_get(THIS_MODULE)) return -EBUSY; + /* For latest state of bcache_is_reboot */ + smp_mb(); + if (bcache_is_reboot) + return -EBUSY; + path = kstrndup(buffer, size, GFP_KERNEL); if (!path) goto err; @@ -2378,8 +2435,61 @@ err: goto out; } + +struct pdev { + struct list_head list; + struct cached_dev *dc; +}; + +static ssize_t bch_pending_bdevs_cleanup(struct kobject *k, + struct kobj_attribute *attr, + const char *buffer, + size_t size) +{ + LIST_HEAD(pending_devs); + ssize_t ret = size; + struct cached_dev *dc, *tdc; + struct pdev *pdev, *tpdev; + struct cache_set *c, *tc; + + mutex_lock(&bch_register_lock); + list_for_each_entry_safe(dc, tdc, &uncached_devices, list) { + pdev = kmalloc(sizeof(struct pdev), GFP_KERNEL); + if (!pdev) + break; + pdev->dc = dc; + list_add(&pdev->list, &pending_devs); + } + + list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) { + list_for_each_entry_safe(c, tc, &bch_cache_sets, list) { + char *pdev_set_uuid = pdev->dc->sb.set_uuid; + char *set_uuid = c->sb.uuid; + + if (!memcmp(pdev_set_uuid, set_uuid, 16)) { + list_del(&pdev->list); + kfree(pdev); + break; + } + } + } + mutex_unlock(&bch_register_lock); + + list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) { + pr_info("delete pdev %p", pdev); + list_del(&pdev->list); + bcache_device_stop(&pdev->dc->disk); + kfree(pdev); + } + + return ret; +} + static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x) { + if (bcache_is_reboot) + return NOTIFY_DONE; + if (code == SYS_DOWN || code == SYS_HALT || code == SYS_POWER_OFF) { @@ -2392,19 +2502,45 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x) mutex_lock(&bch_register_lock); + if (bcache_is_reboot) + goto out; + + /* New registration is rejected since now */ + bcache_is_reboot = true; + /* + * Make registering caller (if there is) on other CPU + * core know bcache_is_reboot set to true earlier + */ + smp_mb(); + if (list_empty(&bch_cache_sets) && list_empty(&uncached_devices)) goto out; + mutex_unlock(&bch_register_lock); + pr_info("Stopping all devices:"); + /* + * The reason bch_register_lock is not held to call + * bch_cache_set_stop() and bcache_device_stop() is to + * avoid potential deadlock during reboot, because cache + * set or bcache device stopping process will acqurie + * bch_register_lock too. + * + * We are safe here because bcache_is_reboot sets to + * true already, register_bcache() will reject new + * registration now. bcache_is_reboot also makes sure + * bcache_reboot() won't be re-entered on by other thread, + * so there is no race in following list iteration by + * list_for_each_entry_safe(). + */ list_for_each_entry_safe(c, tc, &bch_cache_sets, list) bch_cache_set_stop(c); list_for_each_entry_safe(dc, tdc, &uncached_devices, list) bcache_device_stop(&dc->disk); - mutex_unlock(&bch_register_lock); /* * Give an early chance for other kthreads and @@ -2496,6 +2632,7 @@ static int __init bcache_init(void) static const struct attribute *files[] = { &ksysfs_register.attr, &ksysfs_register_quiet.attr, + &ksysfs_pendings_cleanup.attr, NULL }; @@ -2531,6 +2668,8 @@ static int __init bcache_init(void) bch_debug_init(); closure_debug_init(); + bcache_is_reboot = false; + return 0; err: bcache_exit(); |