diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/dm-crypt.c | 214 | ||||
-rw-r--r-- | drivers/md/dm-delay.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-flakey.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-ioctl.c | 5 | ||||
-rw-r--r-- | drivers/md/dm-linear.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-log.c | 13 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-raid1.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 6 | ||||
-rw-r--r-- | drivers/md/dm-stripe.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-thin-metadata.c | 744 | ||||
-rw-r--r-- | drivers/md/dm-thin-metadata.h | 23 | ||||
-rw-r--r-- | drivers/md/dm-thin.c | 447 | ||||
-rw-r--r-- | drivers/md/dm-verity.c | 2 | ||||
-rw-r--r-- | drivers/md/dm.h | 5 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-block-manager.c | 105 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-block-manager.h | 21 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-space-map-common.c | 12 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-space-map-common.h | 1 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-transaction-manager.c | 64 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-transaction-manager.h | 11 |
23 files changed, 1060 insertions, 634 deletions
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index e2b32401ecc7..664743d6a6cd 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -42,21 +42,21 @@ struct convert_context { unsigned int offset_out; unsigned int idx_in; unsigned int idx_out; - sector_t sector; - atomic_t pending; + sector_t cc_sector; + atomic_t cc_pending; }; /* * per bio private data */ struct dm_crypt_io { - struct dm_target *target; + struct crypt_config *cc; struct bio *base_bio; struct work_struct work; struct convert_context ctx; - atomic_t pending; + atomic_t io_pending; int error; sector_t sector; struct dm_crypt_io *base_io; @@ -109,9 +109,6 @@ enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID }; */ struct crypt_cpu { struct ablkcipher_request *req; - /* ESSIV: struct crypto_cipher *essiv_tfm */ - void *iv_private; - struct crypto_ablkcipher *tfms[0]; }; /* @@ -151,6 +148,10 @@ struct crypt_config { * per_cpu_ptr() only. */ struct crypt_cpu __percpu *cpu; + + /* ESSIV: struct crypto_cipher *essiv_tfm */ + void *iv_private; + struct crypto_ablkcipher **tfms; unsigned tfms_count; /* @@ -193,7 +194,7 @@ static struct crypt_cpu *this_crypt_config(struct crypt_config *cc) */ static struct crypto_ablkcipher *any_tfm(struct crypt_config *cc) { - return __this_cpu_ptr(cc->cpu)->tfms[0]; + return cc->tfms[0]; } /* @@ -258,7 +259,7 @@ static int crypt_iv_essiv_init(struct crypt_config *cc) struct hash_desc desc; struct scatterlist sg; struct crypto_cipher *essiv_tfm; - int err, cpu; + int err; sg_init_one(&sg, cc->key, cc->key_size); desc.tfm = essiv->hash_tfm; @@ -268,14 +269,12 @@ static int crypt_iv_essiv_init(struct crypt_config *cc) if (err) return err; - for_each_possible_cpu(cpu) { - essiv_tfm = per_cpu_ptr(cc->cpu, cpu)->iv_private, + essiv_tfm = cc->iv_private; - err = crypto_cipher_setkey(essiv_tfm, essiv->salt, - crypto_hash_digestsize(essiv->hash_tfm)); - if (err) - return err; - } + err = crypto_cipher_setkey(essiv_tfm, essiv->salt, + crypto_hash_digestsize(essiv->hash_tfm)); + if (err) + return err; return 0; } @@ -286,16 +285,14 @@ static int crypt_iv_essiv_wipe(struct crypt_config *cc) struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; unsigned salt_size = crypto_hash_digestsize(essiv->hash_tfm); struct crypto_cipher *essiv_tfm; - int cpu, r, err = 0; + int r, err = 0; memset(essiv->salt, 0, salt_size); - for_each_possible_cpu(cpu) { - essiv_tfm = per_cpu_ptr(cc->cpu, cpu)->iv_private; - r = crypto_cipher_setkey(essiv_tfm, essiv->salt, salt_size); - if (r) - err = r; - } + essiv_tfm = cc->iv_private; + r = crypto_cipher_setkey(essiv_tfm, essiv->salt, salt_size); + if (r) + err = r; return err; } @@ -335,8 +332,6 @@ static struct crypto_cipher *setup_essiv_cpu(struct crypt_config *cc, static void crypt_iv_essiv_dtr(struct crypt_config *cc) { - int cpu; - struct crypt_cpu *cpu_cc; struct crypto_cipher *essiv_tfm; struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; @@ -346,15 +341,12 @@ static void crypt_iv_essiv_dtr(struct crypt_config *cc) kzfree(essiv->salt); essiv->salt = NULL; - for_each_possible_cpu(cpu) { - cpu_cc = per_cpu_ptr(cc->cpu, cpu); - essiv_tfm = cpu_cc->iv_private; + essiv_tfm = cc->iv_private; - if (essiv_tfm) - crypto_free_cipher(essiv_tfm); + if (essiv_tfm) + crypto_free_cipher(essiv_tfm); - cpu_cc->iv_private = NULL; - } + cc->iv_private = NULL; } static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, @@ -363,7 +355,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, struct crypto_cipher *essiv_tfm = NULL; struct crypto_hash *hash_tfm = NULL; u8 *salt = NULL; - int err, cpu; + int err; if (!opts) { ti->error = "Digest algorithm missing for ESSIV mode"; @@ -388,15 +380,13 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, cc->iv_gen_private.essiv.salt = salt; cc->iv_gen_private.essiv.hash_tfm = hash_tfm; - for_each_possible_cpu(cpu) { - essiv_tfm = setup_essiv_cpu(cc, ti, salt, - crypto_hash_digestsize(hash_tfm)); - if (IS_ERR(essiv_tfm)) { - crypt_iv_essiv_dtr(cc); - return PTR_ERR(essiv_tfm); - } - per_cpu_ptr(cc->cpu, cpu)->iv_private = essiv_tfm; + essiv_tfm = setup_essiv_cpu(cc, ti, salt, + crypto_hash_digestsize(hash_tfm)); + if (IS_ERR(essiv_tfm)) { + crypt_iv_essiv_dtr(cc); + return PTR_ERR(essiv_tfm); } + cc->iv_private = essiv_tfm; return 0; @@ -410,7 +400,7 @@ bad: static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, struct dm_crypt_request *dmreq) { - struct crypto_cipher *essiv_tfm = this_crypt_config(cc)->iv_private; + struct crypto_cipher *essiv_tfm = cc->iv_private; memset(iv, 0, cc->iv_size); *(__le64 *)iv = cpu_to_le64(dmreq->iv_sector); @@ -664,7 +654,7 @@ static void crypt_convert_init(struct crypt_config *cc, ctx->offset_out = 0; ctx->idx_in = bio_in ? bio_in->bi_idx : 0; ctx->idx_out = bio_out ? bio_out->bi_idx : 0; - ctx->sector = sector + cc->iv_offset; + ctx->cc_sector = sector + cc->iv_offset; init_completion(&ctx->restart); } @@ -695,12 +685,12 @@ static int crypt_convert_block(struct crypt_config *cc, struct bio_vec *bv_out = bio_iovec_idx(ctx->bio_out, ctx->idx_out); struct dm_crypt_request *dmreq; u8 *iv; - int r = 0; + int r; dmreq = dmreq_of_req(cc, req); iv = iv_of_dmreq(cc, dmreq); - dmreq->iv_sector = ctx->sector; + dmreq->iv_sector = ctx->cc_sector; dmreq->ctx = ctx; sg_init_table(&dmreq->sg_in, 1); sg_set_page(&dmreq->sg_in, bv_in->bv_page, 1 << SECTOR_SHIFT, @@ -749,12 +739,12 @@ static void crypt_alloc_req(struct crypt_config *cc, struct convert_context *ctx) { struct crypt_cpu *this_cc = this_crypt_config(cc); - unsigned key_index = ctx->sector & (cc->tfms_count - 1); + unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1); if (!this_cc->req) this_cc->req = mempool_alloc(cc->req_pool, GFP_NOIO); - ablkcipher_request_set_tfm(this_cc->req, this_cc->tfms[key_index]); + ablkcipher_request_set_tfm(this_cc->req, cc->tfms[key_index]); ablkcipher_request_set_callback(this_cc->req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, kcryptd_async_done, dmreq_of_req(cc, this_cc->req)); @@ -769,14 +759,14 @@ static int crypt_convert(struct crypt_config *cc, struct crypt_cpu *this_cc = this_crypt_config(cc); int r; - atomic_set(&ctx->pending, 1); + atomic_set(&ctx->cc_pending, 1); while(ctx->idx_in < ctx->bio_in->bi_vcnt && ctx->idx_out < ctx->bio_out->bi_vcnt) { crypt_alloc_req(cc, ctx); - atomic_inc(&ctx->pending); + atomic_inc(&ctx->cc_pending); r = crypt_convert_block(cc, ctx, this_cc->req); @@ -788,19 +778,19 @@ static int crypt_convert(struct crypt_config *cc, /* fall through*/ case -EINPROGRESS: this_cc->req = NULL; - ctx->sector++; + ctx->cc_sector++; continue; /* sync */ case 0: - atomic_dec(&ctx->pending); - ctx->sector++; + atomic_dec(&ctx->cc_pending); + ctx->cc_sector++; cond_resched(); continue; /* error */ default: - atomic_dec(&ctx->pending); + atomic_dec(&ctx->cc_pending); return r; } } @@ -811,7 +801,7 @@ static int crypt_convert(struct crypt_config *cc, static void dm_crypt_bio_destructor(struct bio *bio) { struct dm_crypt_io *io = bio->bi_private; - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; bio_free(bio, cc->bs); } @@ -825,7 +815,7 @@ static void dm_crypt_bio_destructor(struct bio *bio) static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size, unsigned *out_of_pages) { - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; struct bio *clone; unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM; @@ -884,26 +874,25 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone) } } -static struct dm_crypt_io *crypt_io_alloc(struct dm_target *ti, +static struct dm_crypt_io *crypt_io_alloc(struct crypt_config *cc, struct bio *bio, sector_t sector) { - struct crypt_config *cc = ti->private; struct dm_crypt_io *io; io = mempool_alloc(cc->io_pool, GFP_NOIO); - io->target = ti; + io->cc = cc; io->base_bio = bio; io->sector = sector; io->error = 0; io->base_io = NULL; - atomic_set(&io->pending, 0); + atomic_set(&io->io_pending, 0); return io; } static void crypt_inc_pending(struct dm_crypt_io *io) { - atomic_inc(&io->pending); + atomic_inc(&io->io_pending); } /* @@ -913,12 +902,12 @@ static void crypt_inc_pending(struct dm_crypt_io *io) */ static void crypt_dec_pending(struct dm_crypt_io *io) { - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; struct bio *base_bio = io->base_bio; struct dm_crypt_io *base_io = io->base_io; int error = io->error; - if (!atomic_dec_and_test(&io->pending)) + if (!atomic_dec_and_test(&io->io_pending)) return; mempool_free(io, cc->io_pool); @@ -952,7 +941,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io) static void crypt_endio(struct bio *clone, int error) { struct dm_crypt_io *io = clone->bi_private; - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; unsigned rw = bio_data_dir(clone); if (unlikely(!bio_flagged(clone, BIO_UPTODATE) && !error)) @@ -979,7 +968,7 @@ static void crypt_endio(struct bio *clone, int error) static void clone_init(struct dm_crypt_io *io, struct bio *clone) { - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; clone->bi_private = io; clone->bi_end_io = crypt_endio; @@ -990,7 +979,7 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone) static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) { - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; struct bio *base_bio = io->base_bio; struct bio *clone; @@ -1038,7 +1027,7 @@ static void kcryptd_io(struct work_struct *work) static void kcryptd_queue_io(struct dm_crypt_io *io) { - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; INIT_WORK(&io->work, kcryptd_io); queue_work(cc->io_queue, &io->work); @@ -1047,7 +1036,7 @@ static void kcryptd_queue_io(struct dm_crypt_io *io) static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) { struct bio *clone = io->ctx.bio_out; - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; if (unlikely(io->error < 0)) { crypt_free_buffer_pages(cc, clone); @@ -1069,7 +1058,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) { - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; struct bio *clone; struct dm_crypt_io *new_io; int crypt_finished; @@ -1107,7 +1096,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) if (r < 0) io->error = -EIO; - crypt_finished = atomic_dec_and_test(&io->ctx.pending); + crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending); /* Encryption was already finished, submit io now */ if (crypt_finished) { @@ -1135,7 +1124,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) * between fragments, so switch to a new dm_crypt_io structure. */ if (unlikely(!crypt_finished && remaining)) { - new_io = crypt_io_alloc(io->target, io->base_bio, + new_io = crypt_io_alloc(io->cc, io->base_bio, sector); crypt_inc_pending(new_io); crypt_convert_init(cc, &new_io->ctx, NULL, @@ -1169,7 +1158,7 @@ static void kcryptd_crypt_read_done(struct dm_crypt_io *io) static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) { - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; int r = 0; crypt_inc_pending(io); @@ -1181,7 +1170,7 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) if (r < 0) io->error = -EIO; - if (atomic_dec_and_test(&io->ctx.pending)) + if (atomic_dec_and_test(&io->ctx.cc_pending)) kcryptd_crypt_read_done(io); crypt_dec_pending(io); @@ -1193,7 +1182,7 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, struct dm_crypt_request *dmreq = async_req->data; struct convert_context *ctx = dmreq->ctx; struct dm_crypt_io *io = container_of(ctx, struct dm_crypt_io, ctx); - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; if (error == -EINPROGRESS) { complete(&ctx->restart); @@ -1208,7 +1197,7 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, mempool_free(req_of_dmreq(cc, dmreq), cc->req_pool); - if (!atomic_dec_and_test(&ctx->pending)) + if (!atomic_dec_and_test(&ctx->cc_pending)) return; if (bio_data_dir(io->base_bio) == READ) @@ -1229,7 +1218,7 @@ static void kcryptd_crypt(struct work_struct *work) static void kcryptd_queue_crypt(struct dm_crypt_io *io) { - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; INIT_WORK(&io->work, kcryptd_crypt); queue_work(cc->crypt_queue, &io->work); @@ -1273,29 +1262,38 @@ static void crypt_encode_key(char *hex, u8 *key, unsigned int size) } } -static void crypt_free_tfms(struct crypt_config *cc, int cpu) +static void crypt_free_tfms(struct crypt_config *cc) { - struct crypt_cpu *cpu_cc = per_cpu_ptr(cc->cpu, cpu); unsigned i; + if (!cc->tfms) + return; + for (i = 0; i < cc->tfms_count; i++) - if (cpu_cc->tfms[i] && !IS_ERR(cpu_cc->tfms[i])) { - crypto_free_ablkcipher(cpu_cc->tfms[i]); - cpu_cc->tfms[i] = NULL; + if (cc->tfms[i] && !IS_ERR(cc->tfms[i])) { + crypto_free_ablkcipher(cc->tfms[i]); + cc->tfms[i] = NULL; } + + kfree(cc->tfms); + cc->tfms = NULL; } -static int crypt_alloc_tfms(struct crypt_config *cc, int cpu, char *ciphermode) +static int crypt_alloc_tfms(struct crypt_config *cc, char *ciphermode) { - struct crypt_cpu *cpu_cc = per_cpu_ptr(cc->cpu, cpu); unsigned i; int err; + cc->tfms = kmalloc(cc->tfms_count * sizeof(struct crypto_ablkcipher *), + GFP_KERNEL); + if (!cc->tfms) + return -ENOMEM; + for (i = 0; i < cc->tfms_count; i++) { - cpu_cc->tfms[i] = crypto_alloc_ablkcipher(ciphermode, 0, 0); - if (IS_ERR(cpu_cc->tfms[i])) { - err = PTR_ERR(cpu_cc->tfms[i]); - crypt_free_tfms(cc, cpu); + cc->tfms[i] = crypto_alloc_ablkcipher(ciphermode, 0, 0); + if (IS_ERR(cc->tfms[i])) { + err = PTR_ERR(cc->tfms[i]); + crypt_free_tfms(cc); return err; } } @@ -1306,15 +1304,14 @@ static int crypt_alloc_tfms(struct crypt_config *cc, int cpu, char *ciphermode) static int crypt_setkey_allcpus(struct crypt_config *cc) { unsigned subkey_size = cc->key_size >> ilog2(cc->tfms_count); - int cpu, err = 0, i, r; - - for_each_possible_cpu(cpu) { - for (i = 0; i < cc->tfms_count; i++) { - r = crypto_ablkcipher_setkey(per_cpu_ptr(cc->cpu, cpu)->tfms[i], - cc->key + (i * subkey_size), subkey_size); - if (r) - err = r; - } + int err = 0, i, r; + + for (i = 0; i < cc->tfms_count; i++) { + r = crypto_ablkcipher_setkey(cc->tfms[i], + cc->key + (i * subkey_size), + subkey_size); + if (r) + err = r; } return err; @@ -1376,9 +1373,10 @@ static void crypt_dtr(struct dm_target *ti) cpu_cc = per_cpu_ptr(cc->cpu, cpu); if (cpu_cc->req) mempool_free(cpu_cc->req, cc->req_pool); - crypt_free_tfms(cc, cpu); } + crypt_free_tfms(cc); + if (cc->bs) bioset_free(cc->bs); @@ -1411,7 +1409,7 @@ static int crypt_ctr_cipher(struct dm_target *ti, struct crypt_config *cc = ti->private; char *tmp, *cipher, *chainmode, *ivmode, *ivopts, *keycount; char *cipher_api = NULL; - int cpu, ret = -EINVAL; + int ret = -EINVAL; char dummy; /* Convert to crypto api definition? */ @@ -1452,8 +1450,7 @@ static int crypt_ctr_cipher(struct dm_target *ti, if (tmp) DMWARN("Ignoring unexpected additional cipher options"); - cc->cpu = __alloc_percpu(sizeof(*(cc->cpu)) + - cc->tfms_count * sizeof(*(cc->cpu->tfms)), + cc->cpu = __alloc_percpu(sizeof(*(cc->cpu)), __alignof__(struct crypt_cpu)); if (!cc->cpu) { ti->error = "Cannot allocate per cpu state"; @@ -1486,12 +1483,10 @@ static int crypt_ctr_cipher(struct dm_target *ti, } /* Allocate cipher */ - for_each_possible_cpu(cpu) { - ret = crypt_alloc_tfms(cc, cpu, cipher_api); - if (ret < 0) { - ti->error = "Error allocating crypto tfm"; - goto bad; - } + ret = crypt_alloc_tfms(cc, cipher_api); + if (ret < 0) { + ti->error = "Error allocating crypto tfm"; + goto bad; } /* Initialize and set key */ @@ -1699,7 +1694,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) } ti->num_flush_requests = 1; - ti->discard_zeroes_data_unsupported = 1; + ti->discard_zeroes_data_unsupported = true; return 0; @@ -1712,7 +1707,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) { struct dm_crypt_io *io; - struct crypt_config *cc; + struct crypt_config *cc = ti->private; /* * If bio is REQ_FLUSH or REQ_DISCARD, just bypass crypt queues. @@ -1720,14 +1715,13 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, * - for REQ_DISCARD caller must use flush if IO ordering matters */ if (unlikely(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) { - cc = ti->private; bio->bi_bdev = cc->dev->bdev; if (bio_sectors(bio)) bio->bi_sector = cc->start + dm_target_offset(ti, bio->bi_sector); return DM_MAPIO_REMAPPED; } - io = crypt_io_alloc(ti, bio, dm_target_offset(ti, bio->bi_sector)); + io = crypt_io_alloc(cc, bio, dm_target_offset(ti, bio->bi_sector)); if (bio_data_dir(io->base_bio) == READ) { if (kcryptd_io_read(io, GFP_NOWAIT)) @@ -1739,7 +1733,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, } static int crypt_status(struct dm_target *ti, status_type_t type, - char *result, unsigned int maxlen) + unsigned status_flags, char *result, unsigned maxlen) { struct crypt_config *cc = ti->private; unsigned int sz = 0; diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 2dc22dddb2ae..f53846f9ab50 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -295,7 +295,7 @@ static int delay_map(struct dm_target *ti, struct bio *bio, } static int delay_status(struct dm_target *ti, status_type_t type, - char *result, unsigned maxlen) + unsigned status_flags, char *result, unsigned maxlen) { struct delay_c *dc = ti->private; int sz = 0; diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index ac49c01f1a44..cc15543a6ad7 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -333,7 +333,7 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, } static int flakey_status(struct dm_target *ti, status_type_t type, - char *result, unsigned int maxlen) + unsigned status_flags, char *result, unsigned maxlen) { unsigned sz = 0; struct flakey_c *fc = ti->private; diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index a1a3e6df17b8..afd95986d099 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1054,6 +1054,7 @@ static void retrieve_status(struct dm_table *table, char *outbuf, *outptr; status_type_t type; size_t remaining, len, used = 0; + unsigned status_flags = 0; outptr = outbuf = get_result_buffer(param, param_size, &len); @@ -1090,7 +1091,9 @@ static void retrieve_status(struct dm_table *table, /* Get the status/table string from the target driver */ if (ti->type->status) { - if (ti->type->status(ti, type, outptr, remaining)) { + if (param->flags & DM_NOFLUSH_FLAG) + status_flags |= DM_STATUS_NOFLUSH_FLAG; + if (ti->type->status(ti, type, status_flags, outptr, remaining)) { param->flags |= DM_BUFFER_FULL_FLAG; break; } diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 3639eeab6042..1bf19a93eef0 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -96,7 +96,7 @@ static int linear_map(struct dm_target *ti, struct bio *bio, } static int linear_status(struct dm_target *ti, status_type_t type, - char *result, unsigned int maxlen) + unsigned status_flags, char *result, unsigned maxlen) { struct linear_c *lc = (struct linear_c *) ti->private; diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 65ebaebf502b..627d19186d5a 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -571,16 +571,6 @@ static void disk_dtr(struct dm_dirty_log *log) destroy_log_context(lc); } -static int count_bits32(uint32_t *addr, unsigned size) -{ - int count = 0, i; - - for (i = 0; i < size; i++) { - count += hweight32(*(addr+i)); - } - return count; -} - static void fail_log_device(struct log_c *lc) { if (lc->log_dev_failed) @@ -629,7 +619,8 @@ static int disk_resume(struct dm_dirty_log *log) /* copy clean across to sync */ memcpy(lc->sync_bits, lc->clean_bits, size); - lc->sync_count = count_bits32(lc->clean_bits, lc->bitset_uint32_count); + lc->sync_count = memweight(lc->clean_bits, + lc->bitset_uint32_count * sizeof(uint32_t)); lc->sync_search = 0; /* set the correct number of regions in the header */ diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 8a3b2d53f81b..d8abb90a6c2f 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1378,7 +1378,7 @@ static void multipath_resume(struct dm_target *ti) * num_paths num_selector_args [path_dev [selector_args]* ]+ ]+ */ static int multipath_status(struct dm_target *ti, status_type_t type, - char *result, unsigned int maxlen) + unsigned status_flags, char *result, unsigned maxlen) { int sz = 0; unsigned long flags; diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 691b3c59088e..982e3e390c45 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1157,7 +1157,7 @@ static int raid_map(struct dm_target *ti, struct bio *bio, union map_info *map_c } static int raid_status(struct dm_target *ti, status_type_t type, - char *result, unsigned maxlen) + unsigned status_flags, char *result, unsigned maxlen) { struct raid_set *rs = ti->private; unsigned raid_param_cnt = 1; /* at least 1 for chunksize */ diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 819ccba65912..bc5ddba8045b 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1088,7 +1088,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_requests = 1; ti->num_discard_requests = 1; - ti->discard_zeroes_data_unsupported = 1; + ti->discard_zeroes_data_unsupported = true; ms->kmirrord_wq = alloc_workqueue("kmirrord", WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0); @@ -1367,7 +1367,7 @@ static char device_status_char(struct mirror *m) static int mirror_status(struct dm_target *ti, status_type_t type, - char *result, unsigned int maxlen) + unsigned status_flags, char *result, unsigned maxlen) { unsigned int m, sz = 0; struct mirror_set *ms = (struct mirror_set *) ti->private; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 6c0f3e33923a..a143921feaf6 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1849,7 +1849,7 @@ static void snapshot_merge_resume(struct dm_target *ti) } static int snapshot_status(struct dm_target *ti, status_type_t type, - char *result, unsigned int maxlen) + unsigned status_flags, char *result, unsigned maxlen) { unsigned sz = 0; struct dm_snapshot *snap = ti->private; @@ -2151,8 +2151,8 @@ static void origin_resume(struct dm_target *ti) ti->max_io_len = get_origin_minimum_chunksize(dev->bdev); } -static int origin_status(struct dm_target *ti, status_type_t type, char *result, - unsigned int maxlen) +static int origin_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct dm_dev *dev = ti->private; diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 9e8f4cc63d6c..a087bf2a8d66 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -311,8 +311,8 @@ static int stripe_map(struct dm_target *ti, struct bio *bio, * */ -static int stripe_status(struct dm_target *ti, - status_type_t type, char *result, unsigned int maxlen) +static int stripe_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct stripe_c *sc = (struct stripe_c *) ti->private; char buffer[sc->stripes + 1]; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 2e227fbf1622..f90069029aae 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1319,6 +1319,9 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned flush) if (!ti->num_flush_requests) continue; + if (ti->flush_supported) + return 1; + if (ti->type->iterate_devices && ti->type->iterate_devices(ti, device_flush_capable, &flush)) return 1; diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 31f9827dfb56..693e149e9727 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011 Red Hat, Inc. + * Copyright (C) 2011-2012 Red Hat, Inc. * * This file is released under the GPL. */ @@ -178,13 +178,20 @@ struct dm_pool_metadata { struct rw_semaphore root_lock; uint32_t time; - int need_commit; dm_block_t root; dm_block_t details_root; struct list_head thin_devices; uint64_t trans_id; unsigned long flags; sector_t data_block_size; + bool read_only:1; + + /* + * Set if a transaction has to be aborted but the attempt to roll back + * to the previous (good) transaction failed. The only pool metadata + * operation possible in this state is the closing of the device. + */ + bool fail_io:1; }; struct dm_thin_device { @@ -193,7 +200,8 @@ struct dm_thin_device { dm_thin_id id; int open_count; - int changed; + bool changed:1; + bool aborted_with_changes:1; uint64_t mapped_blocks; uint64_t transaction_id; uint32_t creation_time; @@ -344,7 +352,21 @@ static int subtree_equal(void *context, void *value1_le, void *value2_le) /*----------------------------------------------------------------*/ -static int superblock_all_zeroes(struct dm_block_manager *bm, int *result) +static int superblock_lock_zero(struct dm_pool_metadata *pmd, + struct dm_block **sblock) +{ + return dm_bm_write_lock_zero(pmd->bm, THIN_SUPERBLOCK_LOCATION, + &sb_validator, sblock); +} + +static int superblock_lock(struct dm_pool_metadata *pmd, + struct dm_block **sblock) +{ + return dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, + &sb_validator, sblock); +} + +static int __superblock_all_zeroes(struct dm_block_manager *bm, int *result) { int r; unsigned i; @@ -371,72 +393,9 @@ static int superblock_all_zeroes(struct dm_block_manager *bm, int *result) return dm_bm_unlock(b); } -static int init_pmd(struct dm_pool_metadata *pmd, - struct dm_block_manager *bm, - dm_block_t nr_blocks, int create) +static void __setup_btree_details(struct dm_pool_metadata *pmd) { - int r; - struct dm_space_map *sm, *data_sm; - struct dm_transaction_manager *tm; - struct dm_block *sblock; - - if (create) { - r = dm_tm_create_with_sm(bm, THIN_SUPERBLOCK_LOCATION, - &sb_validator, &tm, &sm, &sblock); - if (r < 0) { - DMERR("tm_create_with_sm failed"); - return r; - } - - data_sm = dm_sm_disk_create(tm, nr_blocks); - if (IS_ERR(data_sm)) { - DMERR("sm_disk_create failed"); - dm_tm_unlock(tm, sblock); - r = PTR_ERR(data_sm); - goto bad; - } - } else { - struct thin_disk_superblock *disk_super = NULL; - size_t space_map_root_offset = - offsetof(struct thin_disk_superblock, metadata_space_map_root); - - r = dm_tm_open_with_sm(bm, THIN_SUPERBLOCK_LOCATION, - &sb_validator, space_map_root_offset, - SPACE_MAP_ROOT_SIZE, &tm, &sm, &sblock); - if (r < 0) { - DMERR("tm_open_with_sm failed"); - return r; - } - - disk_super = dm_block_data(sblock); - data_sm = dm_sm_disk_open(tm, disk_super->data_space_map_root, - sizeof(disk_super->data_space_map_root)); - if (IS_ERR(data_sm)) { - DMERR("sm_disk_open failed"); - r = PTR_ERR(data_sm); - goto bad; - } - } - - - r = dm_tm_unlock(tm, sblock); - if (r < 0) { - DMERR("couldn't unlock superblock"); - goto bad_data_sm; - } - - pmd->bm = bm; - pmd->metadata_sm = sm; - pmd->data_sm = data_sm; - pmd->tm = tm; - pmd->nb_tm = dm_tm_create_non_blocking_clone(tm); - if (!pmd->nb_tm) { - DMERR("could not create clone tm"); - r = -ENOMEM; - goto bad_data_sm; - } - - pmd->info.tm = tm; + pmd->info.tm = pmd->tm; pmd->info.levels = 2; pmd->info.value_type.context = pmd->data_sm; pmd->info.value_type.size = sizeof(__le64); @@ -447,7 +406,7 @@ static int init_pmd(struct dm_pool_metadata *pmd, memcpy(&pmd->nb_info, &pmd->info, sizeof(pmd->nb_info)); pmd->nb_info.tm = pmd->nb_tm; - pmd->tl_info.tm = tm; + pmd->tl_info.tm = pmd->tm; pmd->tl_info.levels = 1; pmd->tl_info.value_type.context = &pmd->info; pmd->tl_info.value_type.size = sizeof(__le64); @@ -455,7 +414,7 @@ static int init_pmd(struct dm_pool_metadata *pmd, pmd->tl_info.value_type.dec = subtree_dec; pmd->tl_info.value_type.equal = subtree_equal; - pmd->bl_info.tm = tm; + pmd->bl_info.tm = pmd->tm; pmd->bl_info.levels = 1; pmd->bl_info.value_type.context = pmd->data_sm; pmd->bl_info.value_type.size = sizeof(__le64); @@ -463,48 +422,266 @@ static int init_pmd(struct dm_pool_metadata *pmd, pmd->bl_info.value_type.dec = data_block_dec; pmd->bl_info.value_type.equal = data_block_equal; - pmd->details_info.tm = tm; + pmd->details_info.tm = pmd->tm; pmd->details_info.levels = 1; pmd->details_info.value_type.context = NULL; pmd->details_info.value_type.size = sizeof(struct disk_device_details); pmd->details_info.value_type.inc = NULL; pmd->details_info.value_type.dec = NULL; pmd->details_info.value_type.equal = NULL; +} + +static int __write_initial_superblock(struct dm_pool_metadata *pmd) +{ + int r; + struct dm_block *sblock; + size_t metadata_len, data_len; + struct thin_disk_superblock *disk_super; + sector_t bdev_size = i_size_read(pmd->bdev->bd_inode) >> SECTOR_SHIFT; - pmd->root = 0; + if (bdev_size > THIN_METADATA_MAX_SECTORS) + bdev_size = THIN_METADATA_MAX_SECTORS; - init_rwsem(&pmd->root_lock); - pmd->time = 0; - pmd->need_commit = 0; - pmd->details_root = 0; - pmd->trans_id = 0; - pmd->flags = 0; - INIT_LIST_HEAD(&pmd->thin_devices); + r = dm_sm_root_size(pmd->metadata_sm, &metadata_len); + if (r < 0) + return r; + + r = dm_sm_root_size(pmd->data_sm, &data_len); + if (r < 0) + return r; + + r = dm_sm_commit(pmd->data_sm); + if (r < 0) + return r; + + r = dm_tm_pre_commit(pmd->tm); + if (r < 0) + return r; + + r = superblock_lock_zero(pmd, &sblock); + if (r) + return r; + + disk_super = dm_block_data(sblock); + disk_super->flags = 0; + memset(disk_super->uuid, 0, sizeof(disk_super->uuid)); + disk_super->magic = cpu_to_le64(THIN_SUPERBLOCK_MAGIC); + disk_super->version = cpu_to_le32(THIN_VERSION); + disk_super->time = 0; + disk_super->trans_id = 0; + disk_super->held_root = 0; + + r = dm_sm_copy_root(pmd->metadata_sm, &disk_super->metadata_space_map_root, + metadata_len); + if (r < 0) + goto bad_locked; + + r = dm_sm_copy_root(pmd->data_sm, &disk_super->data_space_map_root, + data_len); + if (r < 0) + goto bad_locked; + + disk_super->data_mapping_root = cpu_to_le64(pmd->root); + disk_super->device_details_root = cpu_to_le64(pmd->details_root); + disk_super->metadata_block_size = cpu_to_le32(THIN_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); + disk_super->metadata_nr_blocks = cpu_to_le64(bdev_size >> SECTOR_TO_BLOCK_SHIFT); + disk_super->data_block_size = cpu_to_le32(pmd->data_block_size); + + return dm_tm_commit(pmd->tm, sblock); + +bad_locked: + dm_bm_unlock(sblock); + return r; +} + +static int __format_metadata(struct dm_pool_metadata *pmd) +{ + int r; + + r = dm_tm_create_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION, + &pmd->tm, &pmd->metadata_sm); + if (r < 0) { + DMERR("tm_create_with_sm failed"); + return r; + } + + pmd->data_sm = dm_sm_disk_create(pmd->tm, 0); + if (IS_ERR(pmd->data_sm)) { + DMERR("sm_disk_create failed"); + r = PTR_ERR(pmd->data_sm); + goto bad_cleanup_tm; + } + + pmd->nb_tm = dm_tm_create_non_blocking_clone(pmd->tm); + if (!pmd->nb_tm) { + DMERR("could not create non-blocking clone tm"); + r = -ENOMEM; + goto bad_cleanup_data_sm; + } + + __setup_btree_details(pmd); + + r = dm_btree_empty(&pmd->info, &pmd->root); + if (r < 0) + goto bad_cleanup_nb_tm; + + r = dm_btree_empty(&pmd->details_info, &pmd->details_root); + if (r < 0) { + DMERR("couldn't create devices root"); + goto bad_cleanup_nb_tm; + } + + r = __write_initial_superblock(pmd); + if (r) + goto bad_cleanup_nb_tm; return 0; -bad_data_sm: - dm_sm_destroy(data_sm); -bad: - dm_tm_destroy(tm); - dm_sm_destroy(sm); +bad_cleanup_nb_tm: + dm_tm_destroy(pmd->nb_tm); +bad_cleanup_data_sm: + dm_sm_destroy(pmd->data_sm); +bad_cleanup_tm: + dm_tm_destroy(pmd->tm); + dm_sm_destroy(pmd->metadata_sm); return r; } +static int __check_incompat_features(struct thin_disk_superblock *disk_super, + struct dm_pool_metadata *pmd) +{ + uint32_t features; + + features = le32_to_cpu(disk_super->incompat_flags) & ~THIN_FEATURE_INCOMPAT_SUPP; + if (features) { + DMERR("could not access metadata due to unsupported optional features (%lx).", + (unsigned long)features); + return -EINVAL; + } + + /* + * Check for read-only metadata to skip the following RDWR checks. + */ + if (get_disk_ro(pmd->bdev->bd_disk)) + return 0; + + features = le32_to_cpu(disk_super->compat_ro_flags) & ~THIN_FEATURE_COMPAT_RO_SUPP; + if (features) { + DMERR("could not access metadata RDWR due to unsupported optional features (%lx).", + (unsigned long)features); + return -EINVAL; + } + + return 0; +} + +static int __open_metadata(struct dm_pool_metadata *pmd) +{ + int r; + struct dm_block *sblock; + struct thin_disk_superblock *disk_super; + + r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, + &sb_validator, &sblock); + if (r < 0) { + DMERR("couldn't read superblock"); + return r; + } + + disk_super = dm_block_data(sblock); + + r = __check_incompat_features(disk_super, pmd); + if (r < 0) + goto bad_unlock_sblock; + + r = dm_tm_open_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION, + disk_super->metadata_space_map_root, + sizeof(disk_super->metadata_space_map_root), + &pmd->tm, &pmd->metadata_sm); + if (r < 0) { + DMERR("tm_open_with_sm failed"); + goto bad_unlock_sblock; + } + + pmd->data_sm = dm_sm_disk_open(pmd->tm, disk_super->data_space_map_root, + sizeof(disk_super->data_space_map_root)); + if (IS_ERR(pmd->data_sm)) { + DMERR("sm_disk_open failed"); + r = PTR_ERR(pmd->data_sm); + goto bad_cleanup_tm; + } + + pmd->nb_tm = dm_tm_create_non_blocking_clone(pmd->tm); + if (!pmd->nb_tm) { + DMERR("could not create non-blocking clone tm"); + r = -ENOMEM; + goto bad_cleanup_data_sm; + } + + __setup_btree_details(pmd); + return dm_bm_unlock(sblock); + +bad_cleanup_data_sm: + dm_sm_destroy(pmd->data_sm); +bad_cleanup_tm: + dm_tm_destroy(pmd->tm); + dm_sm_destroy(pmd->metadata_sm); +bad_unlock_sblock: + dm_bm_unlock(sblock); + + return r; +} + +static int __open_or_format_metadata(struct dm_pool_metadata *pmd, bool format_device) +{ + int r, unformatted; + + r = __superblock_all_zeroes(pmd->bm, &unformatted); + if (r) + return r; + + if (unformatted) + return format_device ? __format_metadata(pmd) : -EPERM; + + return __open_metadata(pmd); +} + +static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, bool format_device) +{ + int r; + + pmd->bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE, + THIN_METADATA_CACHE_SIZE, + THIN_MAX_CONCURRENT_LOCKS); + if (IS_ERR(pmd->bm)) { + DMERR("could not create block manager"); + return PTR_ERR(pmd->bm); + } + + r = __open_or_format_metadata(pmd, format_device); + if (r) + dm_block_manager_destroy(pmd->bm); + + return r; +} + +static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd) +{ + dm_sm_destroy(pmd->data_sm); + dm_sm_destroy(pmd->metadata_sm); + dm_tm_destroy(pmd->nb_tm); + dm_tm_destroy(pmd->tm); + dm_block_manager_destroy(pmd->bm); +} + static int __begin_transaction(struct dm_pool_metadata *pmd) { int r; - u32 features; struct thin_disk_superblock *disk_super; struct dm_block *sblock; /* - * __maybe_commit_transaction() resets these - */ - WARN_ON(pmd->need_commit); - - /* * We re-read the superblock every time. Shouldn't need to do this * really. */ @@ -521,32 +698,8 @@ static int __begin_transaction(struct dm_pool_metadata *pmd) pmd->flags = le32_to_cpu(disk_super->flags); pmd->data_block_size = le32_to_cpu(disk_super->data_block_size); - features = le32_to_cpu(disk_super->incompat_flags) & ~THIN_FEATURE_INCOMPAT_SUPP; - if (features) { - DMERR("could not access metadata due to " - "unsupported optional features (%lx).", - (unsigned long)features); - r = -EINVAL; - goto out; - } - - /* - * Check for read-only metadata to skip the following RDWR checks. - */ - if (get_disk_ro(pmd->bdev->bd_disk)) - goto out; - - features = le32_to_cpu(disk_super->compat_ro_flags) & ~THIN_FEATURE_COMPAT_RO_SUPP; - if (features) { - DMERR("could not access metadata RDWR due to " - "unsupported optional features (%lx).", - (unsigned long)features); - r = -EINVAL; - } - -out: dm_bm_unlock(sblock); - return r; + return 0; } static int __write_changed_details(struct dm_pool_metadata *pmd) @@ -579,8 +732,6 @@ static int __write_changed_details(struct dm_pool_metadata *pmd) list_del(&td->list); kfree(td); } - - pmd->need_commit = 1; } return 0; @@ -588,9 +739,6 @@ static int __write_changed_details(struct dm_pool_metadata *pmd) static int __commit_transaction(struct dm_pool_metadata *pmd) { - /* - * FIXME: Associated pool should be made read-only on failure. - */ int r; size_t metadata_len, data_len; struct thin_disk_superblock *disk_super; @@ -605,9 +753,6 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) if (r < 0) return r; - if (!pmd->need_commit) - return r; - r = dm_sm_commit(pmd->data_sm); if (r < 0) return r; @@ -624,8 +769,7 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) if (r < 0) return r; - r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, - &sb_validator, &sblock); + r = superblock_lock(pmd, &sblock); if (r) return r; @@ -646,11 +790,7 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) if (r < 0) goto out_locked; - r = dm_tm_commit(pmd->tm, sblock); - if (!r) - pmd->need_commit = 0; - - return r; + return dm_tm_commit(pmd->tm, sblock); out_locked: dm_bm_unlock(sblock); @@ -658,15 +798,11 @@ out_locked: } struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, - sector_t data_block_size) + sector_t data_block_size, + bool format_device) { int r; - struct thin_disk_superblock *disk_super; struct dm_pool_metadata *pmd; - sector_t bdev_size = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; - struct dm_block_manager *bm; - int create; - struct dm_block *sblock; pmd = kmalloc(sizeof(*pmd), GFP_KERNEL); if (!pmd) { @@ -674,86 +810,28 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, return ERR_PTR(-ENOMEM); } - bm = dm_block_manager_create(bdev, THIN_METADATA_BLOCK_SIZE, - THIN_METADATA_CACHE_SIZE, - THIN_MAX_CONCURRENT_LOCKS); - if (!bm) { - DMERR("could not create block manager"); - kfree(pmd); - return ERR_PTR(-ENOMEM); - } - - r = superblock_all_zeroes(bm, &create); - if (r) { - dm_block_manager_destroy(bm); - kfree(pmd); - return ERR_PTR(r); - } - + init_rwsem(&pmd->root_lock); + pmd->time = 0; + INIT_LIST_HEAD(&pmd->thin_devices); + pmd->read_only = false; + pmd->fail_io = false; + pmd->bdev = bdev; + pmd->data_block_size = data_block_size; - r = init_pmd(pmd, bm, 0, create); + r = __create_persistent_data_objects(pmd, format_device); if (r) { - dm_block_manager_destroy(bm); kfree(pmd); return ERR_PTR(r); } - pmd->bdev = bdev; - - if (!create) { - r = __begin_transaction(pmd); - if (r < 0) - goto bad; - return pmd; - } - - /* - * Create. - */ - r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, - &sb_validator, &sblock); - if (r) - goto bad; - - if (bdev_size > THIN_METADATA_MAX_SECTORS) - bdev_size = THIN_METADATA_MAX_SECTORS; - disk_super = dm_block_data(sblock); - disk_super->magic = cpu_to_le64(THIN_SUPERBLOCK_MAGIC); - disk_super->version = cpu_to_le32(THIN_VERSION); - disk_super->time = 0; - disk_super->metadata_block_size = cpu_to_le32(THIN_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); - disk_super->metadata_nr_blocks = cpu_to_le64(bdev_size >> SECTOR_TO_BLOCK_SHIFT); - disk_super->data_block_size = cpu_to_le32(data_block_size); - - r = dm_bm_unlock(sblock); - if (r < 0) - goto bad; - - r = dm_btree_empty(&pmd->info, &pmd->root); - if (r < 0) - goto bad; - - r = dm_btree_empty(&pmd->details_info, &pmd->details_root); - if (r < 0) { - DMERR("couldn't create devices root"); - goto bad; - } - - pmd->flags = 0; - pmd->need_commit = 1; - r = dm_pool_commit_metadata(pmd); + r = __begin_transaction(pmd); if (r < 0) { - DMERR("%s: dm_pool_commit_metadata() failed, error = %d", - __func__, r); - goto bad; + if (dm_pool_metadata_close(pmd) < 0) + DMWARN("%s: dm_pool_metadata_close() failed.", __func__); + return ERR_PTR(r); } return pmd; - -bad: - if (dm_pool_metadata_close(pmd) < 0) - DMWARN("%s: dm_pool_metadata_close() failed.", __func__); - return ERR_PTR(r); } int dm_pool_metadata_close(struct dm_pool_metadata *pmd) @@ -779,18 +857,17 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd) return -EBUSY; } - r = __commit_transaction(pmd); - if (r < 0) - DMWARN("%s: __commit_transaction() failed, error = %d", - __func__, r); + if (!pmd->read_only && !pmd->fail_io) { + r = __commit_transaction(pmd); + if (r < 0) + DMWARN("%s: __commit_transaction() failed, error = %d", + __func__, r); + } - dm_tm_destroy(pmd->tm); - dm_tm_destroy(pmd->nb_tm); - dm_block_manager_destroy(pmd->bm); - dm_sm_destroy(pmd->metadata_sm); - dm_sm_destroy(pmd->data_sm); - kfree(pmd); + if (!pmd->fail_io) + __destroy_persistent_data_objects(pmd); + kfree(pmd); return 0; } @@ -851,6 +928,7 @@ static int __open_device(struct dm_pool_metadata *pmd, (*td)->id = dev; (*td)->open_count = 1; (*td)->changed = changed; + (*td)->aborted_with_changes = false; (*td)->mapped_blocks = le64_to_cpu(details_le.mapped_blocks); (*td)->transaction_id = le64_to_cpu(details_le.transaction_id); (*td)->creation_time = le32_to_cpu(details_le.creation_time); @@ -912,10 +990,11 @@ static int __create_thin(struct dm_pool_metadata *pmd, int dm_pool_create_thin(struct dm_pool_metadata *pmd, dm_thin_id dev) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = __create_thin(pmd, dev); + if (!pmd->fail_io) + r = __create_thin(pmd, dev); up_write(&pmd->root_lock); return r; @@ -1002,10 +1081,11 @@ int dm_pool_create_snap(struct dm_pool_metadata *pmd, dm_thin_id dev, dm_thin_id origin) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = __create_snap(pmd, dev, origin); + if (!pmd->fail_io) + r = __create_snap(pmd, dev, origin); up_write(&pmd->root_lock); return r; @@ -1038,18 +1118,17 @@ static int __delete_device(struct dm_pool_metadata *pmd, dm_thin_id dev) if (r) return r; - pmd->need_commit = 1; - return 0; } int dm_pool_delete_thin_device(struct dm_pool_metadata *pmd, dm_thin_id dev) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = __delete_device(pmd, dev); + if (!pmd->fail_io) + r = __delete_device(pmd, dev); up_write(&pmd->root_lock); return r; @@ -1059,28 +1138,40 @@ int dm_pool_set_metadata_transaction_id(struct dm_pool_metadata *pmd, uint64_t current_id, uint64_t new_id) { + int r = -EINVAL; + down_write(&pmd->root_lock); + + if (pmd->fail_io) + goto out; + if (pmd->trans_id != current_id) { - up_write(&pmd->root_lock); DMERR("mismatched transaction id"); - return -EINVAL; + goto out; } pmd->trans_id = new_id; - pmd->need_commit = 1; + r = 0; + +out: up_write(&pmd->root_lock); - return 0; + return r; } int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd, uint64_t *result) { + int r = -EINVAL; + down_read(&pmd->root_lock); - *result = pmd->trans_id; + if (!pmd->fail_io) { + *result = pmd->trans_id; + r = 0; + } up_read(&pmd->root_lock); - return 0; + return r; } static int __reserve_metadata_snap(struct dm_pool_metadata *pmd) @@ -1109,8 +1200,6 @@ static int __reserve_metadata_snap(struct dm_pool_metadata *pmd) dm_tm_dec(pmd->tm, held_root); dm_tm_unlock(pmd->tm, copy); - pmd->need_commit = 1; - return -EBUSY; } @@ -1132,29 +1221,25 @@ static int __reserve_metadata_snap(struct dm_pool_metadata *pmd) /* * Write the held root into the superblock. */ - r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, - &sb_validator, &sblock); + r = superblock_lock(pmd, &sblock); if (r) { dm_tm_dec(pmd->tm, held_root); - pmd->need_commit = 1; return r; } disk_super = dm_block_data(sblock); disk_super->held_root = cpu_to_le64(held_root); dm_bm_unlock(sblock); - - pmd->need_commit = 1; - return 0; } int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = __reserve_metadata_snap(pmd); + if (!pmd->fail_io) + r = __reserve_metadata_snap(pmd); up_write(&pmd->root_lock); return r; @@ -1167,15 +1252,13 @@ static int __release_metadata_snap(struct dm_pool_metadata *pmd) struct dm_block *sblock, *copy; dm_block_t held_root; - r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, - &sb_validator, &sblock); + r = superblock_lock(pmd, &sblock); if (r) return r; disk_super = dm_block_data(sblock); held_root = le64_to_cpu(disk_super->held_root); disk_super->held_root = cpu_to_le64(0); - pmd->need_commit = 1; dm_bm_unlock(sblock); @@ -1198,10 +1281,11 @@ static int __release_metadata_snap(struct dm_pool_metadata *pmd) int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = __release_metadata_snap(pmd); + if (!pmd->fail_io) + r = __release_metadata_snap(pmd); up_write(&pmd->root_lock); return r; @@ -1228,10 +1312,11 @@ static int __get_metadata_snap(struct dm_pool_metadata *pmd, int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd, dm_block_t *result) { - int r; + int r = -EINVAL; down_read(&pmd->root_lock); - r = __get_metadata_snap(pmd, result); + if (!pmd->fail_io) + r = __get_metadata_snap(pmd, result); up_read(&pmd->root_lock); return r; @@ -1240,10 +1325,11 @@ int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd, int dm_pool_open_thin_device(struct dm_pool_metadata *pmd, dm_thin_id dev, struct dm_thin_device **td) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = __open_device(pmd, dev, 0, td); + if (!pmd->fail_io) + r = __open_device(pmd, dev, 0, td); up_write(&pmd->root_lock); return r; @@ -1271,28 +1357,31 @@ static bool __snapshotted_since(struct dm_thin_device *td, uint32_t time) int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block, int can_block, struct dm_thin_lookup_result *result) { - int r; + int r = -EINVAL; uint64_t block_time = 0; __le64 value; struct dm_pool_metadata *pmd = td->pmd; dm_block_t keys[2] = { td->id, block }; + struct dm_btree_info *info; if (can_block) { down_read(&pmd->root_lock); - r = dm_btree_lookup(&pmd->info, pmd->root, keys, &value); - if (!r) - block_time = le64_to_cpu(value); - up_read(&pmd->root_lock); - - } else if (down_read_trylock(&pmd->root_lock)) { - r = dm_btree_lookup(&pmd->nb_info, pmd->root, keys, &value); - if (!r) - block_time = le64_to_cpu(value); - up_read(&pmd->root_lock); - - } else + info = &pmd->info; + } else if (down_read_trylock(&pmd->root_lock)) + info = &pmd->nb_info; + else return -EWOULDBLOCK; + if (pmd->fail_io) + goto out; + + r = dm_btree_lookup(info, pmd->root, keys, &value); + if (!r) + block_time = le64_to_cpu(value); + +out: + up_read(&pmd->root_lock); + if (!r) { dm_block_t exception_block; uint32_t exception_time; @@ -1313,7 +1402,6 @@ static int __insert(struct dm_thin_device *td, dm_block_t block, struct dm_pool_metadata *pmd = td->pmd; dm_block_t keys[2] = { td->id, block }; - pmd->need_commit = 1; value = cpu_to_le64(pack_block_time(data_block, pmd->time)); __dm_bless_for_disk(&value); @@ -1322,10 +1410,9 @@ static int __insert(struct dm_thin_device *td, dm_block_t block, if (r) return r; - if (inserted) { + td->changed = 1; + if (inserted) td->mapped_blocks++; - td->changed = 1; - } return 0; } @@ -1333,10 +1420,11 @@ static int __insert(struct dm_thin_device *td, dm_block_t block, int dm_thin_insert_block(struct dm_thin_device *td, dm_block_t block, dm_block_t data_block) { - int r; + int r = -EINVAL; down_write(&td->pmd->root_lock); - r = __insert(td, block, data_block); + if (!td->pmd->fail_io) + r = __insert(td, block, data_block); up_write(&td->pmd->root_lock); return r; @@ -1354,31 +1442,51 @@ static int __remove(struct dm_thin_device *td, dm_block_t block) td->mapped_blocks--; td->changed = 1; - pmd->need_commit = 1; return 0; } int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block) { - int r; + int r = -EINVAL; down_write(&td->pmd->root_lock); - r = __remove(td, block); + if (!td->pmd->fail_io) + r = __remove(td, block); up_write(&td->pmd->root_lock); return r; } -int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result) +bool dm_thin_changed_this_transaction(struct dm_thin_device *td) { int r; - down_write(&pmd->root_lock); + down_read(&td->pmd->root_lock); + r = td->changed; + up_read(&td->pmd->root_lock); - r = dm_sm_new_block(pmd->data_sm, result); - pmd->need_commit = 1; + return r; +} +bool dm_thin_aborted_changes(struct dm_thin_device *td) +{ + bool r; + + down_read(&td->pmd->root_lock); + r = td->aborted_with_changes; + up_read(&td->pmd->root_lock); + + return r; +} + +int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result) +{ + int r = -EINVAL; + + down_write(&pmd->root_lock); + if (!pmd->fail_io) + r = dm_sm_new_block(pmd->data_sm, result); up_write(&pmd->root_lock); return r; @@ -1386,9 +1494,11 @@ int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result) int dm_pool_commit_metadata(struct dm_pool_metadata *pmd) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); + if (pmd->fail_io) + goto out; r = __commit_transaction(pmd); if (r <= 0) @@ -1403,12 +1513,41 @@ out: return r; } +static void __set_abort_with_changes_flags(struct dm_pool_metadata *pmd) +{ + struct dm_thin_device *td; + + list_for_each_entry(td, &pmd->thin_devices, list) + td->aborted_with_changes = td->changed; +} + +int dm_pool_abort_metadata(struct dm_pool_metadata *pmd) +{ + int r = -EINVAL; + + down_write(&pmd->root_lock); + if (pmd->fail_io) + goto out; + + __set_abort_with_changes_flags(pmd); + __destroy_persistent_data_objects(pmd); + r = __create_persistent_data_objects(pmd, false); + if (r) + pmd->fail_io = true; + +out: + up_write(&pmd->root_lock); + + return r; +} + int dm_pool_get_free_block_count(struct dm_pool_metadata *pmd, dm_block_t *result) { - int r; + int r = -EINVAL; down_read(&pmd->root_lock); - r = dm_sm_get_nr_free(pmd->data_sm, result); + if (!pmd->fail_io) + r = dm_sm_get_nr_free(pmd->data_sm, result); up_read(&pmd->root_lock); return r; @@ -1417,10 +1556,11 @@ int dm_pool_get_free_block_count(struct dm_pool_metadata *pmd, dm_block_t *resul int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd, dm_block_t *result) { - int r; + int r = -EINVAL; down_read(&pmd->root_lock); - r = dm_sm_get_nr_free(pmd->metadata_sm, result); + if (!pmd->fail_io) + r = dm_sm_get_nr_free(pmd->metadata_sm, result); up_read(&pmd->root_lock); return r; @@ -1429,10 +1569,11 @@ int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd, int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result) { - int r; + int r = -EINVAL; down_read(&pmd->root_lock); - r = dm_sm_get_nr_blocks(pmd->metadata_sm, result); + if (!pmd->fail_io) + r = dm_sm_get_nr_blocks(pmd->metadata_sm, result); up_read(&pmd->root_lock); return r; @@ -1449,10 +1590,11 @@ int dm_pool_get_data_block_size(struct dm_pool_metadata *pmd, sector_t *result) int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result) { - int r; + int r = -EINVAL; down_read(&pmd->root_lock); - r = dm_sm_get_nr_blocks(pmd->data_sm, result); + if (!pmd->fail_io) + r = dm_sm_get_nr_blocks(pmd->data_sm, result); up_read(&pmd->root_lock); return r; @@ -1460,13 +1602,17 @@ int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result) int dm_thin_get_mapped_count(struct dm_thin_device *td, dm_block_t *result) { + int r = -EINVAL; struct dm_pool_metadata *pmd = td->pmd; down_read(&pmd->root_lock); - *result = td->mapped_blocks; + if (!pmd->fail_io) { + *result = td->mapped_blocks; + r = 0; + } up_read(&pmd->root_lock); - return 0; + return r; } static int __highest_block(struct dm_thin_device *td, dm_block_t *result) @@ -1488,11 +1634,12 @@ static int __highest_block(struct dm_thin_device *td, dm_block_t *result) int dm_thin_get_highest_mapped_block(struct dm_thin_device *td, dm_block_t *result) { - int r; + int r = -EINVAL; struct dm_pool_metadata *pmd = td->pmd; down_read(&pmd->root_lock); - r = __highest_block(td, result); + if (!pmd->fail_io) + r = __highest_block(td, result); up_read(&pmd->root_lock); return r; @@ -1515,20 +1662,25 @@ static int __resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) return -EINVAL; } - r = dm_sm_extend(pmd->data_sm, new_count - old_count); - if (!r) - pmd->need_commit = 1; - - return r; + return dm_sm_extend(pmd->data_sm, new_count - old_count); } int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = __resize_data_dev(pmd, new_count); + if (!pmd->fail_io) + r = __resize_data_dev(pmd, new_count); up_write(&pmd->root_lock); return r; } + +void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd) +{ + down_write(&pmd->root_lock); + pmd->read_only = true; + dm_bm_set_read_only(pmd->bm); + up_write(&pmd->root_lock); +} diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h index 7b47c0a9a8e3..0cecc3702885 100644 --- a/drivers/md/dm-thin-metadata.h +++ b/drivers/md/dm-thin-metadata.h @@ -38,7 +38,8 @@ typedef uint64_t dm_thin_id; * Reopens or creates a new, empty metadata volume. */ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, - sector_t data_block_size); + sector_t data_block_size, + bool format_device); int dm_pool_metadata_close(struct dm_pool_metadata *pmd); @@ -79,6 +80,16 @@ int dm_pool_delete_thin_device(struct dm_pool_metadata *pmd, int dm_pool_commit_metadata(struct dm_pool_metadata *pmd); /* + * Discards all uncommitted changes. Rereads the superblock, rolling back + * to the last good transaction. Thin devices remain open. + * dm_thin_aborted_changes() tells you if they had uncommitted changes. + * + * If this call fails it's only useful to call dm_pool_metadata_close(). + * All other methods will fail with -EINVAL. + */ +int dm_pool_abort_metadata(struct dm_pool_metadata *pmd); + +/* * Set/get userspace transaction id. */ int dm_pool_set_metadata_transaction_id(struct dm_pool_metadata *pmd, @@ -147,6 +158,10 @@ int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block); /* * Queries. */ +bool dm_thin_changed_this_transaction(struct dm_thin_device *td); + +bool dm_thin_aborted_changes(struct dm_thin_device *td); + int dm_thin_get_highest_mapped_block(struct dm_thin_device *td, dm_block_t *highest_mapped); @@ -171,6 +186,12 @@ int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result); */ int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_size); +/* + * Flicks the underlying block manager into read only mode, so you know + * that nothing is changing. + */ +void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd); + /*----------------------------------------------------------------*/ #endif diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 93e3e542cff9..af1fc3b2c2ad 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1,10 +1,11 @@ /* - * Copyright (C) 2011 Red Hat UK. + * Copyright (C) 2011-2012 Red Hat UK. * * This file is released under the GPL. */ #include "dm-thin-metadata.h" +#include "dm.h" #include <linux/device-mapper.h> #include <linux/dm-io.h> @@ -496,12 +497,27 @@ static void build_virtual_key(struct dm_thin_device *td, dm_block_t b, */ struct dm_thin_new_mapping; +/* + * The pool runs in 3 modes. Ordered in degraded order for comparisons. + */ +enum pool_mode { + PM_WRITE, /* metadata may be changed */ + PM_READ_ONLY, /* metadata may not be changed */ + PM_FAIL, /* all I/O fails */ +}; + struct pool_features { + enum pool_mode mode; + unsigned zero_new_blocks:1; unsigned discard_enabled:1; unsigned discard_passdown:1; }; +struct thin_c; +typedef void (*process_bio_fn)(struct thin_c *tc, struct bio *bio); +typedef void (*process_mapping_fn)(struct dm_thin_new_mapping *m); + struct pool { struct list_head list; struct dm_target *ti; /* Only set if a pool target is bound */ @@ -542,8 +558,17 @@ struct pool { struct dm_thin_new_mapping *next_mapping; mempool_t *mapping_pool; mempool_t *endio_hook_pool; + + process_bio_fn process_bio; + process_bio_fn process_discard; + + process_mapping_fn process_prepared_mapping; + process_mapping_fn process_prepared_discard; }; +static enum pool_mode get_pool_mode(struct pool *pool); +static void set_pool_mode(struct pool *pool, enum pool_mode mode); + /* * Target context for a pool. */ @@ -707,21 +732,39 @@ static void remap_to_origin(struct thin_c *tc, struct bio *bio) bio->bi_bdev = tc->origin_dev->bdev; } +static int bio_triggers_commit(struct thin_c *tc, struct bio *bio) +{ + return (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && + dm_thin_changed_this_transaction(tc->td); +} + static void issue(struct thin_c *tc, struct bio *bio) { struct pool *pool = tc->pool; unsigned long flags; + if (!bio_triggers_commit(tc, bio)) { + generic_make_request(bio); + return; + } + /* - * Batch together any FUA/FLUSH bios we find and then issue - * a single commit for them in process_deferred_bios(). + * Complete bio with an error if earlier I/O caused changes to + * the metadata that can't be committed e.g, due to I/O errors + * on the metadata device. */ - if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { - spin_lock_irqsave(&pool->lock, flags); - bio_list_add(&pool->deferred_flush_bios, bio); - spin_unlock_irqrestore(&pool->lock, flags); - } else - generic_make_request(bio); + if (dm_thin_aborted_changes(tc->td)) { + bio_io_error(bio); + return; + } + + /* + * Batch together any bios that trigger commits and then issue a + * single commit for them in process_deferred_bios(). + */ + spin_lock_irqsave(&pool->lock, flags); + bio_list_add(&pool->deferred_flush_bios, bio); + spin_unlock_irqrestore(&pool->lock, flags); } static void remap_to_origin_and_issue(struct thin_c *tc, struct bio *bio) @@ -858,6 +901,14 @@ static void cell_defer_except(struct thin_c *tc, struct dm_bio_prison_cell *cell wake_worker(pool); } +static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) +{ + if (m->bio) + m->bio->bi_end_io = m->saved_bi_end_io; + cell_error(m->cell); + list_del(&m->list); + mempool_free(m, m->tc->pool->mapping_pool); +} static void process_prepared_mapping(struct dm_thin_new_mapping *m) { struct thin_c *tc = m->tc; @@ -870,7 +921,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) if (m->err) { cell_error(m->cell); - return; + goto out; } /* @@ -882,7 +933,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) if (r) { DMERR("dm_thin_insert_block() failed"); cell_error(m->cell); - return; + goto out; } /* @@ -897,22 +948,25 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) } else cell_defer(tc, m->cell, m->data_block); +out: list_del(&m->list); mempool_free(m, tc->pool->mapping_pool); } -static void process_prepared_discard(struct dm_thin_new_mapping *m) +static void process_prepared_discard_fail(struct dm_thin_new_mapping *m) { - int r; struct thin_c *tc = m->tc; - r = dm_thin_remove_block(tc->td, m->virt_block); - if (r) - DMERR("dm_thin_remove_block() failed"); + bio_io_error(m->bio); + cell_defer_except(tc, m->cell); + cell_defer_except(tc, m->cell2); + mempool_free(m, tc->pool->mapping_pool); +} + +static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m) +{ + struct thin_c *tc = m->tc; - /* - * Pass the discard down to the underlying device? - */ if (m->pass_discard) remap_and_issue(tc, m->bio, m->data_block); else @@ -923,8 +977,20 @@ static void process_prepared_discard(struct dm_thin_new_mapping *m) mempool_free(m, tc->pool->mapping_pool); } +static void process_prepared_discard(struct dm_thin_new_mapping *m) +{ + int r; + struct thin_c *tc = m->tc; + + r = dm_thin_remove_block(tc->td, m->virt_block); + if (r) + DMERR("dm_thin_remove_block() failed"); + + process_prepared_discard_passdown(m); +} + static void process_prepared(struct pool *pool, struct list_head *head, - void (*fn)(struct dm_thin_new_mapping *)) + process_mapping_fn *fn) { unsigned long flags; struct list_head maps; @@ -936,7 +1002,7 @@ static void process_prepared(struct pool *pool, struct list_head *head, spin_unlock_irqrestore(&pool->lock, flags); list_for_each_entry_safe(m, tmp, &maps, list) - fn(m); + (*fn)(m); } /* @@ -1102,6 +1168,35 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, } } +static int commit(struct pool *pool) +{ + int r; + + r = dm_pool_commit_metadata(pool->pmd); + if (r) + DMERR("commit failed, error = %d", r); + + return r; +} + +/* + * A non-zero return indicates read_only or fail_io mode. + * Many callers don't care about the return value. + */ +static int commit_or_fallback(struct pool *pool) +{ + int r; + + if (get_pool_mode(pool) != PM_WRITE) + return -EINVAL; + + r = commit(pool); + if (r) + set_pool_mode(pool, PM_READ_ONLY); + + return r; +} + static int alloc_data_block(struct thin_c *tc, dm_block_t *result) { int r; @@ -1130,12 +1225,7 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) * Try to commit to see if that will free up some * more space. */ - r = dm_pool_commit_metadata(pool->pmd); - if (r) { - DMERR("%s: dm_pool_commit_metadata() failed, error = %d", - __func__, r); - return r; - } + (void) commit_or_fallback(pool); r = dm_pool_get_free_block_count(pool->pmd, &free_blocks); if (r) @@ -1314,7 +1404,7 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio, if (bio_detain(pool->prison, &key, bio, &cell)) return; - if (bio_data_dir(bio) == WRITE) + if (bio_data_dir(bio) == WRITE && bio->bi_size) break_sharing(tc, bio, block, &key, lookup_result, cell); else { struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; @@ -1366,6 +1456,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block default: DMERR("%s: alloc_data_block() failed, error = %d", __func__, r); + set_pool_mode(tc->pool, PM_READ_ONLY); cell_error(cell); break; } @@ -1423,6 +1514,49 @@ static void process_bio(struct thin_c *tc, struct bio *bio) } } +static void process_bio_read_only(struct thin_c *tc, struct bio *bio) +{ + int r; + int rw = bio_data_dir(bio); + dm_block_t block = get_bio_block(tc, bio); + struct dm_thin_lookup_result lookup_result; + + r = dm_thin_find_block(tc->td, block, 1, &lookup_result); + switch (r) { + case 0: + if (lookup_result.shared && (rw == WRITE) && bio->bi_size) + bio_io_error(bio); + else + remap_and_issue(tc, bio, lookup_result.block); + break; + + case -ENODATA: + if (rw != READ) { + bio_io_error(bio); + break; + } + + if (tc->origin_dev) { + remap_to_origin_and_issue(tc, bio); + break; + } + + zero_fill_bio(bio); + bio_endio(bio, 0); + break; + + default: + DMERR("dm_thin_find_block() failed, error = %d", r); + bio_io_error(bio); + break; + } +} + +static void process_bio_fail(struct thin_c *tc, struct bio *bio) +{ + bio_io_error(bio); +} + static int need_commit_due_to_time(struct pool *pool) { return jiffies < pool->last_commit_jiffies || @@ -1434,7 +1568,6 @@ static void process_deferred_bios(struct pool *pool) unsigned long flags; struct bio *bio; struct bio_list bios; - int r; bio_list_init(&bios); @@ -1461,9 +1594,9 @@ static void process_deferred_bios(struct pool *pool) } if (bio->bi_rw & REQ_DISCARD) - process_discard(tc, bio); + pool->process_discard(tc, bio); else - process_bio(tc, bio); + pool->process_bio(tc, bio); } /* @@ -1479,10 +1612,7 @@ static void process_deferred_bios(struct pool *pool) if (bio_list_empty(&bios) && !need_commit_due_to_time(pool)) return; - r = dm_pool_commit_metadata(pool->pmd); - if (r) { - DMERR("%s: dm_pool_commit_metadata() failed, error = %d", - __func__, r); + if (commit_or_fallback(pool)) { while ((bio = bio_list_pop(&bios))) bio_io_error(bio); return; @@ -1497,8 +1627,8 @@ static void do_worker(struct work_struct *ws) { struct pool *pool = container_of(ws, struct pool, worker); - process_prepared(pool, &pool->prepared_mappings, process_prepared_mapping); - process_prepared(pool, &pool->prepared_discards, process_prepared_discard); + process_prepared(pool, &pool->prepared_mappings, &pool->process_prepared_mapping); + process_prepared(pool, &pool->prepared_discards, &pool->process_prepared_discard); process_deferred_bios(pool); } @@ -1515,6 +1645,52 @@ static void do_waker(struct work_struct *ws) /*----------------------------------------------------------------*/ +static enum pool_mode get_pool_mode(struct pool *pool) +{ + return pool->pf.mode; +} + +static void set_pool_mode(struct pool *pool, enum pool_mode mode) +{ + int r; + + pool->pf.mode = mode; + + switch (mode) { + case PM_FAIL: + DMERR("switching pool to failure mode"); + pool->process_bio = process_bio_fail; + pool->process_discard = process_bio_fail; + pool->process_prepared_mapping = process_prepared_mapping_fail; + pool->process_prepared_discard = process_prepared_discard_fail; + break; + + case PM_READ_ONLY: + DMERR("switching pool to read-only mode"); + r = dm_pool_abort_metadata(pool->pmd); + if (r) { + DMERR("aborting transaction failed"); + set_pool_mode(pool, PM_FAIL); + } else { + dm_pool_metadata_read_only(pool->pmd); + pool->process_bio = process_bio_read_only; + pool->process_discard = process_discard; + pool->process_prepared_mapping = process_prepared_mapping_fail; + pool->process_prepared_discard = process_prepared_discard_passdown; + } + break; + + case PM_WRITE: + pool->process_bio = process_bio; + pool->process_discard = process_discard; + pool->process_prepared_mapping = process_prepared_mapping; + pool->process_prepared_discard = process_prepared_discard; + break; + } +} + +/*----------------------------------------------------------------*/ + /* * Mapping functions. */ @@ -1560,6 +1736,12 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, struct dm_thin_lookup_result result; map_context->ptr = thin_hook_bio(tc, bio); + + if (get_pool_mode(tc->pool) == PM_FAIL) { + bio_io_error(bio); + return DM_MAPIO_SUBMITTED; + } + if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)) { thin_defer_bio(tc, bio); return DM_MAPIO_SUBMITTED; @@ -1596,14 +1778,35 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, break; case -ENODATA: + if (get_pool_mode(tc->pool) == PM_READ_ONLY) { + /* + * This block isn't provisioned, and we have no way + * of doing so. Just error it. + */ + bio_io_error(bio); + r = DM_MAPIO_SUBMITTED; + break; + } + /* fall through */ + + case -EWOULDBLOCK: /* * In future, the failed dm_thin_find_block above could * provide the hint to load the metadata into cache. */ - case -EWOULDBLOCK: thin_defer_bio(tc, bio); r = DM_MAPIO_SUBMITTED; break; + + default: + /* + * Must always call bio_io_error on failure. + * dm_thin_find_block can fail with -EINVAL if the + * pool is switched to fail-io mode. + */ + bio_io_error(bio); + r = DM_MAPIO_SUBMITTED; + break; } return r; @@ -1640,15 +1843,26 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti) { struct pool_c *pt = ti->private; + /* + * We want to make sure that degraded pools are never upgraded. + */ + enum pool_mode old_mode = pool->pf.mode; + enum pool_mode new_mode = pt->pf.mode; + + if (old_mode > new_mode) + new_mode = old_mode; + pool->ti = ti; pool->low_water_blocks = pt->low_water_blocks; pool->pf = pt->pf; + set_pool_mode(pool, new_mode); /* * If discard_passdown was enabled verify that the data device * supports discards. Disable discard_passdown if not; otherwise * -EOPNOTSUPP will be returned. */ + /* FIXME: pull this out into a sep fn. */ if (pt->pf.discard_passdown) { struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); if (!q || !blk_queue_discard(q)) { @@ -1674,6 +1888,7 @@ static void unbind_control_target(struct pool *pool, struct dm_target *ti) /* Initialize pool features. */ static void pool_features_init(struct pool_features *pf) { + pf->mode = PM_WRITE; pf->zero_new_blocks = 1; pf->discard_enabled = 1; pf->discard_passdown = 1; @@ -1704,14 +1919,16 @@ static struct kmem_cache *_endio_hook_cache; static struct pool *pool_create(struct mapped_device *pool_md, struct block_device *metadata_dev, - unsigned long block_size, char **error) + unsigned long block_size, + int read_only, char **error) { int r; void *err_p; struct pool *pool; struct dm_pool_metadata *pmd; + bool format_device = read_only ? false : true; - pmd = dm_pool_metadata_open(metadata_dev, block_size); + pmd = dm_pool_metadata_open(metadata_dev, block_size, format_device); if (IS_ERR(pmd)) { *error = "Error creating metadata object"; return (struct pool *)pmd; @@ -1828,8 +2045,8 @@ static void __pool_dec(struct pool *pool) static struct pool *__pool_find(struct mapped_device *pool_md, struct block_device *metadata_dev, - unsigned long block_size, char **error, - int *created) + unsigned long block_size, int read_only, + char **error, int *created) { struct pool *pool = __pool_table_lookup_metadata_dev(metadata_dev); @@ -1850,7 +2067,7 @@ static struct pool *__pool_find(struct mapped_device *pool_md, __pool_inc(pool); } else { - pool = pool_create(pool_md, metadata_dev, block_size, error); + pool = pool_create(pool_md, metadata_dev, block_size, read_only, error); *created = 1; } } @@ -1901,19 +2118,23 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf, arg_name = dm_shift_arg(as); argc--; - if (!strcasecmp(arg_name, "skip_block_zeroing")) { + if (!strcasecmp(arg_name, "skip_block_zeroing")) pf->zero_new_blocks = 0; - continue; - } else if (!strcasecmp(arg_name, "ignore_discard")) { + + else if (!strcasecmp(arg_name, "ignore_discard")) pf->discard_enabled = 0; - continue; - } else if (!strcasecmp(arg_name, "no_discard_passdown")) { + + else if (!strcasecmp(arg_name, "no_discard_passdown")) pf->discard_passdown = 0; - continue; - } - ti->error = "Unrecognised pool feature requested"; - r = -EINVAL; + else if (!strcasecmp(arg_name, "read_only")) + pf->mode = PM_READ_ONLY; + + else { + ti->error = "Unrecognised pool feature requested"; + r = -EINVAL; + break; + } } return r; @@ -2006,7 +2227,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) } pool = __pool_find(dm_table_get_md(ti->table), metadata_dev->bdev, - block_size, &ti->error, &pool_created); + block_size, pf.mode == PM_READ_ONLY, &ti->error, &pool_created); if (IS_ERR(pool)) { r = PTR_ERR(pool); goto out_free_pt; @@ -2052,7 +2273,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) * stacking of discard limits (this keeps the pool and * thin devices' discard limits consistent). */ - ti->discards_supported = 1; + ti->discards_supported = true; } ti->private = pt; @@ -2139,15 +2360,12 @@ static int pool_preresume(struct dm_target *ti) r = dm_pool_resize_data_dev(pool->pmd, data_size); if (r) { DMERR("failed to resize data device"); + /* FIXME Stricter than necessary: Rollback transaction instead here */ + set_pool_mode(pool, PM_READ_ONLY); return r; } - r = dm_pool_commit_metadata(pool->pmd); - if (r) { - DMERR("%s: dm_pool_commit_metadata() failed, error = %d", - __func__, r); - return r; - } + (void) commit_or_fallback(pool); } return 0; @@ -2170,19 +2388,12 @@ static void pool_resume(struct dm_target *ti) static void pool_postsuspend(struct dm_target *ti) { - int r; struct pool_c *pt = ti->private; struct pool *pool = pt->pool; cancel_delayed_work(&pool->waker); flush_workqueue(pool->wq); - - r = dm_pool_commit_metadata(pool->pmd); - if (r < 0) { - DMERR("%s: dm_pool_commit_metadata() failed, error = %d", - __func__, r); - /* FIXME: invalidate device? error the next FUA or FLUSH bio ?*/ - } + (void) commit_or_fallback(pool); } static int check_arg_count(unsigned argc, unsigned args_required) @@ -2316,12 +2527,7 @@ static int process_reserve_metadata_snap_mesg(unsigned argc, char **argv, struct if (r) return r; - r = dm_pool_commit_metadata(pool->pmd); - if (r) { - DMERR("%s: dm_pool_commit_metadata() failed, error = %d", - __func__, r); - return r; - } + (void) commit_or_fallback(pool); r = dm_pool_reserve_metadata_snap(pool->pmd); if (r) @@ -2382,25 +2588,41 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv) else DMWARN("Unrecognised thin pool target message received: %s", argv[0]); - if (!r) { - r = dm_pool_commit_metadata(pool->pmd); - if (r) - DMERR("%s message: dm_pool_commit_metadata() failed, error = %d", - argv[0], r); - } + if (!r) + (void) commit_or_fallback(pool); return r; } +static void emit_flags(struct pool_features *pf, char *result, + unsigned sz, unsigned maxlen) +{ + unsigned count = !pf->zero_new_blocks + !pf->discard_enabled + + !pf->discard_passdown + (pf->mode == PM_READ_ONLY); + DMEMIT("%u ", count); + + if (!pf->zero_new_blocks) + DMEMIT("skip_block_zeroing "); + + if (!pf->discard_enabled) + DMEMIT("ignore_discard "); + + if (!pf->discard_passdown) + DMEMIT("no_discard_passdown "); + + if (pf->mode == PM_READ_ONLY) + DMEMIT("read_only "); +} + /* * Status line is: * <transaction id> <used metadata sectors>/<total metadata sectors> * <used data sectors>/<total data sectors> <held metadata root> */ static int pool_status(struct dm_target *ti, status_type_t type, - char *result, unsigned maxlen) + unsigned status_flags, char *result, unsigned maxlen) { - int r, count; + int r; unsigned sz = 0; uint64_t transaction_id; dm_block_t nr_free_blocks_data; @@ -2415,6 +2637,15 @@ static int pool_status(struct dm_target *ti, status_type_t type, switch (type) { case STATUSTYPE_INFO: + if (get_pool_mode(pool) == PM_FAIL) { + DMEMIT("Fail"); + break; + } + + /* Commit to ensure statistics aren't out-of-date */ + if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) + (void) commit_or_fallback(pool); + r = dm_pool_get_metadata_transaction_id(pool->pmd, &transaction_id); if (r) @@ -2450,9 +2681,19 @@ static int pool_status(struct dm_target *ti, status_type_t type, (unsigned long long)nr_blocks_data); if (held_root) - DMEMIT("%llu", held_root); + DMEMIT("%llu ", held_root); + else + DMEMIT("- "); + + if (pool->pf.mode == PM_READ_ONLY) + DMEMIT("ro "); else - DMEMIT("-"); + DMEMIT("rw "); + + if (pool->pf.discard_enabled && pool->pf.discard_passdown) + DMEMIT("discard_passdown"); + else + DMEMIT("no_discard_passdown"); break; @@ -2462,20 +2703,7 @@ static int pool_status(struct dm_target *ti, status_type_t type, format_dev_t(buf2, pt->data_dev->bdev->bd_dev), (unsigned long)pool->sectors_per_block, (unsigned long long)pt->low_water_blocks); - - count = !pool->pf.zero_new_blocks + !pool->pf.discard_enabled + - !pt->pf.discard_passdown; - DMEMIT("%u ", count); - - if (!pool->pf.zero_new_blocks) - DMEMIT("skip_block_zeroing "); - - if (!pool->pf.discard_enabled) - DMEMIT("ignore_discard "); - - if (!pt->pf.discard_passdown) - DMEMIT("no_discard_passdown "); - + emit_flags(&pt->pf, result, sz, maxlen); break; } @@ -2535,7 +2763,7 @@ static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 2, 0}, + .version = {1, 3, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -2640,6 +2868,11 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) } __pool_inc(tc->pool); + if (get_pool_mode(tc->pool) == PM_FAIL) { + ti->error = "Couldn't open thin device, Pool is in fail mode"; + goto bad_thin_open; + } + r = dm_pool_open_thin_device(tc->pool->pmd, tc->dev_id, &tc->td); if (r) { ti->error = "Couldn't open thin internal device"; @@ -2651,14 +2884,15 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad_thin_open; ti->num_flush_requests = 1; + ti->flush_supported = true; /* In case the pool supports discards, pass them on. */ if (tc->pool->pf.discard_enabled) { - ti->discards_supported = 1; + ti->discards_supported = true; ti->num_discard_requests = 1; - ti->discard_zeroes_data_unsupported = 1; + ti->discard_zeroes_data_unsupported = true; /* Discard requests must be split on a block boundary */ - ti->split_discard_requests = 1; + ti->split_discard_requests = true; } dm_put(pool_md); @@ -2739,7 +2973,7 @@ static void thin_postsuspend(struct dm_target *ti) * <nr mapped sectors> <highest mapped sector> */ static int thin_status(struct dm_target *ti, status_type_t type, - char *result, unsigned maxlen) + unsigned status_flags, char *result, unsigned maxlen) { int r; ssize_t sz = 0; @@ -2747,6 +2981,11 @@ static int thin_status(struct dm_target *ti, status_type_t type, char buf[BDEVNAME_SIZE]; struct thin_c *tc = ti->private; + if (get_pool_mode(tc->pool) == PM_FAIL) { + DMEMIT("Fail"); + return 0; + } + if (!tc->td) DMEMIT("-"); else { @@ -2815,7 +3054,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type thin_target = { .name = "thin", - .version = {1, 2, 0}, + .version = {1, 3, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index fa365d39b612..254d19268ad2 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -515,7 +515,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio, * Status: V (valid) or C (corruption found) */ static int verity_status(struct dm_target *ti, status_type_t type, - char *result, unsigned maxlen) + unsigned status_flags, char *result, unsigned maxlen) { struct dm_verity *v = ti->private; unsigned sz = 0; diff --git a/drivers/md/dm.h b/drivers/md/dm.h index b7dacd59d8d7..52eef493d266 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -23,6 +23,11 @@ #define DM_SUSPEND_NOFLUSH_FLAG (1 << 1) /* + * Status feature flags + */ +#define DM_STATUS_NOFLUSH_FLAG (1 << 0) + +/* * Type of table and mapped_device's mempool */ #define DM_TYPE_NONE 0 diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 0317ecdc6e53..5ba277768d99 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -325,11 +325,6 @@ static struct dm_buffer *to_buffer(struct dm_block *b) return (struct dm_buffer *) b; } -static struct dm_bufio_client *to_bufio(struct dm_block_manager *bm) -{ - return (struct dm_bufio_client *) bm; -} - dm_block_t dm_block_location(struct dm_block *b) { return dm_bufio_get_block_number(to_buffer(b)); @@ -367,34 +362,60 @@ static void dm_block_manager_write_callback(struct dm_buffer *buf) /*---------------------------------------------------------------- * Public interface *--------------------------------------------------------------*/ +struct dm_block_manager { + struct dm_bufio_client *bufio; + bool read_only:1; +}; + struct dm_block_manager *dm_block_manager_create(struct block_device *bdev, unsigned block_size, unsigned cache_size, unsigned max_held_per_thread) { - return (struct dm_block_manager *) - dm_bufio_client_create(bdev, block_size, max_held_per_thread, - sizeof(struct buffer_aux), - dm_block_manager_alloc_callback, - dm_block_manager_write_callback); + int r; + struct dm_block_manager *bm; + + bm = kmalloc(sizeof(*bm), GFP_KERNEL); + if (!bm) { + r = -ENOMEM; + goto bad; + } + + bm->bufio = dm_bufio_client_create(bdev, block_size, max_held_per_thread, + sizeof(struct buffer_aux), + dm_block_manager_alloc_callback, + dm_block_manager_write_callback); + if (IS_ERR(bm->bufio)) { + r = PTR_ERR(bm->bufio); + kfree(bm); + goto bad; + } + + bm->read_only = false; + + return bm; + +bad: + return ERR_PTR(r); } EXPORT_SYMBOL_GPL(dm_block_manager_create); void dm_block_manager_destroy(struct dm_block_manager *bm) { - return dm_bufio_client_destroy(to_bufio(bm)); + dm_bufio_client_destroy(bm->bufio); + kfree(bm); } EXPORT_SYMBOL_GPL(dm_block_manager_destroy); unsigned dm_bm_block_size(struct dm_block_manager *bm) { - return dm_bufio_get_block_size(to_bufio(bm)); + return dm_bufio_get_block_size(bm->bufio); } EXPORT_SYMBOL_GPL(dm_bm_block_size); dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm) { - return dm_bufio_get_device_size(to_bufio(bm)); + return dm_bufio_get_device_size(bm->bufio); } static int dm_bm_validate_buffer(struct dm_block_manager *bm, @@ -406,7 +427,7 @@ static int dm_bm_validate_buffer(struct dm_block_manager *bm, int r; if (!v) return 0; - r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(to_bufio(bm))); + r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio)); if (unlikely(r)) return r; aux->validator = v; @@ -430,7 +451,7 @@ int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b, void *p; int r; - p = dm_bufio_read(to_bufio(bm), b, (struct dm_buffer **) result); + p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); if (unlikely(IS_ERR(p))) return PTR_ERR(p); @@ -463,7 +484,10 @@ int dm_bm_write_lock(struct dm_block_manager *bm, void *p; int r; - p = dm_bufio_read(to_bufio(bm), b, (struct dm_buffer **) result); + if (bm->read_only) + return -EPERM; + + p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); if (unlikely(IS_ERR(p))) return PTR_ERR(p); @@ -496,7 +520,7 @@ int dm_bm_read_try_lock(struct dm_block_manager *bm, void *p; int r; - p = dm_bufio_get(to_bufio(bm), b, (struct dm_buffer **) result); + p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result); if (unlikely(IS_ERR(p))) return PTR_ERR(p); if (unlikely(!p)) @@ -529,7 +553,10 @@ int dm_bm_write_lock_zero(struct dm_block_manager *bm, struct buffer_aux *aux; void *p; - p = dm_bufio_new(to_bufio(bm), b, (struct dm_buffer **) result); + if (bm->read_only) + return -EPERM; + + p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result); if (unlikely(IS_ERR(p))) return PTR_ERR(p); @@ -547,6 +574,7 @@ int dm_bm_write_lock_zero(struct dm_block_manager *bm, return 0; } +EXPORT_SYMBOL_GPL(dm_bm_write_lock_zero); int dm_bm_unlock(struct dm_block *b) { @@ -565,45 +593,30 @@ int dm_bm_unlock(struct dm_block *b) } EXPORT_SYMBOL_GPL(dm_bm_unlock); -int dm_bm_unlock_move(struct dm_block *b, dm_block_t n) -{ - struct buffer_aux *aux; - - aux = dm_bufio_get_aux_data(to_buffer(b)); - - if (aux->write_locked) { - dm_bufio_mark_buffer_dirty(to_buffer(b)); - bl_up_write(&aux->lock); - } else - bl_up_read(&aux->lock); - - dm_bufio_release_move(to_buffer(b), n); - return 0; -} - int dm_bm_flush_and_unlock(struct dm_block_manager *bm, struct dm_block *superblock) { int r; - r = dm_bufio_write_dirty_buffers(to_bufio(bm)); - if (unlikely(r)) - return r; - r = dm_bufio_issue_flush(to_bufio(bm)); - if (unlikely(r)) + if (bm->read_only) + return -EPERM; + + r = dm_bufio_write_dirty_buffers(bm->bufio); + if (unlikely(r)) { + dm_bm_unlock(superblock); return r; + } dm_bm_unlock(superblock); - r = dm_bufio_write_dirty_buffers(to_bufio(bm)); - if (unlikely(r)) - return r; - r = dm_bufio_issue_flush(to_bufio(bm)); - if (unlikely(r)) - return r; + return dm_bufio_write_dirty_buffers(bm->bufio); +} - return 0; +void dm_bm_set_read_only(struct dm_block_manager *bm) +{ + bm->read_only = true; } +EXPORT_SYMBOL_GPL(dm_bm_set_read_only); u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor) { diff --git a/drivers/md/persistent-data/dm-block-manager.h b/drivers/md/persistent-data/dm-block-manager.h index 924833d2dfa6..be5bff61be28 100644 --- a/drivers/md/persistent-data/dm-block-manager.h +++ b/drivers/md/persistent-data/dm-block-manager.h @@ -97,14 +97,6 @@ int dm_bm_write_lock_zero(struct dm_block_manager *bm, dm_block_t b, int dm_bm_unlock(struct dm_block *b); /* - * An optimisation; we often want to copy a block's contents to a new - * block. eg, as part of the shadowing operation. It's far better for - * bufio to do this move behind the scenes than hold 2 locks and memcpy the - * data. - */ -int dm_bm_unlock_move(struct dm_block *b, dm_block_t n); - -/* * It's a common idiom to have a superblock that should be committed last. * * @superblock should be write-locked on entry. It will be unlocked during @@ -116,6 +108,19 @@ int dm_bm_unlock_move(struct dm_block *b, dm_block_t n); int dm_bm_flush_and_unlock(struct dm_block_manager *bm, struct dm_block *superblock); +/* + * Switches the bm to a read only mode. Once read-only mode + * has been entered the following functions will return -EPERM. + * + * dm_bm_write_lock + * dm_bm_write_lock_zero + * dm_bm_flush_and_unlock + * + * Additionally you should not use dm_bm_unlock_move, however no error will + * be returned if you do. + */ +void dm_bm_set_read_only(struct dm_block_manager *bm); + u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor); /*----------------------------------------------------------------*/ diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index ff3beed6ad2d..d77602d63c83 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -224,6 +224,7 @@ static int sm_ll_init(struct ll_disk *ll, struct dm_transaction_manager *tm) ll->nr_blocks = 0; ll->bitmap_root = 0; ll->ref_count_root = 0; + ll->bitmap_index_changed = false; return 0; } @@ -476,7 +477,15 @@ int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev) int sm_ll_commit(struct ll_disk *ll) { - return ll->commit(ll); + int r = 0; + + if (ll->bitmap_index_changed) { + r = ll->commit(ll); + if (!r) + ll->bitmap_index_changed = false; + } + + return r; } /*----------------------------------------------------------------*/ @@ -491,6 +500,7 @@ static int metadata_ll_load_ie(struct ll_disk *ll, dm_block_t index, static int metadata_ll_save_ie(struct ll_disk *ll, dm_block_t index, struct disk_index_entry *ie) { + ll->bitmap_index_changed = true; memcpy(ll->mi_le.index + index, ie, sizeof(*ie)); return 0; } diff --git a/drivers/md/persistent-data/dm-space-map-common.h b/drivers/md/persistent-data/dm-space-map-common.h index 8f220821a9a9..b3078d5eda0c 100644 --- a/drivers/md/persistent-data/dm-space-map-common.h +++ b/drivers/md/persistent-data/dm-space-map-common.h @@ -78,6 +78,7 @@ struct ll_disk { open_index_fn open_index; max_index_entries_fn max_entries; commit_fn commit; + bool bitmap_index_changed:1; }; struct disk_sm_root { diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c index 86c3705052a4..d247a35da3c6 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.c +++ b/drivers/md/persistent-data/dm-transaction-manager.c @@ -219,13 +219,24 @@ static int __shadow_block(struct dm_transaction_manager *tm, dm_block_t orig, if (r < 0) return r; - r = dm_bm_unlock_move(orig_block, new); - if (r < 0) { + /* + * It would be tempting to use dm_bm_unlock_move here, but some + * code, such as the space maps, keeps using the old data structures + * secure in the knowledge they won't be changed until the next + * transaction. Using unlock_move would force a synchronous read + * since the old block would no longer be in the cache. + */ + r = dm_bm_write_lock_zero(tm->bm, new, v, result); + if (r) { dm_bm_unlock(orig_block); return r; } - return dm_bm_write_lock(tm->bm, new, v, result); + memcpy(dm_block_data(*result), dm_block_data(orig_block), + dm_bm_block_size(tm->bm)); + + dm_bm_unlock(orig_block); + return r; } int dm_tm_shadow_block(struct dm_transaction_manager *tm, dm_block_t orig, @@ -310,12 +321,10 @@ struct dm_block_manager *dm_tm_get_bm(struct dm_transaction_manager *tm) static int dm_tm_create_internal(struct dm_block_manager *bm, dm_block_t sb_location, - struct dm_block_validator *sb_validator, - size_t root_offset, size_t root_max_len, struct dm_transaction_manager **tm, struct dm_space_map **sm, - struct dm_block **sblock, - int create) + int create, + void *sm_root, size_t sm_len) { int r; @@ -330,64 +339,43 @@ static int dm_tm_create_internal(struct dm_block_manager *bm, } if (create) { - r = dm_bm_write_lock_zero(dm_tm_get_bm(*tm), sb_location, - sb_validator, sblock); - if (r < 0) { - DMERR("couldn't lock superblock"); - goto bad1; - } - r = dm_sm_metadata_create(*sm, *tm, dm_bm_nr_blocks(bm), sb_location); if (r) { DMERR("couldn't create metadata space map"); - goto bad2; + goto bad; } } else { - r = dm_bm_write_lock(dm_tm_get_bm(*tm), sb_location, - sb_validator, sblock); - if (r < 0) { - DMERR("couldn't lock superblock"); - goto bad1; - } - - r = dm_sm_metadata_open(*sm, *tm, - dm_block_data(*sblock) + root_offset, - root_max_len); + r = dm_sm_metadata_open(*sm, *tm, sm_root, sm_len); if (r) { DMERR("couldn't open metadata space map"); - goto bad2; + goto bad; } } return 0; -bad2: - dm_tm_unlock(*tm, *sblock); -bad1: +bad: dm_tm_destroy(*tm); + dm_sm_destroy(*sm); return r; } int dm_tm_create_with_sm(struct dm_block_manager *bm, dm_block_t sb_location, - struct dm_block_validator *sb_validator, struct dm_transaction_manager **tm, - struct dm_space_map **sm, struct dm_block **sblock) + struct dm_space_map **sm) { - return dm_tm_create_internal(bm, sb_location, sb_validator, - 0, 0, tm, sm, sblock, 1); + return dm_tm_create_internal(bm, sb_location, tm, sm, 1, NULL, 0); } EXPORT_SYMBOL_GPL(dm_tm_create_with_sm); int dm_tm_open_with_sm(struct dm_block_manager *bm, dm_block_t sb_location, - struct dm_block_validator *sb_validator, - size_t root_offset, size_t root_max_len, + void *sm_root, size_t root_len, struct dm_transaction_manager **tm, - struct dm_space_map **sm, struct dm_block **sblock) + struct dm_space_map **sm) { - return dm_tm_create_internal(bm, sb_location, sb_validator, root_offset, - root_max_len, tm, sm, sblock, 0); + return dm_tm_create_internal(bm, sb_location, tm, sm, 0, sm_root, root_len); } EXPORT_SYMBOL_GPL(dm_tm_open_with_sm); diff --git a/drivers/md/persistent-data/dm-transaction-manager.h b/drivers/md/persistent-data/dm-transaction-manager.h index 6da784871db4..b5b139076ca5 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.h +++ b/drivers/md/persistent-data/dm-transaction-manager.h @@ -115,16 +115,17 @@ struct dm_block_manager *dm_tm_get_bm(struct dm_transaction_manager *tm); * * Returns a tm that has an open transaction to write the new disk sm. * Caller should store the new sm root and commit. + * + * The superblock location is passed so the metadata space map knows it + * shouldn't be used. */ int dm_tm_create_with_sm(struct dm_block_manager *bm, dm_block_t sb_location, - struct dm_block_validator *sb_validator, struct dm_transaction_manager **tm, - struct dm_space_map **sm, struct dm_block **sblock); + struct dm_space_map **sm); int dm_tm_open_with_sm(struct dm_block_manager *bm, dm_block_t sb_location, - struct dm_block_validator *sb_validator, - size_t root_offset, size_t root_max_len, + void *sm_root, size_t root_len, struct dm_transaction_manager **tm, - struct dm_space_map **sm, struct dm_block **sblock); + struct dm_space_map **sm); #endif /* _LINUX_DM_TRANSACTION_MANAGER_H */ |