diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-09-13 19:12:55 -1000 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-09-13 19:12:55 -1000 |
commit | a0efc03b7925c4b606b1c49feb394859754e1cc8 (patch) | |
tree | 998b4fbd2af2d78cb164493e6013aa89ea086700 | |
parent | 0f9aeeac1df7b7c984dea04384a2b54c343710ab (diff) | |
parent | 3ab91828166895600efd9cdc3a0eb32001f7204a (diff) | |
download | linux-a0efc03b7925c4b606b1c49feb394859754e1cc8.tar.gz linux-a0efc03b7925c4b606b1c49feb394859754e1cc8.tar.bz2 linux-a0efc03b7925c4b606b1c49feb394859754e1cc8.zip |
Merge tag 'for-4.19/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper fixes from Mike Snitzer:
- DM verity fix for crash due to using vmalloc'd buffers with the
asynchronous crypto hadsh API.
- Fix to both DM crypt and DM integrity targets to discontinue using
CRYPTO_TFM_REQ_MAY_SLEEP because its use of GFP_KERNEL can lead to
deadlock by recursing back into a filesystem.
- Various DM raid fixes related to reshape and rebuild races.
- Fix for DM thin-provisioning to avoid data corruption that was a
side-effect of needing to abort DM thin metadata transaction due to
running out of metadata space. Fix is to reserve a small amount of
metadata space so that once it is used the DM thin-pool can finish
its active transaction before switching to read-only mode.
* tag 'for-4.19/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
dm thin metadata: try to avoid ever aborting transactions
dm raid: bump target version, update comments and documentation
dm raid: fix RAID leg rebuild errors
dm raid: fix rebuild of specific devices by updating superblock
dm raid: fix stripe adding reshape deadlock
dm raid: fix reshape race on small devices
dm: disable CRYPTO_TFM_REQ_MAY_SLEEP to fix a GFP_KERNEL recursion deadlock
dm verity: fix crash on bufio buffer that was allocated with vmalloc
-rw-r--r-- | Documentation/device-mapper/dm-raid.txt | 4 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 10 | ||||
-rw-r--r-- | drivers/md/dm-integrity.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 154 | ||||
-rw-r--r-- | drivers/md/dm-thin-metadata.c | 36 | ||||
-rw-r--r-- | drivers/md/dm-thin.c | 73 | ||||
-rw-r--r-- | drivers/md/dm-verity-target.c | 24 |
7 files changed, 192 insertions, 113 deletions
diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt index 390c145f01d7..52a719b49afd 100644 --- a/Documentation/device-mapper/dm-raid.txt +++ b/Documentation/device-mapper/dm-raid.txt @@ -348,3 +348,7 @@ Version History 1.13.1 Fix deadlock caused by early md_stop_writes(). Also fix size an state races. 1.13.2 Fix raid redundancy validation and avoid keeping raid set frozen +1.14.0 Fix reshape race on small devices. Fix stripe adding reshape + deadlock/potential data corruption. Update superblock when + specific devices are requested via rebuild. Fix RAID leg + rebuild errors. diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index f266c81f396f..0481223b1deb 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -332,7 +332,7 @@ static int crypt_iv_essiv_init(struct crypt_config *cc) int err; desc->tfm = essiv->hash_tfm; - desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; + desc->flags = 0; err = crypto_shash_digest(desc, cc->key, cc->key_size, essiv->salt); shash_desc_zero(desc); @@ -606,7 +606,7 @@ static int crypt_iv_lmk_one(struct crypt_config *cc, u8 *iv, int i, r; desc->tfm = lmk->hash_tfm; - desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; + desc->flags = 0; r = crypto_shash_init(desc); if (r) @@ -768,7 +768,7 @@ static int crypt_iv_tcw_whitening(struct crypt_config *cc, /* calculate crc32 for every 32bit part and xor it */ desc->tfm = tcw->crc32_tfm; - desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; + desc->flags = 0; for (i = 0; i < 4; i++) { r = crypto_shash_init(desc); if (r) @@ -1251,7 +1251,7 @@ static void crypt_alloc_req_skcipher(struct crypt_config *cc, * requests if driver request queue is full. */ skcipher_request_set_callback(ctx->r.req, - CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, + CRYPTO_TFM_REQ_MAY_BACKLOG, kcryptd_async_done, dmreq_of_req(cc, ctx->r.req)); } @@ -1268,7 +1268,7 @@ static void crypt_alloc_req_aead(struct crypt_config *cc, * requests if driver request queue is full. */ aead_request_set_callback(ctx->r.req_aead, - CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, + CRYPTO_TFM_REQ_MAY_BACKLOG, kcryptd_async_done, dmreq_of_req(cc, ctx->r.req_aead)); } diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 378878599466..89ccb64342de 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -532,7 +532,7 @@ static void section_mac(struct dm_integrity_c *ic, unsigned section, __u8 result unsigned j, size; desc->tfm = ic->journal_mac; - desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; + desc->flags = 0; r = crypto_shash_init(desc); if (unlikely(r)) { @@ -676,7 +676,7 @@ static void complete_journal_encrypt(struct crypto_async_request *req, int err) static bool do_crypt(bool encrypt, struct skcipher_request *req, struct journal_completion *comp) { int r; - skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, + skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, complete_journal_encrypt, comp); if (likely(encrypt)) r = crypto_skcipher_encrypt(req); diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index cae689de75fd..5ba067fa0c72 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2010-2011 Neil Brown - * Copyright (C) 2010-2017 Red Hat, Inc. All rights reserved. + * Copyright (C) 2010-2018 Red Hat, Inc. All rights reserved. * * This file is released under the GPL. */ @@ -29,9 +29,6 @@ */ #define MIN_RAID456_JOURNAL_SPACE (4*2048) -/* Global list of all raid sets */ -static LIST_HEAD(raid_sets); - static bool devices_handle_discard_safely = false; /* @@ -227,7 +224,6 @@ struct rs_layout { struct raid_set { struct dm_target *ti; - struct list_head list; uint32_t stripe_cache_entries; unsigned long ctr_flags; @@ -273,19 +269,6 @@ static void rs_config_restore(struct raid_set *rs, struct rs_layout *l) mddev->new_chunk_sectors = l->new_chunk_sectors; } -/* Find any raid_set in active slot for @rs on global list */ -static struct raid_set *rs_find_active(struct raid_set *rs) -{ - struct raid_set *r; - struct mapped_device *md = dm_table_get_md(rs->ti->table); - - list_for_each_entry(r, &raid_sets, list) - if (r != rs && dm_table_get_md(r->ti->table) == md) - return r; - - return NULL; -} - /* raid10 algorithms (i.e. formats) */ #define ALGORITHM_RAID10_DEFAULT 0 #define ALGORITHM_RAID10_NEAR 1 @@ -764,7 +747,6 @@ static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *r mddev_init(&rs->md); - INIT_LIST_HEAD(&rs->list); rs->raid_disks = raid_devs; rs->delta_disks = 0; @@ -782,9 +764,6 @@ static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *r for (i = 0; i < raid_devs; i++) md_rdev_init(&rs->dev[i].rdev); - /* Add @rs to global list. */ - list_add(&rs->list, &raid_sets); - /* * Remaining items to be initialized by further RAID params: * rs->md.persistent @@ -797,7 +776,7 @@ static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *r return rs; } -/* Free all @rs allocations and remove it from global list. */ +/* Free all @rs allocations */ static void raid_set_free(struct raid_set *rs) { int i; @@ -815,8 +794,6 @@ static void raid_set_free(struct raid_set *rs) dm_put_device(rs->ti, rs->dev[i].data_dev); } - list_del(&rs->list); - kfree(rs); } @@ -2649,7 +2626,7 @@ static int rs_adjust_data_offsets(struct raid_set *rs) return 0; } - /* HM FIXME: get InSync raid_dev? */ + /* HM FIXME: get In_Sync raid_dev? */ rdev = &rs->dev[0].rdev; if (rs->delta_disks < 0) { @@ -3149,6 +3126,11 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); rs_set_new(rs); } else if (rs_is_recovering(rs)) { + /* Rebuild particular devices */ + if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) { + set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); + rs_setup_recovery(rs, MaxSector); + } /* A recovering raid set may be resized */ ; /* skip setup rs */ } else if (rs_is_reshaping(rs)) { @@ -3242,6 +3224,8 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) /* Start raid set read-only and assumed clean to change in raid_resume() */ rs->md.ro = 1; rs->md.in_sync = 1; + + /* Keep array frozen */ set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery); /* Has to be held on running the array */ @@ -3265,7 +3249,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) rs->callbacks.congested_fn = raid_is_congested; dm_table_add_target_callbacks(ti->table, &rs->callbacks); - /* If raid4/5/6 journal mode explictely requested (only possible with journal dev) -> set it */ + /* If raid4/5/6 journal mode explicitly requested (only possible with journal dev) -> set it */ if (test_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags)) { r = r5c_journal_mode_set(&rs->md, rs->journal_dev.mode); if (r) { @@ -3350,32 +3334,53 @@ static int raid_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_SUBMITTED; } -/* Return string describing the current sync action of @mddev */ -static const char *decipher_sync_action(struct mddev *mddev, unsigned long recovery) +/* Return sync state string for @state */ +enum sync_state { st_frozen, st_reshape, st_resync, st_check, st_repair, st_recover, st_idle }; +static const char *sync_str(enum sync_state state) +{ + /* Has to be in above sync_state order! */ + static const char *sync_strs[] = { + "frozen", + "reshape", + "resync", + "check", + "repair", + "recover", + "idle" + }; + + return __within_range(state, 0, ARRAY_SIZE(sync_strs) - 1) ? sync_strs[state] : "undef"; +}; + +/* Return enum sync_state for @mddev derived from @recovery flags */ +static const enum sync_state decipher_sync_action(struct mddev *mddev, unsigned long recovery) { if (test_bit(MD_RECOVERY_FROZEN, &recovery)) - return "frozen"; + return st_frozen; - /* The MD sync thread can be done with io but still be running */ + /* The MD sync thread can be done with io or be interrupted but still be running */ if (!test_bit(MD_RECOVERY_DONE, &recovery) && (test_bit(MD_RECOVERY_RUNNING, &recovery) || (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery)))) { if (test_bit(MD_RECOVERY_RESHAPE, &recovery)) - return "reshape"; + return st_reshape; if (test_bit(MD_RECOVERY_SYNC, &recovery)) { if (!test_bit(MD_RECOVERY_REQUESTED, &recovery)) - return "resync"; - else if (test_bit(MD_RECOVERY_CHECK, &recovery)) - return "check"; - return "repair"; + return st_resync; + if (test_bit(MD_RECOVERY_CHECK, &recovery)) + return st_check; + return st_repair; } if (test_bit(MD_RECOVERY_RECOVER, &recovery)) - return "recover"; + return st_recover; + + if (mddev->reshape_position != MaxSector) + return st_reshape; } - return "idle"; + return st_idle; } /* @@ -3409,6 +3414,7 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery, sector_t resync_max_sectors) { sector_t r; + enum sync_state state; struct mddev *mddev = &rs->md; clear_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); @@ -3419,20 +3425,14 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery, set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); } else { - if (!test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags) && - !test_bit(MD_RECOVERY_INTR, &recovery) && - (test_bit(MD_RECOVERY_NEEDED, &recovery) || - test_bit(MD_RECOVERY_RESHAPE, &recovery) || - test_bit(MD_RECOVERY_RUNNING, &recovery))) - r = mddev->curr_resync_completed; - else + state = decipher_sync_action(mddev, recovery); + + if (state == st_idle && !test_bit(MD_RECOVERY_INTR, &recovery)) r = mddev->recovery_cp; + else + r = mddev->curr_resync_completed; - if (r >= resync_max_sectors && - (!test_bit(MD_RECOVERY_REQUESTED, &recovery) || - (!test_bit(MD_RECOVERY_FROZEN, &recovery) && - !test_bit(MD_RECOVERY_NEEDED, &recovery) && - !test_bit(MD_RECOVERY_RUNNING, &recovery)))) { + if (state == st_idle && r >= resync_max_sectors) { /* * Sync complete. */ @@ -3440,24 +3440,20 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery, if (test_bit(MD_RECOVERY_RECOVER, &recovery)) set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); - } else if (test_bit(MD_RECOVERY_RECOVER, &recovery)) { + } else if (state == st_recover) /* * In case we are recovering, the array is not in sync * and health chars should show the recovering legs. */ ; - - } else if (test_bit(MD_RECOVERY_SYNC, &recovery) && - !test_bit(MD_RECOVERY_REQUESTED, &recovery)) { + else if (state == st_resync) /* * If "resync" is occurring, the raid set * is or may be out of sync hence the health * characters shall be 'a'. */ set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags); - - } else if (test_bit(MD_RECOVERY_RESHAPE, &recovery) && - !test_bit(MD_RECOVERY_REQUESTED, &recovery)) { + else if (state == st_reshape) /* * If "reshape" is occurring, the raid set * is or may be out of sync hence the health @@ -3465,7 +3461,7 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery, */ set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags); - } else if (test_bit(MD_RECOVERY_REQUESTED, &recovery)) { + else if (state == st_check || state == st_repair) /* * If "check" or "repair" is occurring, the raid set has * undergone an initial sync and the health characters @@ -3473,12 +3469,12 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery, */ set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); - } else { + else { struct md_rdev *rdev; /* * We are idle and recovery is needed, prevent 'A' chars race - * caused by components still set to in-sync by constrcuctor. + * caused by components still set to in-sync by constructor. */ if (test_bit(MD_RECOVERY_NEEDED, &recovery)) set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags); @@ -3542,7 +3538,7 @@ static void raid_status(struct dm_target *ti, status_type_t type, progress = rs_get_progress(rs, recovery, resync_max_sectors); resync_mismatches = (mddev->last_sync_action && !strcasecmp(mddev->last_sync_action, "check")) ? atomic64_read(&mddev->resync_mismatches) : 0; - sync_action = decipher_sync_action(&rs->md, recovery); + sync_action = sync_str(decipher_sync_action(&rs->md, recovery)); /* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */ for (i = 0; i < rs->raid_disks; i++) @@ -3892,14 +3888,13 @@ static int rs_start_reshape(struct raid_set *rs) struct mddev *mddev = &rs->md; struct md_personality *pers = mddev->pers; + /* Don't allow the sync thread to work until the table gets reloaded. */ + set_bit(MD_RECOVERY_WAIT, &mddev->recovery); + r = rs_setup_reshape(rs); if (r) return r; - /* Need to be resumed to be able to start reshape, recovery is frozen until raid_resume() though */ - if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) - mddev_resume(mddev); - /* * Check any reshape constraints enforced by the personalility * @@ -3923,10 +3918,6 @@ static int rs_start_reshape(struct raid_set *rs) } } - /* Suspend because a resume will happen in raid_resume() */ - set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags); - mddev_suspend(mddev); - /* * Now reshape got set up, update superblocks to * reflect the fact so that a table reload will @@ -3947,29 +3938,6 @@ static int raid_preresume(struct dm_target *ti) if (test_and_set_bit(RT_FLAG_RS_PRERESUMED, &rs->runtime_flags)) return 0; - if (!test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) { - struct raid_set *rs_active = rs_find_active(rs); - - if (rs_active) { - /* - * In case no rebuilds have been requested - * and an active table slot exists, copy - * current resynchonization completed and - * reshape position pointers across from - * suspended raid set in the active slot. - * - * This resumes the new mapping at current - * offsets to continue recover/reshape without - * necessarily redoing a raid set partially or - * causing data corruption in case of a reshape. - */ - if (rs_active->md.curr_resync_completed != MaxSector) - mddev->curr_resync_completed = rs_active->md.curr_resync_completed; - if (rs_active->md.reshape_position != MaxSector) - mddev->reshape_position = rs_active->md.reshape_position; - } - } - /* * The superblocks need to be updated on disk if the * array is new or new devices got added (thus zeroed @@ -4046,7 +4014,7 @@ static void raid_resume(struct dm_target *ti) static struct target_type raid_target = { .name = "raid", - .version = {1, 13, 2}, + .version = {1, 14, 0}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 72142021b5c9..74f6770c70b1 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -189,6 +189,12 @@ struct dm_pool_metadata { sector_t data_block_size; /* + * We reserve a section of the metadata for commit overhead. + * All reported space does *not* include this. + */ + dm_block_t metadata_reserve; + + /* * Set if a transaction has to be aborted but the attempt to roll back * to the previous (good) transaction failed. The only pool metadata * operation possible in this state is the closing of the device. @@ -816,6 +822,22 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) return dm_tm_commit(pmd->tm, sblock); } +static void __set_metadata_reserve(struct dm_pool_metadata *pmd) +{ + int r; + dm_block_t total; + dm_block_t max_blocks = 4096; /* 16M */ + + r = dm_sm_get_nr_blocks(pmd->metadata_sm, &total); + if (r) { + DMERR("could not get size of metadata device"); + pmd->metadata_reserve = max_blocks; + } else { + sector_div(total, 10); + pmd->metadata_reserve = min(max_blocks, total); + } +} + struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, sector_t data_block_size, bool format_device) @@ -849,6 +871,8 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, return ERR_PTR(r); } + __set_metadata_reserve(pmd); + return pmd; } @@ -1820,6 +1844,13 @@ int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd, down_read(&pmd->root_lock); if (!pmd->fail_io) r = dm_sm_get_nr_free(pmd->metadata_sm, result); + + if (!r) { + if (*result < pmd->metadata_reserve) + *result = 0; + else + *result -= pmd->metadata_reserve; + } up_read(&pmd->root_lock); return r; @@ -1932,8 +1963,11 @@ int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_cou int r = -EINVAL; down_write(&pmd->root_lock); - if (!pmd->fail_io) + if (!pmd->fail_io) { r = __resize_space_map(pmd->metadata_sm, new_count); + if (!r) + __set_metadata_reserve(pmd); + } up_write(&pmd->root_lock); return r; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 7bd60a150f8f..aaf1ad481ee8 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -200,7 +200,13 @@ struct dm_thin_new_mapping; enum pool_mode { PM_WRITE, /* metadata may be changed */ PM_OUT_OF_DATA_SPACE, /* metadata may be changed, though data may not be allocated */ + + /* + * Like READ_ONLY, except may switch back to WRITE on metadata resize. Reported as READ_ONLY. + */ + PM_OUT_OF_METADATA_SPACE, PM_READ_ONLY, /* metadata may not be changed */ + PM_FAIL, /* all I/O fails */ }; @@ -1371,7 +1377,35 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode); static void requeue_bios(struct pool *pool); -static void check_for_space(struct pool *pool) +static bool is_read_only_pool_mode(enum pool_mode mode) +{ + return (mode == PM_OUT_OF_METADATA_SPACE || mode == PM_READ_ONLY); +} + +static bool is_read_only(struct pool *pool) +{ + return is_read_only_pool_mode(get_pool_mode(pool)); +} + +static void check_for_metadata_space(struct pool *pool) +{ + int r; + const char *ooms_reason = NULL; + dm_block_t nr_free; + + r = dm_pool_get_free_metadata_block_count(pool->pmd, &nr_free); + if (r) + ooms_reason = "Could not get free metadata blocks"; + else if (!nr_free) + ooms_reason = "No free metadata blocks"; + + if (ooms_reason && !is_read_only(pool)) { + DMERR("%s", ooms_reason); + set_pool_mode(pool, PM_OUT_OF_METADATA_SPACE); + } +} + +static void check_for_data_space(struct pool *pool) { int r; dm_block_t nr_free; @@ -1397,14 +1431,16 @@ static int commit(struct pool *pool) { int r; - if (get_pool_mode(pool) >= PM_READ_ONLY) + if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE) return -EINVAL; r = dm_pool_commit_metadata(pool->pmd); if (r) metadata_operation_failed(pool, "dm_pool_commit_metadata", r); - else - check_for_space(pool); + else { + check_for_metadata_space(pool); + check_for_data_space(pool); + } return r; } @@ -1470,6 +1506,19 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) return r; } + r = dm_pool_get_free_metadata_block_count(pool->pmd, &free_blocks); + if (r) { + metadata_operation_failed(pool, "dm_pool_get_free_metadata_block_count", r); + return r; + } + + if (!free_blocks) { + /* Let's commit before we use up the metadata reserve. */ + r = commit(pool); + if (r) + return r; + } + return 0; } @@ -1501,6 +1550,7 @@ static blk_status_t should_error_unserviceable_bio(struct pool *pool) case PM_OUT_OF_DATA_SPACE: return pool->pf.error_if_no_space ? BLK_STS_NOSPC : 0; + case PM_OUT_OF_METADATA_SPACE: case PM_READ_ONLY: case PM_FAIL: return BLK_STS_IOERR; @@ -2464,8 +2514,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) error_retry_list(pool); break; + case PM_OUT_OF_METADATA_SPACE: case PM_READ_ONLY: - if (old_mode != new_mode) + if (!is_read_only_pool_mode(old_mode)) notify_of_pool_mode_change(pool, "read-only"); dm_pool_metadata_read_only(pool->pmd); pool->process_bio = process_bio_read_only; @@ -3403,6 +3454,10 @@ static int maybe_resize_metadata_dev(struct dm_target *ti, bool *need_commit) DMINFO("%s: growing the metadata device from %llu to %llu blocks", dm_device_name(pool->pool_md), sb_metadata_dev_size, metadata_dev_size); + + if (get_pool_mode(pool) == PM_OUT_OF_METADATA_SPACE) + set_pool_mode(pool, PM_WRITE); + r = dm_pool_resize_metadata_dev(pool->pmd, metadata_dev_size); if (r) { metadata_operation_failed(pool, "dm_pool_resize_metadata_dev", r); @@ -3707,7 +3762,7 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv, struct pool_c *pt = ti->private; struct pool *pool = pt->pool; - if (get_pool_mode(pool) >= PM_READ_ONLY) { + if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE) { DMERR("%s: unable to service pool target messages in READ_ONLY or FAIL mode", dm_device_name(pool->pool_md)); return -EOPNOTSUPP; @@ -3781,6 +3836,7 @@ static void pool_status(struct dm_target *ti, status_type_t type, dm_block_t nr_blocks_data; dm_block_t nr_blocks_metadata; dm_block_t held_root; + enum pool_mode mode; char buf[BDEVNAME_SIZE]; char buf2[BDEVNAME_SIZE]; struct pool_c *pt = ti->private; @@ -3851,9 +3907,10 @@ static void pool_status(struct dm_target *ti, status_type_t type, else DMEMIT("- "); - if (pool->pf.mode == PM_OUT_OF_DATA_SPACE) + mode = get_pool_mode(pool); + if (mode == PM_OUT_OF_DATA_SPACE) DMEMIT("out_of_data_space "); - else if (pool->pf.mode == PM_READ_ONLY) + else if (is_read_only_pool_mode(mode)) DMEMIT("ro "); else DMEMIT("rw "); diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 12decdbd722d..fc65f0dedf7f 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -99,10 +99,26 @@ static int verity_hash_update(struct dm_verity *v, struct ahash_request *req, { struct scatterlist sg; - sg_init_one(&sg, data, len); - ahash_request_set_crypt(req, &sg, NULL, len); - - return crypto_wait_req(crypto_ahash_update(req), wait); + if (likely(!is_vmalloc_addr(data))) { + sg_init_one(&sg, data, len); + ahash_request_set_crypt(req, &sg, NULL, len); + return crypto_wait_req(crypto_ahash_update(req), wait); + } else { + do { + int r; + size_t this_step = min_t(size_t, len, PAGE_SIZE - offset_in_page(data)); + flush_kernel_vmap_range((void *)data, this_step); + sg_init_table(&sg, 1); + sg_set_page(&sg, vmalloc_to_page(data), this_step, offset_in_page(data)); + ahash_request_set_crypt(req, &sg, NULL, this_step); + r = crypto_wait_req(crypto_ahash_update(req), wait); + if (unlikely(r)) + return r; + data += this_step; + len -= this_step; + } while (len); + return 0; + } } /* |