From 7b06e09a6d81868309f68069a6dca7ff62d47beb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 9 Aug 2017 11:32:13 -0700 Subject: dm mpath: avoid that building with W=1 causes gcc 7 to complain about fall-through Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index d24e4b05f5da..97bca9464395 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1379,6 +1379,7 @@ static void pg_init_done(void *data, int errors) case SCSI_DH_RETRY: /* Wait before retrying. */ delay_retry = 1; + /* fall through */ case SCSI_DH_IMM_RETRY: case SCSI_DH_RES_TEMP_UNAVAIL: if (pg_init_limit_reached(m, pgpath)) -- cgit v1.2.3 From 9157c8d3e2d318581ecc8bdf34367d57f19c5380 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 9 Aug 2017 11:32:14 -0700 Subject: dm mpath: complain about unsupported __multipath_map_bio() return values WARN_ONCE() if __multipath_map_bio() returns an unsupported return value. Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 97bca9464395..7406ededf875 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -632,6 +632,10 @@ static void process_queued_bios(struct work_struct *work) case DM_MAPIO_REMAPPED: generic_make_request(bio); break; + case 0: + break; + default: + WARN_ONCE(true, "__multipath_map_bio() returned %d\n", r); } } blk_finish_plug(&plug); -- cgit v1.2.3 From d5c27f3ffbc2ee2d2f74ebfa1b2d789f67e9b3f1 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 9 Aug 2017 11:32:16 -0700 Subject: dm rq: make dm-sq requeuing behavior consistent with dm-mq behavior DM_MAPIO_DELAY_REQUEUE causes dm-mq to requeue after a delay but causes dm-sq to requeue immediately. Make the behavior of dm-sq consistent with that of dm-mq. Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Signed-off-by: Mike Snitzer --- drivers/md/dm-rq.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index c6ebc5b1e00e..cbee09054d1e 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -237,14 +237,14 @@ static void dm_end_request(struct request *clone, blk_status_t error) /* * Requeue the original request of a clone. */ -static void dm_old_requeue_request(struct request *rq) +static void dm_old_requeue_request(struct request *rq, unsigned long delay_ms) { struct request_queue *q = rq->q; unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); blk_requeue_request(q, rq); - blk_run_queue_async(q); + blk_delay_queue(q, delay_ms); spin_unlock_irqrestore(q->queue_lock, flags); } @@ -270,6 +270,7 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_ struct mapped_device *md = tio->md; struct request *rq = tio->orig; int rw = rq_data_dir(rq); + unsigned long delay_ms = delay_requeue ? 100 : 0; rq_end_stats(md, rq); if (tio->clone) { @@ -278,9 +279,9 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_ } if (!rq->q->mq_ops) - dm_old_requeue_request(rq); + dm_old_requeue_request(rq, delay_ms); else - dm_mq_delay_requeue_request(rq, delay_requeue ? 100/*ms*/ : 0); + dm_mq_delay_requeue_request(rq, delay_ms); rq_completed(md, rw, false); } -- cgit v1.2.3 From dc6364b5170dc446fca076d6523aaebc339d6511 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 24 Aug 2017 20:19:52 +0800 Subject: dm rq: do not update rq partially in each ending bio We don't need to update the original dm request partially when ending each cloned bio: just update original dm request once when the whole cloned request is finished. This still allows full support for partial completion because a new 'completed' counter accounts for incremental progress as the clone bios complete. Partial request update can be a bit expensive, so we should try to avoid it, especially because it is run in softirq context. Avoiding all the partial request updates fixes both hard lockup and soft lockups that were easily reproduced while running Laurence's test[1] on IB/SRP. BTW, after d4acf3650c7c ("block: Make blk_mq_delay_kick_requeue_list() rerun the queue at a quiet time"), we need to make the test more aggressive for reproducing the lockup: 1) run hammer_write.sh 32 or 64 concurrently. 2) write 8M each time [1] https://marc.info/?l=linux-block&m=150220185510245&w=2 Signed-off-by: Ming Lei Signed-off-by: Mike Snitzer --- drivers/md/dm-rq.c | 18 +++++++----------- drivers/md/dm-rq.h | 1 + 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index cbee09054d1e..eadfcfd106ff 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -117,9 +117,9 @@ static void end_clone_bio(struct bio *clone) struct dm_rq_clone_bio_info *info = container_of(clone, struct dm_rq_clone_bio_info, clone); struct dm_rq_target_io *tio = info->tio; - struct bio *bio = info->orig; unsigned int nr_bytes = info->orig->bi_iter.bi_size; blk_status_t error = clone->bi_status; + bool is_last = !clone->bi_next; bio_put(clone); @@ -137,28 +137,23 @@ static void end_clone_bio(struct bio *clone) * when the request is completed. */ tio->error = error; - return; + goto exit; } /* * I/O for the bio successfully completed. * Notice the data completion to the upper layer. */ - - /* - * bios are processed from the head of the list. - * So the completing bio should always be rq->bio. - * If it's not, something wrong is happening. - */ - if (tio->orig->bio != bio) - DMERR("bio completion is going in the middle of the request"); + tio->completed += nr_bytes; /* * Update the original request. * Do not use blk_end_request() here, because it may complete * the original request before the clone, and break the ordering. */ - blk_update_request(tio->orig, BLK_STS_OK, nr_bytes); + if (is_last) + exit: + blk_update_request(tio->orig, BLK_STS_OK, tio->completed); } static struct dm_rq_target_io *tio_from_request(struct request *rq) @@ -456,6 +451,7 @@ static void init_tio(struct dm_rq_target_io *tio, struct request *rq, tio->clone = NULL; tio->orig = rq; tio->error = 0; + tio->completed = 0; /* * Avoid initializing info for blk-mq; it passes * target-specific data through info.ptr diff --git a/drivers/md/dm-rq.h b/drivers/md/dm-rq.h index 9813922e4fe5..f43c45460aac 100644 --- a/drivers/md/dm-rq.h +++ b/drivers/md/dm-rq.h @@ -29,6 +29,7 @@ struct dm_rq_target_io { struct dm_stats_aux stats_aux; unsigned long duration_jiffies; unsigned n_sectors; + unsigned completed; }; /* -- cgit v1.2.3 From 1e3b21c6fb671a5ce9d77a05a8bde805d8908467 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 30 Apr 2017 17:31:22 -0400 Subject: dm integrity: optimize writing dm-bufio buffers that are partially changed Rather than write the entire dm-bufio buffer when only a subset is changed, improve dm-bufio (and dm-integrity) by only writing the subset of the buffer that changed. Update dm-integrity to make use of dm-bufio's new dm_bufio_mark_partial_buffer_dirty() interface. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-bufio.c | 95 +++++++++++++++++++++++++++++++++-------------- drivers/md/dm-bufio.h | 9 +++++ drivers/md/dm-integrity.c | 2 +- 3 files changed, 77 insertions(+), 29 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 44f4a8ac95bd..94e050b395df 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -63,6 +63,12 @@ #define DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT (PAGE_SIZE >> 1) #define DM_BUFIO_BLOCK_SIZE_GFP_LIMIT (PAGE_SIZE << (MAX_ORDER - 1)) +/* + * Align buffer writes to this boundary. + * Tests show that SSDs have the highest IOPS when using 4k writes. + */ +#define DM_BUFIO_WRITE_ALIGN 4096 + /* * dm_buffer->list_mode */ @@ -149,6 +155,10 @@ struct dm_buffer { blk_status_t write_error; unsigned long state; unsigned long last_accessed; + unsigned dirty_start; + unsigned dirty_end; + unsigned write_start; + unsigned write_end; struct dm_bufio_client *c; struct list_head write_list; struct bio bio; @@ -560,7 +570,7 @@ static void dmio_complete(unsigned long error, void *context) } static void use_dmio(struct dm_buffer *b, int rw, sector_t sector, - unsigned n_sectors, bio_end_io_t *end_io) + unsigned n_sectors, unsigned offset, bio_end_io_t *end_io) { int r; struct dm_io_request io_req = { @@ -578,10 +588,10 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector, if (b->data_mode != DATA_MODE_VMALLOC) { io_req.mem.type = DM_IO_KMEM; - io_req.mem.ptr.addr = b->data; + io_req.mem.ptr.addr = (char *)b->data + offset; } else { io_req.mem.type = DM_IO_VMA; - io_req.mem.ptr.vma = b->data; + io_req.mem.ptr.vma = (char *)b->data + offset; } b->bio.bi_end_io = end_io; @@ -609,10 +619,10 @@ static void inline_endio(struct bio *bio) } static void use_inline_bio(struct dm_buffer *b, int rw, sector_t sector, - unsigned n_sectors, bio_end_io_t *end_io) + unsigned n_sectors, unsigned offset, bio_end_io_t *end_io) { char *ptr; - int len; + unsigned len; bio_init(&b->bio, b->bio_vec, DM_BUFIO_INLINE_VECS); b->bio.bi_iter.bi_sector = sector; @@ -625,29 +635,20 @@ static void use_inline_bio(struct dm_buffer *b, int rw, sector_t sector, b->bio.bi_private = end_io; bio_set_op_attrs(&b->bio, rw, 0); - /* - * We assume that if len >= PAGE_SIZE ptr is page-aligned. - * If len < PAGE_SIZE the buffer doesn't cross page boundary. - */ - ptr = b->data; + ptr = (char *)b->data + offset; len = n_sectors << SECTOR_SHIFT; - if (len >= PAGE_SIZE) - BUG_ON((unsigned long)ptr & (PAGE_SIZE - 1)); - else - BUG_ON((unsigned long)ptr & (len - 1)); - do { - if (!bio_add_page(&b->bio, virt_to_page(ptr), - len < PAGE_SIZE ? len : PAGE_SIZE, + unsigned this_step = min((unsigned)(PAGE_SIZE - offset_in_page(ptr)), len); + if (!bio_add_page(&b->bio, virt_to_page(ptr), this_step, offset_in_page(ptr))) { BUG_ON(b->c->block_size <= PAGE_SIZE); - use_dmio(b, rw, sector, n_sectors, end_io); + use_dmio(b, rw, sector, n_sectors, offset, end_io); return; } - len -= PAGE_SIZE; - ptr += PAGE_SIZE; + len -= this_step; + ptr += this_step; } while (len > 0); submit_bio(&b->bio); @@ -657,18 +658,33 @@ static void submit_io(struct dm_buffer *b, int rw, bio_end_io_t *end_io) { unsigned n_sectors; sector_t sector; - - if (rw == WRITE && b->c->write_callback) - b->c->write_callback(b); + unsigned offset, end; sector = (b->block << b->c->sectors_per_block_bits) + b->c->start; - n_sectors = 1 << b->c->sectors_per_block_bits; + + if (rw != WRITE) { + n_sectors = 1 << b->c->sectors_per_block_bits; + offset = 0; + } else { + if (b->c->write_callback) + b->c->write_callback(b); + offset = b->write_start; + end = b->write_end; + offset &= -DM_BUFIO_WRITE_ALIGN; + end += DM_BUFIO_WRITE_ALIGN - 1; + end &= -DM_BUFIO_WRITE_ALIGN; + if (unlikely(end > b->c->block_size)) + end = b->c->block_size; + + sector += offset >> SECTOR_SHIFT; + n_sectors = (end - offset) >> SECTOR_SHIFT; + } if (n_sectors <= ((DM_BUFIO_INLINE_VECS * PAGE_SIZE) >> SECTOR_SHIFT) && b->data_mode != DATA_MODE_VMALLOC) - use_inline_bio(b, rw, sector, n_sectors, end_io); + use_inline_bio(b, rw, sector, n_sectors, offset, end_io); else - use_dmio(b, rw, sector, n_sectors, end_io); + use_dmio(b, rw, sector, n_sectors, offset, end_io); } /*---------------------------------------------------------------- @@ -720,6 +736,9 @@ static void __write_dirty_buffer(struct dm_buffer *b, clear_bit(B_DIRTY, &b->state); wait_on_bit_lock_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE); + b->write_start = b->dirty_start; + b->write_end = b->dirty_end; + if (!write_list) submit_io(b, WRITE, write_endio); else @@ -1221,19 +1240,37 @@ void dm_bufio_release(struct dm_buffer *b) } EXPORT_SYMBOL_GPL(dm_bufio_release); -void dm_bufio_mark_buffer_dirty(struct dm_buffer *b) +void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b, + unsigned start, unsigned end) { struct dm_bufio_client *c = b->c; + BUG_ON(start >= end); + BUG_ON(end > b->c->block_size); + dm_bufio_lock(c); BUG_ON(test_bit(B_READING, &b->state)); - if (!test_and_set_bit(B_DIRTY, &b->state)) + if (!test_and_set_bit(B_DIRTY, &b->state)) { + b->dirty_start = start; + b->dirty_end = end; __relink_lru(b, LIST_DIRTY); + } else { + if (start < b->dirty_start) + b->dirty_start = start; + if (end > b->dirty_end) + b->dirty_end = end; + } dm_bufio_unlock(c); } +EXPORT_SYMBOL_GPL(dm_bufio_mark_partial_buffer_dirty); + +void dm_bufio_mark_buffer_dirty(struct dm_buffer *b) +{ + dm_bufio_mark_partial_buffer_dirty(b, 0, b->c->block_size); +} EXPORT_SYMBOL_GPL(dm_bufio_mark_buffer_dirty); void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c) @@ -1398,6 +1435,8 @@ retry: wait_on_bit_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE); set_bit(B_DIRTY, &b->state); + b->dirty_start = 0; + b->dirty_end = c->block_size; __unlink_buffer(b); __link_buffer(b, new_block, LIST_DIRTY); } else { diff --git a/drivers/md/dm-bufio.h b/drivers/md/dm-bufio.h index b6d8f53ec15b..be732d3f8611 100644 --- a/drivers/md/dm-bufio.h +++ b/drivers/md/dm-bufio.h @@ -93,6 +93,15 @@ void dm_bufio_release(struct dm_buffer *b); */ void dm_bufio_mark_buffer_dirty(struct dm_buffer *b); +/* + * Mark a part of the buffer dirty. + * + * The specified part of the buffer is scheduled to be written. dm-bufio may + * write the specified part of the buffer or it may write a larger superset. + */ +void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b, + unsigned start, unsigned end); + /* * Initiate writing of dirty buffers, without waiting for completion. */ diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 3acce09bba35..689f89d8eeef 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1040,7 +1040,7 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se memcpy(tag, dp, to_copy); } else if (op == TAG_WRITE) { memcpy(dp, tag, to_copy); - dm_bufio_mark_buffer_dirty(b); + dm_bufio_mark_partial_buffer_dirty(b, *metadata_offset, *metadata_offset + to_copy); } else { /* e.g.: op == TAG_CMP */ if (unlikely(memcmp(dp, tag, to_copy))) { -- cgit v1.2.3 From 3f2e539359bd0e709eb35127dc04df6bf8c3e8de Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 21 Jul 2017 12:00:00 -0400 Subject: dm integrity: count and display checksum failures This changes DM integrity to count the number of checksum failures and report the counter in response to STATUSTYPE_INFO request (via 'dmsetup status'). Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 689f89d8eeef..47fd409b2e2a 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -225,6 +225,8 @@ struct dm_integrity_c { struct alg_spec internal_hash_alg; struct alg_spec journal_crypt_alg; struct alg_spec journal_mac_alg; + + atomic64_t number_of_mismatches; }; struct dm_integrity_range { @@ -309,6 +311,8 @@ static void dm_integrity_dtr(struct dm_target *ti); static void dm_integrity_io_error(struct dm_integrity_c *ic, const char *msg, int err) { + if (err == -EILSEQ) + atomic64_inc(&ic->number_of_mismatches); if (!cmpxchg(&ic->failed, 0, err)) DMERR("Error on %s: %d", msg, err); } @@ -1273,6 +1277,7 @@ again: DMERR("Checksum failed at sector 0x%llx", (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size))); r = -EILSEQ; + atomic64_inc(&ic->number_of_mismatches); } if (likely(checksums != checksums_onstack)) kfree(checksums); @@ -2230,7 +2235,7 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, switch (type) { case STATUSTYPE_INFO: - result[0] = '\0'; + DMEMIT("%llu", (unsigned long long)atomic64_read(&ic->number_of_mismatches)); break; case STATUSTYPE_TABLE: { @@ -2803,6 +2808,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) bio_list_init(&ic->flush_bio_list); init_waitqueue_head(&ic->copy_to_journal_wait); init_completion(&ic->crypto_backoff); + atomic64_set(&ic->number_of_mismatches, 0); r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &ic->dev); if (r) { @@ -3199,7 +3205,7 @@ static void dm_integrity_dtr(struct dm_target *ti) static struct target_type integrity_target = { .name = "integrity", - .version = {1, 0, 0}, + .version = {1, 1, 0}, .module = THIS_MODULE, .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY, .ctr = dm_integrity_ctr, -- cgit v1.2.3 From 5916a22b83041b07d63191fe06206ae0fff6ec7a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 22 Jun 2017 11:32:45 -0700 Subject: dm: constify argument arrays The arrays of 'struct dm_arg' are never modified by the device-mapper core, so constify them so that they are placed in .rodata. (Exception: the args array in dm-raid cannot be constified because it is allocated on the stack and modified.) Signed-off-by: Eric Biggers Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 4 ++-- drivers/md/dm-crypt.c | 2 +- drivers/md/dm-flakey.c | 4 ++-- drivers/md/dm-integrity.c | 2 +- drivers/md/dm-mpath.c | 10 +++++----- drivers/md/dm-switch.c | 2 +- drivers/md/dm-table.c | 7 ++++--- drivers/md/dm-thin.c | 2 +- drivers/md/dm-verity-target.c | 2 +- include/linux/device-mapper.h | 4 ++-- 10 files changed, 20 insertions(+), 19 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index c5ea03fc7ee1..b0a5503a2fd3 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2306,7 +2306,7 @@ static void init_features(struct cache_features *cf) static int parse_features(struct cache_args *ca, struct dm_arg_set *as, char **error) { - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {0, 2, "Invalid number of cache feature arguments"}, }; @@ -2348,7 +2348,7 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as, static int parse_policy(struct cache_args *ca, struct dm_arg_set *as, char **error) { - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {0, 1024, "Invalid number of policy arguments"}, }; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index cdf6b1e12460..abf16559ed49 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2533,7 +2533,7 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar { struct crypt_config *cc = ti->private; struct dm_arg_set as; - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {0, 6, "Invalid number of feature args"}, }; unsigned int opt_params, val; diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index e2c7234931bc..d8bb371e63d7 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -51,7 +51,7 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, unsigned argc; const char *arg_name; - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {0, 6, "Invalid number of feature args"}, {1, UINT_MAX, "Invalid corrupt bio byte"}, {0, 255, "Invalid corrupt value to write into bio byte (0-255)"}, @@ -178,7 +178,7 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, */ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) { - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {0, UINT_MAX, "Invalid up interval"}, {0, UINT_MAX, "Invalid down interval"}, }; diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 47fd409b2e2a..293a19652d55 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -2780,7 +2780,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) int r; unsigned extra_args; struct dm_arg_set as; - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {0, 9, "Invalid number of feature args"}, }; unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 7406ededf875..bf280a99fa81 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -702,7 +702,7 @@ static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg, struct path_selector_type *pst; unsigned ps_argc; - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {0, 1024, "invalid number of path selector args"}, }; @@ -826,7 +826,7 @@ retain: static struct priority_group *parse_priority_group(struct dm_arg_set *as, struct multipath *m) { - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {1, 1024, "invalid number of paths"}, {0, 1024, "invalid number of selector args"} }; @@ -902,7 +902,7 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m) int ret; struct dm_target *ti = m->ti; - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {0, 1024, "invalid number of hardware handler args"}, }; @@ -954,7 +954,7 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m) struct dm_target *ti = m->ti; const char *arg_name; - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {0, 8, "invalid number of feature args"}, {1, 50, "pg_init_retries must be between 1 and 50"}, {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"}, @@ -1023,7 +1023,7 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m) static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv) { /* target arguments */ - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {0, 1024, "invalid number of priority groups"}, {0, 1024, "invalid initial priority group number"}, }; diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c index 871c18fe000d..83a371d54412 100644 --- a/drivers/md/dm-switch.c +++ b/drivers/md/dm-switch.c @@ -251,7 +251,7 @@ static void switch_dtr(struct dm_target *ti) */ static int switch_ctr(struct dm_target *ti, unsigned argc, char **argv) { - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {1, (KMALLOC_MAX_SIZE - sizeof(struct switch_ctx)) / sizeof(struct switch_path), "Invalid number of paths"}, {1, UINT_MAX, "Invalid region size"}, {0, 0, "Invalid number of optional args"}, diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 28a4071cdf85..ef7b8f201f73 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -806,7 +806,8 @@ int dm_table_add_target(struct dm_table *t, const char *type, /* * Target argument parsing helpers. */ -static int validate_next_arg(struct dm_arg *arg, struct dm_arg_set *arg_set, +static int validate_next_arg(const struct dm_arg *arg, + struct dm_arg_set *arg_set, unsigned *value, char **error, unsigned grouped) { const char *arg_str = dm_shift_arg(arg_set); @@ -824,14 +825,14 @@ static int validate_next_arg(struct dm_arg *arg, struct dm_arg_set *arg_set, return 0; } -int dm_read_arg(struct dm_arg *arg, struct dm_arg_set *arg_set, +int dm_read_arg(const struct dm_arg *arg, struct dm_arg_set *arg_set, unsigned *value, char **error) { return validate_next_arg(arg, arg_set, value, error, 0); } EXPORT_SYMBOL(dm_read_arg); -int dm_read_arg_group(struct dm_arg *arg, struct dm_arg_set *arg_set, +int dm_read_arg_group(const struct dm_arg *arg, struct dm_arg_set *arg_set, unsigned *value, char **error) { return validate_next_arg(arg, arg_set, value, error, 1); diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 9dec2f8cc739..9736621c2963 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -3041,7 +3041,7 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf, unsigned argc; const char *arg_name; - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {0, 4, "Invalid number of pool feature arguments"}, }; diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index b46705ebf01f..79f18d4d7f02 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -839,7 +839,7 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v) struct dm_target *ti = v->ti; const char *arg_name; - static struct dm_arg _args[] = { + static const struct dm_arg _args[] = { {0, DM_VERITY_OPTS_MAX, "Invalid number of feature args"}, }; diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 4f2b3b2076c4..3b5fdf308148 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -387,7 +387,7 @@ struct dm_arg { * Validate the next argument, either returning it as *value or, if invalid, * returning -EINVAL and setting *error. */ -int dm_read_arg(struct dm_arg *arg, struct dm_arg_set *arg_set, +int dm_read_arg(const struct dm_arg *arg, struct dm_arg_set *arg_set, unsigned *value, char **error); /* @@ -395,7 +395,7 @@ int dm_read_arg(struct dm_arg *arg, struct dm_arg_set *arg_set, * arg->min and arg->max further arguments. Either return the size as * *num_args or, if invalid, return -EINVAL and set *error. */ -int dm_read_arg_group(struct dm_arg *arg, struct dm_arg_set *arg_set, +int dm_read_arg_group(const struct dm_arg *arg, struct dm_arg_set *arg_set, unsigned *num_args, char **error); /* -- cgit v1.2.3 From cf0dec6674c1e2a7ba326cfbbe7f05a70458afc9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 22 Jun 2017 11:33:47 -0700 Subject: dm ioctl: constify ioctl lookup table Constify the lookup table for device-mapper ioctls so that it is placed in .rodata. Signed-off-by: Eric Biggers Signed-off-by: Mike Snitzer --- drivers/md/dm-ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index e06f0ef7d2ec..8756a6850431 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1629,7 +1629,7 @@ static int target_message(struct file *filp, struct dm_ioctl *param, size_t para *---------------------------------------------------------------*/ static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags) { - static struct { + static const struct { int cmd; int flags; ioctl_fn fn; -- cgit v1.2.3 From 0c79c62021d23f0b5c942cf59d43a7ce6c24cd1b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 28 Jul 2017 10:42:24 -0400 Subject: dm log writes: don't use all the cpu while waiting to log blocks The check to see if the logging kthread needs to go to sleep is wrong, it checks lc->pending_blocks, which will be non-0 if there are any blocks that are pending, whether they are ready to be logged or not. What we really want is to go to sleep until it's time to log blocks, so change this check so we do actually go to sleep in between flushes. Signed-off-by: Josef Bacik Signed-off-by: Mike Snitzer --- drivers/md/dm-log-writes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index a1da0eb58a93..9aab510a1709 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -399,7 +399,7 @@ next: if (!try_to_freeze()) { set_current_state(TASK_INTERRUPTIBLE); if (!kthread_should_stop() && - !atomic_read(&lc->pending_blocks)) + list_empty(&lc->logging_blocks)) schedule(); __set_current_state(TASK_RUNNING); } -- cgit v1.2.3 From 228bb5b26038a7d58b7c11af1297f34b534b59cd Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 28 Jul 2017 10:42:25 -0400 Subject: dm log writes: fix >512b sectorsize support 512b sectors vs device's physical sectorsize was not maintained consistently and as such the support for >512b sector devices has bugs. The log metadata expects native sectorsize but 512b sectors were being stored. Also, device's sectorsize was assumed when assigning the bi_sector for blocks that were being logged. Fix this up by adding two helpers to convert between bio and dev sectors, and use these in the appropriate places to fix the problem and make it clear which units go where. Doing so allows dm-log-writes use with 4k devices. Signed-off-by: Josef Bacik Signed-off-by: Mike Snitzer --- drivers/md/dm-log-writes.c | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index 9aab510a1709..09979bdb6fe3 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -100,6 +100,7 @@ struct log_writes_c { struct dm_dev *logdev; u64 logged_entries; u32 sectorsize; + u32 sectorshift; atomic_t io_blocks; atomic_t pending_blocks; sector_t next_sector; @@ -128,6 +129,18 @@ struct per_bio_data { struct pending_block *block; }; +static inline sector_t bio_to_dev_sectors(struct log_writes_c *lc, + sector_t sectors) +{ + return sectors >> (lc->sectorshift - SECTOR_SHIFT); +} + +static inline sector_t dev_to_bio_sectors(struct log_writes_c *lc, + sector_t sectors) +{ + return sectors << (lc->sectorshift - SECTOR_SHIFT); +} + static void put_pending_block(struct log_writes_c *lc) { if (atomic_dec_and_test(&lc->pending_blocks)) { @@ -253,7 +266,7 @@ static int log_one_block(struct log_writes_c *lc, if (!block->vec_cnt) goto out; - sector++; + sector += dev_to_bio_sectors(lc, 1); atomic_inc(&lc->io_blocks); bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt, BIO_MAX_PAGES)); @@ -354,10 +367,9 @@ static int log_writes_kthread(void *arg) goto next; sector = lc->next_sector; - if (block->flags & LOG_DISCARD_FLAG) - lc->next_sector++; - else - lc->next_sector += block->nr_sectors + 1; + if (!(block->flags & LOG_DISCARD_FLAG)) + lc->next_sector += dev_to_bio_sectors(lc, block->nr_sectors); + lc->next_sector += dev_to_bio_sectors(lc, 1); /* * Apparently the size of the device may not be known @@ -435,7 +447,6 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv) INIT_LIST_HEAD(&lc->unflushed_blocks); INIT_LIST_HEAD(&lc->logging_blocks); init_waitqueue_head(&lc->wait); - lc->sectorsize = 1 << SECTOR_SHIFT; atomic_set(&lc->io_blocks, 0); atomic_set(&lc->pending_blocks, 0); @@ -455,6 +466,8 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } + lc->sectorsize = bdev_logical_block_size(lc->dev->bdev); + lc->sectorshift = ilog2(lc->sectorsize); lc->log_kthread = kthread_run(log_writes_kthread, lc, "log-write"); if (IS_ERR(lc->log_kthread)) { ret = PTR_ERR(lc->log_kthread); @@ -464,8 +477,12 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - /* We put the super at sector 0, start logging at sector 1 */ - lc->next_sector = 1; + /* + * next_sector is in 512b sectors to correspond to what bi_sector expects. + * The super starts at sector 0, and the next_sector is the next logical + * one based on the sectorsize of the device. + */ + lc->next_sector = lc->sectorsize >> SECTOR_SHIFT; lc->logging_enabled = true; lc->end_sector = logdev_last_sector(lc); lc->device_supports_discard = true; @@ -599,8 +616,8 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio) if (discard_bio) block->flags |= LOG_DISCARD_FLAG; - block->sector = bio->bi_iter.bi_sector; - block->nr_sectors = bio_sectors(bio); + block->sector = bio_to_dev_sectors(lc, bio->bi_iter.bi_sector); + block->nr_sectors = bio_to_dev_sectors(lc, bio_sectors(bio)); /* We don't need the data, just submit */ if (discard_bio) { @@ -767,9 +784,12 @@ static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limit if (!q || !blk_queue_discard(q)) { lc->device_supports_discard = false; - limits->discard_granularity = 1 << SECTOR_SHIFT; + limits->discard_granularity = lc->sectorsize; limits->max_discard_sectors = (UINT_MAX >> SECTOR_SHIFT); } + limits->logical_block_size = bdev_logical_block_size(lc->dev->bdev); + limits->physical_block_size = bdev_physical_block_size(lc->dev->bdev); + limits->io_min = limits->physical_block_size; } static struct target_type log_writes_target = { -- cgit v1.2.3 From b7e326f7b7375392d06f9cfbc27a7c63181f69d7 Mon Sep 17 00:00:00 2001 From: Hyunchul Lee Date: Mon, 31 Jul 2017 16:22:20 +0900 Subject: dm integrity: do not check integrity for failed read operations Even though read operations fail, dm_integrity_map_continue() calls integrity_metadata() to check integrity. In this case, just complete these. This also makes it so read I/O errors do not generate integrity warnings in the kernel log. Cc: stable@vger.kernel.org Signed-off-by: Hyunchul Lee Acked-by: Milan Broz Acked-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 293a19652d55..fe5ad640a0aa 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1702,7 +1702,11 @@ sleep: if (need_sync_io) { wait_for_completion_io(&read_comp); - integrity_metadata(&dio->work); + if (likely(!bio->bi_status)) + integrity_metadata(&dio->work); + else + dec_in_flight(dio); + } else { INIT_WORK(&dio->work, integrity_metadata); queue_work(ic->metadata_wq, &dio->work); -- cgit v1.2.3 From 7c373d660420f74c3e16d148629b810f3a36ac9e Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Sun, 6 Aug 2017 22:54:00 +0530 Subject: dm integrity: make blk_integrity_profile structure const Make this structure const as it is only stored in the profile field of a blk_integrity structure. This field is of type const, so make structure as const. Signed-off-by: Bhumika Goyal Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index fe5ad640a0aa..2b32342da556 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -299,7 +299,7 @@ static void __DEBUG_bytes(__u8 *bytes, size_t len, const char *msg, ...) /* * DM Integrity profile, protection is performed layer above (dm-crypt) */ -static struct blk_integrity_profile dm_integrity_profile = { +static const struct blk_integrity_profile dm_integrity_profile = { .name = "DM-DIF-EXT-TAG", .generate_fn = NULL, .verify_fn = NULL, -- cgit v1.2.3 From b5e8ad92c3ac0b073bbf08ffd1a6a31d3449caae Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 15 Aug 2017 17:11:59 +0200 Subject: dm integrity: use init_completion instead of COMPLETION_INITIALIZER_ONSTACK The new lockdep support for completions causeed the stack usage in dm-integrity to explode, in case of write_journal from 504 bytes to 1120 (using arm gcc-7.1.1): drivers/md/dm-integrity.c: In function 'write_journal': drivers/md/dm-integrity.c:827:1: error: the frame size of 1120 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] The problem is that not only the size of 'struct completion' grows significantly, but we end up having multiple copies of it on the stack when we assign it from a local variable after the initial declaration. COMPLETION_INITIALIZER_ONSTACK() is the right thing to use when we want to declare and initialize a completion on the stack. However, this driver doesn't do that and instead initializes the completion just before it is used. In this case, init_completion() does the same thing more efficiently, and drops the stack usage for the function above down to 496 bytes. While the other functions in this file are not bad enough to cause a warning, they benefit equally from the change, so I do the change across the entire file. In the one place where we reuse a completion, I picked the cheaper reinit_completion() over init_completion(). Fixes: cd8084f91c02 ("locking/lockdep: Apply crossrelease to completions") Signed-off-by: Arnd Bergmann Acked-by: Mikulas Patocka Acked-by: Mike Snitzer Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 2b32342da556..ac0d7759594b 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -773,13 +773,13 @@ static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsi unsigned i; io_comp.ic = ic; - io_comp.comp = COMPLETION_INITIALIZER_ONSTACK(io_comp.comp); + init_completion(&io_comp.comp); if (commit_start + commit_sections <= ic->journal_sections) { io_comp.in_flight = (atomic_t)ATOMIC_INIT(1); if (ic->journal_io) { crypt_comp_1.ic = ic; - crypt_comp_1.comp = COMPLETION_INITIALIZER_ONSTACK(crypt_comp_1.comp); + init_completion(&crypt_comp_1.comp); crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0); encrypt_journal(ic, true, commit_start, commit_sections, &crypt_comp_1); wait_for_completion_io(&crypt_comp_1.comp); @@ -795,18 +795,18 @@ static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsi to_end = ic->journal_sections - commit_start; if (ic->journal_io) { crypt_comp_1.ic = ic; - crypt_comp_1.comp = COMPLETION_INITIALIZER_ONSTACK(crypt_comp_1.comp); + init_completion(&crypt_comp_1.comp); crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0); encrypt_journal(ic, true, commit_start, to_end, &crypt_comp_1); if (try_wait_for_completion(&crypt_comp_1.comp)) { rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, to_end, &io_comp); - crypt_comp_1.comp = COMPLETION_INITIALIZER_ONSTACK(crypt_comp_1.comp); + reinit_completion(&crypt_comp_1.comp); crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0); encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_1); wait_for_completion_io(&crypt_comp_1.comp); } else { crypt_comp_2.ic = ic; - crypt_comp_2.comp = COMPLETION_INITIALIZER_ONSTACK(crypt_comp_2.comp); + init_completion(&crypt_comp_2.comp); crypt_comp_2.in_flight = (atomic_t)ATOMIC_INIT(0); encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_2); wait_for_completion_io(&crypt_comp_1.comp); @@ -1679,7 +1679,7 @@ sleep: dio->in_flight = (atomic_t)ATOMIC_INIT(2); if (need_sync_io) { - read_comp = COMPLETION_INITIALIZER_ONSTACK(read_comp); + init_completion(&read_comp); dio->completion = &read_comp; } else dio->completion = NULL; @@ -1840,7 +1840,7 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, comp.ic = ic; comp.in_flight = (atomic_t)ATOMIC_INIT(1); - comp.comp = COMPLETION_INITIALIZER_ONSTACK(comp.comp); + init_completion(&comp.comp); i = write_start; for (n = 0; n < write_sections; n++, i++, wraparound_section(ic, &i)) { @@ -2067,7 +2067,7 @@ static void replay_journal(struct dm_integrity_c *ic) if (ic->journal_io) { struct journal_completion crypt_comp; crypt_comp.ic = ic; - crypt_comp.comp = COMPLETION_INITIALIZER_ONSTACK(crypt_comp.comp); + init_completion(&crypt_comp.comp); crypt_comp.in_flight = (atomic_t)ATOMIC_INIT(0); encrypt_journal(ic, false, 0, ic->journal_sections, &crypt_comp); wait_for_completion(&crypt_comp.comp); @@ -2640,7 +2640,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error) memset(iv, 0x00, ivsize); skcipher_request_set_crypt(req, sg, sg, PAGE_SIZE * ic->journal_pages + sizeof ic->commit_ids, iv); - comp.comp = COMPLETION_INITIALIZER_ONSTACK(comp.comp); + init_completion(&comp.comp); comp.in_flight = (atomic_t)ATOMIC_INIT(1); if (do_crypt(true, req, &comp)) wait_for_completion(&comp.comp); @@ -2697,7 +2697,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error) sg_init_one(&sg, crypt_data, crypt_len); skcipher_request_set_crypt(req, &sg, &sg, crypt_len, iv); - comp.comp = COMPLETION_INITIALIZER_ONSTACK(comp.comp); + init_completion(&comp.comp); comp.in_flight = (atomic_t)ATOMIC_INIT(1); if (do_crypt(true, req, &comp)) wait_for_completion(&comp.comp); -- cgit v1.2.3 From c3ca015fab6df124c933b91902f3f2a3473f9da5 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 31 Aug 2017 21:47:43 -0400 Subject: dax: remove the pmem_dax_ops->flush abstraction Commit abebfbe2f731 ("dm: add ->flush() dax operation support") is buggy. A DM device may be composed of multiple underlying devices and all of them need to be flushed. That commit just routes the flush request to the first device and ignores the other devices. It could be fixed by adding more complex logic to the device mapper. But there is only one implementation of the method pmem_dax_ops->flush - that is pmem_dax_flush() - and it calls arch_wb_cache_pmem(). Consequently, we don't need the pmem_dax_ops->flush abstraction at all, we can call arch_wb_cache_pmem() directly from dax_flush() because dax_dev->ops->flush can't ever reach anything different from arch_wb_cache_pmem(). It should be also pointed out that for some uses of persistent memory it is needed to flush only a very small amount of data (such as 1 cacheline), and it would be overkill if we go through that device mapper machinery for a single flushed cache line. Fix this by removing the pmem_dax_ops->flush abstraction and call arch_wb_cache_pmem() directly from dax_flush(). Also, remove the device mapper code that forwards the flushes. Fixes: abebfbe2f731 ("dm: add ->flush() dax operation support") Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Reviewed-by: Dan Williams Signed-off-by: Mike Snitzer --- drivers/dax/super.c | 21 ++++++++++++++------- drivers/md/dm-linear.c | 15 --------------- drivers/md/dm-stripe.c | 20 -------------------- drivers/md/dm.c | 19 ------------------- drivers/nvdimm/pmem.c | 7 ------- fs/dax.c | 4 ++-- include/linux/dax.h | 5 +---- include/linux/device-mapper.h | 3 --- 8 files changed, 17 insertions(+), 77 deletions(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 938eb4868f7f..8b458f1b30c7 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -189,8 +189,10 @@ static umode_t dax_visible(struct kobject *kobj, struct attribute *a, int n) if (!dax_dev) return 0; - if (a == &dev_attr_write_cache.attr && !dax_dev->ops->flush) +#ifndef CONFIG_ARCH_HAS_PMEM_API + if (a == &dev_attr_write_cache.attr) return 0; +#endif return a->mode; } @@ -255,18 +257,23 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, } EXPORT_SYMBOL_GPL(dax_copy_from_iter); -void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, - size_t size) +#ifdef CONFIG_ARCH_HAS_PMEM_API +void arch_wb_cache_pmem(void *addr, size_t size); +void dax_flush(struct dax_device *dax_dev, void *addr, size_t size) { - if (!dax_alive(dax_dev)) + if (unlikely(!dax_alive(dax_dev))) return; - if (!test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags)) + if (unlikely(!test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags))) return; - if (dax_dev->ops->flush) - dax_dev->ops->flush(dax_dev, pgoff, addr, size); + arch_wb_cache_pmem(addr, size); } +#else +void dax_flush(struct dax_device *dax_dev, void *addr, size_t size) +{ +} +#endif EXPORT_SYMBOL_GPL(dax_flush); void dax_write_cache(struct dax_device *dax_dev, bool wc) diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 41971a090e34..208800610af8 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -184,20 +184,6 @@ static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); } -static void linear_dax_flush(struct dm_target *ti, pgoff_t pgoff, void *addr, - size_t size) -{ - struct linear_c *lc = ti->private; - struct block_device *bdev = lc->dev->bdev; - struct dax_device *dax_dev = lc->dev->dax_dev; - sector_t dev_sector, sector = pgoff * PAGE_SECTORS; - - dev_sector = linear_map_sector(ti, sector); - if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(size, PAGE_SIZE), &pgoff)) - return; - dax_flush(dax_dev, pgoff, addr, size); -} - static struct target_type linear_target = { .name = "linear", .version = {1, 4, 0}, @@ -212,7 +198,6 @@ static struct target_type linear_target = { .iterate_devices = linear_iterate_devices, .direct_access = linear_dax_direct_access, .dax_copy_from_iter = linear_dax_copy_from_iter, - .dax_flush = linear_dax_flush, }; int __init dm_linear_init(void) diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index a0375530b07f..1690bb299b3f 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -351,25 +351,6 @@ static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); } -static void stripe_dax_flush(struct dm_target *ti, pgoff_t pgoff, void *addr, - size_t size) -{ - sector_t dev_sector, sector = pgoff * PAGE_SECTORS; - struct stripe_c *sc = ti->private; - struct dax_device *dax_dev; - struct block_device *bdev; - uint32_t stripe; - - stripe_map_sector(sc, sector, &stripe, &dev_sector); - dev_sector += sc->stripe[stripe].physical_start; - dax_dev = sc->stripe[stripe].dev->dax_dev; - bdev = sc->stripe[stripe].dev->bdev; - - if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(size, PAGE_SIZE), &pgoff)) - return; - dax_flush(dax_dev, pgoff, addr, size); -} - /* * Stripe status: * @@ -491,7 +472,6 @@ static struct target_type stripe_target = { .io_hints = stripe_io_hints, .direct_access = stripe_dax_direct_access, .dax_copy_from_iter = stripe_dax_copy_from_iter, - .dax_flush = stripe_dax_flush, }; int __init dm_stripe_init(void) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index d669fddd9290..825eaffc24da 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -987,24 +987,6 @@ static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, return ret; } -static void dm_dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, - size_t size) -{ - struct mapped_device *md = dax_get_private(dax_dev); - sector_t sector = pgoff * PAGE_SECTORS; - struct dm_target *ti; - int srcu_idx; - - ti = dm_dax_get_live_target(md, sector, &srcu_idx); - - if (!ti) - goto out; - if (ti->type->dax_flush) - ti->type->dax_flush(ti, pgoff, addr, size); - out: - dm_put_live_table(md, srcu_idx); -} - /* * A target may call dm_accept_partial_bio only from the map routine. It is * allowed for all bio types except REQ_PREFLUSH. @@ -2992,7 +2974,6 @@ static const struct block_device_operations dm_blk_dops = { static const struct dax_operations dm_dax_ops = { .direct_access = dm_dax_direct_access, .copy_from_iter = dm_dax_copy_from_iter, - .flush = dm_dax_flush, }; /* diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index f7099adaabc0..88c128258760 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -243,16 +243,9 @@ static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, return copy_from_iter_flushcache(addr, bytes, i); } -static void pmem_dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, - void *addr, size_t size) -{ - arch_wb_cache_pmem(addr, size); -} - static const struct dax_operations pmem_dax_ops = { .direct_access = pmem_dax_direct_access, .copy_from_iter = pmem_copy_from_iter, - .flush = pmem_dax_flush, }; static const struct attribute_group *pmem_attribute_groups[] = { diff --git a/fs/dax.c b/fs/dax.c index 865d42c63e23..18d970fb0e09 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -783,7 +783,7 @@ static int dax_writeback_one(struct block_device *bdev, } dax_mapping_entry_mkclean(mapping, index, pfn_t_to_pfn(pfn)); - dax_flush(dax_dev, pgoff, kaddr, size); + dax_flush(dax_dev, kaddr, size); /* * After we have flushed the cache, we can clear the dirty tag. There * cannot be new dirty data in the pfn after the flush has completed as @@ -978,7 +978,7 @@ int __dax_zero_page_range(struct block_device *bdev, return rc; } memset(kaddr + offset, 0, size); - dax_flush(dax_dev, pgoff, kaddr + offset, size); + dax_flush(dax_dev, kaddr + offset, size); dax_read_unlock(id); } return 0; diff --git a/include/linux/dax.h b/include/linux/dax.h index df97b7af7e2c..0d8f35f6c53d 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -19,8 +19,6 @@ struct dax_operations { /* copy_from_iter: required operation for fs-dax direct-i/o */ size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, struct iov_iter *); - /* flush: optional driver-specific cache management after writes */ - void (*flush)(struct dax_device *, pgoff_t, void *, size_t); }; extern struct attribute_group dax_attribute_group; @@ -84,8 +82,7 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); -void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, - size_t size); +void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); void dax_write_cache(struct dax_device *dax_dev, bool wc); bool dax_write_cache_enabled(struct dax_device *dax_dev); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 3b5fdf308148..a5538433c927 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -134,8 +134,6 @@ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); typedef size_t (*dm_dax_copy_from_iter_fn)(struct dm_target *ti, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); -typedef void (*dm_dax_flush_fn)(struct dm_target *ti, pgoff_t pgoff, void *addr, - size_t size); #define PAGE_SECTORS (PAGE_SIZE / 512) void dm_error(const char *message); @@ -186,7 +184,6 @@ struct target_type { dm_io_hints_fn io_hints; dm_dax_direct_access_fn direct_access; dm_dax_copy_from_iter_fn dax_copy_from_iter; - dm_dax_flush_fn dax_flush; /* For internal device-mapper use. */ struct list_head list; -- cgit v1.2.3