From 75f187aba5e7a3eea259041f85099029774a4c5b Mon Sep 17 00:00:00 2001 From: Alex Bligh Date: Wed, 27 Feb 2013 17:05:23 -0800 Subject: nbd: support FLUSH requests Currently, the NBD device does not accept flush requests from the Linux block layer. If the NBD server opened the target with neither O_SYNC nor O_DSYNC, however, the device will be effectively backed by a writeback cache. Without issuing flushes properly, operation of the NBD device will not be safe against power losses. The NBD protocol has support for both a cache flush command and a FUA command flag; the server will also pass a flag to note its support for these features. This patch adds support for the cache flush command and flag. In the kernel, we receive the flags via the NBD_SET_FLAGS ioctl, and map NBD_FLAG_SEND_FLUSH to the argument of blk_queue_flush. When the flag is active the block layer will send REQ_FLUSH requests, which we translate to NBD_CMD_FLUSH commands. FUA support is not included in this patch because all free software servers implement it with a full fdatasync; thus it has no advantage over supporting flush only. Because I [Paolo] cannot really benchmark it in a realistic scenario, I cannot tell if it is a good idea or not. It is also not clear if it is valid for an NBD server to support FUA but not flush. The Linux block layer gives a warning for this combination, the NBD protocol documentation says nothing about it. The patch also fixes a small problem in the handling of flags: nbd->flags must be cleared at the end of NBD_DO_IT, but the driver was not doing that. The bug manifests itself as follows. Suppose you two different client/server pairs to start the NBD device. Suppose also that the first client supports NBD_SET_FLAGS, and the first server sends NBD_FLAG_SEND_FLUSH; the second pair instead does neither of these two things. Before this patch, the second invocation of NBD_DO_IT will use a stale value of nbd->flags, and the second server will issue an error every time it receives an NBD_CMD_FLUSH command. This bug is pre-existing, but it becomes much more important after this patch; flush failures make the device pretty much unusable, unlike Signed-off-by: Paolo Bonzini Signed-off-by: Alex Bligh Acked-by: Paul Clements Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/nbd.c | 22 ++++++++++++++++++++-- include/uapi/linux/nbd.h | 3 ++- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index ade146bf65e5..695c68fedd32 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -98,6 +98,7 @@ static const char *nbdcmd_to_ascii(int cmd) case NBD_CMD_READ: return "read"; case NBD_CMD_WRITE: return "write"; case NBD_CMD_DISC: return "disconnect"; + case NBD_CMD_FLUSH: return "flush"; case NBD_CMD_TRIM: return "trim/discard"; } return "invalid"; @@ -244,8 +245,15 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req) request.magic = htonl(NBD_REQUEST_MAGIC); request.type = htonl(nbd_cmd(req)); - request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); - request.len = htonl(size); + + if (nbd_cmd(req) == NBD_CMD_FLUSH) { + /* Other values are reserved for FLUSH requests. */ + request.from = 0; + request.len = 0; + } else { + request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); + request.len = htonl(size); + } memcpy(request.handle, &req, sizeof(req)); dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n", @@ -482,6 +490,11 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req) } } + if (req->cmd_flags & REQ_FLUSH) { + BUG_ON(unlikely(blk_rq_sectors(req))); + nbd_cmd(req) = NBD_CMD_FLUSH; + } + req->errors = 0; mutex_lock(&nbd->tx_lock); @@ -684,6 +697,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, if (nbd->flags & NBD_FLAG_SEND_TRIM) queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); + if (nbd->flags & NBD_FLAG_SEND_FLUSH) + blk_queue_flush(nbd->disk->queue, REQ_FLUSH); + else + blk_queue_flush(nbd->disk->queue, 0); thread = kthread_create(nbd_thread, nbd, nbd->disk->disk_name); if (IS_ERR(thread)) { @@ -705,6 +722,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); if (file) fput(file); + nbd->flags = 0; nbd->bytesize = 0; bdev->bd_inode->i_size = 0; set_capacity(nbd->disk, 0); diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h index dfb514472cbc..4f52549b23ff 100644 --- a/include/uapi/linux/nbd.h +++ b/include/uapi/linux/nbd.h @@ -33,13 +33,14 @@ enum { NBD_CMD_READ = 0, NBD_CMD_WRITE = 1, NBD_CMD_DISC = 2, - /* there is a gap here to match userspace */ + NBD_CMD_FLUSH = 3, NBD_CMD_TRIM = 4 }; /* values for flags field */ #define NBD_FLAG_HAS_FLAGS (1 << 0) /* nbd-server supports flags */ #define NBD_FLAG_READ_ONLY (1 << 1) /* device is read-only */ +#define NBD_FLAG_SEND_FLUSH (1 << 2) /* can flush writeback cache */ /* there is a gap here to match userspace */ #define NBD_FLAG_SEND_TRIM (1 << 5) /* send trim/discard */ -- cgit v1.2.3