Merge tag 'for-5.8/drivers-2020-06-01' of git://git.kernel.dk/linux-block

Pull block driver updates from Jens Axboe: "On top of the core changes, here are the block driver changes for this merge window: - NVMe changes: - NVMe over Fibre Channel protocol updates, which also reach over to drivers/scsi/lpfc (James Smart) - namespace revalidation support on the target (Anthony Iliopoulos) - gcc zero length array fix (Arnd Bergmann) - nvmet cleanups (Chaitanya Kulkarni) - misc cleanups and fixes (me, Keith Busch, Sagi Grimberg) - use a SRQ per completion vector (Max Gurtovoy) - fix handling of runtime changes to the queue count (Weiping Zhang) - t10 protection information support for nvme-rdma and nvmet-rdma (Israel Rukshin and Max Gurtovoy) - target side AEN improvements (Chaitanya Kulkarni) - various fixes and minor improvements all over, icluding the nvme part of the lpfc driver" - Floppy code cleanup series (Willy, Denis) - Floppy contention fix (Jiri) - Loop CONFIGURE support (Martijn) - bcache fixes/improvements (Coly, Joe, Colin) - q->queuedata cleanups (Christoph) - Get rid of ioctl_by_bdev (Christoph, Stefan) - md/raid5 allocation fixes (Coly) - zero length array fixes (Gustavo) - swim3 task state fix (Xu)" * tag 'for-5.8/drivers-2020-06-01' of git://git.kernel.dk/linux-block: (166 commits) bcache: configure the asynchronous registertion to be experimental bcache: asynchronous devices registration bcache: fix refcount underflow in bcache_device_free() bcache: Convert pr_<level> uses to a more typical style bcache: remove redundant variables i and n lpfc: Fix return value in __lpfc_nvme_ls_abort lpfc: fix axchg pointer reference after free and double frees lpfc: Fix pointer checks and comments in LS receive refactoring nvme: set dma alignment to qword nvmet: cleanups the loop in nvmet_async_events_process nvmet: fix memory leak when removing namespaces and controllers concurrently nvmet-rdma: add metadata/T10-PI support nvmet: add metadata support for block devices nvmet: add metadata/T10-PI support nvme: add Metadata Capabilities enumerations nvmet: rename nvmet_check_data_len to nvmet_check_transfer_len nvmet: rename nvmet_rw_len to nvmet_rw_data_len nvmet: add metadata characteristics for a namespace nvme-rdma: add metadata/T10-PI support nvme-rdma: introduce nvme_rdma_sgl structure ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2020-06-02 15:37:03 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2020-06-02 15:37:03 -0700
commit: bce159d734091fe31340976081577333f52a85e4 (patch)
tree: 8396be51e6703797a60aefb4992e729f327d27c2 /drivers/nvme/host/tcp.c
parent: 750a02ab8d3c49ca7d23102be90d3d1db19e2827 (diff)
parent: 0c8d3fceade2ab1bbac68bca013e62bfdb851d19 (diff)
download: linux-stable-bce159d734091fe31340976081577333f52a85e4.tar.gz
linux-stable-bce159d734091fe31340976081577333f52a85e4.tar.bz2
linux-stable-bce159d734091fe31340976081577333f52a85e4.zip
1 files changed, 47 insertions, 17 deletions
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index c15a92163c1f..7c7c1886642f 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -60,6 +60,7 @@ struct nvme_tcp_request {
 enum nvme_tcp_queue_flags {
 	NVME_TCP_Q_ALLOCATED	= 0,
 	NVME_TCP_Q_LIVE		= 1,
+	NVME_TCP_Q_POLLING	= 2,
 };
 
 enum nvme_tcp_recv_state {
@@ -75,6 +76,7 @@ struct nvme_tcp_queue {
 	int			io_cpu;
 
 	spinlock_t		lock;
+	struct mutex		send_mutex;
 	struct list_head	send_list;
 
 	/* recv state */
@@ -131,6 +133,7 @@ static DEFINE_MUTEX(nvme_tcp_ctrl_mutex);
 static struct workqueue_struct *nvme_tcp_wq;
 static struct blk_mq_ops nvme_tcp_mq_ops;
 static struct blk_mq_ops nvme_tcp_admin_mq_ops;
+static int nvme_tcp_try_send(struct nvme_tcp_queue *queue);
 
 static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl)
 {
@@ -257,15 +260,29 @@ static inline void nvme_tcp_advance_req(struct nvme_tcp_request *req,
 	}
 }
 
-static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req)
+static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
+		bool sync)
 {
 	struct nvme_tcp_queue *queue = req->queue;
+	bool empty;
 
 	spin_lock(&queue->lock);
+	empty = list_empty(&queue->send_list) && !queue->request;
 	list_add_tail(&req->entry, &queue->send_list);
 	spin_unlock(&queue->lock);
 
-	queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+	/*
+	 * if we're the first on the send_list and we can try to send
+	 * directly, otherwise queue io_work. Also, only do that if we
+	 * are on the same cpu, so we don't introduce contention.
+	 */
+	if (queue->io_cpu == smp_processor_id() &&
+	    sync && empty && mutex_trylock(&queue->send_mutex)) {
+		nvme_tcp_try_send(queue);
+		mutex_unlock(&queue->send_mutex);
+	} else {
+		queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+	}
 }
 
 static inline struct nvme_tcp_request *
@@ -578,7 +595,7 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
 	req->state = NVME_TCP_SEND_H2C_PDU;
 	req->offset = 0;
 
-	nvme_tcp_queue_request(req);
+	nvme_tcp_queue_request(req, false);
 
 	return 0;
 }
@@ -794,11 +811,12 @@ static void nvme_tcp_data_ready(struct sock *sk)
 {
 	struct nvme_tcp_queue *queue;
 
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 	queue = sk->sk_user_data;
-	if (likely(queue && queue->rd_enabled))
+	if (likely(queue && queue->rd_enabled) &&
+	    !test_bit(NVME_TCP_Q_POLLING, &queue->flags))
 		queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 }
 
 static void nvme_tcp_write_space(struct sock *sk)
@@ -867,7 +885,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
 		if (last && !queue->data_digest)
 			flags |= MSG_EOR;
 		else
-			flags |= MSG_MORE;
+			flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
 
 		/* can't zcopy slab pages */
 		if (unlikely(PageSlab(page))) {
@@ -906,11 +924,16 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
 	struct nvme_tcp_queue *queue = req->queue;
 	struct nvme_tcp_cmd_pdu *pdu = req->pdu;
 	bool inline_data = nvme_tcp_has_inline_data(req);
-	int flags = MSG_DONTWAIT | (inline_data ? MSG_MORE : MSG_EOR);
 	u8 hdgst = nvme_tcp_hdgst_len(queue);
 	int len = sizeof(*pdu) + hdgst - req->offset;
+	int flags = MSG_DONTWAIT;
 	int ret;
 
+	if (inline_data)
+		flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
+	else
+		flags |= MSG_EOR;
+
 	if (queue->hdr_digest && !req->offset)
 		nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
 
@@ -949,7 +972,7 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
 
 	ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
 			offset_in_page(pdu) + req->offset, len,
-			MSG_DONTWAIT | MSG_MORE);
+			MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
 	if (unlikely(ret <= 0))
 		return ret;
 
@@ -1063,11 +1086,14 @@ static void nvme_tcp_io_work(struct work_struct *w)
 		bool pending = false;
 		int result;
 
-		result = nvme_tcp_try_send(queue);
-		if (result > 0)
-			pending = true;
-		else if (unlikely(result < 0))
-			break;
+		if (mutex_trylock(&queue->send_mutex)) {
+			result = nvme_tcp_try_send(queue);
+			mutex_unlock(&queue->send_mutex);
+			if (result > 0)
+				pending = true;
+			else if (unlikely(result < 0))
+				break;
+		}
 
 		result = nvme_tcp_try_recv(queue);
 		if (result > 0)
@@ -1319,6 +1345,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
 	queue->ctrl = ctrl;
 	INIT_LIST_HEAD(&queue->send_list);
 	spin_lock_init(&queue->lock);
+	mutex_init(&queue->send_mutex);
 	INIT_WORK(&queue->io_work, nvme_tcp_io_work);
 	queue->queue_size = queue_size;
 
@@ -1543,6 +1570,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
 		set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
 		set->reserved_tags = 2; /* connect + keep-alive */
 		set->numa_node = NUMA_NO_NODE;
+		set->flags = BLK_MQ_F_BLOCKING;
 		set->cmd_size = sizeof(struct nvme_tcp_request);
 		set->driver_data = ctrl;
 		set->nr_hw_queues = 1;
@@ -1554,7 +1582,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
 		set->queue_depth = nctrl->sqsize + 1;
 		set->reserved_tags = 1; /* fabric connect */
 		set->numa_node = NUMA_NO_NODE;
-		set->flags = BLK_MQ_F_SHOULD_MERGE;
+		set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
 		set->cmd_size = sizeof(struct nvme_tcp_request);
 		set->driver_data = ctrl;
 		set->nr_hw_queues = nctrl->queue_count - 1;
@@ -2113,7 +2141,7 @@ static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg)
 	ctrl->async_req.curr_bio = NULL;
 	ctrl->async_req.data_len = 0;
 
-	nvme_tcp_queue_request(&ctrl->async_req);
+	nvme_tcp_queue_request(&ctrl->async_req, true);
 }
 
 static enum blk_eh_timer_return
@@ -2244,7 +2272,7 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	blk_mq_start_request(rq);
 
-	nvme_tcp_queue_request(req);
+	nvme_tcp_queue_request(req, true);
 
 	return BLK_STS_OK;
 }
@@ -2302,9 +2330,11 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx)
 	if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags))
 		return 0;
 
+	set_bit(NVME_TCP_Q_POLLING, &queue->flags);
 	if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue))
 		sk_busy_loop(sk, true);
 	nvme_tcp_try_recv(queue);
+	clear_bit(NVME_TCP_Q_POLLING, &queue->flags);
 	return queue->nr_cqe;
 }
author	Linus Torvalds <torvalds@linux-foundation.org>	2020-06-02 15:37:03 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2020-06-02 15:37:03 -0700
commit	bce159d734091fe31340976081577333f52a85e4 (patch)
tree	8396be51e6703797a60aefb4992e729f327d27c2 /drivers/nvme/host/tcp.c
parent	750a02ab8d3c49ca7d23102be90d3d1db19e2827 (diff)
parent	0c8d3fceade2ab1bbac68bca013e62bfdb851d19 (diff)
download	linux-stable-bce159d734091fe31340976081577333f52a85e4.tar.gz linux-stable-bce159d734091fe31340976081577333f52a85e4.tar.bz2 linux-stable-bce159d734091fe31340976081577333f52a85e4.zip