io_uring: fix CQE reordering

Overflowing CQEs may result in reordering, which is buggy in case of links, F_MORE and so on. If we guarantee that we don't reorder for the unlikely event of a CQ ring overflow, then we can further extend this to not have to terminate multishot requests if it happens. For other operations, like zerocopy sends, we have no choice but to honor CQE ordering. Reported-by: Dylan Yudaken <dylany@fb.com> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Link: https://lore.kernel.org/r/ec3bc55687b0768bbe20fb62d7d06cfced7d7e70.1663892031.git.asml.silence@gmail.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
author: Pavel Begunkov <asml.silence@gmail.com> 2022-09-23 14:53:25 +0100
committer: Jens Axboe <axboe@kernel.dk> 2022-09-23 15:04:20 -0600
commit: aa1df3a360a0c50e0f0086a785d75c2785c29967 (patch)
tree: a02d7afe126c810f9e0c32dcf7982c7a01027f2e /io_uring/io_uring.c
parent: a75155faef4efcb9791f77e2652e29ce8906e05a (diff)
download: linux-aa1df3a360a0c50e0f0086a785d75c2785c29967.tar.gz
linux-aa1df3a360a0c50e0f0086a785d75c2785c29967.tar.bz2
linux-aa1df3a360a0c50e0f0086a785d75c2785c29967.zip
1 files changed, 10 insertions, 2 deletions
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index f359e24b46c3..62d1f55fde55 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -609,7 +609,7 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
 
 	io_cq_lock(ctx);
 	while (!list_empty(&ctx->cq_overflow_list)) {
-		struct io_uring_cqe *cqe = io_get_cqe(ctx);
+		struct io_uring_cqe *cqe = io_get_cqe_overflow(ctx, true);
 		struct io_overflow_cqe *ocqe;
 
 		if (!cqe && !force)
@@ -736,12 +736,19 @@ bool io_req_cqe_overflow(struct io_kiocb *req)
  * control dependency is enough as we're using WRITE_ONCE to
  * fill the cq entry
  */
-struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx)
+struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx, bool overflow)
 {
 	struct io_rings *rings = ctx->rings;
 	unsigned int off = ctx->cached_cq_tail & (ctx->cq_entries - 1);
 	unsigned int free, queued, len;
 
+	/*
+	 * Posting into the CQ when there are pending overflowed CQEs may break
+	 * ordering guarantees, which will affect links, F_MORE users and more.
+	 * Force overflow the completion.
+	 */
+	if (!overflow && (ctx->check_cq & BIT(IO_CHECK_CQ_OVERFLOW_BIT)))
+		return NULL;
 
 	/* userspace may cheat modifying the tail, be safe and do min */
 	queued = min(__io_cqring_events(ctx), ctx->cq_entries);
@@ -2394,6 +2401,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 		if (ret < 0)
 			return ret;
 		io_cqring_overflow_flush(ctx);
+
 		if (io_cqring_events(ctx) >= min_events)
 			return 0;
 	} while (ret > 0);
author	Pavel Begunkov <asml.silence@gmail.com>	2022-09-23 14:53:25 +0100
committer	Jens Axboe <axboe@kernel.dk>	2022-09-23 15:04:20 -0600
commit	aa1df3a360a0c50e0f0086a785d75c2785c29967 (patch)
tree	a02d7afe126c810f9e0c32dcf7982c7a01027f2e /io_uring/io_uring.c
parent	a75155faef4efcb9791f77e2652e29ce8906e05a (diff)
download	linux-aa1df3a360a0c50e0f0086a785d75c2785c29967.tar.gz linux-aa1df3a360a0c50e0f0086a785d75c2785c29967.tar.bz2 linux-aa1df3a360a0c50e0f0086a785d75c2785c29967.zip