summaryrefslogtreecommitdiffstats
path: root/net/ceph/messenger_v1.c
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2023-06-27 14:49:48 +0100
committerJakub Kicinski <kuba@kernel.org>2023-06-27 09:32:40 -0700
commit5da4d7b8e6dfd5bd7d61f7fe1338b1e5d5b4f76c (patch)
tree801376fb6d1cf15d76806968cf02ed946ff16b11 /net/ceph/messenger_v1.c
parent528a08bcd820d07887edeae706df88ceb06db109 (diff)
downloadlinux-5da4d7b8e6dfd5bd7d61f7fe1338b1e5d5b4f76c.tar.gz
linux-5da4d7b8e6dfd5bd7d61f7fe1338b1e5d5b4f76c.tar.bz2
linux-5da4d7b8e6dfd5bd7d61f7fe1338b1e5d5b4f76c.zip
libceph: Partially revert changes to support MSG_SPLICE_PAGES
Fix the mishandling of MSG_DONTWAIT and also reinstates the per-page checking of the source pages (which might have come from a DIO write by userspace) by partially reverting the changes to support MSG_SPLICE_PAGES and doing things a little differently. In messenger_v1: (1) The ceph_tcp_sendpage() is resurrected and the callers reverted to use that. (2) The callers now pass MSG_MORE unconditionally. Previously, they were passing in MSG_MORE|MSG_SENDPAGE_NOTLAST and then degrading that to just MSG_MORE on the last call to ->sendpage(). (3) Make ceph_tcp_sendpage() a wrapper around sendmsg() rather than sendpage(), setting MSG_SPLICE_PAGES if sendpage_ok() returns true on the page. In messenger_v2: (4) Bring back do_try_sendpage() and make the callers use that. (5) Make do_try_sendpage() use sendmsg() for both cases and set MSG_SPLICE_PAGES if sendpage_ok() is set. Fixes: 40a8c17aa770 ("ceph: Use sendmsg(MSG_SPLICE_PAGES) rather than sendpage") Fixes: fa094ccae1e7 ("ceph: Use sendmsg(MSG_SPLICE_PAGES) rather than sendpage()") Reported-by: Ilya Dryomov <idryomov@gmail.com> Link: https://lore.kernel.org/r/CAOi1vP9vjLfk3W+AJFeexC93jqPaPUn2dD_4NrzxwoZTbYfOnw@mail.gmail.com/ Link: https://lore.kernel.org/r/CAOi1vP_Bn918j24S94MuGyn+Gxk212btw7yWeDrRcW1U8pc_BA@mail.gmail.com/ Signed-off-by: David Howells <dhowells@redhat.com> cc: Xiubo Li <xiubli@redhat.com> cc: Jeff Layton <jlayton@kernel.org> cc: Jens Axboe <axboe@kernel.dk> cc: Matthew Wilcox <willy@infradead.org> Link: https://lore.kernel.org/r/3101881.1687801973@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/3111635.1687813501@warthog.procyon.org.uk/ # v2 Reviewed-by: Ilya Dryomov <idryomov@gmail.com> Link: https://lore.kernel.org/r/3199652.1687873788@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/ceph/messenger_v1.c')
-rw-r--r--net/ceph/messenger_v1.c58
1 files changed, 37 insertions, 21 deletions
diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c
index 814579f27f04..3d57bb48a2b4 100644
--- a/net/ceph/messenger_v1.c
+++ b/net/ceph/messenger_v1.c
@@ -74,6 +74,39 @@ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
return r;
}
+/*
+ * @more: MSG_MORE or 0.
+ */
+static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
+ int offset, size_t size, int more)
+{
+ struct msghdr msg = {
+ .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL | more,
+ };
+ struct bio_vec bvec;
+ int ret;
+
+ /*
+ * MSG_SPLICE_PAGES cannot properly handle pages with page_count == 0,
+ * we need to fall back to sendmsg if that's the case.
+ *
+ * Same goes for slab pages: skb_can_coalesce() allows
+ * coalescing neighboring slab objects into a single frag which
+ * triggers one of hardened usercopy checks.
+ */
+ if (sendpage_ok(page))
+ msg.msg_flags |= MSG_SPLICE_PAGES;
+
+ bvec_set_page(&bvec, page, size, offset);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
+
+ ret = sock_sendmsg(sock, &msg);
+ if (ret == -EAGAIN)
+ ret = 0;
+
+ return ret;
+}
+
static void con_out_kvec_reset(struct ceph_connection *con)
{
BUG_ON(con->v1.out_skip);
@@ -450,10 +483,6 @@ static int write_partial_message_data(struct ceph_connection *con)
*/
crc = do_datacrc ? le32_to_cpu(msg->footer.data_crc) : 0;
while (cursor->total_resid) {
- struct bio_vec bvec;
- struct msghdr msghdr = {
- .msg_flags = MSG_SPLICE_PAGES,
- };
struct page *page;
size_t page_offset;
size_t length;
@@ -465,13 +494,8 @@ static int write_partial_message_data(struct ceph_connection *con)
}
page = ceph_msg_data_next(cursor, &page_offset, &length);
- if (length != cursor->total_resid)
- msghdr.msg_flags |= MSG_MORE;
-
- bvec_set_page(&bvec, page, length, page_offset);
- iov_iter_bvec(&msghdr.msg_iter, ITER_SOURCE, &bvec, 1, length);
-
- ret = sock_sendmsg(con->sock, &msghdr);
+ ret = ceph_tcp_sendpage(con->sock, page, page_offset, length,
+ MSG_MORE);
if (ret <= 0) {
if (do_datacrc)
msg->footer.data_crc = cpu_to_le32(crc);
@@ -501,22 +525,14 @@ static int write_partial_message_data(struct ceph_connection *con)
*/
static int write_partial_skip(struct ceph_connection *con)
{
- struct bio_vec bvec;
- struct msghdr msghdr = {
- .msg_flags = MSG_SPLICE_PAGES | MSG_MORE,
- };
int ret;
dout("%s %p %d left\n", __func__, con, con->v1.out_skip);
while (con->v1.out_skip > 0) {
size_t size = min(con->v1.out_skip, (int)PAGE_SIZE);
- if (size == con->v1.out_skip)
- msghdr.msg_flags &= ~MSG_MORE;
- bvec_set_page(&bvec, ZERO_PAGE(0), size, 0);
- iov_iter_bvec(&msghdr.msg_iter, ITER_SOURCE, &bvec, 1, size);
-
- ret = sock_sendmsg(con->sock, &msghdr);
+ ret = ceph_tcp_sendpage(con->sock, ceph_zero_page, 0, size,
+ MSG_MORE);
if (ret <= 0)
goto out;
con->v1.out_skip -= ret;