summaryrefslogtreecommitdiffstats
path: root/net/ceph/osd_client.c
diff options
context:
space:
mode:
authorIlya Dryomov <idryomov@gmail.com>2018-10-15 17:38:23 +0200
committerIlya Dryomov <idryomov@gmail.com>2018-10-22 10:28:22 +0200
commit0d9c1ab3be4c0187663096a6a084421d0a1e45c6 (patch)
tree06a57638913d5badd76fe695bcb4f08992f55579 /net/ceph/osd_client.c
parent26f887e0a3c43f67b550e2e5d8a76e86ca11d188 (diff)
downloadlinux-stable-0d9c1ab3be4c0187663096a6a084421d0a1e45c6.tar.gz
linux-stable-0d9c1ab3be4c0187663096a6a084421d0a1e45c6.tar.bz2
linux-stable-0d9c1ab3be4c0187663096a6a084421d0a1e45c6.zip
libceph: preallocate message data items
Currently message data items are allocated with ceph_msg_data_create() in setup_request_data() inside send_request(). send_request() has never been allowed to fail, so each allocation is followed by a BUG_ON: data = ceph_msg_data_create(...); BUG_ON(!data); It's been this way since support for multiple message data items was added in commit 6644ed7b7e04 ("libceph: make message data be a pointer") in 3.10. There is no reason to delay the allocation of message data items until the last possible moment and we certainly don't need a linked list of them as they are only ever appended to the end and never erased. Make ceph_msg_new2() take max_data_items and adapt the rest of the code. Reported-by: Jerry Lee <leisurelysw24@gmail.com> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Diffstat (limited to 'net/ceph/osd_client.c')
-rw-r--r--net/ceph/osd_client.c102
1 files changed, 88 insertions, 14 deletions
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 7ac7f21ff317..cf0bd2cce848 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -614,12 +614,15 @@ static int ceph_oloc_encoding_size(const struct ceph_object_locator *oloc)
return 8 + 4 + 4 + 4 + (oloc->pool_ns ? oloc->pool_ns->len : 0);
}
-int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp)
+static int __ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp,
+ int num_request_data_items,
+ int num_reply_data_items)
{
struct ceph_osd_client *osdc = req->r_osdc;
struct ceph_msg *msg;
int msg_size;
+ WARN_ON(req->r_request || req->r_reply);
WARN_ON(ceph_oid_empty(&req->r_base_oid));
WARN_ON(ceph_oloc_empty(&req->r_base_oloc));
@@ -641,9 +644,11 @@ int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp)
msg_size += 4 + 8; /* retry_attempt, features */
if (req->r_mempool)
- msg = ceph_msgpool_get(&osdc->msgpool_op, msg_size);
+ msg = ceph_msgpool_get(&osdc->msgpool_op, msg_size,
+ num_request_data_items);
else
- msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp, true);
+ msg = ceph_msg_new2(CEPH_MSG_OSD_OP, msg_size,
+ num_request_data_items, gfp, true);
if (!msg)
return -ENOMEM;
@@ -656,9 +661,11 @@ int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp)
msg_size += req->r_num_ops * sizeof(struct ceph_osd_op);
if (req->r_mempool)
- msg = ceph_msgpool_get(&osdc->msgpool_op_reply, msg_size);
+ msg = ceph_msgpool_get(&osdc->msgpool_op_reply, msg_size,
+ num_reply_data_items);
else
- msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, msg_size, gfp, true);
+ msg = ceph_msg_new2(CEPH_MSG_OSD_OPREPLY, msg_size,
+ num_reply_data_items, gfp, true);
if (!msg)
return -ENOMEM;
@@ -666,7 +673,6 @@ int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp)
return 0;
}
-EXPORT_SYMBOL(ceph_osdc_alloc_messages);
static bool osd_req_opcode_valid(u16 opcode)
{
@@ -679,6 +685,64 @@ __CEPH_FORALL_OSD_OPS(GENERATE_CASE)
}
}
+static void get_num_data_items(struct ceph_osd_request *req,
+ int *num_request_data_items,
+ int *num_reply_data_items)
+{
+ struct ceph_osd_req_op *op;
+
+ *num_request_data_items = 0;
+ *num_reply_data_items = 0;
+
+ for (op = req->r_ops; op != &req->r_ops[req->r_num_ops]; op++) {
+ switch (op->op) {
+ /* request */
+ case CEPH_OSD_OP_WRITE:
+ case CEPH_OSD_OP_WRITEFULL:
+ case CEPH_OSD_OP_SETXATTR:
+ case CEPH_OSD_OP_CMPXATTR:
+ case CEPH_OSD_OP_NOTIFY_ACK:
+ *num_request_data_items += 1;
+ break;
+
+ /* reply */
+ case CEPH_OSD_OP_STAT:
+ case CEPH_OSD_OP_READ:
+ case CEPH_OSD_OP_LIST_WATCHERS:
+ *num_reply_data_items += 1;
+ break;
+
+ /* both */
+ case CEPH_OSD_OP_NOTIFY:
+ *num_request_data_items += 1;
+ *num_reply_data_items += 1;
+ break;
+ case CEPH_OSD_OP_CALL:
+ *num_request_data_items += 2;
+ *num_reply_data_items += 1;
+ break;
+
+ default:
+ WARN_ON(!osd_req_opcode_valid(op->op));
+ break;
+ }
+ }
+}
+
+/*
+ * oid, oloc and OSD op opcode(s) must be filled in before this function
+ * is called.
+ */
+int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp)
+{
+ int num_request_data_items, num_reply_data_items;
+
+ get_num_data_items(req, &num_request_data_items, &num_reply_data_items);
+ return __ceph_osdc_alloc_messages(req, gfp, num_request_data_items,
+ num_reply_data_items);
+}
+EXPORT_SYMBOL(ceph_osdc_alloc_messages);
+
/*
* This is an osd op init function for opcodes that have no data or
* other information associated with them. It also serves as a
@@ -1035,7 +1099,15 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
if (flags & CEPH_OSD_FLAG_WRITE)
req->r_data_offset = off;
- r = ceph_osdc_alloc_messages(req, GFP_NOFS);
+ if (num_ops > 1)
+ /*
+ * This is a special case for ceph_writepages_start(), but it
+ * also covers ceph_uninline_data(). If more multi-op request
+ * use cases emerge, we will need a separate helper.
+ */
+ r = __ceph_osdc_alloc_messages(req, GFP_NOFS, num_ops, 0);
+ else
+ r = ceph_osdc_alloc_messages(req, GFP_NOFS);
if (r)
goto fail;
@@ -1842,13 +1914,16 @@ static bool should_plug_request(struct ceph_osd_request *req)
return true;
}
+/*
+ * Keep get_num_data_items() in sync with this function.
+ */
static void setup_request_data(struct ceph_osd_request *req,
struct ceph_msg *msg)
{
u32 data_len = 0;
int i;
- if (!list_empty(&msg->data))
+ if (msg->num_data_items)
return;
WARN_ON(msg->data_length);
@@ -4325,9 +4400,7 @@ static void handle_watch_notify(struct ceph_osd_client *osdc,
lreq->notify_id, notify_id);
} else if (!completion_done(&lreq->notify_finish_wait)) {
struct ceph_msg_data *data =
- list_first_entry_or_null(&msg->data,
- struct ceph_msg_data,
- links);
+ msg->num_data_items ? &msg->data[0] : NULL;
if (data) {
if (lreq->preply_pages) {
@@ -5036,11 +5109,12 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
goto out_map;
err = ceph_msgpool_init(&osdc->msgpool_op, CEPH_MSG_OSD_OP,
- PAGE_SIZE, 10, true, "osd_op");
+ PAGE_SIZE, CEPH_OSD_SLAB_OPS, 10, "osd_op");
if (err < 0)
goto out_mempool;
err = ceph_msgpool_init(&osdc->msgpool_op_reply, CEPH_MSG_OSD_OPREPLY,
- PAGE_SIZE, 10, true, "osd_op_reply");
+ PAGE_SIZE, CEPH_OSD_SLAB_OPS, 10,
+ "osd_op_reply");
if (err < 0)
goto out_msgpool;
@@ -5310,7 +5384,7 @@ static struct ceph_msg *alloc_msg_with_page_vector(struct ceph_msg_header *hdr)
u32 front_len = le32_to_cpu(hdr->front_len);
u32 data_len = le32_to_cpu(hdr->data_len);
- m = ceph_msg_new(type, front_len, GFP_NOIO, false);
+ m = ceph_msg_new2(type, front_len, 1, GFP_NOIO, false);
if (!m)
return NULL;