From 17ddc49b9c5c8847be745d13d91259248d59114b Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 16 Nov 2015 21:46:32 +0800 Subject: libceph: use list_next_entry instead of list_entry_next list_next_entry has been defined in list.h, so I replace list_entry_next with it. Signed-off-by: Geliang Tang Signed-off-by: Ilya Dryomov --- net/ceph/messenger.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 9981039ef4ff..b1d1489543b1 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -23,9 +23,6 @@ #include #include -#define list_entry_next(pos, member) \ - list_entry(pos->member.next, typeof(*pos), member) - /* * Ceph uses the messenger to exchange ceph_msg messages with other * hosts in the system. The messenger provides ordered and reliable @@ -1042,7 +1039,7 @@ static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor, /* Move on to the next page */ BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head)); - cursor->page = list_entry_next(cursor->page, lru); + cursor->page = list_next_entry(cursor->page, lru); cursor->last_piece = cursor->resid <= PAGE_SIZE; return true; @@ -1166,7 +1163,7 @@ static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, if (!cursor->resid && cursor->total_resid) { WARN_ON(!cursor->last_piece); BUG_ON(list_is_last(&cursor->data->links, cursor->data_head)); - cursor->data = list_entry_next(cursor->data, links); + cursor->data = list_next_entry(cursor->data, links); __ceph_msg_data_cursor_init(cursor); new_piece = true; } -- cgit v1.2.3 From 10bcee149f62e7c5122f79aefc30d610b413280b Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 18 Dec 2015 23:33:30 +0800 Subject: libceph: use list_for_each_entry_safe Use list_for_each_entry_safe() instead of list_for_each_safe() to simplify the code. Signed-off-by: Geliang Tang [idryomov@gmail.com: nuke call to list_splice_init() as well] Signed-off-by: Ilya Dryomov --- net/ceph/messenger.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index b1d1489543b1..de3eb19a6968 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -3358,9 +3358,7 @@ static void ceph_msg_free(struct ceph_msg *m) static void ceph_msg_release(struct kref *kref) { struct ceph_msg *m = container_of(kref, struct ceph_msg, kref); - LIST_HEAD(data); - struct list_head *links; - struct list_head *next; + struct ceph_msg_data *data, *next; dout("%s %p\n", __func__, m); WARN_ON(!list_empty(&m->list_head)); @@ -3373,12 +3371,8 @@ static void ceph_msg_release(struct kref *kref) m->middle = NULL; } - list_splice_init(&m->data, &data); - list_for_each_safe(links, next, &data) { - struct ceph_msg_data *data; - - data = list_entry(links, struct ceph_msg_data, links); - list_del_init(links); + list_for_each_entry_safe(data, next, &m->data, links) { + list_del_init(&data->links); ceph_msg_data_destroy(data); } m->data_length = 0; -- cgit v1.2.3 From 67645d7619738e51c668ca69f097cb90b5470422 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 28 Dec 2015 13:18:34 +0300 Subject: libceph: fix ceph_msg_revoke() There are a number of problems with revoking a "was sending" message: (1) We never make any attempt to revoke data - only kvecs contibute to con->out_skip. However, once the header (envelope) is written to the socket, our peer learns data_len and sets itself to expect at least data_len bytes to follow front or front+middle. If ceph_msg_revoke() is called while the messenger is sending message's data portion, anything we send after that call is counted by the OSD towards the now revoked message's data portion. The effects vary, the most common one is the eventual hang - higher layers get stuck waiting for the reply to the message that was sent out after ceph_msg_revoke() returned and treated by the OSD as a bunch of data bytes. This is what Matt ran into. (2) Flat out zeroing con->out_kvec_bytes worth of bytes to handle kvecs is wrong. If ceph_msg_revoke() is called before the tag is sent out or while the messenger is sending the header, we will get a connection reset, either due to a bad tag (0 is not a valid tag) or a bad header CRC, which kind of defeats the purpose of revoke. Currently the kernel client refuses to work with header CRCs disabled, but that will likely change in the future, making this even worse. (3) con->out_skip is not reset on connection reset, leading to one or more spurious connection resets if we happen to get a real one between con->out_skip is set in ceph_msg_revoke() and before it's cleared in write_partial_skip(). Fixing (1) and (3) is trivial. The idea behind fixing (2) is to never zero the tag or the header, i.e. send out tag+header regardless of when ceph_msg_revoke() is called. That way the header is always correct, no unnecessary resets are induced and revoke stands ready for disabled CRCs. Since ceph_msg_revoke() rips out con->out_msg, introduce a new "message out temp" and copy the header into it before sending. Cc: stable@vger.kernel.org # 4.0+ Reported-by: Matt Conner Signed-off-by: Ilya Dryomov Tested-by: Matt Conner Reviewed-by: Sage Weil --- net/ceph/messenger.c | 76 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 58 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index de3eb19a6968..3850d1a5bd7c 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -669,6 +669,8 @@ static void reset_connection(struct ceph_connection *con) } con->in_seq = 0; con->in_seq_acked = 0; + + con->out_skip = 0; } /* @@ -768,6 +770,8 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt) static void con_out_kvec_reset(struct ceph_connection *con) { + BUG_ON(con->out_skip); + con->out_kvec_left = 0; con->out_kvec_bytes = 0; con->out_kvec_cur = &con->out_kvec[0]; @@ -776,9 +780,9 @@ static void con_out_kvec_reset(struct ceph_connection *con) static void con_out_kvec_add(struct ceph_connection *con, size_t size, void *data) { - int index; + int index = con->out_kvec_left; - index = con->out_kvec_left; + BUG_ON(con->out_skip); BUG_ON(index >= ARRAY_SIZE(con->out_kvec)); con->out_kvec[index].iov_len = size; @@ -787,6 +791,27 @@ static void con_out_kvec_add(struct ceph_connection *con, con->out_kvec_bytes += size; } +/* + * Chop off a kvec from the end. Return residual number of bytes for + * that kvec, i.e. how many bytes would have been written if the kvec + * hadn't been nuked. + */ +static int con_out_kvec_skip(struct ceph_connection *con) +{ + int off = con->out_kvec_cur - con->out_kvec; + int skip = 0; + + if (con->out_kvec_bytes > 0) { + skip = con->out_kvec[off + con->out_kvec_left - 1].iov_len; + BUG_ON(con->out_kvec_bytes < skip); + BUG_ON(!con->out_kvec_left); + con->out_kvec_bytes -= skip; + con->out_kvec_left--; + } + + return skip; +} + #ifdef CONFIG_BLOCK /* @@ -1194,7 +1219,6 @@ static void prepare_write_message_footer(struct ceph_connection *con) m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE; dout("prepare_write_message_footer %p\n", con); - con->out_kvec_is_msg = true; con->out_kvec[v].iov_base = &m->footer; if (con->peer_features & CEPH_FEATURE_MSG_AUTH) { if (con->ops->sign_message) @@ -1222,7 +1246,6 @@ static void prepare_write_message(struct ceph_connection *con) u32 crc; con_out_kvec_reset(con); - con->out_kvec_is_msg = true; con->out_msg_done = false; /* Sneak an ack in there first? If we can get it into the same @@ -1262,18 +1285,19 @@ static void prepare_write_message(struct ceph_connection *con) /* tag + hdr + front + middle */ con_out_kvec_add(con, sizeof (tag_msg), &tag_msg); - con_out_kvec_add(con, sizeof (m->hdr), &m->hdr); + con_out_kvec_add(con, sizeof(con->out_hdr), &con->out_hdr); con_out_kvec_add(con, m->front.iov_len, m->front.iov_base); if (m->middle) con_out_kvec_add(con, m->middle->vec.iov_len, m->middle->vec.iov_base); - /* fill in crc (except data pages), footer */ + /* fill in hdr crc and finalize hdr */ crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc)); con->out_msg->hdr.crc = cpu_to_le32(crc); - con->out_msg->footer.flags = 0; + memcpy(&con->out_hdr, &con->out_msg->hdr, sizeof(con->out_hdr)); + /* fill in front and middle crc, footer */ crc = crc32c(0, m->front.iov_base, m->front.iov_len); con->out_msg->footer.front_crc = cpu_to_le32(crc); if (m->middle) { @@ -1285,6 +1309,7 @@ static void prepare_write_message(struct ceph_connection *con) dout("%s front_crc %u middle_crc %u\n", __func__, le32_to_cpu(con->out_msg->footer.front_crc), le32_to_cpu(con->out_msg->footer.middle_crc)); + con->out_msg->footer.flags = 0; /* is there a data payload? */ con->out_msg->footer.data_crc = 0; @@ -1489,7 +1514,6 @@ static int write_partial_kvec(struct ceph_connection *con) } } con->out_kvec_left = 0; - con->out_kvec_is_msg = false; ret = 1; out: dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con, @@ -1581,6 +1605,7 @@ static int write_partial_skip(struct ceph_connection *con) { int ret; + dout("%s %p %d left\n", __func__, con, con->out_skip); while (con->out_skip > 0) { size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE); @@ -2503,13 +2528,13 @@ more: more_kvec: /* kvec data queued? */ - if (con->out_skip) { - ret = write_partial_skip(con); + if (con->out_kvec_left) { + ret = write_partial_kvec(con); if (ret <= 0) goto out; } - if (con->out_kvec_left) { - ret = write_partial_kvec(con); + if (con->out_skip) { + ret = write_partial_skip(con); if (ret <= 0) goto out; } @@ -3047,16 +3072,31 @@ void ceph_msg_revoke(struct ceph_msg *msg) ceph_msg_put(msg); } if (con->out_msg == msg) { - dout("%s %p msg %p - was sending\n", __func__, con, msg); - con->out_msg = NULL; - if (con->out_kvec_is_msg) { - con->out_skip = con->out_kvec_bytes; - con->out_kvec_is_msg = false; + BUG_ON(con->out_skip); + /* footer */ + if (con->out_msg_done) { + con->out_skip += con_out_kvec_skip(con); + } else { + BUG_ON(!msg->data_length); + if (con->peer_features & CEPH_FEATURE_MSG_AUTH) + con->out_skip += sizeof(msg->footer); + else + con->out_skip += sizeof(msg->old_footer); } + /* data, middle, front */ + if (msg->data_length) + con->out_skip += msg->cursor.total_resid; + if (msg->middle) + con->out_skip += con_out_kvec_skip(con); + con->out_skip += con_out_kvec_skip(con); + + dout("%s %p msg %p - was sending, will write %d skip %d\n", + __func__, con, msg, con->out_kvec_bytes, con->out_skip); msg->hdr.seq = 0; - + con->out_msg = NULL; ceph_msg_put(msg); } + mutex_unlock(&con->mutex); } -- cgit v1.2.3 From f6330cc1f04b7dcb84b572d402cdacf7e275a022 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 13 Jan 2016 14:32:57 +0100 Subject: libceph: clear messenger auth_retry flag if we fault Commit 20e55c4cc758 ("libceph: clear messenger auth_retry flag when we authenticate") got us only half way there. We clear the flag if the second attempt succeeds, but it also needs to be cleared if that attempt fails, to allow for the exponential backoff to kick in. Otherwise, if ->should_authenticate() thinks our keys are valid, we will busy loop, incrementing auth_retry to no avail: process_connect ffff880079a63830 got BADAUTHORIZER attempt 1 process_connect ffff880079a63830 got BADAUTHORIZER attempt 2 process_connect ffff880079a63830 got BADAUTHORIZER attempt 3 process_connect ffff880079a63830 got BADAUTHORIZER attempt 4 process_connect ffff880079a63830 got BADAUTHORIZER attempt 5 ... Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/messenger.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 3850d1a5bd7c..9cfedf565f5b 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2827,13 +2827,17 @@ static bool con_backoff(struct ceph_connection *con) static void con_fault_finish(struct ceph_connection *con) { + dout("%s %p\n", __func__, con); + /* * in case we faulted due to authentication, invalidate our * current tickets so that we can get new ones. */ - if (con->auth_retry && con->ops->invalidate_authorizer) { - dout("calling invalidate_authorizer()\n"); - con->ops->invalidate_authorizer(con); + if (con->auth_retry) { + dout("auth_retry %d, invalidating\n", con->auth_retry); + if (con->ops->invalidate_authorizer) + con->ops->invalidate_authorizer(con); + con->auth_retry = 0; } if (con->ops->fault) -- cgit v1.2.3 From 6abe097db59e1a5af7f082709f38bd95c54ccca1 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 14 Jan 2016 16:35:35 +0100 Subject: libceph: fix authorizer invalidation, take 2 Back in 2013, commit 4b8e8b5d78b8 ("libceph: fix authorizer invalidation") tried to fix authorizer invalidation issues by clearing validity field. However, nothing ever consults this field, so it doesn't force us to request any new secrets in any way and therefore we never get out of the exponential backoff mode: [ 129.973812] libceph: osd2 192.168.122.1:6810 connect authorization failure [ 130.706785] libceph: osd2 192.168.122.1:6810 connect authorization failure [ 131.710088] libceph: osd2 192.168.122.1:6810 connect authorization failure [ 133.708321] libceph: osd2 192.168.122.1:6810 connect authorization failure [ 137.706598] libceph: osd2 192.168.122.1:6810 connect authorization failure ... AFAICT this was the case at the time 4b8e8b5d78b8 was merged, too. Using timespec solely as a bool isn't nice, so introduce a new have_key flag, specifically for this purpose. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/auth_x.c | 27 ++++++++++++++++++++++----- net/ceph/auth_x.h | 1 + 2 files changed, 23 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 10d87753ed87..ab080bb18254 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -237,6 +237,7 @@ static int process_one_ticket(struct ceph_auth_client *ac, th->secret_id = new_secret_id; th->expires = new_expires; th->renew_after = new_renew_after; + th->have_key = true; dout(" got ticket service %d (%s) secret_id %lld len %d\n", type, ceph_entity_type_name(type), th->secret_id, (int)th->ticket_blob->vec.iov_len); @@ -384,6 +385,24 @@ bad: return -ERANGE; } +static bool need_key(struct ceph_x_ticket_handler *th) +{ + if (!th->have_key) + return true; + + return get_seconds() >= th->renew_after; +} + +static bool have_key(struct ceph_x_ticket_handler *th) +{ + if (th->have_key) { + if (get_seconds() >= th->expires) + th->have_key = false; + } + + return th->have_key; +} + static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed) { int want = ac->want_keys; @@ -402,20 +421,18 @@ static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed) continue; th = get_ticket_handler(ac, service); - if (IS_ERR(th)) { *pneed |= service; continue; } - if (get_seconds() >= th->renew_after) + if (need_key(th)) *pneed |= service; - if (get_seconds() >= th->expires) + if (!have_key(th)) xi->have_keys &= ~service; } } - static int ceph_x_build_request(struct ceph_auth_client *ac, void *buf, void *end) { @@ -674,7 +691,7 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, th = get_ticket_handler(ac, peer_type); if (!IS_ERR(th)) - memset(&th->validity, 0, sizeof(th->validity)); + th->have_key = false; } static int calcu_signature(struct ceph_x_authorizer *au, diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h index e8b7c6917d47..5334b9b159c5 100644 --- a/net/ceph/auth_x.h +++ b/net/ceph/auth_x.h @@ -17,6 +17,7 @@ struct ceph_x_ticket_handler { struct ceph_crypto_key session_key; struct ceph_timespec validity; + bool have_key; u64 secret_id; struct ceph_buffer *ticket_blob; -- cgit v1.2.3 From 187d131dd983fb1ab1c5d0d9ee98ab6511f252cd Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 14 Jan 2016 17:31:51 +0100 Subject: libceph: invalidate AUTH in addition to a service ticket If we fault due to authentication, we invalidate the service ticket we have and request a new one - the idea being that if a service rejected our authorizer, it must have expired, despite mon_client's attempts at periodic renewal. (The other possibility is that our ticket is too new and the service hasn't gotten it yet, in which case invalidating isn't necessary but doesn't hurt.) Invalidating just the service ticket is not enough, though. If we assume a failure on mon_client's part to renew a service ticket, we have to assume the same for the AUTH ticket. If our AUTH ticket is bad, we won't get any service tickets no matter how hard we try, so invalidate AUTH ticket along with the service ticket. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/auth_x.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index ab080bb18254..05e9fc21d460 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -684,8 +684,7 @@ static void ceph_x_destroy(struct ceph_auth_client *ac) ac->private = NULL; } -static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, - int peer_type) +static void invalidate_ticket(struct ceph_auth_client *ac, int peer_type) { struct ceph_x_ticket_handler *th; @@ -694,6 +693,19 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, th->have_key = false; } +static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, + int peer_type) +{ + /* + * We are to invalidate a service ticket in the hopes of + * getting a new, hopefully more valid, one. But, we won't get + * it unless our AUTH ticket is good, so invalidate AUTH ticket + * as well, just in case. + */ + invalidate_ticket(ac, peer_type); + invalidate_ticket(ac, CEPH_ENTITY_TYPE_AUTH); +} + static int calcu_signature(struct ceph_x_authorizer *au, struct ceph_msg *msg, __le64 *sig) { -- cgit v1.2.3 From f6cdb2928df87c2277ec691101e5843db6cb96ea Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 15 Jan 2016 13:20:01 +0100 Subject: libceph: kill off ceph_x_ticket_handler::validity With it gone, no need to preserve ceph_timespec in process_one_ticket() either. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/auth_x.c | 6 ++---- net/ceph/auth_x.h | 1 - 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 05e9fc21d460..9e43a315e662 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -152,7 +152,6 @@ static int process_one_ticket(struct ceph_auth_client *ac, void *ticket_buf = NULL; void *tp, *tpend; void **ptp; - struct ceph_timespec new_validity; struct ceph_crypto_key new_session_key; struct ceph_buffer *new_ticket_blob; unsigned long new_expires, new_renew_after; @@ -193,8 +192,8 @@ static int process_one_ticket(struct ceph_auth_client *ac, if (ret) goto out; - ceph_decode_copy(&dp, &new_validity, sizeof(new_validity)); - ceph_decode_timespec(&validity, &new_validity); + ceph_decode_timespec(&validity, dp); + dp += sizeof(struct ceph_timespec); new_expires = get_seconds() + validity.tv_sec; new_renew_after = new_expires - (validity.tv_sec / 4); dout(" expires=%lu renew_after=%lu\n", new_expires, @@ -233,7 +232,6 @@ static int process_one_ticket(struct ceph_auth_client *ac, ceph_buffer_put(th->ticket_blob); th->session_key = new_session_key; th->ticket_blob = new_ticket_blob; - th->validity = new_validity; th->secret_id = new_secret_id; th->expires = new_expires; th->renew_after = new_renew_after; diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h index 5334b9b159c5..40b1a3cf7397 100644 --- a/net/ceph/auth_x.h +++ b/net/ceph/auth_x.h @@ -16,7 +16,6 @@ struct ceph_x_ticket_handler { unsigned int service; struct ceph_crypto_key session_key; - struct ceph_timespec validity; bool have_key; u64 secret_id; -- cgit v1.2.3 From 7e01726a6853e032536ed7e75c1e1232872ff318 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 18 Jan 2016 16:53:31 +0100 Subject: libceph: remove outdated comment MClientMount{,Ack} are long gone. The receipt of bare monmap doesn't actually indicate a mount success as we are yet to authenticate at that point in time. Signed-off-by: Ilya Dryomov --- net/ceph/mon_client.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index edda01626a45..de85dddc3dc0 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -364,10 +364,6 @@ static bool have_debugfs_info(struct ceph_mon_client *monc) return monc->client->have_fsid && monc->auth->global_id > 0; } -/* - * The monitor responds with mount ack indicate mount success. The - * included client ticket allows the client to talk to MDSs and OSDs. - */ static void ceph_monc_handle_map(struct ceph_mon_client *monc, struct ceph_msg *msg) { -- cgit v1.2.3