From aa91647c898d62e869fcf35e977ab3c533be8fc1 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 24 May 2010 11:15:51 -0700 Subject: ceph: make mds requests killable, not interruptible The underlying problem is that many mds requests can't be restarted. For example, a restarted create() would return -EEXIST if the original request succeeds. However, we do not want a hung MDS to hang the client too. So, use the _killable wait_for_completion variants to abort on SIGKILL but nothing else. Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 885aa5710cfd..08413c8a85b2 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1768,12 +1768,12 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, mutex_unlock(&mdsc->mutex); dout("do_request waiting\n"); if (req->r_timeout) { - err = (long)wait_for_completion_interruptible_timeout( + err = (long)wait_for_completion_killable_timeout( &req->r_completion, req->r_timeout); if (err == 0) err = -EIO; } else { - err = wait_for_completion_interruptible(&req->r_completion); + err = wait_for_completion_killable(&req->r_completion); } dout("do_request waited, got %d\n", err); mutex_lock(&mdsc->mutex); -- cgit v1.2.3 From e95e9a7ae4c1e7655a0438579f891b3c60178d77 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 25 May 2010 09:24:42 -0700 Subject: ceph: avoid possible null dereference ac->ops may be null; use protocol id in error message instead. Reported-by: Dan Carpenter Signed-off-by: Sage Weil --- fs/ceph/auth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c index 9f46de2ba7a7..01c4a1cd53c0 100644 --- a/fs/ceph/auth.c +++ b/fs/ceph/auth.c @@ -217,8 +217,8 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac, if (ac->protocol != protocol) { ret = ceph_auth_init_protocol(ac, protocol); if (ret) { - pr_err("error %d on auth method %s init\n", - ret, ac->ops->name); + pr_err("error %d on auth protocol %d init\n", + ret, protocol); goto out; } } -- cgit v1.2.3 From 984c76908efd3c6795aa03dff16a8fc3496af99f Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Sun, 23 May 2010 21:47:58 +0200 Subject: ceph: removed duplicated #includes fs/ceph/auth.c: linux/slab.h is included more than once. fs/ceph/super.h: linux/slab.h is included more than once. Acked-by: Christoph Lameter Signed-off-by: Andrea Gelmini Signed-off-by: Sage Weil --- fs/ceph/auth.c | 1 - fs/ceph/super.h | 1 - 2 files changed, 2 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c index 01c4a1cd53c0..a28ebdf465d7 100644 --- a/fs/ceph/auth.c +++ b/fs/ceph/auth.c @@ -1,7 +1,6 @@ #include "ceph_debug.h" #include -#include #include #include diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 3725c9ee9d08..d84a2527046c 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 09c4d6a7d40dd26c1b35674c582382b7ea551368 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 25 May 2010 15:38:06 -0700 Subject: ceph: do not resend mon requests on auth ticket renewal We only want to send pending mon requests when we successfully authenticate. If we are already authenticated, like when we renew our ticket, there is no need to resend pending requests. Signed-off-by: Sage Weil --- fs/ceph/mon_client.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index f6510a476e7e..21c62e9b7d1d 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c @@ -704,8 +704,11 @@ static void handle_auth_reply(struct ceph_mon_client *monc, struct ceph_msg *msg) { int ret; + int was_auth = 0; mutex_lock(&monc->mutex); + if (monc->auth->ops) + was_auth = monc->auth->ops->is_authenticated(monc->auth); monc->pending_auth = 0; ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base, msg->front.iov_len, @@ -716,7 +719,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc, wake_up(&monc->client->auth_wq); } else if (ret > 0) { __send_prepared_auth_request(monc, ret); - } else if (monc->auth->ops->is_authenticated(monc->auth)) { + } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) { dout("authenticated, starting session\n"); monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT; -- cgit v1.2.3 From a41359fa355e7b450c610ed8e913d5d75c3c9c3b Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 25 May 2010 15:39:06 -0700 Subject: ceph: renew auth tickets before they expire We were only requesting renewal after our tickets expire; do so before that. Most of the low-level logic for this was already there; just use it. Signed-off-by: Sage Weil --- fs/ceph/auth.c | 2 +- fs/ceph/auth.h | 6 ++++++ fs/ceph/auth_none.c | 8 ++++++++ fs/ceph/auth_x.c | 12 ++++++++++++ 4 files changed, 27 insertions(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c index a28ebdf465d7..89490beaf537 100644 --- a/fs/ceph/auth.c +++ b/fs/ceph/auth.c @@ -246,7 +246,7 @@ int ceph_build_auth(struct ceph_auth_client *ac, if (!ac->protocol) return ceph_auth_build_hello(ac, msg_buf, msg_len); BUG_ON(!ac->ops); - if (!ac->ops->is_authenticated(ac)) + if (ac->ops->should_authenticate(ac)) return ceph_build_auth_request(ac, msg_buf, msg_len); return 0; } diff --git a/fs/ceph/auth.h b/fs/ceph/auth.h index 4429a707c021..d38a2fb4a137 100644 --- a/fs/ceph/auth.h +++ b/fs/ceph/auth.h @@ -23,6 +23,12 @@ struct ceph_auth_client_ops { */ int (*is_authenticated)(struct ceph_auth_client *ac); + /* + * true if we should (re)authenticate, e.g., when our tickets + * are getting old and crusty. + */ + int (*should_authenticate)(struct ceph_auth_client *ac); + /* * build requests and process replies during monitor * handshake. if handle_reply returns -EAGAIN, we build diff --git a/fs/ceph/auth_none.c b/fs/ceph/auth_none.c index 24407c119291..ad1dc21286c7 100644 --- a/fs/ceph/auth_none.c +++ b/fs/ceph/auth_none.c @@ -31,6 +31,13 @@ static int is_authenticated(struct ceph_auth_client *ac) return !xi->starting; } +static int should_authenticate(struct ceph_auth_client *ac) +{ + struct ceph_auth_none_info *xi = ac->private; + + return xi->starting; +} + /* * the generic auth code decode the global_id, and we carry no actual * authenticate state, so nothing happens here. @@ -98,6 +105,7 @@ static const struct ceph_auth_client_ops ceph_auth_none_ops = { .reset = reset, .destroy = destroy, .is_authenticated = is_authenticated, + .should_authenticate = should_authenticate, .handle_reply = handle_reply, .create_authorizer = ceph_auth_none_create_authorizer, .destroy_authorizer = ceph_auth_none_destroy_authorizer, diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index 7b206231566d..83d4d2785ffe 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c @@ -27,6 +27,17 @@ static int ceph_x_is_authenticated(struct ceph_auth_client *ac) return (ac->want_keys & xi->have_keys) == ac->want_keys; } +static int ceph_x_should_authenticate(struct ceph_auth_client *ac) +{ + struct ceph_x_info *xi = ac->private; + int need; + + ceph_x_validate_tickets(ac, &need); + dout("ceph_x_should_authenticate want=%d need=%d have=%d\n", + ac->want_keys, need, xi->have_keys); + return need != 0; +} + static int ceph_x_encrypt_buflen(int ilen) { return sizeof(struct ceph_x_encrypt_header) + ilen + 16 + @@ -620,6 +631,7 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, static const struct ceph_auth_client_ops ceph_x_ops = { .name = "x", .is_authenticated = ceph_x_is_authenticated, + .should_authenticate = ceph_x_should_authenticate, .build_request = ceph_x_build_request, .handle_reply = ceph_x_handle_reply, .create_authorizer = ceph_x_create_authorizer, -- cgit v1.2.3 From 7e34bc524ecae3a04d8cc427ee76ddad826a937b Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 22 May 2010 12:01:14 +0200 Subject: fs/ceph: Use ERR_CAST Use ERR_CAST(x) rather than ERR_PTR(PTR_ERR(x)). The former makes more clear what is the purpose of the operation, which otherwise looks like a no-op. In the case of fs/ceph/inode.c, ERR_CAST is not needed, because the type of the returned value is the same as the type of the enclosing function. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ type T; T x; identifier f; @@ T f (...) { <+... - ERR_PTR(PTR_ERR(x)) + x ...+> } @@ expression x; @@ - ERR_PTR(PTR_ERR(x)) + ERR_CAST(x) // Signed-off-by: Julia Lawall Signed-off-by: Sage Weil --- fs/ceph/dir.c | 2 +- fs/ceph/export.c | 2 +- fs/ceph/file.c | 2 +- fs/ceph/inode.c | 2 +- fs/ceph/osdmap.c | 2 +- fs/ceph/super.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 4fd30900eff7..a86c1d5bf84f 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -587,7 +587,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP; req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS); if (IS_ERR(req)) - return ERR_PTR(PTR_ERR(req)); + return ERR_CAST(req); req->r_dentry = dget(dentry); req->r_num_caps = 2; /* we only need inode linkage */ diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 17447644d675..4480cb1c63e7 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -133,7 +133,7 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb, req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPHASH, USE_ANY_MDS); if (IS_ERR(req)) - return ERR_PTR(PTR_ERR(req)); + return ERR_CAST(req); req->r_ino1 = vino; req->r_ino2.ino = cfh->parent_ino; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 0611ec3698aa..f06f902367e1 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -230,7 +230,7 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, /* do the open */ req = prepare_open_request(dir->i_sb, flags, mode); if (IS_ERR(req)) - return ERR_PTR(PTR_ERR(req)); + return ERR_CAST(req); req->r_dentry = dget(dentry); req->r_num_caps = 2; if (flags & O_CREAT) { diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index a81b8b662c7b..226f5a50d362 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -69,7 +69,7 @@ struct inode *ceph_get_snapdir(struct inode *parent) BUG_ON(!S_ISDIR(parent->i_mode)); if (IS_ERR(inode)) - return ERR_PTR(PTR_ERR(inode)); + return inode; inode->i_mode = parent->i_mode; inode->i_uid = parent->i_uid; inode->i_gid = parent->i_gid; diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index cfdd8f4388b7..ddc656fb5c05 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c @@ -706,7 +706,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, len, *p, end); newcrush = crush_decode(*p, min(*p+len, end)); if (IS_ERR(newcrush)) - return ERR_PTR(PTR_ERR(newcrush)); + return ERR_CAST(newcrush); } /* new flags? */ diff --git a/fs/ceph/super.c b/fs/ceph/super.c index bac13898b943..9b46bb951e1f 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -738,7 +738,7 @@ static struct dentry *open_root_dentry(struct ceph_client *client, dout("open_root_inode opening '%s'\n", path); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); if (IS_ERR(req)) - return ERR_PTR(PTR_ERR(req)); + return ERR_CAST(req); req->r_path1 = kstrdup(path, GFP_NOFS); req->r_ino1.ino = CEPH_INO_ROOT; req->r_ino1.snap = CEPH_NOSNAP; -- cgit v1.2.3 From dd1c9057366f329911180e9000e2b425f23fc287 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 25 May 2010 16:45:25 -0700 Subject: ceph: make lease code DN specific The lease code includes a mask in the CEPH_LOCK_* namespace, but that namespace is changing, and only one mask (formerly _DN == 1) is used, so hard code for that value for now. If we ever extend this code to handle leases over different data types we can extend it accordingly. Signed-off-by: Sage Weil --- fs/ceph/ceph_fs.h | 21 +++++++++++---------- fs/ceph/mds_client.c | 4 ++-- 2 files changed, 13 insertions(+), 12 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h index 3b9eeed097b3..2fa992eaf7da 100644 --- a/fs/ceph/ceph_fs.h +++ b/fs/ceph/ceph_fs.h @@ -265,16 +265,17 @@ extern const char *ceph_mds_state_name(int s); * - they also define the lock ordering by the MDS * - a few of these are internal to the mds */ -#define CEPH_LOCK_DN 1 -#define CEPH_LOCK_ISNAP 2 -#define CEPH_LOCK_IVERSION 4 /* mds internal */ -#define CEPH_LOCK_IFILE 8 /* mds internal */ -#define CEPH_LOCK_IAUTH 32 -#define CEPH_LOCK_ILINK 64 -#define CEPH_LOCK_IDFT 128 /* dir frag tree */ -#define CEPH_LOCK_INEST 256 /* mds internal */ -#define CEPH_LOCK_IXATTR 512 -#define CEPH_LOCK_INO 2048 /* immutable inode bits; not a lock */ +#define CEPH_LOCK_DVERSION 1 +#define CEPH_LOCK_DN 2 +#define CEPH_LOCK_ISNAP 16 +#define CEPH_LOCK_IVERSION 32 /* mds internal */ +#define CEPH_LOCK_IFILE 64 +#define CEPH_LOCK_IAUTH 128 +#define CEPH_LOCK_ILINK 256 +#define CEPH_LOCK_IDFT 512 /* dir frag tree */ +#define CEPH_LOCK_INEST 1024 /* mds internal */ +#define CEPH_LOCK_IXATTR 2048 +#define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ /* client_session ops */ enum { diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 08413c8a85b2..5a88b7bb3798 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2541,7 +2541,7 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, return; lease = msg->front.iov_base; lease->action = action; - lease->mask = cpu_to_le16(CEPH_LOCK_DN); + lease->mask = cpu_to_le16(1); lease->ino = cpu_to_le64(ceph_vino(inode).ino); lease->first = lease->last = cpu_to_le64(ceph_vino(inode).snap); lease->seq = cpu_to_le32(seq); @@ -2571,7 +2571,7 @@ void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode, BUG_ON(inode == NULL); BUG_ON(dentry == NULL); - BUG_ON(mask != CEPH_LOCK_DN); + BUG_ON(mask == 0); /* is dentry lease valid? */ spin_lock(&dentry->d_lock); -- cgit v1.2.3 From a922d38fd10d55d5033f10df15baf966e8f5b18c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sat, 29 May 2010 09:41:23 -0700 Subject: ceph: close out mds, osd connections before stopping auth The auth module (part of the mon_client) is needed to free any ceph_authorizer(s) used by the mds and osd connections. Flush the msgr workqueue before stopping monc to ensure that the destroy_authorizer auth op is available when those connections are closed out. Signed-off-by: Sage Weil --- fs/ceph/messenger.c | 6 ++++++ fs/ceph/messenger.h | 1 + fs/ceph/super.c | 10 +++++++++- 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 60b74839ebec..64b8b1f7863d 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -120,6 +120,12 @@ void ceph_msgr_exit(void) destroy_workqueue(ceph_msgr_wq); } +void ceph_msgr_flush() +{ + flush_workqueue(ceph_msgr_wq); +} + + /* * socket callback functions */ diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h index 00a9430b1ffc..76fbc957bc13 100644 --- a/fs/ceph/messenger.h +++ b/fs/ceph/messenger.h @@ -213,6 +213,7 @@ extern int ceph_parse_ips(const char *c, const char *end, extern int ceph_msgr_init(void); extern void ceph_msgr_exit(void); +extern void ceph_msgr_flush(void); extern struct ceph_messenger *ceph_messenger_create( struct ceph_entity_addr *myaddr); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 9b46bb951e1f..5cf6fba8b705 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -669,9 +669,17 @@ static void ceph_destroy_client(struct ceph_client *client) /* unmount */ ceph_mdsc_stop(&client->mdsc); - ceph_monc_stop(&client->monc); ceph_osdc_stop(&client->osdc); + /* + * make sure mds and osd connections close out before destroying + * the auth module, which is needed to free those connections' + * ceph_authorizers. + */ + ceph_msgr_flush(); + + ceph_monc_stop(&client->monc); + ceph_adjust_min_caps(-client->min_caps); ceph_debugfs_client_cleanup(client); -- cgit v1.2.3 From 79494d1b9b92259eb40ea6e939ba5aff4b8de5f1 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 27 May 2010 14:15:49 -0700 Subject: ceph: fix leak of osd authorizer Release the ceph_authorizer when releasing osd state. Signed-off-by: Sage Weil --- fs/ceph/osd_client.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index afa7bb3895c4..d25b4add85b4 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c @@ -361,8 +361,13 @@ static void put_osd(struct ceph_osd *osd) { dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref), atomic_read(&osd->o_ref) - 1); - if (atomic_dec_and_test(&osd->o_ref)) + if (atomic_dec_and_test(&osd->o_ref)) { + struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth; + + if (osd->o_authorizer) + ac->ops->destroy_authorizer(ac, osd->o_authorizer); kfree(osd); + } } /* -- cgit v1.2.3 From 2a8e5e3637e2fc058798f5d3626f525729ffaaaf Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 28 May 2010 16:43:16 -0700 Subject: ceph: clean up on forwarded aborted mds request If an mds request is aborted (timeout, SIGKILL), it is left registered to keep our state in sync with the mds. If we get a forward notification, though, we know the request didn't succeed and we can unregister it safely. We were trying to resend it, but then bailing out (and not unregistering) in __do_request. Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 5a88b7bb3798..b49f12822cbc 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2014,16 +2014,21 @@ static void handle_forward(struct ceph_mds_client *mdsc, mutex_lock(&mdsc->mutex); req = __lookup_request(mdsc, tid); if (!req) { - dout("forward %llu to mds%d - req dne\n", tid, next_mds); + dout("forward tid %llu to mds%d - req dne\n", tid, next_mds); goto out; /* dup reply? */ } - if (fwd_seq <= req->r_num_fwd) { - dout("forward %llu to mds%d - old seq %d <= %d\n", + if (req->r_aborted) { + dout("forward tid %llu aborted, unregistering\n", tid); + __unregister_request(mdsc, req); + } else if (fwd_seq <= req->r_num_fwd) { + dout("forward tid %llu to mds%d - old seq %d <= %d\n", tid, next_mds, req->r_num_fwd, fwd_seq); } else { /* resend. forward race not possible; mds would drop */ - dout("forward %llu to mds%d (we resend)\n", tid, next_mds); + dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds); + BUG_ON(req->r_err); + BUG_ON(req->r_got_result); req->r_num_fwd = fwd_seq; req->r_resend_mds = next_mds; put_request_session(req); -- cgit v1.2.3