From 7f47f7f3b3c33fd2b4a662cd43cd1af96e1a297e Mon Sep 17 00:00:00 2001 From: Niels Dossche Date: Tue, 15 Mar 2022 16:29:47 +0100 Subject: ceph: get snap_rwsem read lock in handle_cap_export for ceph_add_cap ceph_add_cap says in its function documentation that the caller should hold the read lock on the session snap_rwsem. Furthermore, not only ceph_add_cap needs that lock, when it calls to ceph_lookup_snap_realm it eventually calls ceph_get_snap_realm which states via lockdep that snap_rwsem needs to be held. handle_cap_export calls ceph_add_cap without that mdsc->snap_rwsem held. Thus, since ceph_get_snap_realm and ceph_add_cap both need the lock, the common place to acquire that lock is inside handle_cap_export. Signed-off-by: Niels Dossche Reviewed-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/ceph') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index f1ad6884d4da..31204cdc2fbf 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -3870,6 +3870,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, dout("handle_cap_export inode %p ci %p mds%d mseq %d target %d\n", inode, ci, mds, mseq, target); retry: + down_read(&mdsc->snap_rwsem); spin_lock(&ci->i_ceph_lock); cap = __get_cap_for_mds(ci, mds); if (!cap || cap->cap_id != le64_to_cpu(ex->cap_id)) @@ -3933,6 +3934,7 @@ retry: } spin_unlock(&ci->i_ceph_lock); + up_read(&mdsc->snap_rwsem); mutex_unlock(&session->s_mutex); /* open target session */ @@ -3958,6 +3960,7 @@ retry: out_unlock: spin_unlock(&ci->i_ceph_lock); + up_read(&mdsc->snap_rwsem); mutex_unlock(&session->s_mutex); if (tsession) { mutex_unlock(&tsession->s_mutex); -- cgit v1.2.3 From 396ea1681892211dd3fbc1a58bfc2c2c971433e3 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 22 Mar 2022 11:03:13 +0800 Subject: ceph: remove incorrect session state check Once the session is opened the s->s_ttl will be set, and when receiving a new mdsmap and the MDS map is changed, it will be possibly will close some sessions and open new ones. And then some sessions will be in CLOSING state evening without unmounting. URL: https://tracker.ceph.com/issues/54979 Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index fa38c013126d..00c3de177dd6 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4434,8 +4434,6 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc) bool check_session_state(struct ceph_mds_session *s) { - struct ceph_fs_client *fsc = s->s_mdsc->fsc; - switch (s->s_state) { case CEPH_MDS_SESSION_OPEN: if (s->s_ttl && time_after(jiffies, s->s_ttl)) { @@ -4444,10 +4442,6 @@ bool check_session_state(struct ceph_mds_session *s) } break; case CEPH_MDS_SESSION_CLOSING: - /* Should never reach this when not force unmounting */ - WARN_ON_ONCE(s->s_ttl && - READ_ONCE(fsc->mount_state) != CEPH_MOUNT_SHUTDOWN); - fallthrough; case CEPH_MDS_SESSION_NEW: case CEPH_MDS_SESSION_RESTARTING: case CEPH_MDS_SESSION_CLOSED: -- cgit v1.2.3 From 7acae6183cf37c48b8da48bbbdb78820fb3913f3 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 14 Apr 2022 09:07:21 +0800 Subject: ceph: fix possible NULL pointer dereference for req->r_session The request will be inserted into the ci->i_unsafe_dirops before assigning the req->r_session, so it's possible that we will hit NULL pointer dereference bug here. Cc: stable@vger.kernel.org URL: https://tracker.ceph.com/issues/55327 Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Tested-by: Aaron Tomlin Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/ceph') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 31204cdc2fbf..5c14ef04e474 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2274,6 +2274,8 @@ retry: list_for_each_entry(req, &ci->i_unsafe_dirops, r_unsafe_dir_item) { s = req->r_session; + if (!s) + continue; if (unlikely(s->s_mds >= max_sessions)) { spin_unlock(&ci->i_unsafe_lock); for (i = 0; i < max_sessions; i++) { @@ -2294,6 +2296,8 @@ retry: list_for_each_entry(req, &ci->i_unsafe_iops, r_unsafe_target_item) { s = req->r_session; + if (!s) + continue; if (unlikely(s->s_mds >= max_sessions)) { spin_unlock(&ci->i_unsafe_lock); for (i = 0; i < max_sessions; i++) { -- cgit v1.2.3 From 620239d9a32e9fe27c9204ec11e40058671aeeb6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 25 Apr 2022 15:54:27 -0400 Subject: ceph: fix setting of xattrs on async created inodes Currently when we create a file, we spin up an xattr buffer to send along with the create request. If we end up doing an async create however, then we currently pass down a zero-length xattr buffer. Fix the code to send down the xattr buffer in req->r_pagelist. If the xattrs span more than a page, however give up and don't try to do an async create. Cc: stable@vger.kernel.org URL: https://bugzilla.redhat.com/show_bug.cgi?id=2063929 Fixes: 9a8d03ca2e2c ("ceph: attempt to do async create when possible") Reported-by: John Fortin Reported-by: Sri Ramanujam Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 6c9e837aa1d3..8c8226c0feac 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -629,9 +629,15 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry, iinfo.change_attr = 1; ceph_encode_timespec64(&iinfo.btime, &now); - iinfo.xattr_len = ARRAY_SIZE(xattr_buf); - iinfo.xattr_data = xattr_buf; - memset(iinfo.xattr_data, 0, iinfo.xattr_len); + if (req->r_pagelist) { + iinfo.xattr_len = req->r_pagelist->length; + iinfo.xattr_data = req->r_pagelist->mapped_tail; + } else { + /* fake it */ + iinfo.xattr_len = ARRAY_SIZE(xattr_buf); + iinfo.xattr_data = xattr_buf; + memset(iinfo.xattr_data, 0, iinfo.xattr_len); + } in.ino = cpu_to_le64(vino.ino); in.snapid = cpu_to_le64(CEPH_NOSNAP); @@ -743,6 +749,10 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, err = ceph_security_init_secctx(dentry, mode, &as_ctx); if (err < 0) goto out_ctx; + /* Async create can't handle more than a page of xattrs */ + if (as_ctx.pagelist && + !list_is_singular(&as_ctx.pagelist->head)) + try_async = false; } else if (!d_in_lookup(dentry)) { /* If it's not being looked up, it's negative */ return -ENOENT; -- cgit v1.2.3 From 642d51fb0775a41dd6bb3d99b5a40c24df131c20 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 5 May 2022 18:53:09 +0800 Subject: ceph: check folio PG_private bit instead of folio->private The pages in the file mapping maybe reclaimed and reused by other subsystems and the page->private maybe used as flags field or something else, if later that pages are used by page caches again the page->private maybe not cleared as expected. Here will check the PG_private bit instead of the folio->private. Cc: stable@vger.kernel.org URL: https://tracker.ceph.com/issues/55421 Signed-off-by: Xiubo Li Reviewed-by: Luis Henriques Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index aa25bffd4823..b6edcf89a429 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -85,7 +85,7 @@ static bool ceph_dirty_folio(struct address_space *mapping, struct folio *folio) if (folio_test_dirty(folio)) { dout("%p dirty_folio %p idx %lu -- already dirty\n", mapping->host, folio, folio->index); - BUG_ON(!folio_get_private(folio)); + VM_BUG_ON_FOLIO(!folio_test_private(folio), folio); return false; } @@ -122,7 +122,7 @@ static bool ceph_dirty_folio(struct address_space *mapping, struct folio *folio) * Reference snap context in folio->private. Also set * PagePrivate so that we get invalidate_folio callback. */ - BUG_ON(folio_get_private(folio)); + VM_BUG_ON_FOLIO(folio_test_private(folio), folio); folio_attach_private(folio, snapc); return ceph_fscache_dirty_folio(mapping, folio); @@ -150,7 +150,7 @@ static void ceph_invalidate_folio(struct folio *folio, size_t offset, } WARN_ON(!folio_test_locked(folio)); - if (folio_get_private(folio)) { + if (folio_test_private(folio)) { dout("%p invalidate_folio idx %lu full dirty page\n", inode, folio->index); @@ -729,8 +729,11 @@ static void writepages_finish(struct ceph_osd_request *req) /* clean all pages */ for (i = 0; i < req->r_num_ops; i++) { - if (req->r_ops[i].op != CEPH_OSD_OP_WRITE) + if (req->r_ops[i].op != CEPH_OSD_OP_WRITE) { + pr_warn("%s incorrect op %d req %p index %d tid %llu\n", + __func__, req->r_ops[i].op, req, i, req->r_tid); break; + } osd_data = osd_req_op_extent_osd_data(req, i); BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES); -- cgit v1.2.3