summaryrefslogtreecommitdiffstats
path: root/net/ceph
diff options
context:
space:
mode:
authorIlya Dryomov <idryomov@gmail.com>2023-08-01 19:14:24 +0200
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2023-08-11 11:57:51 +0200
commit513bfdde8a3b58cdfac37083c38f7b0100f72ae6 (patch)
tree2995292d9a111495f68fb1f2558fd2f247ce765d /net/ceph
parent181274d2f3de26cbf5fd66cc6987672d9048cb30 (diff)
downloadlinux-stable-513bfdde8a3b58cdfac37083c38f7b0100f72ae6.tar.gz
linux-stable-513bfdde8a3b58cdfac37083c38f7b0100f72ae6.tar.bz2
linux-stable-513bfdde8a3b58cdfac37083c38f7b0100f72ae6.zip
libceph: fix potential hang in ceph_osdc_notify()
commit e6e2843230799230fc5deb8279728a7218b0d63c upstream. If the cluster becomes unavailable, ceph_osdc_notify() may hang even with osd_request_timeout option set because linger_notify_finish_wait() waits for MWatchNotify NOTIFY_COMPLETE message with no associated OSD request in flight -- it's completely asynchronous. Introduce an additional timeout, derived from the specified notify timeout. While at it, switch both waits to killable which is more correct. Cc: stable@vger.kernel.org Signed-off-by: Ilya Dryomov <idryomov@gmail.com> Reviewed-by: Dongsheng Yang <dongsheng.yang@easystack.cn> Reviewed-by: Xiubo Li <xiubli@redhat.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'net/ceph')
-rw-r--r--net/ceph/osd_client.c20
1 files changed, 14 insertions, 6 deletions
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 1e9fab79e245..d594cd501861 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -3330,17 +3330,24 @@ static int linger_reg_commit_wait(struct ceph_osd_linger_request *lreq)
int ret;
dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
- ret = wait_for_completion_interruptible(&lreq->reg_commit_wait);
+ ret = wait_for_completion_killable(&lreq->reg_commit_wait);
return ret ?: lreq->reg_commit_error;
}
-static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq)
+static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq,
+ unsigned long timeout)
{
- int ret;
+ long left;
dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
- ret = wait_for_completion_interruptible(&lreq->notify_finish_wait);
- return ret ?: lreq->notify_finish_error;
+ left = wait_for_completion_killable_timeout(&lreq->notify_finish_wait,
+ ceph_timeout_jiffies(timeout));
+ if (left <= 0)
+ left = left ?: -ETIMEDOUT;
+ else
+ left = lreq->notify_finish_error; /* completed */
+
+ return left;
}
/*
@@ -4888,7 +4895,8 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
linger_submit(lreq);
ret = linger_reg_commit_wait(lreq);
if (!ret)
- ret = linger_notify_finish_wait(lreq);
+ ret = linger_notify_finish_wait(lreq,
+ msecs_to_jiffies(2 * timeout * MSEC_PER_SEC));
else
dout("lreq %p failed to initiate notify %d\n", lreq, ret);