diff options
Diffstat (limited to 'net/ceph/osd_client.c')
-rw-r--r-- | net/ceph/osd_client.c | 60 |
1 files changed, 38 insertions, 22 deletions
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index ce310eee708d..733e46008b89 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -217,6 +217,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, INIT_LIST_HEAD(&req->r_unsafe_item); INIT_LIST_HEAD(&req->r_linger_item); INIT_LIST_HEAD(&req->r_linger_osd); + INIT_LIST_HEAD(&req->r_req_lru_item); req->r_flags = flags; WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0); @@ -226,7 +227,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); else msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, - OSD_OPREPLY_FRONT_LEN, gfp_flags); + OSD_OPREPLY_FRONT_LEN, gfp_flags, true); if (!msg) { ceph_osdc_put_request(req); return NULL; @@ -249,7 +250,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, if (use_mempool) msg = ceph_msgpool_get(&osdc->msgpool_op, 0); else - msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp_flags); + msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp_flags, true); if (!msg) { ceph_osdc_put_request(req); return NULL; @@ -685,6 +686,18 @@ static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) put_osd(osd); } +static void remove_all_osds(struct ceph_osd_client *osdc) +{ + dout("__remove_old_osds %p\n", osdc); + mutex_lock(&osdc->request_mutex); + while (!RB_EMPTY_ROOT(&osdc->osds)) { + struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds), + struct ceph_osd, o_node); + __remove_osd(osdc, osd); + } + mutex_unlock(&osdc->request_mutex); +} + static void __move_osd_to_lru(struct ceph_osd_client *osdc, struct ceph_osd *osd) { @@ -701,14 +714,14 @@ static void __remove_osd_from_lru(struct ceph_osd *osd) list_del_init(&osd->o_osd_lru); } -static void remove_old_osds(struct ceph_osd_client *osdc, int remove_all) +static void remove_old_osds(struct ceph_osd_client *osdc) { struct ceph_osd *osd, *nosd; dout("__remove_old_osds %p\n", osdc); mutex_lock(&osdc->request_mutex); list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) { - if (!remove_all && time_before(jiffies, osd->lru_ttl)) + if (time_before(jiffies, osd->lru_ttl)) break; __remove_osd(osdc, osd); } @@ -751,6 +764,7 @@ static void __insert_osd(struct ceph_osd_client *osdc, struct ceph_osd *new) struct rb_node *parent = NULL; struct ceph_osd *osd = NULL; + dout("__insert_osd %p osd%d\n", new, new->o_osd); while (*p) { parent = *p; osd = rb_entry(parent, struct ceph_osd, o_node); @@ -803,13 +817,10 @@ static void __register_request(struct ceph_osd_client *osdc, { req->r_tid = ++osdc->last_tid; req->r_request->hdr.tid = cpu_to_le64(req->r_tid); - INIT_LIST_HEAD(&req->r_req_lru_item); - dout("__register_request %p tid %lld\n", req, req->r_tid); __insert_request(osdc, req); ceph_osdc_get_request(req); osdc->num_requests++; - if (osdc->num_requests == 1) { dout(" first request, scheduling timeout\n"); __schedule_osd_timeout(osdc); @@ -932,7 +943,7 @@ EXPORT_SYMBOL(ceph_osdc_set_request_linger); * Caller should hold map_sem for read and request_mutex. */ static int __map_request(struct ceph_osd_client *osdc, - struct ceph_osd_request *req) + struct ceph_osd_request *req, int force_resend) { struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; struct ceph_pg pgid; @@ -956,7 +967,8 @@ static int __map_request(struct ceph_osd_client *osdc, num = err; } - if ((req->r_osd && req->r_osd->o_osd == o && + if ((!force_resend && + req->r_osd && req->r_osd->o_osd == o && req->r_sent >= req->r_osd->o_incarnation && req->r_num_pg_osds == num && memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) || @@ -1144,7 +1156,7 @@ static void handle_osds_timeout(struct work_struct *work) dout("osds timeout\n"); down_read(&osdc->map_sem); - remove_old_osds(osdc, 0); + remove_old_osds(osdc); up_read(&osdc->map_sem); schedule_delayed_work(&osdc->osds_timeout_work, @@ -1278,18 +1290,18 @@ static void reset_changed_osds(struct ceph_osd_client *osdc) * * Caller should hold map_sem for read and request_mutex. */ -static void kick_requests(struct ceph_osd_client *osdc) +static void kick_requests(struct ceph_osd_client *osdc, int force_resend) { struct ceph_osd_request *req, *nreq; struct rb_node *p; int needmap = 0; int err; - dout("kick_requests\n"); + dout("kick_requests %s\n", force_resend ? " (force resend)" : ""); mutex_lock(&osdc->request_mutex); for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { req = rb_entry(p, struct ceph_osd_request, r_node); - err = __map_request(osdc, req); + err = __map_request(osdc, req, force_resend); if (err < 0) continue; /* error */ if (req->r_osd == NULL) { @@ -1307,7 +1319,7 @@ static void kick_requests(struct ceph_osd_client *osdc) r_linger_item) { dout("linger req=%p req->r_osd=%p\n", req, req->r_osd); - err = __map_request(osdc, req); + err = __map_request(osdc, req, force_resend); if (err == 0) continue; /* no change and no osd was specified */ if (err < 0) @@ -1384,7 +1396,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) ceph_osdmap_destroy(osdc->osdmap); osdc->osdmap = newmap; } - kick_requests(osdc); + kick_requests(osdc, 0); reset_changed_osds(osdc); } else { dout("ignoring incremental map %u len %d\n", @@ -1412,6 +1424,8 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) "older than our %u\n", epoch, maplen, osdc->osdmap->epoch); } else { + int skipped_map = 0; + dout("taking full map %u len %d\n", epoch, maplen); newmap = osdmap_decode(&p, p+maplen); if (IS_ERR(newmap)) { @@ -1421,9 +1435,12 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) BUG_ON(!newmap); oldmap = osdc->osdmap; osdc->osdmap = newmap; - if (oldmap) + if (oldmap) { + if (oldmap->epoch + 1 < newmap->epoch) + skipped_map = 1; ceph_osdmap_destroy(oldmap); - kick_requests(osdc); + } + kick_requests(osdc, skipped_map); } p += maplen; nr_maps--; @@ -1696,7 +1713,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, * the request still han't been touched yet. */ if (req->r_sent == 0) { - rc = __map_request(osdc, req); + rc = __map_request(osdc, req, 0); if (rc < 0) { if (nofail) { dout("osdc_start_request failed map, " @@ -1862,8 +1879,7 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc) ceph_osdmap_destroy(osdc->osdmap); osdc->osdmap = NULL; } - remove_old_osds(osdc, 1); - WARN_ON(!RB_EMPTY_ROOT(&osdc->osds)); + remove_all_osds(osdc); mempool_destroy(osdc->req_mempool); ceph_msgpool_destroy(&osdc->msgpool_op); ceph_msgpool_destroy(&osdc->msgpool_op_reply); @@ -2022,7 +2038,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, if (front > req->r_reply->front.iov_len) { pr_warning("get_reply front %d > preallocated %d\n", front, (int)req->r_reply->front.iov_len); - m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, GFP_NOFS); + m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, GFP_NOFS, false); if (!m) goto out; ceph_msg_put(req->r_reply); @@ -2070,7 +2086,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con, switch (type) { case CEPH_MSG_OSD_MAP: case CEPH_MSG_WATCH_NOTIFY: - return ceph_msg_new(type, front, GFP_NOFS); + return ceph_msg_new(type, front, GFP_NOFS, false); case CEPH_MSG_OSD_OPREPLY: return get_reply(con, hdr, skip); default: |