summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/block/rbd.c26
-rw-r--r--fs/ceph/addr.c38
-rw-r--r--fs/ceph/caps.c51
-rw-r--r--fs/ceph/dir.c48
-rw-r--r--fs/ceph/mds_client.c61
-rw-r--r--fs/ceph/strings.c1
-rw-r--r--fs/ceph/super.c56
-rw-r--r--fs/ceph/super.h4
-rw-r--r--fs/ceph/xattr.c23
-rw-r--r--include/linux/ceph/ceph_features.h16
-rw-r--r--include/linux/ceph/ceph_fs.h1
-rw-r--r--include/linux/ceph/debugfs.h8
-rw-r--r--include/linux/ceph/libceph.h2
-rw-r--r--include/linux/ceph/osdmap.h5
-rw-r--r--include/linux/crush/crush.h12
-rw-r--r--net/ceph/ceph_common.c37
-rw-r--r--net/ceph/crush/crush.c14
-rw-r--r--net/ceph/crush/crush_ln_table.h166
-rw-r--r--net/ceph/crush/mapper.c118
-rw-r--r--net/ceph/debugfs.c24
-rw-r--r--net/ceph/messenger.c25
-rw-r--r--net/ceph/osdmap.c25
22 files changed, 633 insertions, 128 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index b40af3203089..812523330a78 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3762,8 +3762,8 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
goto out_tag_set;
}
- /* We use the default size, but let's be explicit about it. */
- blk_queue_physical_block_size(q, SECTOR_SIZE);
+ queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
+ /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */
/* set io sizes to object size */
segment_size = rbd_obj_bytes(&rbd_dev->header);
@@ -5301,8 +5301,13 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
if (mapping) {
ret = rbd_dev_header_watch_sync(rbd_dev);
- if (ret)
+ if (ret) {
+ if (ret == -ENOENT)
+ pr_info("image %s/%s does not exist\n",
+ rbd_dev->spec->pool_name,
+ rbd_dev->spec->image_name);
goto out_header_name;
+ }
}
ret = rbd_dev_header_info(rbd_dev);
@@ -5319,8 +5324,14 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
ret = rbd_spec_fill_snap_id(rbd_dev);
else
ret = rbd_spec_fill_names(rbd_dev);
- if (ret)
+ if (ret) {
+ if (ret == -ENOENT)
+ pr_info("snap %s/%s@%s does not exist\n",
+ rbd_dev->spec->pool_name,
+ rbd_dev->spec->image_name,
+ rbd_dev->spec->snap_name);
goto err_out_probe;
+ }
if (rbd_dev->header.features & RBD_FEATURE_LAYERING) {
ret = rbd_dev_v2_parent_info(rbd_dev);
@@ -5390,8 +5401,11 @@ static ssize_t do_rbd_add(struct bus_type *bus,
/* pick the pool */
rc = rbd_add_get_pool_id(rbdc, spec->pool_name);
- if (rc < 0)
+ if (rc < 0) {
+ if (rc == -ENOENT)
+ pr_info("pool %s does not exist\n", spec->pool_name);
goto err_out_client;
+ }
spec->pool_id = (u64)rc;
/* The ceph file layout needs to fit pool id in 32 bits */
@@ -5673,7 +5687,7 @@ static int __init rbd_init(void)
/*
* The number of active work items is limited by the number of
- * rbd devices, so leave @max_active at default.
+ * rbd devices * queue depth, so leave @max_active at default.
*/
rbd_wq = alloc_workqueue(RBD_DRV_NAME, WQ_MEM_RECLAIM, 0);
if (!rbd_wq) {
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 155ab9c0246b..e162bcd105ee 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1146,6 +1146,10 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
inode, page, (int)pos, (int)len);
r = ceph_update_writeable_page(file, pos, len, page);
+ if (r < 0)
+ page_cache_release(page);
+ else
+ *pagep = page;
} while (r == -EAGAIN);
return r;
@@ -1534,19 +1538,27 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false);
- err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR,
- "inline_version", &inline_version,
- sizeof(inline_version),
- CEPH_OSD_CMPXATTR_OP_GT,
- CEPH_OSD_CMPXATTR_MODE_U64);
- if (err)
- goto out_put;
-
- err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR,
- "inline_version", &inline_version,
- sizeof(inline_version), 0, 0);
- if (err)
- goto out_put;
+ {
+ __le64 xattr_buf = cpu_to_le64(inline_version);
+ err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR,
+ "inline_version", &xattr_buf,
+ sizeof(xattr_buf),
+ CEPH_OSD_CMPXATTR_OP_GT,
+ CEPH_OSD_CMPXATTR_MODE_U64);
+ if (err)
+ goto out_put;
+ }
+
+ {
+ char xattr_buf[32];
+ int xattr_len = snprintf(xattr_buf, sizeof(xattr_buf),
+ "%llu", inline_version);
+ err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR,
+ "inline_version",
+ xattr_buf, xattr_len, 0, 0);
+ if (err)
+ goto out_put;
+ }
ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime);
err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 8172775428a0..11631c4c7d14 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -896,6 +896,18 @@ int ceph_is_any_caps(struct inode *inode)
return ret;
}
+static void drop_inode_snap_realm(struct ceph_inode_info *ci)
+{
+ struct ceph_snap_realm *realm = ci->i_snap_realm;
+ spin_lock(&realm->inodes_with_caps_lock);
+ list_del_init(&ci->i_snap_realm_item);
+ ci->i_snap_realm_counter++;
+ ci->i_snap_realm = NULL;
+ spin_unlock(&realm->inodes_with_caps_lock);
+ ceph_put_snap_realm(ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc,
+ realm);
+}
+
/*
* Remove a cap. Take steps to deal with a racing iterate_session_caps.
*
@@ -946,15 +958,13 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
if (removed)
ceph_put_cap(mdsc, cap);
- if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) {
- struct ceph_snap_realm *realm = ci->i_snap_realm;
- spin_lock(&realm->inodes_with_caps_lock);
- list_del_init(&ci->i_snap_realm_item);
- ci->i_snap_realm_counter++;
- ci->i_snap_realm = NULL;
- spin_unlock(&realm->inodes_with_caps_lock);
- ceph_put_snap_realm(mdsc, realm);
- }
+ /* when reconnect denied, we remove session caps forcibly,
+ * i_wr_ref can be non-zero. If there are ongoing write,
+ * keep i_snap_realm.
+ */
+ if (!__ceph_is_any_caps(ci) && ci->i_wr_ref == 0 && ci->i_snap_realm)
+ drop_inode_snap_realm(ci);
+
if (!__ceph_is_any_real_caps(ci))
__cap_delay_cancel(mdsc, ci);
}
@@ -1394,6 +1404,13 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
int was = ci->i_dirty_caps;
int dirty = 0;
+ if (!ci->i_auth_cap) {
+ pr_warn("__mark_dirty_caps %p %llx mask %s, "
+ "but no auth cap (session was closed?)\n",
+ inode, ceph_ino(inode), ceph_cap_string(mask));
+ return 0;
+ }
+
dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->vfs_inode,
ceph_cap_string(mask), ceph_cap_string(was),
ceph_cap_string(was | mask));
@@ -1404,7 +1421,6 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
ci->i_snap_realm->cached_context);
dout(" inode %p now dirty snapc %p auth cap %p\n",
&ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap);
- WARN_ON(!ci->i_auth_cap);
BUG_ON(!list_empty(&ci->i_dirty_item));
spin_lock(&mdsc->cap_dirty_lock);
list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
@@ -1545,7 +1561,19 @@ retry_locked:
if (!mdsc->stopping && inode->i_nlink > 0) {
if (want) {
retain |= CEPH_CAP_ANY; /* be greedy */
+ } else if (S_ISDIR(inode->i_mode) &&
+ (issued & CEPH_CAP_FILE_SHARED) &&
+ __ceph_dir_is_complete(ci)) {
+ /*
+ * If a directory is complete, we want to keep
+ * the exclusive cap. So that MDS does not end up
+ * revoking the shared cap on every create/unlink
+ * operation.
+ */
+ want = CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL;
+ retain |= want;
} else {
+
retain |= CEPH_CAP_ANY_SHARED;
/*
* keep RD only if we didn't have the file open RW,
@@ -2309,6 +2337,9 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
wake = 1;
}
}
+ /* see comment in __ceph_remove_cap() */
+ if (!__ceph_is_any_caps(ci) && ci->i_snap_realm)
+ drop_inode_snap_realm(ci);
}
spin_unlock(&ci->i_ceph_lock);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 83e9976f7189..e729b79812b4 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -281,6 +281,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
/* can we use the dcache? */
spin_lock(&ci->i_ceph_lock);
if ((ctx->pos == 2 || fi->dentry) &&
+ ceph_test_mount_opt(fsc, DCACHE) &&
!ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
ceph_snap(inode) != CEPH_SNAPDIR &&
__ceph_dir_is_complete_ordered(ci) &&
@@ -336,16 +337,23 @@ more:
ceph_mdsc_put_request(req);
return err;
}
- req->r_inode = inode;
- ihold(inode);
- req->r_dentry = dget(file->f_path.dentry);
/* hints to request -> mds selection code */
req->r_direct_mode = USE_AUTH_MDS;
req->r_direct_hash = ceph_frag_value(frag);
req->r_direct_is_hash = true;
- req->r_path2 = kstrdup(fi->last_name, GFP_NOFS);
+ if (fi->last_name) {
+ req->r_path2 = kstrdup(fi->last_name, GFP_NOFS);
+ if (!req->r_path2) {
+ ceph_mdsc_put_request(req);
+ return -ENOMEM;
+ }
+ }
req->r_readdir_offset = fi->next_offset;
req->r_args.readdir.frag = cpu_to_le32(frag);
+
+ req->r_inode = inode;
+ ihold(inode);
+ req->r_dentry = dget(file->f_path.dentry);
err = ceph_mdsc_do_request(mdsc, NULL, req);
if (err < 0) {
ceph_mdsc_put_request(req);
@@ -629,6 +637,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
fsc->mount_options->snapdir_name,
dentry->d_name.len) &&
!is_root_ceph_dentry(dir, dentry) &&
+ ceph_test_mount_opt(fsc, DCACHE) &&
__ceph_dir_is_complete(ci) &&
(__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
spin_unlock(&ci->i_ceph_lock);
@@ -755,10 +764,15 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
err = PTR_ERR(req);
goto out;
}
- req->r_dentry = dget(dentry);
- req->r_num_caps = 2;
req->r_path2 = kstrdup(dest, GFP_NOFS);
+ if (!req->r_path2) {
+ err = -ENOMEM;
+ ceph_mdsc_put_request(req);
+ goto out;
+ }
req->r_locked_dir = dir;
+ req->r_dentry = dget(dentry);
+ req->r_num_caps = 2;
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
err = ceph_mdsc_do_request(mdsc, dir, req);
@@ -933,16 +947,20 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
+ int op = CEPH_MDS_OP_RENAME;
int err;
if (ceph_snap(old_dir) != ceph_snap(new_dir))
return -EXDEV;
- if (ceph_snap(old_dir) != CEPH_NOSNAP ||
- ceph_snap(new_dir) != CEPH_NOSNAP)
- return -EROFS;
+ if (ceph_snap(old_dir) != CEPH_NOSNAP) {
+ if (old_dir == new_dir && ceph_snap(old_dir) == CEPH_SNAPDIR)
+ op = CEPH_MDS_OP_RENAMESNAP;
+ else
+ return -EROFS;
+ }
dout("rename dir %p dentry %p to dir %p dentry %p\n",
old_dir, old_dentry, new_dir, new_dentry);
- req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS);
+ req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
if (IS_ERR(req))
return PTR_ERR(req);
ihold(old_dir);
@@ -1240,11 +1258,12 @@ static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
dout("dir_fsync %p wait on tid %llu (until %llu)\n",
inode, req->r_tid, last_tid);
if (req->r_timeout) {
- ret = wait_for_completion_timeout(
- &req->r_safe_completion, req->r_timeout);
- if (ret > 0)
+ unsigned long time_left = wait_for_completion_timeout(
+ &req->r_safe_completion,
+ req->r_timeout);
+ if (time_left > 0)
ret = 0;
- else if (ret == 0)
+ else
ret = -EIO; /* timed out */
} else {
wait_for_completion(&req->r_safe_completion);
@@ -1372,6 +1391,7 @@ const struct inode_operations ceph_snapdir_iops = {
.getattr = ceph_getattr,
.mkdir = ceph_mkdir,
.rmdir = ceph_unlink,
+ .rename = ceph_rename,
};
const struct dentry_operations ceph_dentry_ops = {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 71c073f38e54..0a2eb32ffe43 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1021,6 +1021,33 @@ static void cleanup_cap_releases(struct ceph_mds_session *session)
spin_unlock(&session->s_cap_lock);
}
+static void cleanup_session_requests(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session)
+{
+ struct ceph_mds_request *req;
+ struct rb_node *p;
+
+ dout("cleanup_session_requests mds%d\n", session->s_mds);
+ mutex_lock(&mdsc->mutex);
+ while (!list_empty(&session->s_unsafe)) {
+ req = list_first_entry(&session->s_unsafe,
+ struct ceph_mds_request, r_unsafe_item);
+ list_del_init(&req->r_unsafe_item);
+ pr_info(" dropping unsafe request %llu\n", req->r_tid);
+ __unregister_request(mdsc, req);
+ }
+ /* zero r_attempts, so kick_requests() will re-send requests */
+ p = rb_first(&mdsc->request_tree);
+ while (p) {
+ req = rb_entry(p, struct ceph_mds_request, r_node);
+ p = rb_next(p);
+ if (req->r_session &&
+ req->r_session->s_mds == session->s_mds)
+ req->r_attempts = 0;
+ }
+ mutex_unlock(&mdsc->mutex);
+}
+
/*
* Helper to safely iterate over all caps associated with a session, with
* special care taken to handle a racing __ceph_remove_cap().
@@ -1098,7 +1125,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
cap, ci, &ci->vfs_inode);
spin_lock(&ci->i_ceph_lock);
__ceph_remove_cap(cap, false);
- if (!__ceph_is_any_real_caps(ci)) {
+ if (!ci->i_auth_cap) {
struct ceph_mds_client *mdsc =
ceph_sb_to_client(inode->i_sb)->mdsc;
@@ -1120,13 +1147,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
mdsc->num_cap_flushing--;
drop = 1;
}
- if (drop && ci->i_wrbuffer_ref) {
- pr_info(" dropping dirty data for %p %lld\n",
- inode, ceph_ino(inode));
- ci->i_wrbuffer_ref = 0;
- ci->i_wrbuffer_ref_head = 0;
- drop++;
- }
spin_unlock(&mdsc->cap_dirty_lock);
}
spin_unlock(&ci->i_ceph_lock);
@@ -1853,7 +1873,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
*/
static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
struct ceph_mds_request *req,
- int mds)
+ int mds, bool drop_cap_releases)
{
struct ceph_msg *msg;
struct ceph_mds_request_head *head;
@@ -1937,6 +1957,12 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
releases += ceph_encode_inode_release(&p,
req->r_old_dentry->d_inode,
mds, req->r_old_inode_drop, req->r_old_inode_unless, 0);
+
+ if (drop_cap_releases) {
+ releases = 0;
+ p = msg->front.iov_base + req->r_request_release_offset;
+ }
+
head->num_releases = cpu_to_le16(releases);
/* time stamp */
@@ -1989,7 +2015,7 @@ static void complete_request(struct ceph_mds_client *mdsc,
*/
static int __prepare_send_request(struct ceph_mds_client *mdsc,
struct ceph_mds_request *req,
- int mds)
+ int mds, bool drop_cap_releases)
{
struct ceph_mds_request_head *rhead;
struct ceph_msg *msg;
@@ -2048,7 +2074,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
ceph_msg_put(req->r_request);
req->r_request = NULL;
}
- msg = create_request_message(mdsc, req, mds);
+ msg = create_request_message(mdsc, req, mds, drop_cap_releases);
if (IS_ERR(msg)) {
req->r_err = PTR_ERR(msg);
complete_request(mdsc, req);
@@ -2132,7 +2158,7 @@ static int __do_request(struct ceph_mds_client *mdsc,
if (req->r_request_started == 0) /* note request start time */
req->r_request_started = jiffies;
- err = __prepare_send_request(mdsc, req, mds);
+ err = __prepare_send_request(mdsc, req, mds, false);
if (!err) {
ceph_msg_get(req->r_request);
ceph_con_send(&session->s_con, req->r_request);
@@ -2590,6 +2616,7 @@ static void handle_session(struct ceph_mds_session *session,
case CEPH_SESSION_CLOSE:
if (session->s_state == CEPH_MDS_SESSION_RECONNECTING)
pr_info("mds%d reconnect denied\n", session->s_mds);
+ cleanup_session_requests(mdsc, session);
remove_session_caps(session);
wake = 2; /* for good measure */
wake_up_all(&mdsc->session_close_wq);
@@ -2658,7 +2685,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
mutex_lock(&mdsc->mutex);
list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) {
- err = __prepare_send_request(mdsc, req, session->s_mds);
+ err = __prepare_send_request(mdsc, req, session->s_mds, true);
if (!err) {
ceph_msg_get(req->r_request);
ceph_con_send(&session->s_con, req->r_request);
@@ -2679,7 +2706,8 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
continue; /* only old requests */
if (req->r_session &&
req->r_session->s_mds == session->s_mds) {
- err = __prepare_send_request(mdsc, req, session->s_mds);
+ err = __prepare_send_request(mdsc, req,
+ session->s_mds, true);
if (!err) {
ceph_msg_get(req->r_request);
ceph_con_send(&session->s_con, req->r_request);
@@ -2864,7 +2892,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
spin_unlock(&session->s_cap_lock);
/* trim unused caps to reduce MDS's cache rejoin time */
- shrink_dcache_parent(mdsc->fsc->sb->s_root);
+ if (mdsc->fsc->sb->s_root)
+ shrink_dcache_parent(mdsc->fsc->sb->s_root);
ceph_con_close(&session->s_con);
ceph_con_open(&session->s_con,
@@ -3133,7 +3162,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
di->lease_renew_from &&
di->lease_renew_after == 0) {
unsigned long duration =
- le32_to_cpu(h->duration_ms) * HZ / 1000;
+ msecs_to_jiffies(le32_to_cpu(h->duration_ms));
di->lease_seq = seq;
dentry->d_time = di->lease_renew_from + duration;
diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c
index 51cc23e48111..89e6bc321df3 100644
--- a/fs/ceph/strings.c
+++ b/fs/ceph/strings.c
@@ -75,6 +75,7 @@ const char *ceph_mds_op_name(int op)
case CEPH_MDS_OP_LSSNAP: return "lssnap";
case CEPH_MDS_OP_MKSNAP: return "mksnap";
case CEPH_MDS_OP_RMSNAP: return "rmsnap";
+ case CEPH_MDS_OP_RENAMESNAP: return "renamesnap";
case CEPH_MDS_OP_SETFILELOCK: return "setfilelock";
case CEPH_MDS_OP_GETFILELOCK: return "getfilelock";
}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index a63997b8bcff..e463ebd69a9c 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -345,6 +345,11 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt,
fsopt->rsize = CEPH_RSIZE_DEFAULT;
fsopt->rasize = CEPH_RASIZE_DEFAULT;
fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
+ if (!fsopt->snapdir_name) {
+ err = -ENOMEM;
+ goto out;
+ }
+
fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
@@ -406,31 +411,20 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb);
struct ceph_mount_options *fsopt = fsc->mount_options;
- struct ceph_options *opt = fsc->client->options;
-
- if (opt->flags & CEPH_OPT_FSID)
- seq_printf(m, ",fsid=%pU", &opt->fsid);
- if (opt->flags & CEPH_OPT_NOSHARE)
- seq_puts(m, ",noshare");
- if (opt->flags & CEPH_OPT_NOCRC)
- seq_puts(m, ",nocrc");
- if (opt->flags & CEPH_OPT_NOMSGAUTH)
- seq_puts(m, ",nocephx_require_signatures");
- if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0)
- seq_puts(m, ",notcp_nodelay");
-
- if (opt->name)
- seq_printf(m, ",name=%s", opt->name);
- if (opt->key)
- seq_puts(m, ",secret=<hidden>");
-
- if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
- seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
- if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
- seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
- if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
- seq_printf(m, ",osdkeepalivetimeout=%d",
- opt->osd_keepalive_timeout);
+ size_t pos;
+ int ret;
+
+ /* a comma between MNT/MS and client options */
+ seq_putc(m, ',');
+ pos = m->count;
+
+ ret = ceph_print_client_options(m, fsc->client);
+ if (ret)
+ return ret;
+
+ /* retract our comma if no client options */
+ if (m->count == pos)
+ m->count--;
if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
seq_puts(m, ",dirstat");
@@ -438,14 +432,10 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
seq_puts(m, ",norbytes");
if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
seq_puts(m, ",noasyncreaddir");
- if (fsopt->flags & CEPH_MOUNT_OPT_DCACHE)
- seq_puts(m, ",dcache");
- else
+ if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
seq_puts(m, ",nodcache");
if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)
seq_puts(m, ",fsc");
- else
- seq_puts(m, ",nofsc");
#ifdef CONFIG_CEPH_FS_POSIX_ACL
if (fsopt->sb_flags & MS_POSIXACL)
@@ -477,6 +467,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name);
+
return 0;
}
@@ -730,6 +721,11 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
if (IS_ERR(req))
return ERR_CAST(req);
req->r_path1 = kstrdup(path, GFP_NOFS);
+ if (!req->r_path1) {
+ root = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
req->r_ino1.ino = CEPH_INO_ROOT;
req->r_ino1.snap = CEPH_NOSNAP;
req->r_started = started;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 04c8124ed30e..fa20e1318939 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -36,7 +36,8 @@
#define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */
#define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */
-#define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES)
+#define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES | \
+ CEPH_MOUNT_OPT_DCACHE)
#define ceph_set_mount_opt(fsc, opt) \
(fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt;
@@ -881,7 +882,6 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
/* file.c */
extern const struct file_operations ceph_file_fops;
-extern const struct address_space_operations ceph_aops;
extern int ceph_open(struct inode *inode, struct file *file);
extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 5a492caf34cb..5c4c9c256931 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -877,16 +877,23 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
err = PTR_ERR(req);
goto out;
}
- req->r_inode = inode;
- ihold(inode);
- req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
- req->r_num_caps = 1;
+
req->r_args.setxattr.flags = cpu_to_le32(flags);
req->r_path2 = kstrdup(name, GFP_NOFS);
+ if (!req->r_path2) {
+ ceph_mdsc_put_request(req);
+ err = -ENOMEM;
+ goto out;
+ }
req->r_pagelist = pagelist;
pagelist = NULL;
+ req->r_inode = inode;
+ ihold(inode);
+ req->r_num_caps = 1;
+ req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
+
dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
err = ceph_mdsc_do_request(mdsc, NULL, req);
ceph_mdsc_put_request(req);
@@ -1019,12 +1026,14 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
USE_AUTH_MDS);
if (IS_ERR(req))
return PTR_ERR(req);
+ req->r_path2 = kstrdup(name, GFP_NOFS);
+ if (!req->r_path2)
+ return -ENOMEM;
+
req->r_inode = inode;
ihold(inode);
- req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
req->r_num_caps = 1;
- req->r_path2 = kstrdup(name, GFP_NOFS);
-
+ req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
err = ceph_mdsc_do_request(mdsc, NULL, req);
ceph_mdsc_put_request(req);
return err;
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 71e05bbf8ceb..4763ad64e832 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -50,6 +50,19 @@
#define CEPH_FEATURE_MDS_INLINE_DATA (1ULL<<40)
#define CEPH_FEATURE_CRUSH_TUNABLES3 (1ULL<<41)
#define CEPH_FEATURE_OSD_PRIMARY_AFFINITY (1ULL<<41) /* overlap w/ tunables3 */
+#define CEPH_FEATURE_MSGR_KEEPALIVE2 (1ULL<<42)
+#define CEPH_FEATURE_OSD_POOLRESEND (1ULL<<43)
+#define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V2 (1ULL<<44)
+#define CEPH_FEATURE_OSD_SET_ALLOC_HINT (1ULL<<45)
+#define CEPH_FEATURE_OSD_FADVISE_FLAGS (1ULL<<46)
+#define CEPH_FEATURE_OSD_REPOP (1ULL<<46) /* overlap with fadvise */
+#define CEPH_FEATURE_OSD_OBJECT_DIGEST (1ULL<<46) /* overlap with fadvise */
+#define CEPH_FEATURE_OSD_TRANSACTION_MAY_LAYOUT (1ULL<<46) /* overlap w/ fadvise */
+#define CEPH_FEATURE_MDS_QUOTA (1ULL<<47)
+#define CEPH_FEATURE_CRUSH_V4 (1ULL<<48) /* straw2 buckets */
+#define CEPH_FEATURE_OSD_MIN_SIZE_RECOVERY (1ULL<<49)
+// duplicated since it was introduced at the same time as MIN_SIZE_RECOVERY
+#define CEPH_FEATURE_OSD_PROXY_FEATURES (1ULL<<49) /* overlap w/ above */
/*
* The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
@@ -93,7 +106,8 @@ static inline u64 ceph_sanitize_features(u64 features)
CEPH_FEATURE_EXPORT_PEER | \
CEPH_FEATURE_OSDMAP_ENC | \
CEPH_FEATURE_CRUSH_TUNABLES3 | \
- CEPH_FEATURE_OSD_PRIMARY_AFFINITY)
+ CEPH_FEATURE_OSD_PRIMARY_AFFINITY | \
+ CEPH_FEATURE_CRUSH_V4)
#define CEPH_FEATURES_REQUIRED_DEFAULT \
(CEPH_FEATURE_NOSRCADDR | \
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 31eb03d0c766..d7d072a25c27 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -323,6 +323,7 @@ enum {
CEPH_MDS_OP_MKSNAP = 0x01400,
CEPH_MDS_OP_RMSNAP = 0x01401,
CEPH_MDS_OP_LSSNAP = 0x00402,
+ CEPH_MDS_OP_RENAMESNAP = 0x01403,
};
extern const char *ceph_mds_op_name(int op);
diff --git a/include/linux/ceph/debugfs.h b/include/linux/ceph/debugfs.h
index 1df086d7882d..29cf897cc5cd 100644
--- a/include/linux/ceph/debugfs.h
+++ b/include/linux/ceph/debugfs.h
@@ -7,13 +7,7 @@
#define CEPH_DEFINE_SHOW_FUNC(name) \
static int name##_open(struct inode *inode, struct file *file) \
{ \
- struct seq_file *sf; \
- int ret; \
- \
- ret = single_open(file, name, NULL); \
- sf = file->private_data; \
- sf->private = inode->i_private; \
- return ret; \
+ return single_open(file, name, inode->i_private); \
} \
\
static const struct file_operations name##_fops = { \
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 16fff9608848..30f92cefaa72 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -135,6 +135,7 @@ struct ceph_client {
struct dentry *debugfs_dir;
struct dentry *debugfs_monmap;
struct dentry *debugfs_osdmap;
+ struct dentry *debugfs_options;
#endif
};
@@ -191,6 +192,7 @@ extern struct ceph_options *ceph_parse_options(char *options,
const char *dev_name, const char *dev_name_end,
int (*parse_extra_token)(char *c, void *private),
void *private);
+int ceph_print_client_options(struct seq_file *m, struct ceph_client *client);
extern void ceph_destroy_options(struct ceph_options *opt);
extern int ceph_compare_options(struct ceph_options *new_opt,
struct ceph_client *client);
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 561ea896c657..e55c08bc3a96 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -175,13 +175,12 @@ static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid)
__u8 version;
if (!ceph_has_room(p, end, 1 + 8 + 4 + 4)) {
- pr_warning("incomplete pg encoding");
-
+ pr_warn("incomplete pg encoding\n");
return -EINVAL;
}
version = ceph_decode_8(p);
if (version > 1) {
- pr_warning("do not understand pg encoding %d > 1",
+ pr_warn("do not understand pg encoding %d > 1\n",
(int)version);
return -EINVAL;
}
diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index 4fad5f8ee01d..48a1a7d100f1 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -96,13 +96,15 @@ struct crush_rule {
* uniform O(1) poor poor
* list O(n) optimal poor
* tree O(log n) good good
- * straw O(n) optimal optimal
+ * straw O(n) better better
+ * straw2 O(n) optimal optimal
*/
enum {
CRUSH_BUCKET_UNIFORM = 1,
CRUSH_BUCKET_LIST = 2,
CRUSH_BUCKET_TREE = 3,
- CRUSH_BUCKET_STRAW = 4
+ CRUSH_BUCKET_STRAW = 4,
+ CRUSH_BUCKET_STRAW2 = 5,
};
extern const char *crush_bucket_alg_name(int alg);
@@ -149,6 +151,11 @@ struct crush_bucket_straw {
__u32 *straws; /* 16-bit fixed point */
};
+struct crush_bucket_straw2 {
+ struct crush_bucket h;
+ __u32 *item_weights; /* 16-bit fixed point */
+};
+
/*
@@ -189,6 +196,7 @@ extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b);
extern void crush_destroy_bucket_list(struct crush_bucket_list *b);
extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b);
extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b);
+extern void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b);
extern void crush_destroy_bucket(struct crush_bucket *b);
extern void crush_destroy_rule(struct crush_rule *r);
extern void crush_destroy(struct crush_map *map);
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index ec565508e904..79e8f71aef5b 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -490,6 +490,43 @@ out:
}
EXPORT_SYMBOL(ceph_parse_options);
+int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
+{
+ struct ceph_options *opt = client->options;
+ size_t pos = m->count;
+
+ if (opt->name)
+ seq_printf(m, "name=%s,", opt->name);
+ if (opt->key)
+ seq_puts(m, "secret=<hidden>,");
+
+ if (opt->flags & CEPH_OPT_FSID)
+ seq_printf(m, "fsid=%pU,", &opt->fsid);
+ if (opt->flags & CEPH_OPT_NOSHARE)
+ seq_puts(m, "noshare,");
+ if (opt->flags & CEPH_OPT_NOCRC)
+ seq_puts(m, "nocrc,");
+ if (opt->flags & CEPH_OPT_NOMSGAUTH)
+ seq_puts(m, "nocephx_require_signatures,");
+ if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0)
+ seq_puts(m, "notcp_nodelay,");
+
+ if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
+ seq_printf(m, "mount_timeout=%d,", opt->mount_timeout);
+ if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
+ seq_printf(m, "osd_idle_ttl=%d,", opt->osd_idle_ttl);
+ if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
+ seq_printf(m, "osdkeepalivetimeout=%d,",
+ opt->osd_keepalive_timeout);
+
+ /* drop redundant comma */
+ if (m->count != pos)
+ m->count--;
+
+ return 0;
+}
+EXPORT_SYMBOL(ceph_print_client_options);
+
u64 ceph_client_id(struct ceph_client *client)
{
return client->monc.auth->global_id;
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index 16bc199d9a62..9d84ce4ea0df 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -17,6 +17,7 @@ const char *crush_bucket_alg_name(int alg)
case CRUSH_BUCKET_LIST: return "list";
case CRUSH_BUCKET_TREE: return "tree";
case CRUSH_BUCKET_STRAW: return "straw";
+ case CRUSH_BUCKET_STRAW2: return "straw2";
default: return "unknown";
}
}
@@ -40,6 +41,8 @@ int crush_get_bucket_item_weight(const struct crush_bucket *b, int p)
return ((struct crush_bucket_tree *)b)->node_weights[crush_calc_tree_node(p)];
case CRUSH_BUCKET_STRAW:
return ((struct crush_bucket_straw *)b)->item_weights[p];
+ case CRUSH_BUCKET_STRAW2:
+ return ((struct crush_bucket_straw2 *)b)->item_weights[p];
}
return 0;
}
@@ -77,6 +80,14 @@ void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
kfree(b);
}
+void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b)
+{
+ kfree(b->item_weights);
+ kfree(b->h.perm);
+ kfree(b->h.items);
+ kfree(b);
+}
+
void crush_destroy_bucket(struct crush_bucket *b)
{
switch (b->alg) {
@@ -92,6 +103,9 @@ void crush_destroy_bucket(struct crush_bucket *b)
case CRUSH_BUCKET_STRAW:
crush_destroy_bucket_straw((struct crush_bucket_straw *)b);
break;
+ case CRUSH_BUCKET_STRAW2:
+ crush_destroy_bucket_straw2((struct crush_bucket_straw2 *)b);
+ break;
}
}
diff --git a/net/ceph/crush/crush_ln_table.h b/net/ceph/crush/crush_ln_table.h
new file mode 100644
index 000000000000..6192c7fc958c
--- /dev/null
+++ b/net/ceph/crush/crush_ln_table.h
@@ -0,0 +1,166 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2015 Intel Corporation All Rights Reserved
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#if defined(__linux__)
+#include <linux/types.h>
+#elif defined(__FreeBSD__)
+#include <sys/types.h>
+#endif
+
+#ifndef CEPH_CRUSH_LN_H
+#define CEPH_CRUSH_LN_H
+
+
+// RH_LH_tbl[2*k] = 2^48/(1.0+k/128.0)
+// RH_LH_tbl[2*k+1] = 2^48*log2(1.0+k/128.0)
+
+static int64_t __RH_LH_tbl[128*2+2] = {
+ 0x0001000000000000ll, 0x0000000000000000ll, 0x0000fe03f80fe040ll, 0x000002dfca16dde1ll,
+ 0x0000fc0fc0fc0fc1ll, 0x000005b9e5a170b4ll, 0x0000fa232cf25214ll, 0x0000088e68ea899all,
+ 0x0000f83e0f83e0f9ll, 0x00000b5d69bac77ell, 0x0000f6603d980f67ll, 0x00000e26fd5c8555ll,
+ 0x0000f4898d5f85bcll, 0x000010eb389fa29fll, 0x0000f2b9d6480f2cll, 0x000013aa2fdd27f1ll,
+ 0x0000f0f0f0f0f0f1ll, 0x00001663f6fac913ll, 0x0000ef2eb71fc435ll, 0x00001918a16e4633ll,
+ 0x0000ed7303b5cc0fll, 0x00001bc84240adabll, 0x0000ebbdb2a5c162ll, 0x00001e72ec117fa5ll,
+ 0x0000ea0ea0ea0ea1ll, 0x00002118b119b4f3ll, 0x0000e865ac7b7604ll, 0x000023b9a32eaa56ll,
+ 0x0000e6c2b4481cd9ll, 0x00002655d3c4f15cll, 0x0000e525982af70dll, 0x000028ed53f307eell,
+ 0x0000e38e38e38e39ll, 0x00002b803473f7adll, 0x0000e1fc780e1fc8ll, 0x00002e0e85a9de04ll,
+ 0x0000e070381c0e08ll, 0x0000309857a05e07ll, 0x0000dee95c4ca038ll, 0x0000331dba0efce1ll,
+ 0x0000dd67c8a60dd7ll, 0x0000359ebc5b69d9ll, 0x0000dbeb61eed19dll, 0x0000381b6d9bb29bll,
+ 0x0000da740da740dbll, 0x00003a93dc9864b2ll, 0x0000d901b2036407ll, 0x00003d0817ce9cd4ll,
+ 0x0000d79435e50d7all, 0x00003f782d7204d0ll, 0x0000d62b80d62b81ll, 0x000041e42b6ec0c0ll,
+ 0x0000d4c77b03531ell, 0x0000444c1f6b4c2dll, 0x0000d3680d3680d4ll, 0x000046b016ca47c1ll,
+ 0x0000d20d20d20d21ll, 0x000049101eac381cll, 0x0000d0b69fcbd259ll, 0x00004b6c43f1366all,
+ 0x0000cf6474a8819fll, 0x00004dc4933a9337ll, 0x0000ce168a772509ll, 0x0000501918ec6c11ll,
+ 0x0000cccccccccccdll, 0x00005269e12f346ell, 0x0000cb8727c065c4ll, 0x000054b6f7f1325all,
+ 0x0000ca4587e6b750ll, 0x0000570068e7ef5all, 0x0000c907da4e8712ll, 0x000059463f919deell,
+ 0x0000c7ce0c7ce0c8ll, 0x00005b8887367433ll, 0x0000c6980c6980c7ll, 0x00005dc74ae9fbecll,
+ 0x0000c565c87b5f9ell, 0x00006002958c5871ll, 0x0000c4372f855d83ll, 0x0000623a71cb82c8ll,
+ 0x0000c30c30c30c31ll, 0x0000646eea247c5cll, 0x0000c1e4bbd595f7ll, 0x000066a008e4788cll,
+ 0x0000c0c0c0c0c0c1ll, 0x000068cdd829fd81ll, 0x0000bfa02fe80bfbll, 0x00006af861e5fc7dll,
+ 0x0000be82fa0be830ll, 0x00006d1fafdce20all, 0x0000bd6910470767ll, 0x00006f43cba79e40ll,
+ 0x0000bc52640bc527ll, 0x00007164beb4a56dll, 0x0000bb3ee721a54ell, 0x000073829248e961ll,
+ 0x0000ba2e8ba2e8bbll, 0x0000759d4f80cba8ll, 0x0000b92143fa36f6ll, 0x000077b4ff5108d9ll,
+ 0x0000b81702e05c0cll, 0x000079c9aa879d53ll, 0x0000b70fbb5a19bfll, 0x00007bdb59cca388ll,
+ 0x0000b60b60b60b61ll, 0x00007dea15a32c1bll, 0x0000b509e68a9b95ll, 0x00007ff5e66a0ffell,
+ 0x0000b40b40b40b41ll, 0x000081fed45cbccbll, 0x0000b30f63528918ll, 0x00008404e793fb81ll,
+ 0x0000b21642c8590cll, 0x000086082806b1d5ll, 0x0000b11fd3b80b12ll, 0x000088089d8a9e47ll,
+ 0x0000b02c0b02c0b1ll, 0x00008a064fd50f2all, 0x0000af3addc680b0ll, 0x00008c01467b94bbll,
+ 0x0000ae4c415c9883ll, 0x00008df988f4ae80ll, 0x0000ad602b580ad7ll, 0x00008fef1e987409ll,
+ 0x0000ac7691840ac8ll, 0x000091e20ea1393ell, 0x0000ab8f69e2835all, 0x000093d2602c2e5fll,
+ 0x0000aaaaaaaaaaabll, 0x000095c01a39fbd6ll, 0x0000a9c84a47a080ll, 0x000097ab43af59f9ll,
+ 0x0000a8e83f5717c1ll, 0x00009993e355a4e5ll, 0x0000a80a80a80a81ll, 0x00009b79ffdb6c8bll,
+ 0x0000a72f0539782all, 0x00009d5d9fd5010bll, 0x0000a655c4392d7cll, 0x00009f3ec9bcfb80ll,
+ 0x0000a57eb50295fbll, 0x0000a11d83f4c355ll, 0x0000a4a9cf1d9684ll, 0x0000a2f9d4c51039ll,
+ 0x0000a3d70a3d70a4ll, 0x0000a4d3c25e68dcll, 0x0000a3065e3fae7dll, 0x0000a6ab52d99e76ll,
+ 0x0000a237c32b16d0ll, 0x0000a8808c384547ll, 0x0000a16b312ea8fdll, 0x0000aa5374652a1cll,
+ 0x0000a0a0a0a0a0a1ll, 0x0000ac241134c4e9ll, 0x00009fd809fd80a0ll, 0x0000adf26865a8a1ll,
+ 0x00009f1165e72549ll, 0x0000afbe7fa0f04dll, 0x00009e4cad23dd60ll, 0x0000b1885c7aa982ll,
+ 0x00009d89d89d89d9ll, 0x0000b35004723c46ll, 0x00009cc8e160c3fcll, 0x0000b5157cf2d078ll,
+ 0x00009c09c09c09c1ll, 0x0000b6d8cb53b0call, 0x00009b4c6f9ef03bll, 0x0000b899f4d8ab63ll,
+ 0x00009a90e7d95bc7ll, 0x0000ba58feb2703all, 0x000099d722dabde6ll, 0x0000bc15edfeed32ll,
+ 0x0000991f1a515886ll, 0x0000bdd0c7c9a817ll, 0x00009868c809868dll, 0x0000bf89910c1678ll,
+ 0x000097b425ed097cll, 0x0000c1404eadf383ll, 0x000097012e025c05ll, 0x0000c2f5058593d9ll,
+ 0x0000964fda6c0965ll, 0x0000c4a7ba58377cll, 0x000095a02568095bll, 0x0000c65871da59ddll,
+ 0x000094f2094f2095ll, 0x0000c80730b00016ll, 0x0000944580944581ll, 0x0000c9b3fb6d0559ll,
+ 0x0000939a85c4093all, 0x0000cb5ed69565afll, 0x000092f113840498ll, 0x0000cd07c69d8702ll,
+ 0x0000924924924925ll, 0x0000ceaecfea8085ll, 0x000091a2b3c4d5e7ll, 0x0000d053f6d26089ll,
+ 0x000090fdbc090fdcll, 0x0000d1f73f9c70c0ll, 0x0000905a38633e07ll, 0x0000d398ae817906ll,
+ 0x00008fb823ee08fcll, 0x0000d53847ac00a6ll, 0x00008f1779d9fdc4ll, 0x0000d6d60f388e41ll,
+ 0x00008e78356d1409ll, 0x0000d8720935e643ll, 0x00008dda5202376all, 0x0000da0c39a54804ll,
+ 0x00008d3dcb08d3ddll, 0x0000dba4a47aa996ll, 0x00008ca29c046515ll, 0x0000dd3b4d9cf24bll,
+ 0x00008c08c08c08c1ll, 0x0000ded038e633f3ll, 0x00008b70344a139cll, 0x0000e0636a23e2eell,
+ 0x00008ad8f2fba939ll, 0x0000e1f4e5170d02ll, 0x00008a42f870566all, 0x0000e384ad748f0ell,
+ 0x000089ae4089ae41ll, 0x0000e512c6e54998ll, 0x0000891ac73ae982ll, 0x0000e69f35065448ll,
+ 0x0000888888888889ll, 0x0000e829fb693044ll, 0x000087f78087f781ll, 0x0000e9b31d93f98ell,
+ 0x00008767ab5f34e5ll, 0x0000eb3a9f019750ll, 0x000086d905447a35ll, 0x0000ecc08321eb30ll,
+ 0x0000864b8a7de6d2ll, 0x0000ee44cd59ffabll, 0x000085bf37612cefll, 0x0000efc781043579ll,
+ 0x0000853408534086ll, 0x0000f148a170700all, 0x000084a9f9c8084bll, 0x0000f2c831e44116ll,
+ 0x0000842108421085ll, 0x0000f446359b1353ll, 0x0000839930523fbfll, 0x0000f5c2afc65447ll,
+ 0x000083126e978d50ll, 0x0000f73da38d9d4all, 0x0000828cbfbeb9a1ll, 0x0000f8b7140edbb1ll,
+ 0x0000820820820821ll, 0x0000fa2f045e7832ll, 0x000081848da8faf1ll, 0x0000fba577877d7dll,
+ 0x0000810204081021ll, 0x0000fd1a708bbe11ll, 0x0000808080808081ll, 0x0000fe8df263f957ll,
+ 0x0000800000000000ll, 0x0000ffff00000000ll,
+ };
+
+
+ // LL_tbl[k] = 2^48*log2(1.0+k/2^15);
+static int64_t __LL_tbl[256] = {
+ 0x0000000000000000ull, 0x00000002e2a60a00ull, 0x000000070cb64ec5ull, 0x00000009ef50ce67ull,
+ 0x0000000cd1e588fdull, 0x0000000fb4747e9cull, 0x0000001296fdaf5eull, 0x0000001579811b58ull,
+ 0x000000185bfec2a1ull, 0x0000001b3e76a552ull, 0x0000001e20e8c380ull, 0x0000002103551d43ull,
+ 0x00000023e5bbb2b2ull, 0x00000026c81c83e4ull, 0x00000029aa7790f0ull, 0x0000002c8cccd9edull,
+ 0x0000002f6f1c5ef2ull, 0x0000003251662017ull, 0x0000003533aa1d71ull, 0x0000003815e8571aull,
+ 0x0000003af820cd26ull, 0x0000003dda537faeull, 0x00000040bc806ec8ull, 0x000000439ea79a8cull,
+ 0x0000004680c90310ull, 0x0000004962e4a86cull, 0x0000004c44fa8ab6ull, 0x0000004f270aaa06ull,
+ 0x0000005209150672ull, 0x00000054eb19a013ull, 0x00000057cd1876fdull, 0x0000005aaf118b4aull,
+ 0x0000005d9104dd0full, 0x0000006072f26c64ull, 0x0000006354da3960ull, 0x0000006636bc441aull,
+ 0x0000006918988ca8ull, 0x0000006bfa6f1322ull, 0x0000006edc3fd79full, 0x00000071be0ada35ull,
+ 0x000000749fd01afdull, 0x00000077818f9a0cull, 0x0000007a6349577aull, 0x0000007d44fd535eull,
+ 0x0000008026ab8dceull, 0x00000083085406e3ull, 0x00000085e9f6beb2ull, 0x00000088cb93b552ull,
+ 0x0000008bad2aeadcull, 0x0000008e8ebc5f65ull, 0x0000009170481305ull, 0x0000009451ce05d3ull,
+ 0x00000097334e37e5ull, 0x0000009a14c8a953ull, 0x0000009cf63d5a33ull, 0x0000009fd7ac4a9dull,
+ 0x000000a2b07f3458ull, 0x000000a59a78ea6aull, 0x000000a87bd699fbull, 0x000000ab5d2e8970ull,
+ 0x000000ae3e80b8e3ull, 0x000000b11fcd2869ull, 0x000000b40113d818ull, 0x000000b6e254c80aull,
+ 0x000000b9c38ff853ull, 0x000000bca4c5690cull, 0x000000bf85f51a4aull, 0x000000c2671f0c26ull,
+ 0x000000c548433eb6ull, 0x000000c82961b211ull, 0x000000cb0a7a664dull, 0x000000cdeb8d5b82ull,
+ 0x000000d0cc9a91c8ull, 0x000000d3ada20933ull, 0x000000d68ea3c1ddull, 0x000000d96f9fbbdbull,
+ 0x000000dc5095f744ull, 0x000000df31867430ull, 0x000000e2127132b5ull, 0x000000e4f35632eaull,
+ 0x000000e7d43574e6ull, 0x000000eab50ef8c1ull, 0x000000ed95e2be90ull, 0x000000f076b0c66cull,
+ 0x000000f35779106aull, 0x000000f6383b9ca2ull, 0x000000f918f86b2aull, 0x000000fbf9af7c1aull,
+ 0x000000feda60cf88ull, 0x00000101bb0c658cull, 0x000001049bb23e3cull, 0x000001077c5259afull,
+ 0x0000010a5cecb7fcull, 0x0000010d3d81593aull, 0x000001101e103d7full, 0x00000112fe9964e4ull,
+ 0x00000115df1ccf7eull, 0x00000118bf9a7d64ull, 0x0000011ba0126eadull, 0x0000011e8084a371ull,
+ 0x0000012160f11bc6ull, 0x000001244157d7c3ull, 0x0000012721b8d77full, 0x0000012a02141b10ull,
+ 0x0000012ce269a28eull, 0x0000012fc2b96e0full, 0x00000132a3037daaull, 0x000001358347d177ull,
+ 0x000001386386698cull, 0x0000013b43bf45ffull, 0x0000013e23f266e9ull, 0x00000141041fcc5eull,
+ 0x00000143e4477678ull, 0x00000146c469654bull, 0x00000149a48598f0ull, 0x0000014c849c117cull,
+ 0x0000014f64accf08ull, 0x0000015244b7d1a9ull, 0x0000015524bd1976ull, 0x0000015804bca687ull,
+ 0x0000015ae4b678f2ull, 0x0000015dc4aa90ceull, 0x00000160a498ee31ull, 0x0000016384819134ull,
+ 0x00000166646479ecull, 0x000001694441a870ull, 0x0000016c24191cd7ull, 0x0000016df6ca19bdull,
+ 0x00000171e3b6d7aaull, 0x00000174c37d1e44ull, 0x00000177a33dab1cull, 0x0000017a82f87e49ull,
+ 0x0000017d62ad97e2ull, 0x00000180425cf7feull, 0x00000182b07f3458ull, 0x0000018601aa8c19ull,
+ 0x00000188e148c046ull, 0x0000018bc0e13b52ull, 0x0000018ea073fd52ull, 0x000001918001065dull,
+ 0x000001945f88568bull, 0x000001973f09edf2ull, 0x0000019a1e85ccaaull, 0x0000019cfdfbf2c8ull,
+ 0x0000019fdd6c6063ull, 0x000001a2bcd71593ull, 0x000001a59c3c126eull, 0x000001a87b9b570bull,
+ 0x000001ab5af4e380ull, 0x000001ae3a48b7e5ull, 0x000001b11996d450ull, 0x000001b3f8df38d9ull,
+ 0x000001b6d821e595ull, 0x000001b9b75eda9bull, 0x000001bc96961803ull, 0x000001bf75c79de3ull,
+ 0x000001c254f36c51ull, 0x000001c534198365ull, 0x000001c81339e336ull, 0x000001caf2548bd9ull,
+ 0x000001cdd1697d67ull, 0x000001d0b078b7f5ull, 0x000001d38f823b9aull, 0x000001d66e86086dull,
+ 0x000001d94d841e86ull, 0x000001dc2c7c7df9ull, 0x000001df0b6f26dfull, 0x000001e1ea5c194eull,
+ 0x000001e4c943555dull, 0x000001e7a824db23ull, 0x000001ea8700aab5ull, 0x000001ed65d6c42bull,
+ 0x000001f044a7279dull, 0x000001f32371d51full, 0x000001f60236cccaull, 0x000001f8e0f60eb3ull,
+ 0x000001fbbfaf9af3ull, 0x000001fe9e63719eull, 0x000002017d1192ccull, 0x000002045bb9fe94ull,
+ 0x000002073a5cb50dull, 0x00000209c06e6212ull, 0x0000020cf791026aull, 0x0000020fd622997cull,
+ 0x00000212b07f3458ull, 0x000002159334a8d8ull, 0x0000021871b52150ull, 0x0000021b502fe517ull,
+ 0x0000021d6a73a78full, 0x000002210d144eeeull, 0x00000223eb7df52cull, 0x00000226c9e1e713ull,
+ 0x00000229a84024bbull, 0x0000022c23679b4eull, 0x0000022f64eb83a8ull, 0x000002324338a51bull,
+ 0x00000235218012a9ull, 0x00000237ffc1cc69ull, 0x0000023a2c3b0ea4ull, 0x0000023d13ee805bull,
+ 0x0000024035e9221full, 0x00000243788faf25ull, 0x0000024656b4e735ull, 0x00000247ed646bfeull,
+ 0x0000024c12ee3d98ull, 0x0000024ef1025c1aull, 0x00000251cf10c799ull, 0x0000025492644d65ull,
+ 0x000002578b1c85eeull, 0x0000025a6919d8f0ull, 0x0000025d13ee805bull, 0x0000026025036716ull,
+ 0x0000026296453882ull, 0x00000265e0d62b53ull, 0x00000268beb701f3ull, 0x0000026b9c92265eull,
+ 0x0000026d32f798a9ull, 0x00000271583758ebull, 0x000002743601673bull, 0x0000027713c5c3b0ull,
+ 0x00000279f1846e5full, 0x0000027ccf3d6761ull, 0x0000027e6580aecbull, 0x000002828a9e44b3ull,
+ 0x0000028568462932ull, 0x00000287bdbf5255ull, 0x0000028b2384de4aull, 0x0000028d13ee805bull,
+ 0x0000029035e9221full, 0x0000029296453882ull, 0x0000029699bdfb61ull, 0x0000029902a37aabull,
+ 0x0000029c54b864c9ull, 0x0000029deabd1083ull, 0x000002a20f9c0bb5ull, 0x000002a4c7605d61ull,
+ 0x000002a7bdbf5255ull, 0x000002a96056dafcull, 0x000002ac3daf14efull, 0x000002af1b019ecaull,
+ 0x000002b296453882ull, 0x000002b5d022d80full, 0x000002b8fa471cb3ull, 0x000002ba9012e713ull,
+ 0x000002bd6d4901ccull, 0x000002c04a796cf6ull, 0x000002c327a428a6ull, 0x000002c61a5e8f4cull,
+ 0x000002c8e1e891f6ull, 0x000002cbbf023fc2ull, 0x000002ce9c163e6eull, 0x000002d179248e13ull,
+ 0x000002d4562d2ec6ull, 0x000002d73330209dull, 0x000002da102d63b0ull, 0x000002dced24f814ull,
+};
+
+
+
+
+#endif
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index a1ef53c04415..5b47736d27d9 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -20,7 +20,7 @@
#include <linux/crush/crush.h>
#include <linux/crush/hash.h>
-#include <linux/crush/mapper.h>
+#include "crush_ln_table.h"
/*
* Implement the core CRUSH mapping algorithm.
@@ -238,6 +238,102 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
return bucket->h.items[high];
}
+// compute 2^44*log2(input+1)
+uint64_t crush_ln(unsigned xin)
+{
+ unsigned x=xin, x1;
+ int iexpon, index1, index2;
+ uint64_t RH, LH, LL, xl64, result;
+
+ x++;
+
+ // normalize input
+ iexpon = 15;
+ while(!(x&0x18000)) { x<<=1; iexpon--; }
+
+ index1 = (x>>8)<<1;
+ // RH ~ 2^56/index1
+ RH = __RH_LH_tbl[index1 - 256];
+ // LH ~ 2^48 * log2(index1/256)
+ LH = __RH_LH_tbl[index1 + 1 - 256];
+
+ // RH*x ~ 2^48 * (2^15 + xf), xf<2^8
+ xl64 = (int64_t)x * RH;
+ xl64 >>= 48;
+ x1 = xl64;
+
+ result = iexpon;
+ result <<= (12 + 32);
+
+ index2 = x1 & 0xff;
+ // LL ~ 2^48*log2(1.0+index2/2^15)
+ LL = __LL_tbl[index2];
+
+ LH = LH + LL;
+
+ LH >>= (48-12 - 32);
+ result += LH;
+
+ return result;
+}
+
+
+/*
+ * straw2
+ *
+ * for reference, see:
+ *
+ * http://en.wikipedia.org/wiki/Exponential_distribution#Distribution_of_the_minimum_of_exponential_random_variables
+ *
+ */
+
+static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket,
+ int x, int r)
+{
+ unsigned i, high = 0;
+ unsigned u;
+ unsigned w;
+ __s64 ln, draw, high_draw = 0;
+
+ for (i = 0; i < bucket->h.size; i++) {
+ w = bucket->item_weights[i];
+ if (w) {
+ u = crush_hash32_3(bucket->h.hash, x,
+ bucket->h.items[i], r);
+ u &= 0xffff;
+
+ /*
+ * for some reason slightly less than 0x10000 produces
+ * a slightly more accurate distribution... probably a
+ * rounding effect.
+ *
+ * the natural log lookup table maps [0,0xffff]
+ * (corresponding to real numbers [1/0x10000, 1] to
+ * [0, 0xffffffffffff] (corresponding to real numbers
+ * [-11.090355,0]).
+ */
+ ln = crush_ln(u) - 0x1000000000000ll;
+
+ /*
+ * divide by 16.16 fixed-point weight. note
+ * that the ln value is negative, so a larger
+ * weight means a larger (less negative) value
+ * for draw.
+ */
+ draw = div64_s64(ln, w);
+ } else {
+ draw = S64_MIN;
+ }
+
+ if (i == 0 || draw > high_draw) {
+ high = i;
+ high_draw = draw;
+ }
+ }
+ return bucket->h.items[high];
+}
+
+
static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
{
dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
@@ -255,12 +351,16 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
case CRUSH_BUCKET_STRAW:
return bucket_straw_choose((struct crush_bucket_straw *)in,
x, r);
+ case CRUSH_BUCKET_STRAW2:
+ return bucket_straw2_choose((struct crush_bucket_straw2 *)in,
+ x, r);
default:
dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
return in->items[0];
}
}
+
/*
* true if device is marked "out" (failed, fully offloaded)
* of the cluster
@@ -290,6 +390,7 @@ static int is_out(const struct crush_map *map,
* @type: the type of item to choose
* @out: pointer to output vector
* @outpos: our position in that vector
+ * @out_size: size of the out vector
* @tries: number of attempts to make
* @recurse_tries: number of attempts to have recursive chooseleaf make
* @local_retries: localized retries
@@ -304,6 +405,7 @@ static int crush_choose_firstn(const struct crush_map *map,
const __u32 *weight, int weight_max,
int x, int numrep, int type,
int *out, int outpos,
+ int out_size,
unsigned int tries,
unsigned int recurse_tries,
unsigned int local_retries,
@@ -322,6 +424,7 @@ static int crush_choose_firstn(const struct crush_map *map,
int item = 0;
int itemtype;
int collide, reject;
+ int count = out_size;
dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n",
recurse_to_leaf ? "_LEAF" : "",
@@ -329,7 +432,7 @@ static int crush_choose_firstn(const struct crush_map *map,
tries, recurse_tries, local_retries, local_fallback_retries,
parent_r);
- for (rep = outpos; rep < numrep; rep++) {
+ for (rep = outpos; rep < numrep && count > 0 ; rep++) {
/* keep trying until we get a non-out, non-colliding item */
ftotal = 0;
skip_rep = 0;
@@ -403,7 +506,7 @@ static int crush_choose_firstn(const struct crush_map *map,
map->buckets[-1-item],
weight, weight_max,
x, outpos+1, 0,
- out2, outpos,
+ out2, outpos, count,
recurse_tries, 0,
local_retries,
local_fallback_retries,
@@ -463,6 +566,7 @@ reject:
dprintk("CHOOSE got %d\n", item);
out[outpos] = item;
outpos++;
+ count--;
}
dprintk("CHOOSE returns %d\n", outpos);
@@ -654,6 +758,7 @@ int crush_do_rule(const struct crush_map *map,
__u32 step;
int i, j;
int numrep;
+ int out_size;
/*
* the original choose_total_tries value was off by one (it
* counted "retries" and not "tries"). add one.
@@ -761,6 +866,7 @@ int crush_do_rule(const struct crush_map *map,
x, numrep,
curstep->arg2,
o+osize, j,
+ result_max-osize,
choose_tries,
recurse_tries,
choose_local_retries,
@@ -770,11 +876,13 @@ int crush_do_rule(const struct crush_map *map,
c+osize,
0);
} else {
+ out_size = ((numrep < (result_max-osize)) ?
+ numrep : (result_max-osize));
crush_choose_indep(
map,
map->buckets[-1-w[i]],
weight, weight_max,
- x, numrep, numrep,
+ x, out_size, numrep,
curstep->arg2,
o+osize, j,
choose_tries,
@@ -783,7 +891,7 @@ int crush_do_rule(const struct crush_map *map,
recurse_to_leaf,
c+osize,
0);
- osize += numrep;
+ osize += out_size;
}
}
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 14d9995097cc..593dc2eabcc8 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -22,6 +22,7 @@
* .../monmap - current monmap
* .../osdc - active osd requests
* .../monc - mon client state
+ * .../client_options - libceph-only (i.e. not rbd or cephfs) options
* .../dentry_lru - dump contents of dentry lru
* .../caps - expose cap (reservation) stats
* .../bdi - symlink to ../../bdi/something
@@ -177,10 +178,24 @@ static int osdc_show(struct seq_file *s, void *pp)
return 0;
}
+static int client_options_show(struct seq_file *s, void *p)
+{
+ struct ceph_client *client = s->private;
+ int ret;
+
+ ret = ceph_print_client_options(s, client);
+ if (ret)
+ return ret;
+
+ seq_putc(s, '\n');
+ return 0;
+}
+
CEPH_DEFINE_SHOW_FUNC(monmap_show)
CEPH_DEFINE_SHOW_FUNC(osdmap_show)
CEPH_DEFINE_SHOW_FUNC(monc_show)
CEPH_DEFINE_SHOW_FUNC(osdc_show)
+CEPH_DEFINE_SHOW_FUNC(client_options_show)
int ceph_debugfs_init(void)
{
@@ -242,6 +257,14 @@ int ceph_debugfs_client_init(struct ceph_client *client)
if (!client->debugfs_osdmap)
goto out;
+ client->debugfs_options = debugfs_create_file("client_options",
+ 0600,
+ client->debugfs_dir,
+ client,
+ &client_options_show_fops);
+ if (!client->debugfs_options)
+ goto out;
+
return 0;
out:
@@ -252,6 +275,7 @@ out:
void ceph_debugfs_client_cleanup(struct ceph_client *client)
{
dout("ceph_debugfs_client_cleanup %p\n", client);
+ debugfs_remove(client->debugfs_options);
debugfs_remove(client->debugfs_osdmap);
debugfs_remove(client->debugfs_monmap);
debugfs_remove(client->osdc.debugfs_file);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index a9f4ae45b7fb..967080a9f043 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -505,8 +505,6 @@ static int ceph_tcp_connect(struct ceph_connection *con)
pr_err("connect %s error %d\n",
ceph_pr_addr(&con->peer_addr.in_addr), ret);
sock_release(sock);
- con->error_msg = "connect error";
-
return ret;
}
@@ -2145,12 +2143,10 @@ static int process_connect(struct ceph_connection *con)
* to WAIT. This shouldn't happen if we are the
* client.
*/
- pr_err("process_connect got WAIT as client\n");
con->error_msg = "protocol error, got WAIT as client";
return -1;
default:
- pr_err("connect protocol error, will retry\n");
con->error_msg = "protocol error, garbage tag during connect";
return -1;
}
@@ -2282,8 +2278,7 @@ static int read_partial_message(struct ceph_connection *con)
crc = crc32c(0, &con->in_hdr, offsetof(struct ceph_msg_header, crc));
if (cpu_to_le32(crc) != con->in_hdr.crc) {
- pr_err("read_partial_message bad hdr "
- " crc %u != expected %u\n",
+ pr_err("read_partial_message bad hdr crc %u != expected %u\n",
crc, con->in_hdr.crc);
return -EBADMSG;
}
@@ -2313,7 +2308,7 @@ static int read_partial_message(struct ceph_connection *con)
pr_err("read_partial_message bad seq %lld expected %lld\n",
seq, con->in_seq + 1);
con->error_msg = "bad message sequence # for incoming message";
- return -EBADMSG;
+ return -EBADE;
}
/* allocate message? */
@@ -2660,6 +2655,8 @@ more:
switch (ret) {
case -EBADMSG:
con->error_msg = "bad crc";
+ /* fall through */
+ case -EBADE:
ret = -EIO;
break;
case -EIO:
@@ -2838,7 +2835,8 @@ static void con_work(struct work_struct *work)
if (ret < 0) {
if (ret == -EAGAIN)
continue;
- con->error_msg = "socket error on read";
+ if (!con->error_msg)
+ con->error_msg = "socket error on read";
fault = true;
break;
}
@@ -2847,7 +2845,8 @@ static void con_work(struct work_struct *work)
if (ret < 0) {
if (ret == -EAGAIN)
continue;
- con->error_msg = "socket error on write";
+ if (!con->error_msg)
+ con->error_msg = "socket error on write";
fault = true;
}
@@ -2869,11 +2868,13 @@ static void con_work(struct work_struct *work)
*/
static void con_fault(struct ceph_connection *con)
{
- pr_warn("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
- ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
dout("fault %p state %lu to peer %s\n",
con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
+ pr_warn("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
+ ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
+ con->error_msg = NULL;
+
WARN_ON(con->state != CON_STATE_CONNECTING &&
con->state != CON_STATE_NEGOTIATING &&
con->state != CON_STATE_OPEN);
@@ -3295,8 +3296,8 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
*/
if (*skip)
return 0;
- con->error_msg = "error allocating memory for incoming message";
+ con->error_msg = "error allocating memory for incoming message";
return -ENOMEM;
}
memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index b8c3fde5b04f..15796696d64e 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -122,6 +122,22 @@ bad:
return -EINVAL;
}
+static int crush_decode_straw2_bucket(void **p, void *end,
+ struct crush_bucket_straw2 *b)
+{
+ int j;
+ dout("crush_decode_straw2_bucket %p to %p\n", *p, end);
+ b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
+ if (b->item_weights == NULL)
+ return -ENOMEM;
+ ceph_decode_need(p, end, b->h.size * sizeof(u32), bad);
+ for (j = 0; j < b->h.size; j++)
+ b->item_weights[j] = ceph_decode_32(p);
+ return 0;
+bad:
+ return -EINVAL;
+}
+
static int skip_name_map(void **p, void *end)
{
int len;
@@ -204,6 +220,9 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
case CRUSH_BUCKET_STRAW:
size = sizeof(struct crush_bucket_straw);
break;
+ case CRUSH_BUCKET_STRAW2:
+ size = sizeof(struct crush_bucket_straw2);
+ break;
default:
err = -EINVAL;
goto bad;
@@ -261,6 +280,12 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
if (err < 0)
goto bad;
break;
+ case CRUSH_BUCKET_STRAW2:
+ err = crush_decode_straw2_bucket(p, end,
+ (struct crush_bucket_straw2 *)b);
+ if (err < 0)
+ goto bad;
+ break;
}
}