summaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/addr.c2
-rw-r--r--fs/ceph/caps.c10
-rw-r--r--fs/ceph/dir.c11
-rw-r--r--fs/ceph/export.c4
-rw-r--r--fs/ceph/file.c47
-rw-r--r--fs/ceph/inode.c18
-rw-r--r--fs/ceph/ioctl.c6
-rw-r--r--fs/ceph/locks.c29
-rw-r--r--fs/ceph/mds_client.c19
-rw-r--r--fs/ceph/snap.c2
-rw-r--r--fs/ceph/xattr.c6
11 files changed, 91 insertions, 63 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 33da49dc3cc6..5a3953db8118 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -453,7 +453,7 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc)
int err;
struct inode *inode = page->mapping->host;
BUG_ON(!inode);
- igrab(inode);
+ ihold(inode);
err = writepage_nounlock(page, wbc);
unlock_page(page);
iput(inode);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 1f72b00447c4..f605753c8fe9 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2940,14 +2940,12 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
while (!list_empty(&mdsc->cap_dirty)) {
ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info,
i_dirty_item);
- inode = igrab(&ci->vfs_inode);
+ inode = &ci->vfs_inode;
+ ihold(inode);
dout("flush_dirty_caps %p\n", inode);
spin_unlock(&mdsc->cap_dirty_lock);
- if (inode) {
- ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH,
- NULL);
- iput(inode);
- }
+ ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, NULL);
+ iput(inode);
spin_lock(&mdsc->cap_dirty_lock);
}
spin_unlock(&mdsc->cap_dirty_lock);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 33729e822bb9..ef8f08c343e8 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -308,7 +308,8 @@ more:
req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
if (IS_ERR(req))
return PTR_ERR(req);
- req->r_inode = igrab(inode);
+ req->r_inode = inode;
+ ihold(inode);
req->r_dentry = dget(filp->f_dentry);
/* hints to request -> mds selection code */
req->r_direct_mode = USE_AUTH_MDS;
@@ -787,10 +788,12 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
err = ceph_mdsc_do_request(mdsc, dir, req);
- if (err)
+ if (err) {
d_drop(dentry);
- else if (!req->r_reply_info.head->is_dentry)
- d_instantiate(dentry, igrab(old_dentry->d_inode));
+ } else if (!req->r_reply_info.head->is_dentry) {
+ ihold(old_dentry->d_inode);
+ d_instantiate(dentry, old_dentry->d_inode);
+ }
ceph_mdsc_put_request(req);
return err;
}
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index a610d3d67488..f67b687550de 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -109,7 +109,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
err = ceph_mdsc_do_request(mdsc, NULL, req);
inode = req->r_target_inode;
if (inode)
- igrab(inode);
+ ihold(inode);
ceph_mdsc_put_request(req);
if (!inode)
return ERR_PTR(-ESTALE);
@@ -167,7 +167,7 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb,
err = ceph_mdsc_do_request(mdsc, NULL, req);
inode = req->r_target_inode;
if (inode)
- igrab(inode);
+ ihold(inode);
ceph_mdsc_put_request(req);
if (!inode)
return ERR_PTR(err ? err : -ESTALE);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 203252d88d9f..4698a5c553dc 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -191,7 +191,8 @@ int ceph_open(struct inode *inode, struct file *file)
err = PTR_ERR(req);
goto out;
}
- req->r_inode = igrab(inode);
+ req->r_inode = inode;
+ ihold(inode);
req->r_num_caps = 1;
err = ceph_mdsc_do_request(mdsc, parent_inode, req);
if (!err)
@@ -282,14 +283,13 @@ int ceph_release(struct inode *inode, struct file *file)
static int striped_read(struct inode *inode,
u64 off, u64 len,
struct page **pages, int num_pages,
- int *checkeof, bool align_to_pages,
+ int *checkeof, bool o_direct,
unsigned long buf_align)
{
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_inode_info *ci = ceph_inode(inode);
u64 pos, this_len;
int io_align, page_align;
- int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */
int left, pages_left;
int read;
struct page **page_pos;
@@ -307,7 +307,7 @@ static int striped_read(struct inode *inode,
io_align = off & ~PAGE_MASK;
more:
- if (align_to_pages)
+ if (o_direct)
page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
else
page_align = pos & ~PAGE_MASK;
@@ -317,20 +317,19 @@ more:
ci->i_truncate_seq,
ci->i_truncate_size,
page_pos, pages_left, page_align);
- hit_stripe = this_len < left;
- was_short = ret >= 0 && ret < this_len;
if (ret == -ENOENT)
ret = 0;
+ hit_stripe = this_len < left;
+ was_short = ret >= 0 && ret < this_len;
dout("striped_read %llu~%u (read %u) got %d%s%s\n", pos, left, read,
ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
if (ret > 0) {
- int didpages =
- ((pos & ~PAGE_CACHE_MASK) + ret) >> PAGE_CACHE_SHIFT;
+ int didpages = (page_align + ret) >> PAGE_CACHE_SHIFT;
if (read < pos - off) {
dout(" zero gap %llu to %llu\n", off + read, pos);
- ceph_zero_page_vector_range(page_off + read,
+ ceph_zero_page_vector_range(page_align + read,
pos - off - read, pages);
}
pos += ret;
@@ -345,20 +344,22 @@ more:
}
if (was_short) {
- /* was original extent fully inside i_size? */
- if (pos + left <= inode->i_size) {
- dout("zero tail\n");
- ceph_zero_page_vector_range(page_off + read, len - read,
+ /* did we bounce off eof? */
+ if (pos + left > inode->i_size)
+ *checkeof = 1;
+
+ /* zero trailing bytes (inside i_size) */
+ if (left > 0 && pos < inode->i_size) {
+ if (pos + left > inode->i_size)
+ left = inode->i_size - pos;
+
+ dout("zero tail %d\n", left);
+ ceph_zero_page_vector_range(page_align + read, left,
pages);
- read = len;
- goto out;
+ read += left;
}
-
- /* check i_size */
- *checkeof = 1;
}
-out:
if (ret >= 0)
ret = read;
dout("striped_read returns %d\n", ret);
@@ -475,9 +476,6 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
else
pos = *offset;
- io_align = pos & ~PAGE_MASK;
- buf_align = (unsigned long)data & ~PAGE_MASK;
-
ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left);
if (ret < 0)
return ret;
@@ -501,6 +499,8 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
* boundary. this isn't atomic, unfortunately. :(
*/
more:
+ io_align = pos & ~PAGE_MASK;
+ buf_align = (unsigned long)data & ~PAGE_MASK;
len = left;
if (file->f_flags & O_DIRECT) {
/* write from beginning of first page, regardless of
@@ -590,6 +590,7 @@ out:
pos += len;
written += len;
left -= len;
+ data += written;
if (left)
goto more;
@@ -658,7 +659,7 @@ out:
/* hit EOF or hole? */
if (statret == 0 && *ppos < inode->i_size) {
- dout("aio_read sync_read hit hole, reading more\n");
+ dout("aio_read sync_read hit hole, ppos %lld < size %lld, reading more\n", *ppos, inode->i_size);
read += ret;
base += ret;
len -= ret;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 70b6a4839c38..d8858e96ab18 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1101,10 +1101,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
goto done;
}
req->r_dentry = dn; /* may have spliced */
- igrab(in);
+ ihold(in);
} else if (ceph_ino(in) == vino.ino &&
ceph_snap(in) == vino.snap) {
- igrab(in);
+ ihold(in);
} else {
dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
dn, in, ceph_ino(in), ceph_snap(in),
@@ -1144,7 +1144,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
goto done;
}
req->r_dentry = dn; /* may have spliced */
- igrab(in);
+ ihold(in);
rinfo->head->is_dentry = 1; /* fool notrace handlers */
}
@@ -1328,7 +1328,7 @@ void ceph_queue_writeback(struct inode *inode)
if (queue_work(ceph_inode_to_client(inode)->wb_wq,
&ceph_inode(inode)->i_wb_work)) {
dout("ceph_queue_writeback %p\n", inode);
- igrab(inode);
+ ihold(inode);
} else {
dout("ceph_queue_writeback %p failed\n", inode);
}
@@ -1353,7 +1353,7 @@ void ceph_queue_invalidate(struct inode *inode)
if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq,
&ceph_inode(inode)->i_pg_inv_work)) {
dout("ceph_queue_invalidate %p\n", inode);
- igrab(inode);
+ ihold(inode);
} else {
dout("ceph_queue_invalidate %p failed\n", inode);
}
@@ -1477,7 +1477,7 @@ void ceph_queue_vmtruncate(struct inode *inode)
if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq,
&ci->i_vmtruncate_work)) {
dout("ceph_queue_vmtruncate %p\n", inode);
- igrab(inode);
+ ihold(inode);
} else {
dout("ceph_queue_vmtruncate %p failed, pending=%d\n",
inode, ci->i_truncate_pending);
@@ -1738,7 +1738,8 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
__mark_inode_dirty(inode, inode_dirty_flags);
if (mask) {
- req->r_inode = igrab(inode);
+ req->r_inode = inode;
+ ihold(inode);
req->r_inode_drop = release;
req->r_args.setattr.mask = cpu_to_le32(mask);
req->r_num_caps = 1;
@@ -1779,7 +1780,8 @@ int ceph_do_getattr(struct inode *inode, int mask)
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
if (IS_ERR(req))
return PTR_ERR(req);
- req->r_inode = igrab(inode);
+ req->r_inode = inode;
+ ihold(inode);
req->r_num_caps = 1;
req->r_args.getattr.mask = cpu_to_le32(mask);
err = ceph_mdsc_do_request(mdsc, NULL, req);
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 8888c9ba68db..ef0b5f48e13a 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -73,7 +73,8 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
USE_AUTH_MDS);
if (IS_ERR(req))
return PTR_ERR(req);
- req->r_inode = igrab(inode);
+ req->r_inode = inode;
+ ihold(inode);
req->r_inode_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL;
req->r_args.setlayout.layout.fl_stripe_unit =
@@ -135,7 +136,8 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
if (IS_ERR(req))
return PTR_ERR(req);
- req->r_inode = igrab(inode);
+ req->r_inode = inode;
+ ihold(inode);
req->r_args.setlayout.layout.fl_stripe_unit =
cpu_to_le32(l.stripe_unit);
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 476b329867d4..80576d05d687 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -23,7 +23,8 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
if (IS_ERR(req))
return PTR_ERR(req);
- req->r_inode = igrab(inode);
+ req->r_inode = inode;
+ ihold(inode);
/* mds requires start and length rather than start and end */
if (LLONG_MAX == fl->fl_end)
@@ -32,11 +33,10 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
length = fl->fl_end - fl->fl_start + 1;
dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
- "length: %llu, wait: %d, type`: %d", (int)lock_type,
+ "length: %llu, wait: %d, type: %d", (int)lock_type,
(int)operation, (u64)fl->fl_pid, fl->fl_start,
length, wait, fl->fl_type);
-
req->r_args.filelock_change.rule = lock_type;
req->r_args.filelock_change.type = cmd;
req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
@@ -70,7 +70,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
}
ceph_mdsc_put_request(req);
dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
- "length: %llu, wait: %d, type`: %d, err code %d", (int)lock_type,
+ "length: %llu, wait: %d, type: %d, err code %d", (int)lock_type,
(int)operation, (u64)fl->fl_pid, fl->fl_start,
length, wait, fl->fl_type, err);
return err;
@@ -109,16 +109,20 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
dout("mds locked, locking locally");
err = posix_lock_file(file, fl, NULL);
if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
- /* undo! This should only happen if the kernel detects
- * local deadlock. */
+ /* undo! This should only happen if
+ * the kernel detects local
+ * deadlock. */
ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
CEPH_LOCK_UNLOCK, 0, fl);
- dout("got %d on posix_lock_file, undid lock", err);
+ dout("got %d on posix_lock_file, undid lock",
+ err);
}
}
- } else {
- dout("mds returned error code %d", err);
+ } else if (err == -ERESTARTSYS) {
+ dout("undoing lock\n");
+ ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
+ CEPH_LOCK_UNLOCK, 0, fl);
}
return err;
}
@@ -155,8 +159,11 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
file, CEPH_LOCK_UNLOCK, 0, fl);
dout("got %d on flock_lock_file_wait, undid lock", err);
}
- } else {
- dout("mds error code %d", err);
+ } else if (err == -ERESTARTSYS) {
+ dout("undoing lock\n");
+ ceph_lock_message(CEPH_LOCK_FLOCK,
+ CEPH_MDS_OP_SETFILELOCK,
+ file, CEPH_LOCK_UNLOCK, 0, fl);
}
return err;
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 79743d146be6..0c1d91756528 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1438,12 +1438,15 @@ char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
struct dentry *temp;
char *path;
int len, pos;
+ unsigned seq;
if (dentry == NULL)
return ERR_PTR(-EINVAL);
retry:
len = 0;
+ seq = read_seqbegin(&rename_lock);
+ rcu_read_lock();
for (temp = dentry; !IS_ROOT(temp);) {
struct inode *inode = temp->d_inode;
if (inode && ceph_snap(inode) == CEPH_SNAPDIR)
@@ -1455,10 +1458,12 @@ retry:
len += 1 + temp->d_name.len;
temp = temp->d_parent;
if (temp == NULL) {
+ rcu_read_unlock();
pr_err("build_path corrupt dentry %p\n", dentry);
return ERR_PTR(-EINVAL);
}
}
+ rcu_read_unlock();
if (len)
len--; /* no leading '/' */
@@ -1467,9 +1472,12 @@ retry:
return ERR_PTR(-ENOMEM);
pos = len;
path[pos] = 0; /* trailing null */
+ rcu_read_lock();
for (temp = dentry; !IS_ROOT(temp) && pos != 0; ) {
- struct inode *inode = temp->d_inode;
+ struct inode *inode;
+ spin_lock(&temp->d_lock);
+ inode = temp->d_inode;
if (inode && ceph_snap(inode) == CEPH_SNAPDIR) {
dout("build_path path+%d: %p SNAPDIR\n",
pos, temp);
@@ -1478,21 +1486,26 @@ retry:
break;
} else {
pos -= temp->d_name.len;
- if (pos < 0)
+ if (pos < 0) {
+ spin_unlock(&temp->d_lock);
break;
+ }
strncpy(path + pos, temp->d_name.name,
temp->d_name.len);
}
+ spin_unlock(&temp->d_lock);
if (pos)
path[--pos] = '/';
temp = temp->d_parent;
if (temp == NULL) {
+ rcu_read_unlock();
pr_err("build_path corrupt dentry\n");
kfree(path);
return ERR_PTR(-EINVAL);
}
}
- if (pos != 0) {
+ rcu_read_unlock();
+ if (pos != 0 || read_seqretry(&rename_lock, seq)) {
pr_err("build_path did not end path lookup where "
"expected, namelen is %d, pos is %d\n", len, pos);
/* presumably this is only possible if racing with a
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 24067d68a554..54b14de2e729 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -722,7 +722,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
ci = list_first_entry(&mdsc->snap_flush_list,
struct ceph_inode_info, i_snap_flush_item);
inode = &ci->vfs_inode;
- igrab(inode);
+ ihold(inode);
spin_unlock(&mdsc->snap_flush_lock);
spin_lock(&inode->i_lock);
__ceph_flush_snaps(ci, &session, 0);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index f2b628696180..f42d730f1b66 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -665,7 +665,8 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
err = PTR_ERR(req);
goto out;
}
- req->r_inode = igrab(inode);
+ req->r_inode = inode;
+ ihold(inode);
req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
req->r_num_caps = 1;
req->r_args.setxattr.flags = cpu_to_le32(flags);
@@ -795,7 +796,8 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
USE_AUTH_MDS);
if (IS_ERR(req))
return PTR_ERR(req);
- req->r_inode = igrab(inode);
+ req->r_inode = inode;
+ ihold(inode);
req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
req->r_num_caps = 1;
req->r_path2 = kstrdup(name, GFP_NOFS);