diff options
Diffstat (limited to 'fs/ocfs2')
-rw-r--r-- | fs/ocfs2/dlmglue.c | 119 | ||||
-rw-r--r-- | fs/ocfs2/dlmglue.h | 1 | ||||
-rw-r--r-- | fs/ocfs2/file.c | 10 | ||||
-rw-r--r-- | fs/ocfs2/file.h | 2 | ||||
-rw-r--r-- | fs/ocfs2/ioctl.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/mmap.c | 44 | ||||
-rw-r--r-- | fs/ocfs2/namei.c | 3 | ||||
-rw-r--r-- | fs/ocfs2/ocfs2_fs.h | 8 |
8 files changed, 121 insertions, 68 deletions
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 97a972efab83..68728de12864 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -788,35 +788,34 @@ static inline void ocfs2_add_holder(struct ocfs2_lock_res *lockres, spin_unlock(&lockres->l_lock); } -static inline void ocfs2_remove_holder(struct ocfs2_lock_res *lockres, - struct ocfs2_lock_holder *oh) -{ - spin_lock(&lockres->l_lock); - list_del(&oh->oh_list); - spin_unlock(&lockres->l_lock); - - put_pid(oh->oh_owner_pid); -} - -static inline int ocfs2_is_locked_by_me(struct ocfs2_lock_res *lockres) +static struct ocfs2_lock_holder * +ocfs2_pid_holder(struct ocfs2_lock_res *lockres, + struct pid *pid) { struct ocfs2_lock_holder *oh; - struct pid *pid; - /* look in the list of holders for one with the current task as owner */ spin_lock(&lockres->l_lock); - pid = task_pid(current); list_for_each_entry(oh, &lockres->l_holders, oh_list) { if (oh->oh_owner_pid == pid) { spin_unlock(&lockres->l_lock); - return 1; + return oh; } } spin_unlock(&lockres->l_lock); + return NULL; +} - return 0; +static inline void ocfs2_remove_holder(struct ocfs2_lock_res *lockres, + struct ocfs2_lock_holder *oh) +{ + spin_lock(&lockres->l_lock); + list_del(&oh->oh_list); + spin_unlock(&lockres->l_lock); + + put_pid(oh->oh_owner_pid); } + static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, int level) { @@ -2610,34 +2609,93 @@ void ocfs2_inode_unlock(struct inode *inode, * * return < 0 on error, return == 0 if there's no lock holder on the stack * before this call, return == 1 if this call would be a recursive locking. + * return == -1 if this lock attempt will cause an upgrade which is forbidden. + * + * When taking lock levels into account,we face some different situations. + * + * 1. no lock is held + * In this case, just lock the inode as requested and return 0 + * + * 2. We are holding a lock + * For this situation, things diverges into several cases + * + * wanted holding what to do + * ex ex see 2.1 below + * ex pr see 2.2 below + * pr ex see 2.1 below + * pr pr see 2.1 below + * + * 2.1 lock level that is been held is compatible + * with the wanted level, so no lock action will be tacken. + * + * 2.2 Otherwise, an upgrade is needed, but it is forbidden. + * + * Reason why upgrade within a process is forbidden is that + * lock upgrade may cause dead lock. The following illustrates + * how it happens. + * + * thread on node1 thread on node2 + * ocfs2_inode_lock_tracker(ex=0) + * + * <====== ocfs2_inode_lock_tracker(ex=1) + * + * ocfs2_inode_lock_tracker(ex=1) */ int ocfs2_inode_lock_tracker(struct inode *inode, struct buffer_head **ret_bh, int ex, struct ocfs2_lock_holder *oh) { - int status; - int arg_flags = 0, has_locked; + int status = 0; struct ocfs2_lock_res *lockres; + struct ocfs2_lock_holder *tmp_oh; + struct pid *pid = task_pid(current); + lockres = &OCFS2_I(inode)->ip_inode_lockres; - has_locked = ocfs2_is_locked_by_me(lockres); - /* Just get buffer head if the cluster lock has been taken */ - if (has_locked) - arg_flags = OCFS2_META_LOCK_GETBH; + tmp_oh = ocfs2_pid_holder(lockres, pid); - if (likely(!has_locked || ret_bh)) { - status = ocfs2_inode_lock_full(inode, ret_bh, ex, arg_flags); + if (!tmp_oh) { + /* + * This corresponds to the case 1. + * We haven't got any lock before. + */ + status = ocfs2_inode_lock_full(inode, ret_bh, ex, 0); if (status < 0) { if (status != -ENOENT) mlog_errno(status); return status; } - } - if (!has_locked) + + oh->oh_ex = ex; ocfs2_add_holder(lockres, oh); + return 0; + } - return has_locked; + if (unlikely(ex && !tmp_oh->oh_ex)) { + /* + * case 2.2 upgrade may cause dead lock, forbid it. + */ + mlog(ML_ERROR, "Recursive locking is not permitted to " + "upgrade to EX level from PR level.\n"); + dump_stack(); + return -EINVAL; + } + + /* + * case 2.1 OCFS2_META_LOCK_GETBH flag make ocfs2_inode_lock_full. + * ignore the lock level and just update it. + */ + if (ret_bh) { + status = ocfs2_inode_lock_full(inode, ret_bh, ex, + OCFS2_META_LOCK_GETBH); + if (status < 0) { + if (status != -ENOENT) + mlog_errno(status); + return status; + } + } + return tmp_oh ? 1 : 0; } void ocfs2_inode_unlock_tracker(struct inode *inode, @@ -2649,12 +2707,13 @@ void ocfs2_inode_unlock_tracker(struct inode *inode, lockres = &OCFS2_I(inode)->ip_inode_lockres; /* had_lock means that the currect process already takes the cluster - * lock previously. If had_lock is 1, we have nothing to do here, and - * it will get unlocked where we got the lock. + * lock previously. + * If had_lock is 1, we have nothing to do here. + * If had_lock is 0, we will release the lock. */ if (!had_lock) { + ocfs2_inode_unlock(inode, oh->oh_ex); ocfs2_remove_holder(lockres, oh); - ocfs2_inode_unlock(inode, ex); } } diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index 256e0a9067b8..4ec1c828f6e0 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h @@ -96,6 +96,7 @@ struct ocfs2_trim_fs_info { struct ocfs2_lock_holder { struct list_head oh_list; struct pid *oh_owner_pid; + int oh_ex; }; /* ocfs2_inode_lock_full() 'arg_flags' flags */ diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 6ee94bc23f5b..a2a8603d27e0 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -563,8 +563,8 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb, return ret; } -static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, - u32 clusters_to_add, int mark_unwritten) +static int ocfs2_extend_allocation(struct inode *inode, u32 logical_start, + u32 clusters_to_add, int mark_unwritten) { int status = 0; int restart_func = 0; @@ -1035,8 +1035,8 @@ int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh, clusters_to_add -= oi->ip_clusters; if (clusters_to_add) { - ret = __ocfs2_extend_allocation(inode, oi->ip_clusters, - clusters_to_add, 0); + ret = ocfs2_extend_allocation(inode, oi->ip_clusters, + clusters_to_add, 0); if (ret) { mlog_errno(ret); goto out; @@ -1493,7 +1493,7 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode, goto next; } - ret = __ocfs2_extend_allocation(inode, cpos, alloc_size, 1); + ret = ocfs2_extend_allocation(inode, cpos, alloc_size, 1); if (ret) { if (ret != -ENOSPC) mlog_errno(ret); diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index 1fdc9839cd93..7eb7f03531f6 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h @@ -65,8 +65,6 @@ int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh, u64 new_i_size, u64 zero_to); int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh, loff_t zero_to); -int ocfs2_extend_allocation(struct inode *inode, u32 logical_start, - u32 clusters_to_add, int mark_unwritten); int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); int ocfs2_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags); diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index ab30c005cc4b..994726ada857 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -402,7 +402,7 @@ out_err: static void o2ffg_update_histogram(struct ocfs2_info_free_chunk_list *hist, unsigned int chunksize) { - int index; + u32 index; index = __ilog2_u32(chunksize); if (index >= OCFS2_INFO_MAX_HIST) diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index fb9a20e3d608..05220b365fb9 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -44,11 +44,11 @@ #include "ocfs2_trace.h" -static int ocfs2_fault(struct vm_fault *vmf) +static vm_fault_t ocfs2_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; sigset_t oldset; - int ret; + vm_fault_t ret; ocfs2_block_signals(&oldset); ret = filemap_fault(vmf); @@ -59,10 +59,11 @@ static int ocfs2_fault(struct vm_fault *vmf) return ret; } -static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, - struct page *page) +static vm_fault_t __ocfs2_page_mkwrite(struct file *file, + struct buffer_head *di_bh, struct page *page) { - int ret = VM_FAULT_NOPAGE; + int err; + vm_fault_t ret = VM_FAULT_NOPAGE; struct inode *inode = file_inode(file); struct address_space *mapping = inode->i_mapping; loff_t pos = page_offset(page); @@ -105,15 +106,12 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, if (page->index == last_index) len = ((size - 1) & ~PAGE_MASK) + 1; - ret = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_MMAP, + err = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_MMAP, &locked_page, &fsdata, di_bh, page); - if (ret) { - if (ret != -ENOSPC) - mlog_errno(ret); - if (ret == -ENOMEM) - ret = VM_FAULT_OOM; - else - ret = VM_FAULT_SIGBUS; + if (err) { + if (err != -ENOSPC) + mlog_errno(err); + ret = vmf_error(err); goto out; } @@ -121,20 +119,21 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, ret = VM_FAULT_NOPAGE; goto out; } - ret = ocfs2_write_end_nolock(mapping, pos, len, len, fsdata); - BUG_ON(ret != len); + err = ocfs2_write_end_nolock(mapping, pos, len, len, fsdata); + BUG_ON(err != len); ret = VM_FAULT_LOCKED; out: return ret; } -static int ocfs2_page_mkwrite(struct vm_fault *vmf) +static vm_fault_t ocfs2_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; struct inode *inode = file_inode(vmf->vma->vm_file); struct buffer_head *di_bh = NULL; sigset_t oldset; - int ret; + int err; + vm_fault_t ret; sb_start_pagefault(inode->i_sb); ocfs2_block_signals(&oldset); @@ -144,13 +143,10 @@ static int ocfs2_page_mkwrite(struct vm_fault *vmf) * node. Taking the data lock will also ensure that we don't * attempt page truncation as part of a downconvert. */ - ret = ocfs2_inode_lock(inode, &di_bh, 1); - if (ret < 0) { - mlog_errno(ret); - if (ret == -ENOMEM) - ret = VM_FAULT_OOM; - else - ret = VM_FAULT_SIGBUS; + err = ocfs2_inode_lock(inode, &di_bh, 1); + if (err < 0) { + mlog_errno(err); + ret = vmf_error(err); goto out; } diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 8dd6f703c819..b7ca84bc3df7 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -2332,8 +2332,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, struct buffer_head *orphan_dir_bh, bool dio) { - const int namelen = OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN; - char name[namelen + 1]; + char name[OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN + 1]; struct ocfs2_dinode *orphan_fe; int status = 0; struct ocfs2_dir_lookup_result lookup = { NULL, }; diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 5bb4a89f9045..7071ad0dec90 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -807,11 +807,11 @@ struct ocfs2_dir_block_trailer { * in this block. (unused) */ /*10*/ __u8 db_signature[8]; /* Signature for verification */ __le64 db_reserved2; - __le64 db_free_next; /* Next block in list (unused) */ -/*20*/ __le64 db_blkno; /* Offset on disk, in blocks */ - __le64 db_parent_dinode; /* dinode which owns me, in +/*20*/ __le64 db_free_next; /* Next block in list (unused) */ + __le64 db_blkno; /* Offset on disk, in blocks */ +/*30*/ __le64 db_parent_dinode; /* dinode which owns me, in blocks */ -/*30*/ struct ocfs2_block_check db_check; /* Error checking */ + struct ocfs2_block_check db_check; /* Error checking */ /*40*/ }; |