summaryrefslogtreecommitdiffstats
path: root/fs/nfs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-11 22:01:44 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-11 22:01:44 -0700
commit8e7757d83d07cc77ee2661e9615a2f9f4ce540cd (patch)
treeb6c15efae2c117e7fbbe56ad0aaa2859d1cd35a8 /fs/nfs
parentdd198ce7141aa8dd9ffcc9549de422fb055508de (diff)
parent1bd5d6d08ea7ed0794c8a3908383d6d6fc202cdd (diff)
downloadlinux-stable-8e7757d83d07cc77ee2661e9615a2f9f4ce540cd.tar.gz
linux-stable-8e7757d83d07cc77ee2661e9615a2f9f4ce540cd.tar.bz2
linux-stable-8e7757d83d07cc77ee2661e9615a2f9f4ce540cd.zip
Merge tag 'nfs-for-4.14-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust: "Hightlights include: Stable bugfixes: - Fix mirror allocation in the writeback code to avoid a use after free - Fix the O_DSYNC writes to use the correct byte range - Fix 2 use after free issues in the I/O code Features: - Writeback fixes to split up the inode->i_lock in order to reduce contention - RPC client receive fixes to reduce the amount of time the xprt->transport_lock is held when receiving data from a socket into am XDR buffer. - Ditto fixes to reduce contention between call side users of the rdma rb_lock, and its use in rpcrdma_reply_handler. - Re-arrange rdma stats to reduce false cacheline sharing. - Various rdma cleanups and optimisations. - Refactor the NFSv4.1 exchange id code and clean up the code. - Const-ify all instances of struct rpc_xprt_ops Bugfixes: - Fix the NFSv2 'sec=' mount option. - NFSv4.1: don't use machine credentials for CLOSE when using 'sec=sys' - Fix the NFSv3 GRANT callback when the port changes on the server. - Fix livelock issues with COMMIT - NFSv4: Use correct inode in _nfs4_opendata_to_nfs4_state() when doing and NFSv4.1 open by filehandle" * tag 'nfs-for-4.14-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (69 commits) NFS: Count the bytes of skipped subrequests in nfs_lock_and_join_requests() NFS: Don't hold the group lock when calling nfs_release_request() NFS: Remove pnfs_generic_transfer_commit_list() NFS: nfs_lock_and_join_requests and nfs_scan_commit_list can deadlock NFS: Fix 2 use after free issues in the I/O code NFS: Sync the correct byte range during synchronous writes lockd: Delete an error message for a failed memory allocation in reclaimer() NFS: remove jiffies field from access cache NFS: flush data when locking a file to ensure cache coherence for mmap. SUNRPC: remove some dead code. NFS: don't expect errors from mempool_alloc(). xprtrdma: Use xprt_pin_rqst in rpcrdma_reply_handler xprtrdma: Re-arrange struct rx_stats NFS: Fix NFSv2 security settings NFSv4.1: don't use machine credentials for CLOSE when using 'sec=sys' SUNRPC: ECONNREFUSED should cause a rebind. NFS: Remove unused parameter gfp_flags from nfs_pageio_init() NFSv4: Fix up mirror allocation SUNRPC: Add a separate spinlock to protect the RPC request receive list SUNRPC: Cleanup xs_tcp_read_common() ...
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/callback_proc.c2
-rw-r--r--fs/nfs/delegation.c2
-rw-r--r--fs/nfs/dir.c4
-rw-r--r--fs/nfs/direct.c4
-rw-r--r--fs/nfs/file.c17
-rw-r--r--fs/nfs/inode.c10
-rw-r--r--fs/nfs/internal.h1
-rw-r--r--fs/nfs/nfs4_fs.h11
-rw-r--r--fs/nfs/nfs4proc.c274
-rw-r--r--fs/nfs/pagelist.c170
-rw-r--r--fs/nfs/pnfs.c43
-rw-r--r--fs/nfs/pnfs.h2
-rw-r--r--fs/nfs/pnfs_nfs.c44
-rw-r--r--fs/nfs/read.c2
-rw-r--r--fs/nfs/super.c12
-rw-r--r--fs/nfs/write.c457
16 files changed, 490 insertions, 565 deletions
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 5427cdf04c5a..14358de173fb 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -51,7 +51,7 @@ __be32 nfs4_callback_getattr(void *argp, void *resp,
goto out_iput;
res->size = i_size_read(inode);
res->change_attr = delegation->change_attr;
- if (nfsi->nrequests != 0)
+ if (nfs_have_writebacks(inode))
res->change_attr++;
res->ctime = inode->i_ctime;
res->mtime = inode->i_mtime;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index d7df5e67b0c1..606dd3871f66 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -1089,7 +1089,7 @@ bool nfs4_delegation_flush_on_close(const struct inode *inode)
delegation = rcu_dereference(nfsi->delegation);
if (delegation == NULL || !(delegation->type & FMODE_WRITE))
goto out;
- if (nfsi->nrequests < delegation->pagemod_limit)
+ if (atomic_long_read(&nfsi->nrequests) < delegation->pagemod_limit)
ret = false;
out:
rcu_read_unlock();
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 3522b1249019..5ceaeb1f6fb6 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2260,7 +2260,6 @@ static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, str
spin_lock(&inode->i_lock);
retry = false;
}
- res->jiffies = cache->jiffies;
res->cred = cache->cred;
res->mask = cache->mask;
list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru);
@@ -2296,7 +2295,6 @@ static int nfs_access_get_cached_rcu(struct inode *inode, struct rpc_cred *cred,
goto out;
if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
goto out;
- res->jiffies = cache->jiffies;
res->cred = cache->cred;
res->mask = cache->mask;
err = 0;
@@ -2344,7 +2342,6 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
if (cache == NULL)
return;
RB_CLEAR_NODE(&cache->rb_node);
- cache->jiffies = set->jiffies;
cache->cred = get_rpccred(set->cred);
cache->mask = set->mask;
@@ -2432,7 +2429,6 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
cache.mask = NFS_MAY_LOOKUP | NFS_MAY_EXECUTE
| NFS_MAY_WRITE | NFS_MAY_READ;
cache.cred = cred;
- cache.jiffies = jiffies;
status = NFS_PROTO(inode)->access(inode, &cache);
if (status != 0) {
if (status == -ESTALE) {
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 6fb9fad2d1e6..d2972d537469 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -616,13 +616,13 @@ nfs_direct_write_scan_commit_list(struct inode *inode,
struct list_head *list,
struct nfs_commit_info *cinfo)
{
- spin_lock(&cinfo->inode->i_lock);
+ mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
#ifdef CONFIG_NFS_V4_1
if (cinfo->ds != NULL && cinfo->ds->nwritten != 0)
NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo);
#endif
nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0);
- spin_unlock(&cinfo->inode->i_lock);
+ mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
}
static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index af330c31f627..a385d1c3f146 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -631,11 +631,11 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
if (result <= 0)
goto out;
- result = generic_write_sync(iocb, result);
- if (result < 0)
- goto out;
written = result;
iocb->ki_pos += written;
+ result = generic_write_sync(iocb, written);
+ if (result < 0)
+ goto out;
/* Return error values */
if (nfs_need_check_write(file, inode)) {
@@ -744,15 +744,18 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
goto out;
/*
- * Revalidate the cache if the server has time stamps granular
- * enough to detect subsecond changes. Otherwise, clear the
- * cache to prevent missing any changes.
+ * Invalidate cache to prevent missing any changes. If
+ * the file is mapped, clear the page cache as well so
+ * those mappings will be loaded.
*
* This makes locking act as a cache coherency point.
*/
nfs_sync_mapping(filp->f_mapping);
- if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
+ if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) {
nfs_zap_caches(inode);
+ if (mapping_mapped(filp->f_mapping))
+ nfs_revalidate_mapping(inode, filp->f_mapping);
+ }
out:
return status;
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 109279d6d91b..134d9f560240 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1285,7 +1285,6 @@ static bool nfs_file_has_buffered_writers(struct nfs_inode *nfsi)
static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
{
- struct nfs_inode *nfsi = NFS_I(inode);
unsigned long ret = 0;
if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE)
@@ -1315,7 +1314,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE)
&& (fattr->valid & NFS_ATTR_FATTR_SIZE)
&& i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size)
- && nfsi->nrequests == 0) {
+ && !nfs_have_writebacks(inode)) {
i_size_write(inode, nfs_size_to_loff_t(fattr->size));
ret |= NFS_INO_INVALID_ATTR;
}
@@ -1823,7 +1822,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
if (new_isize != cur_isize) {
/* Do we perhaps have any outstanding writes, or has
* the file grown beyond our last write? */
- if (nfsi->nrequests == 0 || new_isize > cur_isize) {
+ if (!nfs_have_writebacks(inode) || new_isize > cur_isize) {
i_size_write(inode, new_isize);
if (!have_writers)
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
@@ -2012,10 +2011,11 @@ static void init_once(void *foo)
INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
INIT_LIST_HEAD(&nfsi->commit_info.list);
- nfsi->nrequests = 0;
- nfsi->commit_info.ncommit = 0;
+ atomic_long_set(&nfsi->nrequests, 0);
+ atomic_long_set(&nfsi->commit_info.ncommit, 0);
atomic_set(&nfsi->commit_info.rpcs_out, 0);
init_rwsem(&nfsi->rmdir_sem);
+ mutex_init(&nfsi->commit_mutex);
nfs4_init_once(nfsi);
}
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index dc456416d2be..68cc22083639 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -251,7 +251,6 @@ int nfs_iocounter_wait(struct nfs_lock_context *l_ctx);
extern const struct nfs_pageio_ops nfs_pgio_rw_ops;
struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *);
void nfs_pgio_header_free(struct nfs_pgio_header *);
-void nfs_pgio_data_destroy(struct nfs_pgio_header *);
int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
struct rpc_cred *cred, const struct nfs_rpc_ops *rpc_ops,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 40bd05f05e74..ac4f10b7f6c1 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -303,6 +303,17 @@ _nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode,
struct rpc_cred *newcred = NULL;
rpc_authflavor_t flavor;
+ if (sp4_mode == NFS_SP4_MACH_CRED_CLEANUP ||
+ sp4_mode == NFS_SP4_MACH_CRED_PNFS_CLEANUP) {
+ /* Using machine creds for cleanup operations
+ * is only relevent if the client credentials
+ * might expire. So don't bother for
+ * RPC_AUTH_UNIX. If file was only exported to
+ * sec=sys, the PUTFH would fail anyway.
+ */
+ if ((*clntp)->cl_auth->au_flavor == RPC_AUTH_UNIX)
+ return false;
+ }
if (test_bit(sp4_mode, &clp->cl_sp4_flags)) {
spin_lock(&clp->cl_lock);
if (clp->cl_machine_cred != NULL)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d90132642340..6c61e2b99635 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1659,12 +1659,52 @@ update:
return state;
}
+static struct inode *
+nfs4_opendata_get_inode(struct nfs4_opendata *data)
+{
+ struct inode *inode;
+
+ switch (data->o_arg.claim) {
+ case NFS4_OPEN_CLAIM_NULL:
+ case NFS4_OPEN_CLAIM_DELEGATE_CUR:
+ case NFS4_OPEN_CLAIM_DELEGATE_PREV:
+ if (!(data->f_attr.valid & NFS_ATTR_FATTR))
+ return ERR_PTR(-EAGAIN);
+ inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh,
+ &data->f_attr, data->f_label);
+ break;
+ default:
+ inode = d_inode(data->dentry);
+ ihold(inode);
+ nfs_refresh_inode(inode, &data->f_attr);
+ }
+ return inode;
+}
+
static struct nfs4_state *
-_nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
+nfs4_opendata_find_nfs4_state(struct nfs4_opendata *data)
{
+ struct nfs4_state *state;
struct inode *inode;
- struct nfs4_state *state = NULL;
- int ret;
+
+ inode = nfs4_opendata_get_inode(data);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
+ if (data->state != NULL && data->state->inode == inode) {
+ state = data->state;
+ atomic_inc(&state->count);
+ } else
+ state = nfs4_get_open_state(inode, data->owner);
+ iput(inode);
+ if (state == NULL)
+ state = ERR_PTR(-ENOMEM);
+ return state;
+}
+
+static struct nfs4_state *
+_nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
+{
+ struct nfs4_state *state;
if (!data->rpc_done) {
state = nfs4_try_open_cached(data);
@@ -1672,29 +1712,17 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
goto out;
}
- ret = -EAGAIN;
- if (!(data->f_attr.valid & NFS_ATTR_FATTR))
- goto err;
- inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr, data->f_label);
- ret = PTR_ERR(inode);
- if (IS_ERR(inode))
- goto err;
- ret = -ENOMEM;
- state = nfs4_get_open_state(inode, data->owner);
- if (state == NULL)
- goto err_put_inode;
+ state = nfs4_opendata_find_nfs4_state(data);
+ if (IS_ERR(state))
+ goto out;
+
if (data->o_res.delegation_type != 0)
nfs4_opendata_check_deleg(data, state);
update_open_stateid(state, &data->o_res.stateid, NULL,
data->o_arg.fmode);
- iput(inode);
out:
nfs_release_seqid(data->o_arg.seqid);
return state;
-err_put_inode:
- iput(inode);
-err:
- return ERR_PTR(ret);
}
static struct nfs4_state *
@@ -2071,7 +2099,6 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
data->o_arg.open_bitmap = &nfs4_open_noattr_bitmap[0];
case NFS4_OPEN_CLAIM_FH:
task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
- nfs_copy_fh(&data->o_res.fh, data->o_arg.fh);
}
data->timestamp = jiffies;
if (nfs4_setup_sequence(data->o_arg.server->nfs_client,
@@ -2258,7 +2285,6 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
mask = NFS4_ACCESS_READ;
cache.cred = cred;
- cache.jiffies = jiffies;
nfs_access_set_mask(&cache, opendata->o_res.access_result);
nfs_access_add_cache(state->inode, &cache);
@@ -7318,7 +7344,9 @@ static int nfs4_sp4_select_mode(struct nfs_client *clp,
1 << (OP_DESTROY_SESSION - 32) |
1 << (OP_DESTROY_CLIENTID - 32)
};
+ unsigned long flags = 0;
unsigned int i;
+ int ret = 0;
if (sp->how == SP4_MACH_CRED) {
/* Print state protect result */
@@ -7334,7 +7362,8 @@ static int nfs4_sp4_select_mode(struct nfs_client *clp,
for (i = 0; i < NFS4_OP_MAP_NUM_WORDS; i++) {
if (sp->enforce.u.words[i] & ~supported_enforce[i]) {
dfprintk(MOUNT, "sp4_mach_cred: disabled\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
}
@@ -7353,10 +7382,11 @@ static int nfs4_sp4_select_mode(struct nfs_client *clp,
test_bit(OP_DESTROY_CLIENTID, sp->enforce.u.longs)) {
dfprintk(MOUNT, "sp4_mach_cred:\n");
dfprintk(MOUNT, " minimal mode enabled\n");
- set_bit(NFS_SP4_MACH_CRED_MINIMAL, &clp->cl_sp4_flags);
+ __set_bit(NFS_SP4_MACH_CRED_MINIMAL, &flags);
} else {
dfprintk(MOUNT, "sp4_mach_cred: disabled\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
if (test_bit(OP_CLOSE, sp->allow.u.longs) &&
@@ -7364,110 +7394,46 @@ static int nfs4_sp4_select_mode(struct nfs_client *clp,
test_bit(OP_DELEGRETURN, sp->allow.u.longs) &&
test_bit(OP_LOCKU, sp->allow.u.longs)) {
dfprintk(MOUNT, " cleanup mode enabled\n");
- set_bit(NFS_SP4_MACH_CRED_CLEANUP, &clp->cl_sp4_flags);
+ __set_bit(NFS_SP4_MACH_CRED_CLEANUP, &flags);
}
if (test_bit(OP_LAYOUTRETURN, sp->allow.u.longs)) {
dfprintk(MOUNT, " pnfs cleanup mode enabled\n");
- set_bit(NFS_SP4_MACH_CRED_PNFS_CLEANUP,
- &clp->cl_sp4_flags);
+ __set_bit(NFS_SP4_MACH_CRED_PNFS_CLEANUP, &flags);
}
if (test_bit(OP_SECINFO, sp->allow.u.longs) &&
test_bit(OP_SECINFO_NO_NAME, sp->allow.u.longs)) {
dfprintk(MOUNT, " secinfo mode enabled\n");
- set_bit(NFS_SP4_MACH_CRED_SECINFO, &clp->cl_sp4_flags);
+ __set_bit(NFS_SP4_MACH_CRED_SECINFO, &flags);
}
if (test_bit(OP_TEST_STATEID, sp->allow.u.longs) &&
test_bit(OP_FREE_STATEID, sp->allow.u.longs)) {
dfprintk(MOUNT, " stateid mode enabled\n");
- set_bit(NFS_SP4_MACH_CRED_STATEID, &clp->cl_sp4_flags);
+ __set_bit(NFS_SP4_MACH_CRED_STATEID, &flags);
}
if (test_bit(OP_WRITE, sp->allow.u.longs)) {
dfprintk(MOUNT, " write mode enabled\n");
- set_bit(NFS_SP4_MACH_CRED_WRITE, &clp->cl_sp4_flags);
+ __set_bit(NFS_SP4_MACH_CRED_WRITE, &flags);
}
if (test_bit(OP_COMMIT, sp->allow.u.longs)) {
dfprintk(MOUNT, " commit mode enabled\n");
- set_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags);
+ __set_bit(NFS_SP4_MACH_CRED_COMMIT, &flags);
}
}
-
+out:
+ clp->cl_sp4_flags = flags;
return 0;
}
struct nfs41_exchange_id_data {
struct nfs41_exchange_id_res res;
struct nfs41_exchange_id_args args;
- struct rpc_xprt *xprt;
- int rpc_status;
};
-static void nfs4_exchange_id_done(struct rpc_task *task, void *data)
-{
- struct nfs41_exchange_id_data *cdata =
- (struct nfs41_exchange_id_data *)data;
- struct nfs_client *clp = cdata->args.client;
- int status = task->tk_status;
-
- trace_nfs4_exchange_id(clp, status);
-
- if (status == 0)
- status = nfs4_check_cl_exchange_flags(cdata->res.flags);
-
- if (cdata->xprt && status == 0) {
- status = nfs4_detect_session_trunking(clp, &cdata->res,
- cdata->xprt);
- goto out;
- }
-
- if (status == 0)
- status = nfs4_sp4_select_mode(clp, &cdata->res.state_protect);
-
- if (status == 0) {
- clp->cl_clientid = cdata->res.clientid;
- clp->cl_exchange_flags = cdata->res.flags;
- clp->cl_seqid = cdata->res.seqid;
- /* Client ID is not confirmed */
- if (!(cdata->res.flags & EXCHGID4_FLAG_CONFIRMED_R))
- clear_bit(NFS4_SESSION_ESTABLISHED,
- &clp->cl_session->session_state);
-
- kfree(clp->cl_serverowner);
- clp->cl_serverowner = cdata->res.server_owner;
- cdata->res.server_owner = NULL;
-
- /* use the most recent implementation id */
- kfree(clp->cl_implid);
- clp->cl_implid = cdata->res.impl_id;
- cdata->res.impl_id = NULL;
-
- if (clp->cl_serverscope != NULL &&
- !nfs41_same_server_scope(clp->cl_serverscope,
- cdata->res.server_scope)) {
- dprintk("%s: server_scope mismatch detected\n",
- __func__);
- set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state);
- kfree(clp->cl_serverscope);
- clp->cl_serverscope = NULL;
- }
-
- if (clp->cl_serverscope == NULL) {
- clp->cl_serverscope = cdata->res.server_scope;
- cdata->res.server_scope = NULL;
- }
- /* Save the EXCHANGE_ID verifier session trunk tests */
- memcpy(clp->cl_confirm.data, cdata->args.verifier.data,
- sizeof(clp->cl_confirm.data));
- }
-out:
- cdata->rpc_status = status;
- return;
-}
-
static void nfs4_exchange_id_release(void *data)
{
struct nfs41_exchange_id_data *cdata =
@@ -7481,7 +7447,6 @@ static void nfs4_exchange_id_release(void *data)
}
static const struct rpc_call_ops nfs4_exchange_id_call_ops = {
- .rpc_call_done = nfs4_exchange_id_done,
.rpc_release = nfs4_exchange_id_release,
};
@@ -7490,7 +7455,8 @@ static const struct rpc_call_ops nfs4_exchange_id_call_ops = {
*
* Wrapper for EXCHANGE_ID operation.
*/
-static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
+static struct rpc_task *
+nfs4_run_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
u32 sp4_how, struct rpc_xprt *xprt)
{
struct rpc_message msg = {
@@ -7504,17 +7470,15 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
.flags = RPC_TASK_TIMEOUT,
};
struct nfs41_exchange_id_data *calldata;
- struct rpc_task *task;
int status;
if (!atomic_inc_not_zero(&clp->cl_count))
- return -EIO;
+ return ERR_PTR(-EIO);
+ status = -ENOMEM;
calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
- if (!calldata) {
- nfs_put_client(clp);
- return -ENOMEM;
- }
+ if (!calldata)
+ goto out;
nfs4_init_boot_verifier(clp, &calldata->args.verifier);
@@ -7553,34 +7517,22 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
goto out_impl_id;
}
if (xprt) {
- calldata->xprt = xprt;
task_setup_data.rpc_xprt = xprt;
task_setup_data.flags |= RPC_TASK_SOFTCONN;
memcpy(calldata->args.verifier.data, clp->cl_confirm.data,
sizeof(calldata->args.verifier.data));
}
calldata->args.client = clp;
-#ifdef CONFIG_NFS_V4_1_MIGRATION
calldata->args.flags = EXCHGID4_FLAG_SUPP_MOVED_REFER |
- EXCHGID4_FLAG_BIND_PRINC_STATEID |
- EXCHGID4_FLAG_SUPP_MOVED_MIGR,
-#else
- calldata->args.flags = EXCHGID4_FLAG_SUPP_MOVED_REFER |
- EXCHGID4_FLAG_BIND_PRINC_STATEID,
+ EXCHGID4_FLAG_BIND_PRINC_STATEID;
+#ifdef CONFIG_NFS_V4_1_MIGRATION
+ calldata->args.flags |= EXCHGID4_FLAG_SUPP_MOVED_MIGR;
#endif
msg.rpc_argp = &calldata->args;
msg.rpc_resp = &calldata->res;
task_setup_data.callback_data = calldata;
- task = rpc_run_task(&task_setup_data);
- if (IS_ERR(task))
- return PTR_ERR(task);
-
- status = calldata->rpc_status;
-
- rpc_put_task(task);
-out:
- return status;
+ return rpc_run_task(&task_setup_data);
out_impl_id:
kfree(calldata->res.impl_id);
@@ -7590,8 +7542,69 @@ out_server_owner:
kfree(calldata->res.server_owner);
out_calldata:
kfree(calldata);
+out:
nfs_put_client(clp);
- goto out;
+ return ERR_PTR(status);
+}
+
+/*
+ * _nfs4_proc_exchange_id()
+ *
+ * Wrapper for EXCHANGE_ID operation.
+ */
+static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
+ u32 sp4_how)
+{
+ struct rpc_task *task;
+ struct nfs41_exchange_id_args *argp;
+ struct nfs41_exchange_id_res *resp;
+ int status;
+
+ task = nfs4_run_exchange_id(clp, cred, sp4_how, NULL);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+
+ argp = task->tk_msg.rpc_argp;
+ resp = task->tk_msg.rpc_resp;
+ status = task->tk_status;
+ if (status != 0)
+ goto out;
+
+ status = nfs4_check_cl_exchange_flags(resp->flags);
+ if (status != 0)
+ goto out;
+
+ status = nfs4_sp4_select_mode(clp, &resp->state_protect);
+ if (status != 0)
+ goto out;
+
+ clp->cl_clientid = resp->clientid;
+ clp->cl_exchange_flags = resp->flags;
+ clp->cl_seqid = resp->seqid;
+ /* Client ID is not confirmed */
+ if (!(resp->flags & EXCHGID4_FLAG_CONFIRMED_R))
+ clear_bit(NFS4_SESSION_ESTABLISHED,
+ &clp->cl_session->session_state);
+
+ if (clp->cl_serverscope != NULL &&
+ !nfs41_same_server_scope(clp->cl_serverscope,
+ resp->server_scope)) {
+ dprintk("%s: server_scope mismatch detected\n",
+ __func__);
+ set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state);
+ }
+
+ swap(clp->cl_serverowner, resp->server_owner);
+ swap(clp->cl_serverscope, resp->server_scope);
+ swap(clp->cl_implid, resp->impl_id);
+
+ /* Save the EXCHANGE_ID verifier session trunk tests */
+ memcpy(clp->cl_confirm.data, argp->verifier.data,
+ sizeof(clp->cl_confirm.data));
+out:
+ trace_nfs4_exchange_id(clp, status);
+ rpc_put_task(task);
+ return status;
}
/*
@@ -7614,13 +7627,13 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
/* try SP4_MACH_CRED if krb5i/p */
if (authflavor == RPC_AUTH_GSS_KRB5I ||
authflavor == RPC_AUTH_GSS_KRB5P) {
- status = _nfs4_proc_exchange_id(clp, cred, SP4_MACH_CRED, NULL);
+ status = _nfs4_proc_exchange_id(clp, cred, SP4_MACH_CRED);
if (!status)
return 0;
}
/* try SP4_NONE */
- return _nfs4_proc_exchange_id(clp, cred, SP4_NONE, NULL);
+ return _nfs4_proc_exchange_id(clp, cred, SP4_NONE);
}
/**
@@ -7642,6 +7655,9 @@ int nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt,
void *data)
{
struct nfs4_add_xprt_data *adata = (struct nfs4_add_xprt_data *)data;
+ struct rpc_task *task;
+ int status;
+
u32 sp4_how;
dprintk("--> %s try %s\n", __func__,
@@ -7650,7 +7666,17 @@ int nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt,
sp4_how = (adata->clp->cl_sp4_flags == 0 ? SP4_NONE : SP4_MACH_CRED);
/* Test connection for session trunking. Async exchange_id call */
- return _nfs4_proc_exchange_id(adata->clp, adata->cred, sp4_how, xprt);
+ task = nfs4_run_exchange_id(adata->clp, adata->cred, sp4_how, xprt);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+
+ status = task->tk_status;
+ if (status == 0)
+ status = nfs4_detect_session_trunking(adata->clp,
+ task->tk_msg.rpc_resp, xprt);
+
+ rpc_put_task(task);
+ return status;
}
EXPORT_SYMBOL_GPL(nfs4_test_session_trunk);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index de9066a92c0d..bec120ec1967 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -134,19 +134,14 @@ EXPORT_SYMBOL_GPL(nfs_async_iocounter_wait);
/*
* nfs_page_group_lock - lock the head of the page group
* @req - request in group that is to be locked
- * @nonblock - if true don't block waiting for lock
*
- * this lock must be held if modifying the page group list
+ * this lock must be held when traversing or modifying the page
+ * group list
*
- * return 0 on success, < 0 on error: -EDELAY if nonblocking or the
- * result from wait_on_bit_lock
- *
- * NOTE: calling with nonblock=false should always have set the
- * lock bit (see fs/buffer.c and other uses of wait_on_bit_lock
- * with TASK_UNINTERRUPTIBLE), so there is no need to check the result.
+ * return 0 on success, < 0 on error
*/
int
-nfs_page_group_lock(struct nfs_page *req, bool nonblock)
+nfs_page_group_lock(struct nfs_page *req)
{
struct nfs_page *head = req->wb_head;
@@ -155,35 +150,10 @@ nfs_page_group_lock(struct nfs_page *req, bool nonblock)
if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags))
return 0;
- if (!nonblock) {
- set_bit(PG_CONTENDED1, &head->wb_flags);
- smp_mb__after_atomic();
- return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
- TASK_UNINTERRUPTIBLE);
- }
-
- return -EAGAIN;
-}
-
-/*
- * nfs_page_group_lock_wait - wait for the lock to clear, but don't grab it
- * @req - a request in the group
- *
- * This is a blocking call to wait for the group lock to be cleared.
- */
-void
-nfs_page_group_lock_wait(struct nfs_page *req)
-{
- struct nfs_page *head = req->wb_head;
-
- WARN_ON_ONCE(head != head->wb_head);
-
- if (!test_bit(PG_HEADLOCK, &head->wb_flags))
- return;
set_bit(PG_CONTENDED1, &head->wb_flags);
smp_mb__after_atomic();
- wait_on_bit(&head->wb_flags, PG_HEADLOCK,
- TASK_UNINTERRUPTIBLE);
+ return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
+ TASK_UNINTERRUPTIBLE);
}
/*
@@ -246,7 +216,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit)
{
bool ret;
- nfs_page_group_lock(req, false);
+ nfs_page_group_lock(req);
ret = nfs_page_group_sync_on_bit_locked(req, bit);
nfs_page_group_unlock(req);
@@ -288,9 +258,7 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
inode = page_file_mapping(req->wb_page)->host;
set_bit(PG_INODE_REF, &req->wb_flags);
kref_get(&req->wb_kref);
- spin_lock(&inode->i_lock);
- NFS_I(inode)->nrequests++;
- spin_unlock(&inode->i_lock);
+ atomic_long_inc(&NFS_I(inode)->nrequests);
}
}
}
@@ -306,14 +274,11 @@ static void
nfs_page_group_destroy(struct kref *kref)
{
struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
+ struct nfs_page *head = req->wb_head;
struct nfs_page *tmp, *next;
- /* subrequests must release the ref on the head request */
- if (req->wb_head != req)
- nfs_release_request(req->wb_head);
-
if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN))
- return;
+ goto out;
tmp = req;
do {
@@ -324,6 +289,10 @@ nfs_page_group_destroy(struct kref *kref)
nfs_free_request(tmp);
tmp = next;
} while (tmp != req);
+out:
+ /* subrequests must release the ref on the head request */
+ if (head != req)
+ nfs_release_request(head);
}
/**
@@ -465,6 +434,7 @@ void nfs_release_request(struct nfs_page *req)
{
kref_put(&req->wb_kref, nfs_page_group_destroy);
}
+EXPORT_SYMBOL_GPL(nfs_release_request);
/**
* nfs_wait_on_request - Wait for a request to complete.
@@ -483,6 +453,7 @@ nfs_wait_on_request(struct nfs_page *req)
return wait_on_bit_io(&req->wb_flags, PG_BUSY,
TASK_UNINTERRUPTIBLE);
}
+EXPORT_SYMBOL_GPL(nfs_wait_on_request);
/*
* nfs_generic_pg_test - determine if requests can be coalesced
@@ -530,16 +501,6 @@ struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *ops)
}
EXPORT_SYMBOL_GPL(nfs_pgio_header_alloc);
-/*
- * nfs_pgio_header_free - Free a read or write header
- * @hdr: The header to free
- */
-void nfs_pgio_header_free(struct nfs_pgio_header *hdr)
-{
- hdr->rw_ops->rw_free_header(hdr);
-}
-EXPORT_SYMBOL_GPL(nfs_pgio_header_free);
-
/**
* nfs_pgio_data_destroy - make @hdr suitable for reuse
*
@@ -548,14 +509,24 @@ EXPORT_SYMBOL_GPL(nfs_pgio_header_free);
*
* @hdr: A header that has had nfs_generic_pgio called
*/
-void nfs_pgio_data_destroy(struct nfs_pgio_header *hdr)
+static void nfs_pgio_data_destroy(struct nfs_pgio_header *hdr)
{
if (hdr->args.context)
put_nfs_open_context(hdr->args.context);
if (hdr->page_array.pagevec != hdr->page_array.page_array)
kfree(hdr->page_array.pagevec);
}
-EXPORT_SYMBOL_GPL(nfs_pgio_data_destroy);
+
+/*
+ * nfs_pgio_header_free - Free a read or write header
+ * @hdr: The header to free
+ */
+void nfs_pgio_header_free(struct nfs_pgio_header *hdr)
+{
+ nfs_pgio_data_destroy(hdr);
+ hdr->rw_ops->rw_free_header(hdr);
+}
+EXPORT_SYMBOL_GPL(nfs_pgio_header_free);
/**
* nfs_pgio_rpcsetup - Set up arguments for a pageio call
@@ -669,7 +640,6 @@ EXPORT_SYMBOL_GPL(nfs_initiate_pgio);
static void nfs_pgio_error(struct nfs_pgio_header *hdr)
{
set_bit(NFS_IOHDR_REDO, &hdr->flags);
- nfs_pgio_data_destroy(hdr);
hdr->completion_ops->completion(hdr);
}
@@ -680,7 +650,6 @@ static void nfs_pgio_error(struct nfs_pgio_header *hdr)
static void nfs_pgio_release(void *calldata)
{
struct nfs_pgio_header *hdr = calldata;
- nfs_pgio_data_destroy(hdr);
hdr->completion_ops->completion(hdr);
}
@@ -711,12 +680,8 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
const struct nfs_pgio_completion_ops *compl_ops,
const struct nfs_rw_ops *rw_ops,
size_t bsize,
- int io_flags,
- gfp_t gfp_flags)
+ int io_flags)
{
- struct nfs_pgio_mirror *new;
- int i;
-
desc->pg_moreio = 0;
desc->pg_inode = inode;
desc->pg_ops = pg_ops;
@@ -732,23 +697,10 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_mirror_count = 1;
desc->pg_mirror_idx = 0;
- if (pg_ops->pg_get_mirror_count) {
- /* until we have a request, we don't have an lseg and no
- * idea how many mirrors there will be */
- new = kcalloc(NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX,
- sizeof(struct nfs_pgio_mirror), gfp_flags);
- desc->pg_mirrors_dynamic = new;
- desc->pg_mirrors = new;
-
- for (i = 0; i < NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX; i++)
- nfs_pageio_mirror_init(&desc->pg_mirrors[i], bsize);
- } else {
- desc->pg_mirrors_dynamic = NULL;
- desc->pg_mirrors = desc->pg_mirrors_static;
- nfs_pageio_mirror_init(&desc->pg_mirrors[0], bsize);
- }
+ desc->pg_mirrors_dynamic = NULL;
+ desc->pg_mirrors = desc->pg_mirrors_static;
+ nfs_pageio_mirror_init(&desc->pg_mirrors[0], bsize);
}
-EXPORT_SYMBOL_GPL(nfs_pageio_init);
/**
* nfs_pgio_result - Basic pageio error handling
@@ -865,32 +817,52 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
return ret;
}
+static struct nfs_pgio_mirror *
+nfs_pageio_alloc_mirrors(struct nfs_pageio_descriptor *desc,
+ unsigned int mirror_count)
+{
+ struct nfs_pgio_mirror *ret;
+ unsigned int i;
+
+ kfree(desc->pg_mirrors_dynamic);
+ desc->pg_mirrors_dynamic = NULL;
+ if (mirror_count == 1)
+ return desc->pg_mirrors_static;
+ ret = kmalloc_array(mirror_count, sizeof(*ret), GFP_NOFS);
+ if (ret != NULL) {
+ for (i = 0; i < mirror_count; i++)
+ nfs_pageio_mirror_init(&ret[i], desc->pg_bsize);
+ desc->pg_mirrors_dynamic = ret;
+ }
+ return ret;
+}
+
/*
* nfs_pageio_setup_mirroring - determine if mirroring is to be used
* by calling the pg_get_mirror_count op
*/
-static int nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio,
+static void nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
- int mirror_count = 1;
-
- if (!pgio->pg_ops->pg_get_mirror_count)
- return 0;
+ unsigned int mirror_count = 1;
- mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req);
-
- if (pgio->pg_error < 0)
- return pgio->pg_error;
-
- if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX)
- return -EINVAL;
+ if (pgio->pg_ops->pg_get_mirror_count)
+ mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req);
+ if (mirror_count == pgio->pg_mirror_count || pgio->pg_error < 0)
+ return;
- if (WARN_ON_ONCE(!pgio->pg_mirrors_dynamic))
- return -EINVAL;
+ if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX) {
+ pgio->pg_error = -EINVAL;
+ return;
+ }
+ pgio->pg_mirrors = nfs_pageio_alloc_mirrors(pgio, mirror_count);
+ if (pgio->pg_mirrors == NULL) {
+ pgio->pg_error = -ENOMEM;
+ pgio->pg_mirrors = pgio->pg_mirrors_static;
+ mirror_count = 1;
+ }
pgio->pg_mirror_count = mirror_count;
-
- return 0;
}
/*
@@ -1036,7 +1008,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
unsigned int bytes_left = 0;
unsigned int offset, pgbase;
- nfs_page_group_lock(req, false);
+ nfs_page_group_lock(req);
subreq = req;
bytes_left = subreq->wb_bytes;
@@ -1058,7 +1030,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
if (mirror->pg_recoalesce)
return 0;
/* retry add_request for this subreq */
- nfs_page_group_lock(req, false);
+ nfs_page_group_lock(req);
continue;
}
@@ -1155,7 +1127,7 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
for (midx = 0; midx < desc->pg_mirror_count; midx++) {
if (midx) {
- nfs_page_group_lock(req, false);
+ nfs_page_group_lock(req);
/* find the last request */
for (lastreq = req->wb_head;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index c383d0913b54..7879ed8ceb76 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -529,47 +529,6 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg)
}
EXPORT_SYMBOL_GPL(pnfs_put_lseg);
-static void pnfs_free_lseg_async_work(struct work_struct *work)
-{
- struct pnfs_layout_segment *lseg;
- struct pnfs_layout_hdr *lo;
-
- lseg = container_of(work, struct pnfs_layout_segment, pls_work);
- lo = lseg->pls_layout;
-
- pnfs_free_lseg(lseg);
- pnfs_put_layout_hdr(lo);
-}
-
-static void pnfs_free_lseg_async(struct pnfs_layout_segment *lseg)
-{
- INIT_WORK(&lseg->pls_work, pnfs_free_lseg_async_work);
- schedule_work(&lseg->pls_work);
-}
-
-void
-pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg)
-{
- if (!lseg)
- return;
-
- assert_spin_locked(&lseg->pls_layout->plh_inode->i_lock);
-
- dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
- atomic_read(&lseg->pls_refcount),
- test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
- if (atomic_dec_and_test(&lseg->pls_refcount)) {
- struct pnfs_layout_hdr *lo = lseg->pls_layout;
- if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags))
- return;
- pnfs_layout_remove_lseg(lo, lseg);
- if (!pnfs_cache_lseg_for_layoutreturn(lo, lseg)) {
- pnfs_get_layout_hdr(lo);
- pnfs_free_lseg_async(lseg);
- }
- }
-}
-
/*
* is l2 fully contained in l1?
* start1 end1
@@ -2274,7 +2233,6 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
nfs_pageio_reset_write_mds(desc);
mirror->pg_recoalesce = 1;
}
- nfs_pgio_data_destroy(hdr);
hdr->release(hdr);
}
@@ -2398,7 +2356,6 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
nfs_pageio_reset_read_mds(desc);
mirror->pg_recoalesce = 1;
}
- nfs_pgio_data_destroy(hdr);
hdr->release(hdr);
}
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 99731e3e332f..87f144f14d1e 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -67,7 +67,6 @@ struct pnfs_layout_segment {
u32 pls_seq;
unsigned long pls_flags;
struct pnfs_layout_hdr *pls_layout;
- struct work_struct pls_work;
};
enum pnfs_try_status {
@@ -230,7 +229,6 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync);
/* pnfs.c */
void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
-void pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg);
void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, struct nfs_fsinfo *);
void unset_pnfs_layoutdriver(struct nfs_server *);
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 25f28fa64c57..60da59be83b6 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -83,34 +83,11 @@ pnfs_generic_clear_request_commit(struct nfs_page *req,
}
out:
nfs_request_remove_commit_list(req, cinfo);
- pnfs_put_lseg_locked(freeme);
+ pnfs_put_lseg(freeme);
}
EXPORT_SYMBOL_GPL(pnfs_generic_clear_request_commit);
static int
-pnfs_generic_transfer_commit_list(struct list_head *src, struct list_head *dst,
- struct nfs_commit_info *cinfo, int max)
-{
- struct nfs_page *req, *tmp;
- int ret = 0;
-
- list_for_each_entry_safe(req, tmp, src, wb_list) {
- if (!nfs_lock_request(req))
- continue;
- kref_get(&req->wb_kref);
- if (cond_resched_lock(&cinfo->inode->i_lock))
- list_safe_reset_next(req, tmp, wb_list);
- nfs_request_remove_commit_list(req, cinfo);
- clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
- nfs_list_add_request(req, dst);
- ret++;
- if ((ret == max) && !cinfo->dreq)
- break;
- }
- return ret;
-}
-
-static int
pnfs_generic_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
struct nfs_commit_info *cinfo,
int max)
@@ -119,15 +96,15 @@ pnfs_generic_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
struct list_head *dst = &bucket->committing;
int ret;
- lockdep_assert_held(&cinfo->inode->i_lock);
- ret = pnfs_generic_transfer_commit_list(src, dst, cinfo, max);
+ lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex);
+ ret = nfs_scan_commit_list(src, dst, cinfo, max);
if (ret) {
cinfo->ds->nwritten -= ret;
cinfo->ds->ncommitting += ret;
if (bucket->clseg == NULL)
bucket->clseg = pnfs_get_lseg(bucket->wlseg);
if (list_empty(src)) {
- pnfs_put_lseg_locked(bucket->wlseg);
+ pnfs_put_lseg(bucket->wlseg);
bucket->wlseg = NULL;
}
}
@@ -142,7 +119,7 @@ int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo,
{
int i, rv = 0, cnt;
- lockdep_assert_held(&cinfo->inode->i_lock);
+ lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex);
for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) {
cnt = pnfs_generic_scan_ds_commit_list(&cinfo->ds->buckets[i],
cinfo, max);
@@ -162,11 +139,10 @@ void pnfs_generic_recover_commit_reqs(struct list_head *dst,
int nwritten;
int i;
- lockdep_assert_held(&cinfo->inode->i_lock);
+ lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex);
restart:
for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
- nwritten = pnfs_generic_transfer_commit_list(&b->written,
- dst, cinfo, 0);
+ nwritten = nfs_scan_commit_list(&b->written, dst, cinfo, 0);
if (!nwritten)
continue;
cinfo->ds->nwritten -= nwritten;
@@ -953,12 +929,12 @@ pnfs_layout_mark_request_commit(struct nfs_page *req,
struct list_head *list;
struct pnfs_commit_bucket *buckets;
- spin_lock(&cinfo->inode->i_lock);
+ mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
buckets = cinfo->ds->buckets;
list = &buckets[ds_commit_idx].written;
if (list_empty(list)) {
if (!pnfs_is_valid_lseg(lseg)) {
- spin_unlock(&cinfo->inode->i_lock);
+ mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
cinfo->completion_ops->resched_write(cinfo, req);
return;
}
@@ -975,7 +951,7 @@ pnfs_layout_mark_request_commit(struct nfs_page *req,
cinfo->ds->nwritten++;
nfs_request_add_commit_list_locked(req, list, cinfo);
- spin_unlock(&cinfo->inode->i_lock);
+ mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
nfs_mark_page_unstable(req->wb_page, cinfo);
}
EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index a8421d9dab6a..0d42573d423d 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -68,7 +68,7 @@ void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
pg_ops = server->pnfs_curr_ld->pg_read_ops;
#endif
nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_read_ops,
- server->rsize, 0, GFP_KERNEL);
+ server->rsize, 0);
}
EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index d828ef88e7db..6b179af59b92 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1691,8 +1691,8 @@ static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args,
rpc_authflavor_t *server_authlist, unsigned int count)
{
rpc_authflavor_t flavor = RPC_AUTH_MAXFLAVOR;
+ bool found_auth_null = false;
unsigned int i;
- int use_auth_null = false;
/*
* If the sec= mount option is used, the specified flavor or AUTH_NULL
@@ -1701,6 +1701,10 @@ static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args,
* AUTH_NULL has a special meaning when it's in the server list - it
* means that the server will ignore the rpc creds, so any flavor
* can be used but still use the sec= that was specified.
+ *
+ * Note also that the MNT procedure in MNTv1 does not return a list
+ * of supported security flavors. In this case, nfs_mount() fabricates
+ * a security flavor list containing just AUTH_NULL.
*/
for (i = 0; i < count; i++) {
flavor = server_authlist[i];
@@ -1709,11 +1713,11 @@ static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args,
goto out;
if (flavor == RPC_AUTH_NULL)
- use_auth_null = true;
+ found_auth_null = true;
}
- if (use_auth_null) {
- flavor = RPC_AUTH_NULL;
+ if (found_auth_null) {
+ flavor = args->auth_info.flavors[0];
goto out;
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index b1af5dee5e0a..f68083db63c8 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -102,10 +102,8 @@ static struct nfs_pgio_header *nfs_writehdr_alloc(void)
{
struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
- if (p) {
- memset(p, 0, sizeof(*p));
- p->rw_mode = FMODE_WRITE;
- }
+ memset(p, 0, sizeof(*p));
+ p->rw_mode = FMODE_WRITE;
return p;
}
@@ -154,6 +152,14 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
}
+static struct nfs_page *
+nfs_page_private_request(struct page *page)
+{
+ if (!PagePrivate(page))
+ return NULL;
+ return (struct nfs_page *)page_private(page);
+}
+
/*
* nfs_page_find_head_request_locked - find head request associated with @page
*
@@ -162,21 +168,41 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
* returns matching head request with reference held, or NULL if not found.
*/
static struct nfs_page *
-nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page)
+nfs_page_find_private_request(struct page *page)
{
- struct nfs_page *req = NULL;
-
- if (PagePrivate(page))
- req = (struct nfs_page *)page_private(page);
- else if (unlikely(PageSwapCache(page)))
- req = nfs_page_search_commits_for_head_request_locked(nfsi,
- page);
+ struct address_space *mapping = page_file_mapping(page);
+ struct nfs_page *req;
+ if (!PagePrivate(page))
+ return NULL;
+ spin_lock(&mapping->private_lock);
+ req = nfs_page_private_request(page);
if (req) {
WARN_ON_ONCE(req->wb_head != req);
kref_get(&req->wb_kref);
}
+ spin_unlock(&mapping->private_lock);
+ return req;
+}
+static struct nfs_page *
+nfs_page_find_swap_request(struct page *page)
+{
+ struct inode *inode = page_file_mapping(page)->host;
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_page *req = NULL;
+ if (!PageSwapCache(page))
+ return NULL;
+ mutex_lock(&nfsi->commit_mutex);
+ if (PageSwapCache(page)) {
+ req = nfs_page_search_commits_for_head_request_locked(nfsi,
+ page);
+ if (req) {
+ WARN_ON_ONCE(req->wb_head != req);
+ kref_get(&req->wb_kref);
+ }
+ }
+ mutex_unlock(&nfsi->commit_mutex);
return req;
}
@@ -187,12 +213,11 @@ nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page)
*/
static struct nfs_page *nfs_page_find_head_request(struct page *page)
{
- struct inode *inode = page_file_mapping(page)->host;
- struct nfs_page *req = NULL;
+ struct nfs_page *req;
- spin_lock(&inode->i_lock);
- req = nfs_page_find_head_request_locked(NFS_I(inode), page);
- spin_unlock(&inode->i_lock);
+ req = nfs_page_find_private_request(page);
+ if (!req)
+ req = nfs_page_find_swap_request(page);
return req;
}
@@ -241,9 +266,6 @@ nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset)
{
struct nfs_page *req;
- WARN_ON_ONCE(head != head->wb_head);
- WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_head->wb_flags));
-
req = head;
do {
if (page_offset >= req->wb_pgbase &&
@@ -269,20 +291,17 @@ static bool nfs_page_group_covers_page(struct nfs_page *req)
unsigned int pos = 0;
unsigned int len = nfs_page_length(req->wb_page);
- nfs_page_group_lock(req, false);
+ nfs_page_group_lock(req);
- do {
+ for (;;) {
tmp = nfs_page_group_search_locked(req->wb_head, pos);
- if (tmp) {
- /* no way this should happen */
- WARN_ON_ONCE(tmp->wb_pgbase != pos);
- pos += tmp->wb_bytes - (pos - tmp->wb_pgbase);
- }
- } while (tmp && pos < len);
+ if (!tmp)
+ break;
+ pos = tmp->wb_pgbase + tmp->wb_bytes;
+ }
nfs_page_group_unlock(req);
- WARN_ON_ONCE(pos > len);
- return pos == len;
+ return pos >= len;
}
/* We can set the PG_uptodate flag if we see that a write request
@@ -333,8 +352,11 @@ static void nfs_end_page_writeback(struct nfs_page *req)
{
struct inode *inode = page_file_mapping(req->wb_page)->host;
struct nfs_server *nfss = NFS_SERVER(inode);
+ bool is_done;
- if (!nfs_page_group_sync_on_bit(req, PG_WB_END))
+ is_done = nfs_page_group_sync_on_bit(req, PG_WB_END);
+ nfs_unlock_request(req);
+ if (!is_done)
return;
end_page_writeback(req->wb_page);
@@ -342,22 +364,6 @@ static void nfs_end_page_writeback(struct nfs_page *req)
clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
}
-
-/* nfs_page_group_clear_bits
- * @req - an nfs request
- * clears all page group related bits from @req
- */
-static void
-nfs_page_group_clear_bits(struct nfs_page *req)
-{
- clear_bit(PG_TEARDOWN, &req->wb_flags);
- clear_bit(PG_UNLOCKPAGE, &req->wb_flags);
- clear_bit(PG_UPTODATE, &req->wb_flags);
- clear_bit(PG_WB_END, &req->wb_flags);
- clear_bit(PG_REMOVE, &req->wb_flags);
-}
-
-
/*
* nfs_unroll_locks_and_wait - unlock all newly locked reqs and wait on @req
*
@@ -366,43 +372,24 @@ nfs_page_group_clear_bits(struct nfs_page *req)
* @inode - inode associated with request page group, must be holding inode lock
* @head - head request of page group, must be holding head lock
* @req - request that couldn't lock and needs to wait on the req bit lock
- * @nonblock - if true, don't actually wait
*
- * NOTE: this must be called holding page_group bit lock and inode spin lock
- * and BOTH will be released before returning.
+ * NOTE: this must be called holding page_group bit lock
+ * which will be released before returning.
*
* returns 0 on success, < 0 on error.
*/
-static int
-nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head,
- struct nfs_page *req, bool nonblock)
- __releases(&inode->i_lock)
+static void
+nfs_unroll_locks(struct inode *inode, struct nfs_page *head,
+ struct nfs_page *req)
{
struct nfs_page *tmp;
- int ret;
/* relinquish all the locks successfully grabbed this run */
- for (tmp = head ; tmp != req; tmp = tmp->wb_this_page)
- nfs_unlock_request(tmp);
-
- WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));
-
- /* grab a ref on the request that will be waited on */
- kref_get(&req->wb_kref);
-
- nfs_page_group_unlock(head);
- spin_unlock(&inode->i_lock);
-
- /* release ref from nfs_page_find_head_request_locked */
- nfs_release_request(head);
-
- if (!nonblock)
- ret = nfs_wait_on_request(req);
- else
- ret = -EAGAIN;
- nfs_release_request(req);
-
- return ret;
+ for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) {
+ if (!kref_read(&tmp->wb_kref))
+ continue;
+ nfs_unlock_and_release_request(tmp);
+ }
}
/*
@@ -417,7 +404,8 @@ nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head,
*/
static void
nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
- struct nfs_page *old_head)
+ struct nfs_page *old_head,
+ struct inode *inode)
{
while (destroy_list) {
struct nfs_page *subreq = destroy_list;
@@ -428,33 +416,28 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
WARN_ON_ONCE(old_head != subreq->wb_head);
/* make sure old group is not used */
- subreq->wb_head = subreq;
subreq->wb_this_page = subreq;
- /* subreq is now totally disconnected from page group or any
- * write / commit lists. last chance to wake any waiters */
- nfs_unlock_request(subreq);
+ clear_bit(PG_REMOVE, &subreq->wb_flags);
- if (!test_bit(PG_TEARDOWN, &subreq->wb_flags)) {
- /* release ref on old head request */
- nfs_release_request(old_head);
+ /* Note: races with nfs_page_group_destroy() */
+ if (!kref_read(&subreq->wb_kref)) {
+ /* Check if we raced with nfs_page_group_destroy() */
+ if (test_and_clear_bit(PG_TEARDOWN, &subreq->wb_flags))
+ nfs_free_request(subreq);
+ continue;
+ }
- nfs_page_group_clear_bits(subreq);
+ subreq->wb_head = subreq;
- /* release the PG_INODE_REF reference */
- if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags))
- nfs_release_request(subreq);
- else
- WARN_ON_ONCE(1);
- } else {
- WARN_ON_ONCE(test_bit(PG_CLEAN, &subreq->wb_flags));
- /* zombie requests have already released the last
- * reference and were waiting on the rest of the
- * group to complete. Since it's no longer part of a
- * group, simply free the request */
- nfs_page_group_clear_bits(subreq);
- nfs_free_request(subreq);
+ if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) {
+ nfs_release_request(subreq);
+ atomic_long_dec(&NFS_I(inode)->nrequests);
}
+
+ /* subreq is now totally disconnected from page group or any
+ * write / commit lists. last chance to wake any waiters */
+ nfs_unlock_and_release_request(subreq);
}
}
@@ -464,7 +447,6 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
* operations for this page.
*
* @page - the page used to lookup the "page group" of nfs_page structures
- * @nonblock - if true, don't block waiting for request locks
*
* This function joins all sub requests to the head request by first
* locking all requests in the group, cancelling any pending operations
@@ -478,7 +460,7 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
* error was encountered.
*/
static struct nfs_page *
-nfs_lock_and_join_requests(struct page *page, bool nonblock)
+nfs_lock_and_join_requests(struct page *page)
{
struct inode *inode = page_file_mapping(page)->host;
struct nfs_page *head, *subreq;
@@ -487,43 +469,63 @@ nfs_lock_and_join_requests(struct page *page, bool nonblock)
int ret;
try_again:
- total_bytes = 0;
-
- WARN_ON_ONCE(destroy_list);
-
- spin_lock(&inode->i_lock);
-
/*
* A reference is taken only on the head request which acts as a
* reference to the whole page group - the group will not be destroyed
* until the head reference is released.
*/
- head = nfs_page_find_head_request_locked(NFS_I(inode), page);
-
- if (!head) {
- spin_unlock(&inode->i_lock);
+ head = nfs_page_find_head_request(page);
+ if (!head)
return NULL;
- }
- /* holding inode lock, so always make a non-blocking call to try the
- * page group lock */
- ret = nfs_page_group_lock(head, true);
- if (ret < 0) {
- spin_unlock(&inode->i_lock);
+ /* lock the page head first in order to avoid an ABBA inefficiency */
+ if (!nfs_lock_request(head)) {
+ ret = nfs_wait_on_request(head);
+ nfs_release_request(head);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ goto try_again;
+ }
- if (!nonblock && ret == -EAGAIN) {
- nfs_page_group_lock_wait(head);
- nfs_release_request(head);
- goto try_again;
- }
+ /* Ensure that nobody removed the request before we locked it */
+ if (head != nfs_page_private_request(page) && !PageSwapCache(page)) {
+ nfs_unlock_and_release_request(head);
+ goto try_again;
+ }
- nfs_release_request(head);
+ ret = nfs_page_group_lock(head);
+ if (ret < 0) {
+ nfs_unlock_and_release_request(head);
return ERR_PTR(ret);
}
/* lock each request in the page group */
- subreq = head;
- do {
+ total_bytes = head->wb_bytes;
+ for (subreq = head->wb_this_page; subreq != head;
+ subreq = subreq->wb_this_page) {
+
+ if (!kref_get_unless_zero(&subreq->wb_kref)) {
+ if (subreq->wb_offset == head->wb_offset + total_bytes)
+ total_bytes += subreq->wb_bytes;
+ continue;
+ }
+
+ while (!nfs_lock_request(subreq)) {
+ /*
+ * Unlock page to allow nfs_page_group_sync_on_bit()
+ * to succeed
+ */
+ nfs_page_group_unlock(head);
+ ret = nfs_wait_on_request(subreq);
+ if (!ret)
+ ret = nfs_page_group_lock(head);
+ if (ret < 0) {
+ nfs_unroll_locks(inode, head, subreq);
+ nfs_release_request(subreq);
+ nfs_unlock_and_release_request(head);
+ return ERR_PTR(ret);
+ }
+ }
/*
* Subrequests are always contiguous, non overlapping
* and in order - but may be repeated (mirrored writes).
@@ -535,24 +537,12 @@ try_again:
((subreq->wb_offset + subreq->wb_bytes) >
(head->wb_offset + total_bytes)))) {
nfs_page_group_unlock(head);
- spin_unlock(&inode->i_lock);
+ nfs_unroll_locks(inode, head, subreq);
+ nfs_unlock_and_release_request(subreq);
+ nfs_unlock_and_release_request(head);
return ERR_PTR(-EIO);
}
-
- if (!nfs_lock_request(subreq)) {
- /* releases page group bit lock and
- * inode spin lock and all references */
- ret = nfs_unroll_locks_and_wait(inode, head,
- subreq, nonblock);
-
- if (ret == 0)
- goto try_again;
-
- return ERR_PTR(ret);
- }
-
- subreq = subreq->wb_this_page;
- } while (subreq != head);
+ }
/* Now that all requests are locked, make sure they aren't on any list.
* Commit list removal accounting is done after locks are dropped */
@@ -573,34 +563,30 @@ try_again:
head->wb_bytes = total_bytes;
}
- /*
- * prepare head request to be added to new pgio descriptor
- */
- nfs_page_group_clear_bits(head);
-
- /*
- * some part of the group was still on the inode list - otherwise
- * the group wouldn't be involved in async write.
- * grab a reference for the head request, iff it needs one.
- */
- if (!test_and_set_bit(PG_INODE_REF, &head->wb_flags))
+ /* Postpone destruction of this request */
+ if (test_and_clear_bit(PG_REMOVE, &head->wb_flags)) {
+ set_bit(PG_INODE_REF, &head->wb_flags);
kref_get(&head->wb_kref);
+ atomic_long_inc(&NFS_I(inode)->nrequests);
+ }
nfs_page_group_unlock(head);
- /* drop lock to clean uprequests on destroy list */
- spin_unlock(&inode->i_lock);
+ nfs_destroy_unlinked_subrequests(destroy_list, head, inode);
- nfs_destroy_unlinked_subrequests(destroy_list, head);
+ /* Did we lose a race with nfs_inode_remove_request()? */
+ if (!(PagePrivate(page) || PageSwapCache(page))) {
+ nfs_unlock_and_release_request(head);
+ return NULL;
+ }
- /* still holds ref on head from nfs_page_find_head_request_locked
+ /* still holds ref on head from nfs_page_find_head_request
* and still has lock on head from lock loop */
return head;
}
static void nfs_write_error_remove_page(struct nfs_page *req)
{
- nfs_unlock_request(req);
nfs_end_page_writeback(req);
generic_error_remove_page(page_file_mapping(req->wb_page),
req->wb_page);
@@ -624,12 +610,12 @@ nfs_error_is_fatal_on_server(int err)
* May return an error if the user signalled nfs_wait_on_request().
*/
static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
- struct page *page, bool nonblock)
+ struct page *page)
{
struct nfs_page *req;
int ret = 0;
- req = nfs_lock_and_join_requests(page, nonblock);
+ req = nfs_lock_and_join_requests(page);
if (!req)
goto out;
ret = PTR_ERR(req);
@@ -672,7 +658,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
int ret;
nfs_pageio_cond_complete(pgio, page_index(page));
- ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);
+ ret = nfs_page_async_flush(pgio, page);
if (ret == -EAGAIN) {
redirty_page_for_writepage(wbc, page);
ret = 0;
@@ -759,6 +745,7 @@ out_err:
*/
static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
{
+ struct address_space *mapping = page_file_mapping(req->wb_page);
struct nfs_inode *nfsi = NFS_I(inode);
WARN_ON_ONCE(req->wb_this_page != req);
@@ -766,27 +753,30 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
/* Lock the request! */
nfs_lock_request(req);
- spin_lock(&inode->i_lock);
- if (!nfsi->nrequests &&
- NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
- inode->i_version++;
/*
* Swap-space should not get truncated. Hence no need to plug the race
* with invalidate/truncate.
*/
+ spin_lock(&mapping->private_lock);
+ if (!nfs_have_writebacks(inode) &&
+ NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) {
+ spin_lock(&inode->i_lock);
+ inode->i_version++;
+ spin_unlock(&inode->i_lock);
+ }
if (likely(!PageSwapCache(req->wb_page))) {
set_bit(PG_MAPPED, &req->wb_flags);
SetPagePrivate(req->wb_page);
set_page_private(req->wb_page, (unsigned long)req);
}
- nfsi->nrequests++;
+ spin_unlock(&mapping->private_lock);
+ atomic_long_inc(&nfsi->nrequests);
/* this a head request for a page group - mark it as having an
* extra reference so sub groups can follow suit.
* This flag also informs pgio layer when to bump nrequests when
* adding subrequests. */
WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags));
kref_get(&req->wb_kref);
- spin_unlock(&inode->i_lock);
}
/*
@@ -794,25 +784,22 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
*/
static void nfs_inode_remove_request(struct nfs_page *req)
{
- struct inode *inode = d_inode(req->wb_context->dentry);
+ struct address_space *mapping = page_file_mapping(req->wb_page);
+ struct inode *inode = mapping->host;
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_page *head;
+ atomic_long_dec(&nfsi->nrequests);
if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
head = req->wb_head;
- spin_lock(&inode->i_lock);
+ spin_lock(&mapping->private_lock);
if (likely(head->wb_page && !PageSwapCache(head->wb_page))) {
set_page_private(head->wb_page, 0);
ClearPagePrivate(head->wb_page);
clear_bit(PG_MAPPED, &head->wb_flags);
}
- nfsi->nrequests--;
- spin_unlock(&inode->i_lock);
- } else {
- spin_lock(&inode->i_lock);
- nfsi->nrequests--;
- spin_unlock(&inode->i_lock);
+ spin_unlock(&mapping->private_lock);
}
if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))
@@ -868,7 +855,8 @@ nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
* number of outstanding requests requiring a commit as well as
* the MM page stats.
*
- * The caller must hold cinfo->inode->i_lock, and the nfs_page lock.
+ * The caller must hold NFS_I(cinfo->inode)->commit_mutex, and the
+ * nfs_page lock.
*/
void
nfs_request_add_commit_list_locked(struct nfs_page *req, struct list_head *dst,
@@ -876,7 +864,7 @@ nfs_request_add_commit_list_locked(struct nfs_page *req, struct list_head *dst,
{
set_bit(PG_CLEAN, &req->wb_flags);
nfs_list_add_request(req, dst);
- cinfo->mds->ncommit++;
+ atomic_long_inc(&cinfo->mds->ncommit);
}
EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked);
@@ -896,9 +884,9 @@ EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked);
void
nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo)
{
- spin_lock(&cinfo->inode->i_lock);
+ mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo);
- spin_unlock(&cinfo->inode->i_lock);
+ mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
if (req->wb_page)
nfs_mark_page_unstable(req->wb_page, cinfo);
}
@@ -922,7 +910,7 @@ nfs_request_remove_commit_list(struct nfs_page *req,
if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags))
return;
nfs_list_remove_request(req);
- cinfo->mds->ncommit--;
+ atomic_long_dec(&cinfo->mds->ncommit);
}
EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list);
@@ -967,7 +955,7 @@ nfs_clear_page_commit(struct page *page)
WB_RECLAIMABLE);
}
-/* Called holding inode (/cinfo) lock */
+/* Called holding the request lock on @req */
static void
nfs_clear_request_commit(struct nfs_page *req)
{
@@ -976,9 +964,11 @@ nfs_clear_request_commit(struct nfs_page *req)
struct nfs_commit_info cinfo;
nfs_init_cinfo_from_inode(&cinfo, inode);
+ mutex_lock(&NFS_I(inode)->commit_mutex);
if (!pnfs_clear_request_commit(req, &cinfo)) {
nfs_request_remove_commit_list(req, &cinfo);
}
+ mutex_unlock(&NFS_I(inode)->commit_mutex);
nfs_clear_page_commit(req->wb_page);
}
}
@@ -1023,7 +1013,6 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
remove_req:
nfs_inode_remove_request(req);
next:
- nfs_unlock_request(req);
nfs_end_page_writeback(req);
nfs_release_request(req);
}
@@ -1035,10 +1024,10 @@ out:
unsigned long
nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
{
- return cinfo->mds->ncommit;
+ return atomic_long_read(&cinfo->mds->ncommit);
}
-/* cinfo->inode->i_lock held by caller */
+/* NFS_I(cinfo->inode)->commit_mutex held by caller */
int
nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
struct nfs_commit_info *cinfo, int max)
@@ -1046,20 +1035,37 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
struct nfs_page *req, *tmp;
int ret = 0;
+restart:
list_for_each_entry_safe(req, tmp, src, wb_list) {
- if (!nfs_lock_request(req))
- continue;
kref_get(&req->wb_kref);
- if (cond_resched_lock(&cinfo->inode->i_lock))
- list_safe_reset_next(req, tmp, wb_list);
+ if (!nfs_lock_request(req)) {
+ int status;
+
+ /* Prevent deadlock with nfs_lock_and_join_requests */
+ if (!list_empty(dst)) {
+ nfs_release_request(req);
+ continue;
+ }
+ /* Ensure we make progress to prevent livelock */
+ mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
+ status = nfs_wait_on_request(req);
+ nfs_release_request(req);
+ mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
+ if (status < 0)
+ break;
+ goto restart;
+ }
nfs_request_remove_commit_list(req, cinfo);
+ clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
nfs_list_add_request(req, dst);
ret++;
if ((ret == max) && !cinfo->dreq)
break;
+ cond_resched();
}
return ret;
}
+EXPORT_SYMBOL_GPL(nfs_scan_commit_list);
/*
* nfs_scan_commit - Scan an inode for commit requests
@@ -1076,15 +1082,17 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst,
{
int ret = 0;
- spin_lock(&cinfo->inode->i_lock);
- if (cinfo->mds->ncommit > 0) {
+ if (!atomic_long_read(&cinfo->mds->ncommit))
+ return 0;
+ mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
+ if (atomic_long_read(&cinfo->mds->ncommit) > 0) {
const int max = INT_MAX;
ret = nfs_scan_commit_list(&cinfo->mds->list, dst,
cinfo, max);
ret += pnfs_scan_commit_lists(inode, cinfo, max - ret);
}
- spin_unlock(&cinfo->inode->i_lock);
+ mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
return ret;
}
@@ -1105,43 +1113,21 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
unsigned int end;
int error;
- if (!PagePrivate(page))
- return NULL;
-
end = offset + bytes;
- spin_lock(&inode->i_lock);
-
- for (;;) {
- req = nfs_page_find_head_request_locked(NFS_I(inode), page);
- if (req == NULL)
- goto out_unlock;
- /* should be handled by nfs_flush_incompatible */
- WARN_ON_ONCE(req->wb_head != req);
- WARN_ON_ONCE(req->wb_this_page != req);
-
- rqend = req->wb_offset + req->wb_bytes;
- /*
- * Tell the caller to flush out the request if
- * the offsets are non-contiguous.
- * Note: nfs_flush_incompatible() will already
- * have flushed out requests having wrong owners.
- */
- if (offset > rqend
- || end < req->wb_offset)
- goto out_flushme;
-
- if (nfs_lock_request(req))
- break;
+ req = nfs_lock_and_join_requests(page);
+ if (IS_ERR_OR_NULL(req))
+ return req;
- /* The request is locked, so wait and then retry */
- spin_unlock(&inode->i_lock);
- error = nfs_wait_on_request(req);
- nfs_release_request(req);
- if (error != 0)
- goto out_err;
- spin_lock(&inode->i_lock);
- }
+ rqend = req->wb_offset + req->wb_bytes;
+ /*
+ * Tell the caller to flush out the request if
+ * the offsets are non-contiguous.
+ * Note: nfs_flush_incompatible() will already
+ * have flushed out requests having wrong owners.
+ */
+ if (offset > rqend || end < req->wb_offset)
+ goto out_flushme;
/* Okay, the request matches. Update the region */
if (offset < req->wb_offset) {
@@ -1152,17 +1138,17 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
req->wb_bytes = end - req->wb_offset;
else
req->wb_bytes = rqend - req->wb_offset;
-out_unlock:
- if (req)
- nfs_clear_request_commit(req);
- spin_unlock(&inode->i_lock);
return req;
out_flushme:
- spin_unlock(&inode->i_lock);
- nfs_release_request(req);
+ /*
+ * Note: we mark the request dirty here because
+ * nfs_lock_and_join_requests() cannot preserve
+ * commit flags, so we have to replay the write.
+ */
+ nfs_mark_request_dirty(req);
+ nfs_unlock_and_release_request(req);
error = nfs_wb_page(inode, page);
-out_err:
- return ERR_PTR(error);
+ return (error < 0) ? ERR_PTR(error) : NULL;
}
/*
@@ -1227,8 +1213,6 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
l_ctx = req->wb_lock_context;
do_flush = req->wb_page != page ||
!nfs_match_open_context(req->wb_context, ctx);
- /* for now, flush if more than 1 request in page_group */
- do_flush |= req->wb_this_page != req;
if (l_ctx && flctx &&
!(list_empty_careful(&flctx->flc_posix) &&
list_empty_careful(&flctx->flc_flock))) {
@@ -1412,7 +1396,6 @@ static void nfs_redirty_request(struct nfs_page *req)
{
nfs_mark_request_dirty(req);
set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);
- nfs_unlock_request(req);
nfs_end_page_writeback(req);
nfs_release_request(req);
}
@@ -1452,7 +1435,7 @@ void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
pg_ops = server->pnfs_curr_ld->pg_write_ops;
#endif
nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops,
- server->wsize, ioflags, GFP_NOIO);
+ server->wsize, ioflags);
}
EXPORT_SYMBOL_GPL(nfs_pageio_init_write);
@@ -1934,7 +1917,7 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
int ret = 0;
/* no commits means nothing needs to be done */
- if (!nfsi->commit_info.ncommit)
+ if (!atomic_long_read(&nfsi->commit_info.ncommit))
return ret;
if (wbc->sync_mode == WB_SYNC_NONE) {
@@ -2015,7 +1998,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
/* blocking call to cancel all requests and join to a single (head)
* request */
- req = nfs_lock_and_join_requests(page, false);
+ req = nfs_lock_and_join_requests(page);
if (IS_ERR(req)) {
ret = PTR_ERR(req);