From c9895cb69b07a4b17d8fdae26667f9a9fba5183b Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 31 May 2011 18:48:56 -0400 Subject: NFS: pnfs IPv6 support Handle ipv6 remote addresses from GETDEVICEINFO - supports netid "tcp" for ipv4 and "tcp6" for ipv6 as rfc 5665 specifies - added ds_remotestr to avoid having to handle different AFs in every dprintk - tested against pynfs 4.1 server, submitting ipv6 support patch to pynfs - tested with IPv6 disabled, it compiles cleanly and relies on rpc_pton to refuse to accept IPv6 addresses Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 7 +- fs/nfs/nfs4filelayout.h | 5 +- fs/nfs/nfs4filelayoutdev.c | 255 ++++++++++++++++++++++++++++++++------------- 3 files changed, 191 insertions(+), 76 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index f9d03abcd04c..b690bb5ee58d 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -344,8 +344,7 @@ filelayout_read_pagelist(struct nfs_read_data *data) set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); return PNFS_NOT_ATTEMPTED; } - dprintk("%s USE DS:ip %x %hu\n", __func__, - ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); + dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr); /* No multipath support. Use first DS */ data->ds_clp = ds->ds_clp; @@ -384,9 +383,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); return PNFS_NOT_ATTEMPTED; } - dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__, + dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__, data->inode->i_ino, sync, (size_t) data->args.count, offset, - ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); + ds->ds_remotestr); data->write_done_cb = filelayout_write_done_cb; data->ds_clp = ds->ds_clp; diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index cebe01e3795e..6c6a817710e5 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -49,8 +49,9 @@ enum stripetype4 { /* Individual ip address */ struct nfs4_pnfs_ds { struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ - u32 ds_ip_addr; - u32 ds_port; + struct sockaddr_storage ds_addr; + size_t ds_addrlen; + char *ds_remotestr; /* human readable addr+port */ struct nfs_client *ds_clp; atomic_t ds_count; }; diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 3b7bf1377264..1a8308b90108 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -56,28 +56,56 @@ print_ds(struct nfs4_pnfs_ds *ds) printk("%s NULL device\n", __func__); return; } - printk(" ip_addr %x port %hu\n" + printk(" ds %s\n" " ref count %d\n" " client %p\n" " cl_exchange_flags %x\n", - ntohl(ds->ds_ip_addr), ntohs(ds->ds_port), + ds->ds_remotestr, atomic_read(&ds->ds_count), ds->ds_clp, ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); } /* nfs4_ds_cache_lock is held */ static struct nfs4_pnfs_ds * -_data_server_lookup_locked(u32 ip_addr, u32 port) +_data_server_lookup_locked(struct sockaddr *addr, size_t addrlen) { struct nfs4_pnfs_ds *ds; - - dprintk("_data_server_lookup: ip_addr=%x port=%hu\n", - ntohl(ip_addr), ntohs(port)); + struct sockaddr_in *a, *b; + struct sockaddr_in6 *a6, *b6; list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { - if (ds->ds_ip_addr == ip_addr && - ds->ds_port == port) { - return ds; + if (addr->sa_family != ds->ds_addr.ss_family) + continue; + + switch (addr->sa_family) { + case AF_INET: + a = (struct sockaddr_in *)addr; + b = (struct sockaddr_in *)&ds->ds_addr; + + if (a->sin_addr.s_addr == b->sin_addr.s_addr && + a->sin_port == b->sin_port) + return ds; + break; + + case AF_INET6: + a6 = (struct sockaddr_in6 *)addr; + b6 = (struct sockaddr_in6 *)&ds->ds_addr; + + /* LINKLOCAL addresses must have matching scope_id */ + if (ipv6_addr_scope(&a6->sin6_addr) == + IPV6_ADDR_SCOPE_LINKLOCAL && + a6->sin6_scope_id != b6->sin6_scope_id) + continue; + + if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) && + a6->sin6_port == b6->sin6_port) + return ds; + break; + + default: + dprintk("%s: unhandled address family: %u\n", + __func__, addr->sa_family); + return NULL; } } return NULL; @@ -91,19 +119,14 @@ static int nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) { struct nfs_client *clp; - struct sockaddr_in sin; int status = 0; - dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__, - ntohl(ds->ds_ip_addr), ntohs(ds->ds_port), + dprintk("--> %s addr %s au_flavor %d\n", __func__, ds->ds_remotestr, mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = ds->ds_ip_addr; - sin.sin_port = ds->ds_port; - - clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin, - sizeof(sin), IPPROTO_TCP); + clp = nfs4_set_ds_client(mds_srv->nfs_client, + (struct sockaddr *)&ds->ds_addr, + ds->ds_addrlen, IPPROTO_TCP); if (IS_ERR(clp)) { status = PTR_ERR(clp); goto out; @@ -115,8 +138,8 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) goto out_put; } ds->ds_clp = clp; - dprintk("%s [existing] ip=%x, port=%hu\n", __func__, - ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); + dprintk("%s [existing] server=%s\n", __func__, + ds->ds_remotestr); goto out; } @@ -135,8 +158,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) goto out_put; ds->ds_clp = clp; - dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr), - ntohs(ds->ds_port)); + dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); out: return status; out_put: @@ -153,6 +175,7 @@ destroy_ds(struct nfs4_pnfs_ds *ds) if (ds->ds_clp) nfs_put_client(ds->ds_clp); + kfree(ds->ds_remotestr); kfree(ds); } @@ -179,31 +202,85 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) kfree(dsaddr); } +/* + * Create a string with a human readable address and port to avoid + * complicated setup around many dprinks. + */ +static char * +nfs4_pnfs_remotestr(struct sockaddr *ds_addr, gfp_t gfp_flags) +{ + char buf[INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN]; + char *remotestr; + char *startsep = ""; + char *endsep = ""; + size_t len; + uint16_t port; + + switch (ds_addr->sa_family) { + case AF_INET: + port = ((struct sockaddr_in *)ds_addr)->sin_port; + break; + case AF_INET6: + startsep = "["; + endsep = "]"; + port = ((struct sockaddr_in6 *)ds_addr)->sin6_port; + break; + default: + dprintk("%s: Unknown address family %u\n", + __func__, ds_addr->sa_family); + return NULL; + } + + if (!rpc_ntop((struct sockaddr *)ds_addr, buf, sizeof(buf))) { + dprintk("%s: error printing addr\n", __func__); + return NULL; + } + + len = strlen(buf) + strlen(startsep) + strlen(endsep) + 1 + 5 + 1; + remotestr = kzalloc(len, gfp_flags); + + if (unlikely(!remotestr)) { + dprintk("%s: couldn't alloc remotestr\n", __func__); + return NULL; + } + + snprintf(remotestr, len, "%s%s%s:%u", + startsep, buf, endsep, ntohs(port)); + + return remotestr; +} + static struct nfs4_pnfs_ds * -nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags) +nfs4_pnfs_ds_add(struct sockaddr *addr, size_t addrlen, gfp_t gfp_flags) { - struct nfs4_pnfs_ds *tmp_ds, *ds; + struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; + char *remotestr; ds = kzalloc(sizeof(*tmp_ds), gfp_flags); if (!ds) goto out; + /* this is only used for debugging, so it's ok if its NULL */ + remotestr = nfs4_pnfs_remotestr(addr, gfp_flags); + spin_lock(&nfs4_ds_cache_lock); - tmp_ds = _data_server_lookup_locked(ip_addr, port); + tmp_ds = _data_server_lookup_locked(addr, addrlen); if (tmp_ds == NULL) { - ds->ds_ip_addr = ip_addr; - ds->ds_port = port; + memcpy(&ds->ds_addr, addr, addrlen); + ds->ds_addrlen = addrlen; + ds->ds_remotestr = remotestr; atomic_set(&ds->ds_count, 1); INIT_LIST_HEAD(&ds->ds_node); ds->ds_clp = NULL; list_add(&ds->ds_node, &nfs4_data_server_cache); - dprintk("%s add new data server ip 0x%x\n", __func__, - ds->ds_ip_addr); + dprintk("%s add new data server %s\n", __func__, + ds->ds_remotestr); } else { + kfree(remotestr); kfree(ds); atomic_inc(&tmp_ds->ds_count); - dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n", - __func__, tmp_ds->ds_ip_addr, + dprintk("%s data server %s found, inc'ed ds_count to %d\n", + __func__, tmp_ds->ds_remotestr, atomic_read(&tmp_ds->ds_count)); ds = tmp_ds; } @@ -213,18 +290,21 @@ out: } /* - * Currently only support ipv4, and one multi-path address. + * Currently only supports ipv4, ipv6 and one multi-path address. */ static struct nfs4_pnfs_ds * decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags) { struct nfs4_pnfs_ds *ds = NULL; - char *buf; - const char *ipend, *pstr; - u32 ip_addr, port; - int nlen, rlen, i; + char *buf, *portstr; + struct sockaddr_storage ss; + size_t sslen; + u32 port; + int nlen, rlen; int tmp[2]; __be32 *p; + char *netid, *match_netid; + size_t match_netid_len; /* r_netid */ p = xdr_inline_decode(streamp, 4); @@ -236,62 +316,97 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_fla if (unlikely(!p)) goto out_err; - /* Check that netid is "tcp" */ - if (nlen != 3 || memcmp((char *)p, "tcp", 3)) { - dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__); + netid = kmalloc(nlen+1, gfp_flags); + if (unlikely(!netid)) goto out_err; - } - /* r_addr */ + netid[nlen] = '\0'; + memcpy(netid, p, nlen); + + /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */ p = xdr_inline_decode(streamp, 4); if (unlikely(!p)) - goto out_err; + goto out_free_netid; rlen = be32_to_cpup(p); p = xdr_inline_decode(streamp, rlen); if (unlikely(!p)) - goto out_err; + goto out_free_netid; - /* ipv6 length plus port is legal */ - if (rlen > INET6_ADDRSTRLEN + 8) { + /* port is ".ABC.DEF", 8 chars max */ + if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) { dprintk("%s: Invalid address, length %d\n", __func__, rlen); - goto out_err; + goto out_free_netid; } buf = kmalloc(rlen + 1, gfp_flags); if (!buf) { dprintk("%s: Not enough memory\n", __func__); - goto out_err; + goto out_free_netid; } buf[rlen] = '\0'; memcpy(buf, p, rlen); - /* replace the port dots with dashes for the in4_pton() delimiter*/ - for (i = 0; i < 2; i++) { - char *res = strrchr(buf, '.'); - if (!res) { - dprintk("%s: Failed finding expected dots in port\n", - __func__); - goto out_free; - } - *res = '-'; + /* replace port '.' with '-' */ + portstr = strrchr(buf, '.'); + if (!portstr) { + dprintk("%s: Failed finding expected dot in port\n", + __func__); + goto out_free_buf; + } + *portstr = '-'; + + /* find '.' between address and port */ + portstr = strrchr(buf, '.'); + if (!portstr) { + dprintk("%s: Failed finding expected dot between address and " + "port\n", __func__); + goto out_free_buf; } + *portstr = '\0'; - /* Currently only support ipv4 address */ - if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) { - dprintk("%s: Only ipv4 addresses supported\n", __func__); - goto out_free; + if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&ss, sizeof(ss))) { + dprintk("%s: Error parsing address %s\n", __func__, buf); + goto out_free_buf; } - /* port */ - pstr = ipend; - sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]); + portstr++; + sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]); port = htons((tmp[0] << 8) | (tmp[1])); - ds = nfs4_pnfs_ds_add(inode, ip_addr, port, gfp_flags); - dprintk("%s: Decoded address and port %s\n", __func__, buf); -out_free: + switch (ss.ss_family) { + case AF_INET: + ((struct sockaddr_in *)&ss)->sin_port = port; + sslen = sizeof(struct sockaddr_in); + match_netid = "tcp"; + match_netid_len = 3; + break; + + case AF_INET6: + ((struct sockaddr_in6 *)&ss)->sin6_port = port; + sslen = sizeof(struct sockaddr_in6); + match_netid = "tcp6"; + match_netid_len = 4; + break; + + default: + dprintk("%s: unsupported address family: %u\n", + __func__, ss.ss_family); + goto out_free_buf; + } + + if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) { + dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n", + __func__, netid, match_netid); + goto out_free_buf; + } + + ds = nfs4_pnfs_ds_add((struct sockaddr *)&ss, sslen, gfp_flags); + dprintk("%s: Added DS %s\n", __func__, ds->ds_remotestr); +out_free_buf: kfree(buf); +out_free_netid: + kfree(netid); out_err: return ds; } @@ -591,13 +706,13 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j) static void filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr, - int err, u32 ds_addr) + int err, const char *ds_remotestr) { u32 *p = (u32 *)&dsaddr->id_node.deviceid; - printk(KERN_ERR "NFS: data server %x connection error %d." + printk(KERN_ERR "NFS: data server %s connection error %d." " Deviceid [%x%x%x%x] marked out of use.\n", - ds_addr, err, p[0], p[1], p[2], p[3]); + ds_remotestr, err, p[0], p[1], p[2], p[3]); spin_lock(&nfs4_ds_cache_lock); dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY; @@ -628,7 +743,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) err = nfs4_ds_connect(s, ds); if (err) { filelayout_mark_devid_negative(dsaddr, err, - ntohl(ds->ds_ip_addr)); + ds->ds_remotestr); return NULL; } } -- cgit v1.2.3 From 14f9a6076f5388f3fd6341ad4b841337b28fc825 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 31 May 2011 18:48:57 -0400 Subject: NFS: Parse and store all multipath DS addresses This parses and stores all addresses associated with each data server, laying the groundwork for supporting multipath to data servers. - Skips over addresses that cannot be parsed (ie IPv6 addrs if v6 is not enabled). Only fails if none of the addresses are recognizable - Currently only uses the first address that parsed cleanly - Tested against pynfs server (modified to support multipath) Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.h | 12 +- fs/nfs/nfs4filelayoutdev.c | 363 +++++++++++++++++++++++++++++---------------- 2 files changed, 243 insertions(+), 132 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 6c6a817710e5..68cce730b800 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -47,11 +47,17 @@ enum stripetype4 { }; /* Individual ip address */ +struct nfs4_pnfs_ds_addr { + struct sockaddr_storage da_addr; + size_t da_addrlen; + struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */ + char *da_remotestr; /* human readable addr+port */ +}; + struct nfs4_pnfs_ds { struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ - struct sockaddr_storage ds_addr; - size_t ds_addrlen; - char *ds_remotestr; /* human readable addr+port */ + char *ds_remotestr; /* comma sep list of addrs */ + struct list_head ds_addrs; struct nfs_client *ds_clp; atomic_t ds_count; }; diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 1a8308b90108..610808a96f25 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -65,52 +65,103 @@ print_ds(struct nfs4_pnfs_ds *ds) ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); } -/* nfs4_ds_cache_lock is held */ -static struct nfs4_pnfs_ds * -_data_server_lookup_locked(struct sockaddr *addr, size_t addrlen) +static bool +same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) { - struct nfs4_pnfs_ds *ds; struct sockaddr_in *a, *b; struct sockaddr_in6 *a6, *b6; - list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { - if (addr->sa_family != ds->ds_addr.ss_family) - continue; - - switch (addr->sa_family) { - case AF_INET: - a = (struct sockaddr_in *)addr; - b = (struct sockaddr_in *)&ds->ds_addr; - - if (a->sin_addr.s_addr == b->sin_addr.s_addr && - a->sin_port == b->sin_port) - return ds; - break; - - case AF_INET6: - a6 = (struct sockaddr_in6 *)addr; - b6 = (struct sockaddr_in6 *)&ds->ds_addr; - - /* LINKLOCAL addresses must have matching scope_id */ - if (ipv6_addr_scope(&a6->sin6_addr) == - IPV6_ADDR_SCOPE_LINKLOCAL && - a6->sin6_scope_id != b6->sin6_scope_id) - continue; - - if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) && - a6->sin6_port == b6->sin6_port) - return ds; - break; - - default: - dprintk("%s: unhandled address family: %u\n", - __func__, addr->sa_family); - return NULL; + if (addr1->sa_family != addr2->sa_family) + return false; + + switch (addr1->sa_family) { + case AF_INET: + a = (struct sockaddr_in *)addr1; + b = (struct sockaddr_in *)addr2; + + if (a->sin_addr.s_addr == b->sin_addr.s_addr && + a->sin_port == b->sin_port) + return true; + break; + + case AF_INET6: + a6 = (struct sockaddr_in6 *)addr1; + b6 = (struct sockaddr_in6 *)addr2; + + /* LINKLOCAL addresses must have matching scope_id */ + if (ipv6_addr_scope(&a6->sin6_addr) == + IPV6_ADDR_SCOPE_LINKLOCAL && + a6->sin6_scope_id != b6->sin6_scope_id) + return false; + + if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) && + a6->sin6_port == b6->sin6_port) + return true; + break; + + default: + dprintk("%s: unhandled address family: %u\n", + __func__, addr1->sa_family); + return false; + } + + return false; +} + +/* + * Lookup DS by addresses. The first matching address returns true. + * nfs4_ds_cache_lock is held + */ +static struct nfs4_pnfs_ds * +_data_server_lookup_locked(struct list_head *dsaddrs) +{ + struct nfs4_pnfs_ds *ds; + struct nfs4_pnfs_ds_addr *da1, *da2; + + list_for_each_entry(da1, dsaddrs, da_node) { + list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { + list_for_each_entry(da2, &ds->ds_addrs, da_node) { + if (same_sockaddr( + (struct sockaddr *)&da1->da_addr, + (struct sockaddr *)&da2->da_addr)) + return ds; + } } } return NULL; } +/* + * Compare two lists of addresses. + */ +static bool +_data_server_match_all_addrs_locked(struct list_head *dsaddrs1, + struct list_head *dsaddrs2) +{ + struct nfs4_pnfs_ds_addr *da1, *da2; + size_t count1 = 0, + count2 = 0; + + list_for_each_entry(da1, dsaddrs1, da_node) + count1++; + + list_for_each_entry(da2, dsaddrs2, da_node) { + bool found = false; + count2++; + list_for_each_entry(da1, dsaddrs1, da_node) { + if (same_sockaddr((struct sockaddr *)&da1->da_addr, + (struct sockaddr *)&da2->da_addr)) { + found = true; + break; + } + } + if (!found) + return false; + } + + return (count1 == count2); +} + /* * Create an rpc connection to the nfs4_pnfs_ds data server * Currently only support IPv4 @@ -119,14 +170,21 @@ static int nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) { struct nfs_client *clp; + struct nfs4_pnfs_ds_addr *da; int status = 0; - dprintk("--> %s addr %s au_flavor %d\n", __func__, ds->ds_remotestr, + dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr, mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); + BUG_ON(list_empty(&ds->ds_addrs)); + + da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node); + dprintk("%s: using the first address for DS %s: %s\n", + __func__, ds->ds_remotestr, da->da_remotestr); + clp = nfs4_set_ds_client(mds_srv->nfs_client, - (struct sockaddr *)&ds->ds_addr, - ds->ds_addrlen, IPPROTO_TCP); + (struct sockaddr *)&da->da_addr, + da->da_addrlen, IPPROTO_TCP); if (IS_ERR(clp)) { status = PTR_ERR(clp); goto out; @@ -169,12 +227,24 @@ out_put: static void destroy_ds(struct nfs4_pnfs_ds *ds) { + struct nfs4_pnfs_ds_addr *da; + dprintk("--> %s\n", __func__); ifdebug(FACILITY) print_ds(ds); if (ds->ds_clp) nfs_put_client(ds->ds_clp); + + while (!list_empty(&ds->ds_addrs)) { + da = list_first_entry(&ds->ds_addrs, + struct nfs4_pnfs_ds_addr, + da_node); + list_del_init(&da->da_node); + kfree(da->da_remotestr); + kfree(da); + } + kfree(ds->ds_remotestr); kfree(ds); } @@ -207,67 +277,73 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) * complicated setup around many dprinks. */ static char * -nfs4_pnfs_remotestr(struct sockaddr *ds_addr, gfp_t gfp_flags) +nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags) { - char buf[INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN]; + struct nfs4_pnfs_ds_addr *da; char *remotestr; - char *startsep = ""; - char *endsep = ""; size_t len; - uint16_t port; + char *p; - switch (ds_addr->sa_family) { - case AF_INET: - port = ((struct sockaddr_in *)ds_addr)->sin_port; - break; - case AF_INET6: - startsep = "["; - endsep = "]"; - port = ((struct sockaddr_in6 *)ds_addr)->sin6_port; - break; - default: - dprintk("%s: Unknown address family %u\n", - __func__, ds_addr->sa_family); - return NULL; + len = 3; /* '{', '}' and eol */ + list_for_each_entry(da, dsaddrs, da_node) { + len += strlen(da->da_remotestr) + 1; /* string plus comma */ } - if (!rpc_ntop((struct sockaddr *)ds_addr, buf, sizeof(buf))) { - dprintk("%s: error printing addr\n", __func__); + remotestr = kzalloc(len, gfp_flags); + if (!remotestr) return NULL; - } - len = strlen(buf) + strlen(startsep) + strlen(endsep) + 1 + 5 + 1; - remotestr = kzalloc(len, gfp_flags); + p = remotestr; + *(p++) = '{'; + len--; + list_for_each_entry(da, dsaddrs, da_node) { + size_t ll = strlen(da->da_remotestr); - if (unlikely(!remotestr)) { - dprintk("%s: couldn't alloc remotestr\n", __func__); - return NULL; - } + if (ll > len) + goto out_err; - snprintf(remotestr, len, "%s%s%s:%u", - startsep, buf, endsep, ntohs(port)); + memcpy(p, da->da_remotestr, ll); + p += ll; + len -= ll; + if (len < 1) + goto out_err; + (*p++) = ','; + len--; + } + if (len < 2) + goto out_err; + *(p++) = '}'; + *p = '\0'; return remotestr; +out_err: + kfree(remotestr); + return NULL; } static struct nfs4_pnfs_ds * -nfs4_pnfs_ds_add(struct sockaddr *addr, size_t addrlen, gfp_t gfp_flags) +nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) { struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; char *remotestr; - ds = kzalloc(sizeof(*tmp_ds), gfp_flags); + if (list_empty(dsaddrs)) { + dprintk("%s: no addresses defined\n", __func__); + goto out; + } + + ds = kzalloc(sizeof(*ds), gfp_flags); if (!ds) goto out; /* this is only used for debugging, so it's ok if its NULL */ - remotestr = nfs4_pnfs_remotestr(addr, gfp_flags); + remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags); spin_lock(&nfs4_ds_cache_lock); - tmp_ds = _data_server_lookup_locked(addr, addrlen); + tmp_ds = _data_server_lookup_locked(dsaddrs); if (tmp_ds == NULL) { - memcpy(&ds->ds_addr, addr, addrlen); - ds->ds_addrlen = addrlen; + INIT_LIST_HEAD(&ds->ds_addrs); + list_splice_init(dsaddrs, &ds->ds_addrs); ds->ds_remotestr = remotestr; atomic_set(&ds->ds_count, 1); INIT_LIST_HEAD(&ds->ds_node); @@ -276,6 +352,11 @@ nfs4_pnfs_ds_add(struct sockaddr *addr, size_t addrlen, gfp_t gfp_flags) dprintk("%s add new data server %s\n", __func__, ds->ds_remotestr); } else { + if (!_data_server_match_all_addrs_locked(&tmp_ds->ds_addrs, + dsaddrs)) { + dprintk("%s: multipath address mismatch: %s != %s", + __func__, tmp_ds->ds_remotestr, remotestr); + } kfree(remotestr); kfree(ds); atomic_inc(&tmp_ds->ds_count); @@ -292,19 +373,20 @@ out: /* * Currently only supports ipv4, ipv6 and one multi-path address. */ -static struct nfs4_pnfs_ds * -decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags) +static struct nfs4_pnfs_ds_addr * +decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags) { - struct nfs4_pnfs_ds *ds = NULL; + struct nfs4_pnfs_ds_addr *da = NULL; char *buf, *portstr; - struct sockaddr_storage ss; - size_t sslen; u32 port; int nlen, rlen; int tmp[2]; __be32 *p; char *netid, *match_netid; - size_t match_netid_len; + size_t len, match_netid_len; + char *startsep = ""; + char *endsep = ""; + /* r_netid */ p = xdr_inline_decode(streamp, 4); @@ -365,50 +447,74 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_fla } *portstr = '\0'; - if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&ss, sizeof(ss))) { - dprintk("%s: Error parsing address %s\n", __func__, buf); + da = kzalloc(sizeof(*da), gfp_flags); + if (unlikely(!da)) goto out_free_buf; + + INIT_LIST_HEAD(&da->da_node); + + if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&da->da_addr, + sizeof(da->da_addr))) { + dprintk("%s: error parsing address %s\n", __func__, buf); + goto out_free_da; } portstr++; sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]); port = htons((tmp[0] << 8) | (tmp[1])); - switch (ss.ss_family) { + switch (da->da_addr.ss_family) { case AF_INET: - ((struct sockaddr_in *)&ss)->sin_port = port; - sslen = sizeof(struct sockaddr_in); + ((struct sockaddr_in *)&da->da_addr)->sin_port = port; + da->da_addrlen = sizeof(struct sockaddr_in); match_netid = "tcp"; match_netid_len = 3; break; case AF_INET6: - ((struct sockaddr_in6 *)&ss)->sin6_port = port; - sslen = sizeof(struct sockaddr_in6); + ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port; + da->da_addrlen = sizeof(struct sockaddr_in6); match_netid = "tcp6"; match_netid_len = 4; + startsep = "["; + endsep = "]"; break; default: dprintk("%s: unsupported address family: %u\n", - __func__, ss.ss_family); - goto out_free_buf; + __func__, da->da_addr.ss_family); + goto out_free_da; } if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) { dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n", __func__, netid, match_netid); - goto out_free_buf; + goto out_free_da; } - ds = nfs4_pnfs_ds_add((struct sockaddr *)&ss, sslen, gfp_flags); - dprintk("%s: Added DS %s\n", __func__, ds->ds_remotestr); + /* save human readable address */ + len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7; + da->da_remotestr = kzalloc(len, gfp_flags); + + /* NULL is ok, only used for dprintk */ + if (da->da_remotestr) + snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep, + buf, endsep, ntohs(port)); + + dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr); + kfree(buf); + kfree(netid); + return da; + +out_free_da: + kfree(da); out_free_buf: + dprintk("%s: Error parsing DS addr: %s\n", __func__, buf); kfree(buf); out_free_netid: kfree(netid); out_err: - return ds; + return NULL; } /* Decode opaque device data and return the result */ @@ -425,6 +531,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) struct xdr_stream stream; struct xdr_buf buf; struct page *scratch; + struct list_head dsaddrs; + struct nfs4_pnfs_ds_addr *da; /* set up xdr stream */ scratch = alloc_page(gfp_flags); @@ -501,6 +609,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) NFS_SERVER(ino)->nfs_client, &pdev->dev_id); + INIT_LIST_HEAD(&dsaddrs); + for (i = 0; i < dsaddr->ds_num; i++) { int j; u32 mp_count; @@ -510,48 +620,43 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) goto out_err_free_deviceid; mp_count = be32_to_cpup(p); /* multipath count */ - if (mp_count > 1) { - printk(KERN_WARNING - "%s: Multipath count %d not supported, " - "skipping all greater than 1\n", __func__, - mp_count); - } for (j = 0; j < mp_count; j++) { - if (j == 0) { - dsaddr->ds_list[i] = decode_and_add_ds(&stream, - ino, gfp_flags); - if (dsaddr->ds_list[i] == NULL) - goto out_err_free_deviceid; - } else { - u32 len; - /* skip extra multipath */ - - /* read len, skip */ - p = xdr_inline_decode(&stream, 4); - if (unlikely(!p)) - goto out_err_free_deviceid; - len = be32_to_cpup(p); - - p = xdr_inline_decode(&stream, len); - if (unlikely(!p)) - goto out_err_free_deviceid; - - /* read len, skip */ - p = xdr_inline_decode(&stream, 4); - if (unlikely(!p)) - goto out_err_free_deviceid; - len = be32_to_cpup(p); - - p = xdr_inline_decode(&stream, len); - if (unlikely(!p)) - goto out_err_free_deviceid; - } + da = decode_ds_addr(&stream, gfp_flags); + if (da) + list_add_tail(&da->da_node, &dsaddrs); + } + if (list_empty(&dsaddrs)) { + dprintk("%s: no suitable DS addresses found\n", + __func__); + goto out_err_free_deviceid; + } + + dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); + if (!dsaddr->ds_list[i]) + goto out_err_drain_dsaddrs; + + /* If DS was already in cache, free ds addrs */ + while (!list_empty(&dsaddrs)) { + da = list_first_entry(&dsaddrs, + struct nfs4_pnfs_ds_addr, + da_node); + list_del_init(&da->da_node); + kfree(da->da_remotestr); + kfree(da); } } __free_page(scratch); return dsaddr; +out_err_drain_dsaddrs: + while (!list_empty(&dsaddrs)) { + da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr, + da_node); + list_del_init(&da->da_node); + kfree(da->da_remotestr); + kfree(da); + } out_err_free_deviceid: nfs4_fl_free_deviceid(dsaddr); /* stripe_indicies was part of dsaddr */ -- cgit v1.2.3 From 7e574f0d3911c5cc60d4d2b57fee975c462d6cd0 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 31 May 2011 18:48:58 -0400 Subject: NFS: pnfs: loop over multipath addrs on connect Don't just use the first addr in the multipath list - instead, loop over addresses when calling nfs4_set_ds_client() (which calls connect) until it is successful. Although this is not real multipath support, it's a quick fix to handle when an MDS sends a list of addresses for a DS and some of the addr families are unsupported or misconfigured (like no routable ipv6 addr assigned). This will attempt all paths to the DS before giving up, instead of immediately falling back to the MDS. As before, an error encountered after a successful connect() will cause all i/o to fall back to the MDS. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayoutdev.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 610808a96f25..61c173b0aab3 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -169,7 +169,7 @@ _data_server_match_all_addrs_locked(struct list_head *dsaddrs1, static int nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) { - struct nfs_client *clp; + struct nfs_client *clp = ERR_PTR(-EIO); struct nfs4_pnfs_ds_addr *da; int status = 0; @@ -178,13 +178,17 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) BUG_ON(list_empty(&ds->ds_addrs)); - da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node); - dprintk("%s: using the first address for DS %s: %s\n", - __func__, ds->ds_remotestr, da->da_remotestr); + list_for_each_entry(da, &ds->ds_addrs, da_node) { + dprintk("%s: DS %s: trying address %s\n", + __func__, ds->ds_remotestr, da->da_remotestr); - clp = nfs4_set_ds_client(mds_srv->nfs_client, + clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&da->da_addr, da->da_addrlen, IPPROTO_TCP); + if (!IS_ERR(clp)) + break; + } + if (IS_ERR(clp)) { status = PTR_ERR(clp); goto out; -- cgit v1.2.3 From 78fe0f41d9937ee62817912ac8d627e06243c269 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 31 May 2011 19:05:47 -0400 Subject: NFS: use scope from exchange_id to skip reclaim can be skipped if the "eir_server_scope" from the exchange_id proc differs from previous calls. Also, in the future server_scope will be useful for determining whether client trunking is available Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 1 + fs/nfs/nfs4_fs.h | 3 +++ fs/nfs/nfs4proc.c | 32 ++++++++++++++++++++++++++++++++ fs/nfs/nfs4state.c | 9 ++++++++- fs/nfs/nfs4xdr.c | 8 +++++++- 5 files changed, 51 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index b3dc2b88b65b..006f8ff0a3c0 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -293,6 +293,7 @@ static void nfs_free_client(struct nfs_client *clp) nfs4_deviceid_purge_client(clp); kfree(clp->cl_hostname); + kfree(clp->server_scope); kfree(clp); dprintk("<-- nfs_free_client()\n"); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index c4a69833dd0d..b47f0d4710fa 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -48,6 +48,7 @@ enum nfs4_client_state { NFS4CLNT_SESSION_RESET, NFS4CLNT_RECALL_SLOT, NFS4CLNT_LEASE_CONFIRM, + NFS4CLNT_SERVER_SCOPE_MISMATCH, }; enum nfs4_session_state { @@ -349,6 +350,8 @@ extern void nfs4_schedule_state_manager(struct nfs_client *); extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); extern void nfs41_handle_recall_slot(struct nfs_client *clp); +extern void nfs41_handle_server_scope(struct nfs_client *, + struct server_scope **); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5879b23e0c99..5f4912f72282 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4781,6 +4781,16 @@ out_inval: return -NFS4ERR_INVAL; } +static bool +nfs41_same_server_scope(struct server_scope *a, struct server_scope *b) +{ + if (a->server_scope_sz == b->server_scope_sz && + memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0) + return true; + + return false; +} + /* * nfs4_proc_exchange_id() * @@ -4823,9 +4833,31 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) init_utsname()->domainname, clp->cl_rpcclient->cl_auth->au_flavor); + res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL); + if (unlikely(!res.server_scope)) + return -ENOMEM; + status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); if (!status) status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); + + if (!status) { + if (clp->server_scope && + !nfs41_same_server_scope(clp->server_scope, + res.server_scope)) { + dprintk("%s: server_scope mismatch detected\n", + __func__); + set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state); + kfree(clp->server_scope); + clp->server_scope = NULL; + } + + if (!clp->server_scope) + clp->server_scope = res.server_scope; + else + kfree(res.server_scope); + } + dprintk("<-- %s status= %d\n", __func__, status); return status; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index e97dd219f84f..5d744a52b4e1 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1643,7 +1643,14 @@ static void nfs4_state_manager(struct nfs_client *clp) goto out_error; } clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); - set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); + + if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, + &clp->cl_state)) + nfs4_state_start_reclaim_nograce(clp); + else + set_bit(NFS4CLNT_RECLAIM_REBOOT, + &clp->cl_state); + pnfs_destroy_all_layouts(clp); } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e6e8f3b9a1de..1555c74dd336 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4977,11 +4977,17 @@ static int decode_exchange_id(struct xdr_stream *xdr, if (unlikely(status)) return status; - /* Throw away server_scope */ + /* Save server_scope */ status = decode_opaque_inline(xdr, &dummy, &dummy_str); if (unlikely(status)) return status; + if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) + return -EIO; + + memcpy(res->server_scope->server_scope, dummy_str, dummy); + res->server_scope->server_scope_sz = dummy; + /* Throw away Implementation id array */ status = decode_opaque_inline(xdr, &dummy, &dummy_str); if (unlikely(status)) -- cgit v1.2.3 From 35dbbc99e93e57680837c17f96efe370f0535064 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Wed, 1 Jun 2011 16:32:21 -0400 Subject: NFS: fix comment We support IPv4 and IPv6 now. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayoutdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 61c173b0aab3..ed388aae9689 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -164,7 +164,7 @@ _data_server_match_all_addrs_locked(struct list_head *dsaddrs1, /* * Create an rpc connection to the nfs4_pnfs_ds data server - * Currently only support IPv4 + * Currently only supports IPv4 and IPv6 addresses */ static int nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) -- cgit v1.2.3 From 6382a44138e7aa40bf52170e7afc014443a24806 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Wed, 1 Jun 2011 16:44:44 -0400 Subject: NFS: move pnfs layouts to nfs_server structure Layouts should be tracked per nfs_server (aka superblock) instead of per struct nfs_client, which may have multiple FSIDs associated with it. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 57 +++++++++++++++++++++++++++++++------------------- fs/nfs/client.c | 4 +--- fs/nfs/pnfs.c | 13 +++++++++--- 3 files changed, 47 insertions(+), 27 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index d4d1954e9bb9..74780f9f852c 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -111,6 +111,7 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf static u32 initiate_file_draining(struct nfs_client *clp, struct cb_layoutrecallargs *args) { + struct nfs_server *server; struct pnfs_layout_hdr *lo; struct inode *ino; bool found = false; @@ -118,21 +119,28 @@ static u32 initiate_file_draining(struct nfs_client *clp, LIST_HEAD(free_me_list); spin_lock(&clp->cl_lock); - list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) { - if (nfs_compare_fh(&args->cbl_fh, - &NFS_I(lo->plh_inode)->fh)) - continue; - ino = igrab(lo->plh_inode); - if (!ino) - continue; - found = true; - /* Without this, layout can be freed as soon - * as we release cl_lock. - */ - get_layout_hdr(lo); - break; + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + list_for_each_entry(lo, &server->layouts, plh_layouts) { + if (nfs_compare_fh(&args->cbl_fh, + &NFS_I(lo->plh_inode)->fh)) + continue; + ino = igrab(lo->plh_inode); + if (!ino) + continue; + found = true; + /* Without this, layout can be freed as soon + * as we release cl_lock. + */ + get_layout_hdr(lo); + break; + } + if (found) + break; } + rcu_read_unlock(); spin_unlock(&clp->cl_lock); + if (!found) return NFS4ERR_NOMATCHING_LAYOUT; @@ -154,6 +162,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, static u32 initiate_bulk_draining(struct nfs_client *clp, struct cb_layoutrecallargs *args) { + struct nfs_server *server; struct pnfs_layout_hdr *lo; struct inode *ino; u32 rv = NFS4ERR_NOMATCHING_LAYOUT; @@ -167,18 +176,24 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, }; spin_lock(&clp->cl_lock); - list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) { + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { if ((args->cbl_recall_type == RETURN_FSID) && - memcmp(&NFS_SERVER(lo->plh_inode)->fsid, - &args->cbl_fsid, sizeof(struct nfs_fsid))) - continue; - if (!igrab(lo->plh_inode)) + memcmp(&server->fsid, &args->cbl_fsid, + sizeof(struct nfs_fsid))) continue; - get_layout_hdr(lo); - BUG_ON(!list_empty(&lo->plh_bulk_recall)); - list_add(&lo->plh_bulk_recall, &recall_list); + + list_for_each_entry(lo, &server->layouts, plh_layouts) { + if (!igrab(lo->plh_inode)) + continue; + get_layout_hdr(lo); + BUG_ON(!list_empty(&lo->plh_bulk_recall)); + list_add(&lo->plh_bulk_recall, &recall_list); + } } + rcu_read_unlock(); spin_unlock(&clp->cl_lock); + list_for_each_entry_safe(lo, tmp, &recall_list, plh_bulk_recall) { ino = lo->plh_inode; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 006f8ff0a3c0..5452ada59461 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -188,9 +188,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ cred = rpc_lookup_machine_cred(); if (!IS_ERR(cred)) clp->cl_machine_cred = cred; -#if defined(CONFIG_NFS_V4_1) - INIT_LIST_HEAD(&clp->cl_layouts); -#endif nfs_fscache_get_client_cookie(clp); return clp; @@ -1063,6 +1060,7 @@ static struct nfs_server *nfs_alloc_server(void) INIT_LIST_HEAD(&server->client_link); INIT_LIST_HEAD(&server->master_link); INIT_LIST_HEAD(&server->delegations); + INIT_LIST_HEAD(&server->layouts); atomic_set(&server->active, 0); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 29c0ca7fc347..ff8200772377 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -448,11 +448,17 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) void pnfs_destroy_all_layouts(struct nfs_client *clp) { + struct nfs_server *server; struct pnfs_layout_hdr *lo; LIST_HEAD(tmp_list); spin_lock(&clp->cl_lock); - list_splice_init(&clp->cl_layouts, &tmp_list); + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + if (!list_empty(&server->layouts)) + list_splice_init(&server->layouts, &tmp_list); + } + rcu_read_unlock(); spin_unlock(&clp->cl_lock); while (!list_empty(&tmp_list)) { @@ -920,7 +926,8 @@ pnfs_update_layout(struct inode *ino, }; unsigned pg_offset; struct nfs_inode *nfsi = NFS_I(ino); - struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; + struct nfs_server *server = NFS_SERVER(ino); + struct nfs_client *clp = server->nfs_client; struct pnfs_layout_hdr *lo; struct pnfs_layout_segment *lseg = NULL; bool first = false; @@ -964,7 +971,7 @@ pnfs_update_layout(struct inode *ino, */ spin_lock(&clp->cl_lock); BUG_ON(!list_empty(&lo->plh_layouts)); - list_add_tail(&lo->plh_layouts, &clp->cl_layouts); + list_add_tail(&lo->plh_layouts, &server->layouts); spin_unlock(&clp->cl_lock); } -- cgit v1.2.3 From fca78d6d2c77f87d7dbee89bbe4836a44da881e2 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 2 Jun 2011 14:59:07 -0400 Subject: NFS: Add SECINFO_NO_NAME procedure If the client is using NFS v4.1, then we can use SECINFO_NO_NAME to find the secflavor for the initial mount. If the server doesn't support SECINFO_NO_NAME then I fall back on the "guess and check" method used for v4.0 mounts. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 3 ++ fs/nfs/namespace.c | 2 +- fs/nfs/nfs4_fs.h | 2 ++ fs/nfs/nfs4proc.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++- fs/nfs/nfs4xdr.c | 68 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 157 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2a55347a2daa..fc017eadfe08 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -277,6 +277,9 @@ extern void nfs_sb_deactive(struct super_block *sb); extern char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen); extern struct vfsmount *nfs_d_automount(struct path *path); +#ifdef CONFIG_NFS_V4 +rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); +#endif /* getroot.c */ extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 1f063bacd285..8102391bb374 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -119,7 +119,7 @@ Elong: } #ifdef CONFIG_NFS_V4 -static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) +rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) { struct gss_api_mech *mech; struct xdr_netobj oid; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index b47f0d4710fa..c30aed2c70f4 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -67,6 +67,8 @@ struct nfs4_minor_version_ops { int cache_reply); int (*validate_stateid)(struct nfs_delegation *, const nfs4_stateid *); + int (*find_root_sec)(struct nfs_server *, struct nfs_fh *, + struct nfs_fsinfo *); const struct nfs4_state_recovery_ops *reboot_recovery_ops; const struct nfs4_state_recovery_ops *nograce_recovery_ops; const struct nfs4_state_maintenance_ops *state_renewal_ops; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5f4912f72282..892bff53f61d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2251,13 +2251,14 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *info) { + int minor_version = server->nfs_client->cl_minorversion; int status = nfs4_lookup_root(server, fhandle, info); if ((status == -NFS4ERR_WRONGSEC) && !(server->flags & NFS_MOUNT_SECFLAVOUR)) /* * A status of -NFS4ERR_WRONGSEC will be mapped to -EPERM * by nfs4_map_errors() as this function exits. */ - status = nfs4_find_root_sec(server, fhandle, info); + status = nfs_v4_minor_ops[minor_version]->find_root_sec(server, fhandle, info); if (status == 0) status = nfs4_server_capabilities(server, fhandle); if (status == 0) @@ -5935,6 +5936,85 @@ out: rpc_put_task(task); return status; } + +static int +_nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors) +{ + struct nfs41_secinfo_no_name_args args = { + .style = SECINFO_STYLE_CURRENT_FH, + }; + struct nfs4_secinfo_res res = { + .flavors = flavors, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SECINFO_NO_NAME], + .rpc_argp = &args, + .rpc_resp = &res, + }; + return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); +} + +static int +nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = _nfs41_proc_secinfo_no_name(server, fhandle, info, flavors); + switch (err) { + case 0: + case -NFS4ERR_WRONGSEC: + case -NFS4ERR_NOTSUPP: + break; + default: + err = nfs4_handle_exception(server, err, &exception); + } + } while (exception.retry); + return err; +} + +static int +nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + int err; + struct page *page; + rpc_authflavor_t flavor; + struct nfs4_secinfo_flavors *flavors; + + page = alloc_page(GFP_KERNEL); + if (!page) { + err = -ENOMEM; + goto out; + } + + flavors = page_address(page); + err = nfs41_proc_secinfo_no_name(server, fhandle, info, flavors); + + /* + * Fall back on "guess and check" method if + * the server doesn't support SECINFO_NO_NAME + */ + if (err == -NFS4ERR_WRONGSEC || err == -NFS4ERR_NOTSUPP) { + err = nfs4_find_root_sec(server, fhandle, info); + goto out_freepage; + } + if (err) + goto out_freepage; + + flavor = nfs_find_best_sec(flavors); + if (err == 0) + err = nfs4_lookup_root_sec(server, fhandle, info, flavor); + +out_freepage: + put_page(page); + if (err == -EACCES) + return -EPERM; +out: + return err; +} #endif /* CONFIG_NFS_V4_1 */ struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { @@ -5996,6 +6076,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { .minor_version = 0, .call_sync = _nfs4_call_sync, .validate_stateid = nfs4_validate_delegation_stateid, + .find_root_sec = nfs4_find_root_sec, .reboot_recovery_ops = &nfs40_reboot_recovery_ops, .nograce_recovery_ops = &nfs40_nograce_recovery_ops, .state_renewal_ops = &nfs40_state_renewal_ops, @@ -6006,6 +6087,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .minor_version = 1, .call_sync = _nfs4_call_sync_session, .validate_stateid = nfs41_validate_delegation_stateid, + .find_root_sec = nfs41_find_root_sec, .reboot_recovery_ops = &nfs41_reboot_recovery_ops, .nograce_recovery_ops = &nfs41_nograce_recovery_ops, .state_renewal_ops = &nfs41_state_renewal_ops, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1555c74dd336..c8c069a6319b 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -343,6 +343,8 @@ static int nfs4_stat_to_errno(int); 1 /* FIXME: opaque lrf_body always empty at the moment */) #define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \ 1 + decode_stateid_maxsz) +#define encode_secinfo_no_name_maxsz (op_encode_hdr_maxsz + 1) +#define decode_secinfo_no_name_maxsz decode_secinfo_maxsz #else /* CONFIG_NFS_V4_1 */ #define encode_sequence_maxsz 0 #define decode_sequence_maxsz 0 @@ -772,6 +774,14 @@ static int nfs4_stat_to_errno(int); decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_layoutreturn_maxsz) +#define NFS4_enc_secinfo_no_name_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putrootfh_maxsz +\ + encode_secinfo_no_name_maxsz) +#define NFS4_dec_secinfo_no_name_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putrootfh_maxsz + \ + decode_secinfo_no_name_maxsz) const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + compound_encode_hdr_maxsz + @@ -1938,6 +1948,20 @@ encode_layoutreturn(struct xdr_stream *xdr, hdr->nops++; hdr->replen += decode_layoutreturn_maxsz; } + +static int +encode_secinfo_no_name(struct xdr_stream *xdr, + const struct nfs41_secinfo_no_name_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + p = reserve_space(xdr, 8); + *p++ = cpu_to_be32(OP_SECINFO_NO_NAME); + *p++ = cpu_to_be32(args->style); + hdr->nops++; + hdr->replen += decode_secinfo_no_name_maxsz; + return 0; +} #endif /* CONFIG_NFS_V4_1 */ /* @@ -2790,6 +2814,25 @@ static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req, encode_layoutreturn(xdr, args, &hdr); encode_nops(&hdr); } + +/* + * Encode SECINFO_NO_NAME request + */ +static int nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs41_secinfo_no_name_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putrootfh(xdr, &hdr); + encode_secinfo_no_name(xdr, args, &hdr); + encode_nops(&hdr); + return 0; +} #endif /* CONFIG_NFS_V4_1 */ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) @@ -6467,6 +6510,30 @@ static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp, out: return status; } + +/* + * Decode SECINFO_NO_NAME response + */ +static int nfs4_xdr_dec_secinfo_no_name(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs4_secinfo_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putrootfh(xdr); + if (status) + goto out; + status = decode_secinfo(xdr, res); +out: + return status; +} #endif /* CONFIG_NFS_V4_1 */ /** @@ -6669,6 +6736,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(LAYOUTGET, enc_layoutget, dec_layoutget), PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), + PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), #endif /* CONFIG_NFS_V4_1 */ }; -- cgit v1.2.3 From 7d9747947ae66d8f6a9a9a023a3a5e28df6a536e Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 2 Jun 2011 14:59:08 -0400 Subject: NFS: Added TEST_STATEID call This patch adds in the xdr for doing a TEST_STATEID call with a single stateid. RFC 5661 allows multiple stateids to be tested in a single call, but only testing one keeps things simpler for now. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 28 +++++++++++++++++ fs/nfs/nfs4xdr.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 892bff53f61d..5612ba997db7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6015,6 +6015,34 @@ out_freepage: out: return err; } +static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) +{ + int status; + struct nfs41_test_stateid_args args = { + .stateid = &state->stateid, + }; + struct nfs41_test_stateid_res res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_TEST_STATEID], + .rpc_argp = &args, + .rpc_resp = &res, + }; + args.seq_args.sa_session = res.seq_res.sr_session = NULL; + status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1); + return status; +} + +static int nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs41_test_stateid(server, state), + &exception); + } while (exception.retry); + return err; +} #endif /* CONFIG_NFS_V4_1 */ struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index c8c069a6319b..7d914d9e7584 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -345,6 +345,9 @@ static int nfs4_stat_to_errno(int); 1 + decode_stateid_maxsz) #define encode_secinfo_no_name_maxsz (op_encode_hdr_maxsz + 1) #define decode_secinfo_no_name_maxsz decode_secinfo_maxsz +#define encode_test_stateid_maxsz (op_encode_hdr_maxsz + 2 + \ + XDR_QUADLEN(NFS4_STATEID_SIZE)) +#define decode_test_stateid_maxsz (op_decode_hdr_maxsz + 2 + 1) #else /* CONFIG_NFS_V4_1 */ #define encode_sequence_maxsz 0 #define decode_sequence_maxsz 0 @@ -782,6 +785,12 @@ static int nfs4_stat_to_errno(int); decode_sequence_maxsz + \ decode_putrootfh_maxsz + \ decode_secinfo_no_name_maxsz) +#define NFS4_enc_test_stateid_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_test_stateid_maxsz) +#define NFS4_dec_test_stateid_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_test_stateid_maxsz) const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + compound_encode_hdr_maxsz + @@ -1962,6 +1971,20 @@ encode_secinfo_no_name(struct xdr_stream *xdr, hdr->replen += decode_secinfo_no_name_maxsz; return 0; } + +static void encode_test_stateid(struct xdr_stream *xdr, + struct nfs41_test_stateid_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 8 + NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(OP_TEST_STATEID); + *p++ = cpu_to_be32(1); + xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); + hdr->nops++; + hdr->replen += decode_test_stateid_maxsz; +} #endif /* CONFIG_NFS_V4_1 */ /* @@ -2833,6 +2856,23 @@ static int nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req, encode_nops(&hdr); return 0; } + +/* + * Encode TEST_STATEID request + */ +static void nfs4_xdr_enc_test_stateid(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs41_test_stateid_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_test_stateid(xdr, args, &hdr); + encode_nops(&hdr); +} #endif /* CONFIG_NFS_V4_1 */ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) @@ -5371,6 +5411,35 @@ out_overflow: print_overflow_msg(__func__, xdr); return -EIO; } + +static int decode_test_stateid(struct xdr_stream *xdr, + struct nfs41_test_stateid_res *res) +{ + __be32 *p; + int status; + int num_res; + + status = decode_op_hdr(xdr, OP_TEST_STATEID); + if (status) + return status; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + num_res = be32_to_cpup(p++); + if (num_res != 1) + goto out; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + res->status = be32_to_cpup(p++); + return res->status; +out_overflow: + print_overflow_msg(__func__, xdr); +out: + return -EIO; +} #endif /* CONFIG_NFS_V4_1 */ /* @@ -6534,6 +6603,27 @@ static int nfs4_xdr_dec_secinfo_no_name(struct rpc_rqst *rqstp, out: return status; } + +/* + * Decode TEST_STATEID response + */ +static int nfs4_xdr_dec_test_stateid(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs41_test_stateid_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_test_stateid(xdr, res); +out: + return status; +} #endif /* CONFIG_NFS_V4_1 */ /** @@ -6737,6 +6827,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), + PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), #endif /* CONFIG_NFS_V4_1 */ }; -- cgit v1.2.3 From 9aeda35fd643eba683fdb8dba8907fa796a85dda Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 2 Jun 2011 14:59:09 -0400 Subject: NFS: added FREE_STATEID call FREE_STATEID is used to tell the server that we want to free a stateid that no longer has any locks associated with it. This allows the client to reclaim locks without encountering edge conditions documented in section 8.4.3 of RFC 5661. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 30 +++++++++++++++++++++ fs/nfs/nfs4xdr.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5612ba997db7..197ad0a737f6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6043,6 +6043,36 @@ static int nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *stat } while (exception.retry); return err; } + +static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *state) +{ + int status; + struct nfs41_free_stateid_args args = { + .stateid = &state->stateid, + }; + struct nfs41_free_stateid_res res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FREE_STATEID], + .rpc_argp = &args, + .rpc_resp = &res, + }; + + args.seq_args.sa_session = res.seq_res.sr_session = NULL; + status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1); + return status; +} + +static int nfs41_free_stateid(struct nfs_server *server, struct nfs4_state *state) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs4_free_stateid(server, state), + &exception); + } while (exception.retry); + return err; +} #endif /* CONFIG_NFS_V4_1 */ struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 7d914d9e7584..c191a9baa422 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -348,6 +348,9 @@ static int nfs4_stat_to_errno(int); #define encode_test_stateid_maxsz (op_encode_hdr_maxsz + 2 + \ XDR_QUADLEN(NFS4_STATEID_SIZE)) #define decode_test_stateid_maxsz (op_decode_hdr_maxsz + 2 + 1) +#define encode_free_stateid_maxsz (op_encode_hdr_maxsz + 1 + \ + XDR_QUADLEN(NFS4_STATEID_SIZE)) +#define decode_free_stateid_maxsz (op_decode_hdr_maxsz + 1) #else /* CONFIG_NFS_V4_1 */ #define encode_sequence_maxsz 0 #define decode_sequence_maxsz 0 @@ -791,6 +794,12 @@ static int nfs4_stat_to_errno(int); #define NFS4_dec_test_stateid_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_test_stateid_maxsz) +#define NFS4_enc_free_stateid_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_free_stateid_maxsz) +#define NFS4_dec_free_stateid_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_free_stateid_maxsz) const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + compound_encode_hdr_maxsz + @@ -1985,6 +1994,18 @@ static void encode_test_stateid(struct xdr_stream *xdr, hdr->nops++; hdr->replen += decode_test_stateid_maxsz; } + +static void encode_free_stateid(struct xdr_stream *xdr, + struct nfs41_free_stateid_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + p = reserve_space(xdr, 4 + NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(OP_FREE_STATEID); + xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); + hdr->nops++; + hdr->replen += decode_free_stateid_maxsz; +} #endif /* CONFIG_NFS_V4_1 */ /* @@ -2873,6 +2894,23 @@ static void nfs4_xdr_enc_test_stateid(struct rpc_rqst *req, encode_test_stateid(xdr, args, &hdr); encode_nops(&hdr); } + +/* + * Encode FREE_STATEID request + */ +static void nfs4_xdr_enc_free_stateid(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs41_free_stateid_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_free_stateid(xdr, args, &hdr); + encode_nops(&hdr); +} #endif /* CONFIG_NFS_V4_1 */ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) @@ -5440,6 +5478,26 @@ out_overflow: out: return -EIO; } + +static int decode_free_stateid(struct xdr_stream *xdr, + struct nfs41_free_stateid_res *res) +{ + __be32 *p; + int status; + + status = decode_op_hdr(xdr, OP_FREE_STATEID); + if (status) + return status; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + res->status = be32_to_cpup(p++); + return res->status; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} #endif /* CONFIG_NFS_V4_1 */ /* @@ -6624,6 +6682,27 @@ static int nfs4_xdr_dec_test_stateid(struct rpc_rqst *rqstp, out: return status; } + +/* + * Decode FREE_STATEID response + */ +static int nfs4_xdr_dec_free_stateid(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs41_free_stateid_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_free_stateid(xdr, res); +out: + return status; +} #endif /* CONFIG_NFS_V4_1 */ /** @@ -6828,6 +6907,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), + PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid), #endif /* CONFIG_NFS_V4_1 */ }; -- cgit v1.2.3 From f062eb6ced3b297277b94b4da3113b1d3782e539 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 2 Jun 2011 14:59:10 -0400 Subject: NFS: test and free stateids during recovery When recovering open files and locks, the stateid should be tested against the server and freed if it is invalid. This patch adds new recovery functions for NFS v4.1. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 197ad0a737f6..325706f73eea 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -80,7 +80,10 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, struct nfs4_state *state); - +#ifdef CONFIG_NFS_V4_1 +static int nfs41_test_stateid(struct nfs_server *, struct nfs4_state *); +static int nfs41_free_stateid(struct nfs_server *, struct nfs4_state *); +#endif /* Prevent leaks of NFSv4 errors into userland */ static int nfs4_map_errors(int err) { @@ -1687,6 +1690,20 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta return ret; } +#if defined(CONFIG_NFS_V4_1) +static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + int status; + struct nfs_server *server = NFS_SERVER(state->inode); + + status = nfs41_test_stateid(server, state); + if (status == NFS_OK) + return 0; + nfs41_free_stateid(server, state); + return nfs4_open_expired(sp, state); +} +#endif + /* * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-* * fields corresponding to attributes that were used to store the verifier. @@ -4444,6 +4461,20 @@ out: return err; } +#if defined(CONFIG_NFS_V4_1) +static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request) +{ + int status; + struct nfs_server *server = NFS_SERVER(state->inode); + + status = nfs41_test_stateid(server, state); + if (status == NFS_OK) + return 0; + nfs41_free_stateid(server, state); + return nfs4_lock_expired(state, request); +} +#endif + static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) { struct nfs_inode *nfsi = NFS_I(state->inode); @@ -6109,8 +6140,8 @@ struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, - .recover_open = nfs4_open_expired, - .recover_lock = nfs4_lock_expired, + .recover_open = nfs41_open_expired, + .recover_lock = nfs41_lock_expired, .establish_clid = nfs41_init_clientid, .get_clid_cred = nfs4_get_exchange_id_cred, }; -- cgit v1.2.3 From 1751c3638f2a07a8c66a803a31791bab9bd3fced Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 10 Jun 2011 13:30:23 -0400 Subject: NFS: Cleanup of the nfs_pageio code in preparation for a pnfs bugfix We need to ensure that the layouts are set up before we can decide to coalesce requests. To do so, we want to further split up the struct nfs_pageio_descriptor operations into an initialisation callback, a coalescing test callback, and a 'do i/o' callback. This patch cleans up the existing callback methods before adding the 'initialisation' callback. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 15 +++++++++++++-- fs/nfs/objlayout/objio_osd.c | 13 ++++++++++++- fs/nfs/pagelist.c | 10 ++++------ fs/nfs/pnfs.c | 24 ++++++++++++++++++++++++ fs/nfs/pnfs.h | 25 +++++++++++++------------ fs/nfs/read.c | 43 ++++++++++++++++++++++++++++++++----------- fs/nfs/write.c | 28 ++++++++++++++++++++++------ 7 files changed, 120 insertions(+), 38 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index b690bb5ee58d..46bb31b79683 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -658,7 +658,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, * return true : coalesce page * return false : don't coalesce page */ -bool +static bool filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { @@ -681,6 +681,16 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, return (p_stripe == r_stripe); } +static const struct nfs_pageio_ops filelayout_pg_read_ops = { + .pg_test = filelayout_pg_test, + .pg_doio = nfs_generic_pg_readpages, +}; + +static const struct nfs_pageio_ops filelayout_pg_write_ops = { + .pg_test = filelayout_pg_test, + .pg_doio = nfs_generic_pg_writepages, +}; + static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg) { return !FILELAYOUT_LSEG(lseg)->commit_through_mds; @@ -878,7 +888,8 @@ static struct pnfs_layoutdriver_type filelayout_type = { .owner = THIS_MODULE, .alloc_lseg = filelayout_alloc_lseg, .free_lseg = filelayout_free_lseg, - .pg_test = filelayout_pg_test, + .pg_read_ops = &filelayout_pg_read_ops, + .pg_write_ops = &filelayout_pg_write_ops, .mark_pnfs_commit = filelayout_mark_pnfs_commit, .choose_commit_list = filelayout_choose_commit_list, .commit_pagelist = filelayout_commit_pagelist, diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 8ff2ea3f10ef..c203fab694af 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -1007,6 +1007,16 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, OBJIO_LSEG(pgio->pg_lseg)->max_io_size; } +static const struct nfs_pageio_ops objio_pg_read_ops = { + .pg_test = objio_pg_test, + .pg_doio = nfs_generic_pg_readpages, +}; + +static const struct nfs_pageio_ops objio_pg_write_ops = { + .pg_test = objio_pg_test, + .pg_doio = nfs_generic_pg_writepages, +}; + static struct pnfs_layoutdriver_type objlayout_type = { .id = LAYOUT_OSD2_OBJECTS, .name = "LAYOUT_OSD2_OBJECTS", @@ -1020,7 +1030,8 @@ static struct pnfs_layoutdriver_type objlayout_type = { .read_pagelist = objlayout_read_pagelist, .write_pagelist = objlayout_write_pagelist, - .pg_test = objio_pg_test, + .pg_read_ops = &objio_pg_read_ops, + .pg_write_ops = &objio_pg_write_ops, .free_deviceid_node = objio_free_deviceid_node, diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 009855716286..9b8a4730f0bd 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -230,7 +230,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pg_test); */ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, - int (*doio)(struct nfs_pageio_descriptor *), + const struct nfs_pageio_ops *pg_ops, size_t bsize, int io_flags) { @@ -241,12 +241,10 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_base = 0; desc->pg_moreio = 0; desc->pg_inode = inode; - desc->pg_doio = doio; + desc->pg_ops = pg_ops; desc->pg_ioflags = io_flags; desc->pg_error = 0; desc->pg_lseg = NULL; - desc->pg_test = nfs_generic_pg_test; - pnfs_pageio_init(desc, inode); } /** @@ -276,7 +274,7 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, return false; if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) return false; - return pgio->pg_test(pgio, prev, req); + return pgio->pg_ops->pg_test(pgio, prev, req); } /** @@ -311,7 +309,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) { if (!list_empty(&desc->pg_list)) { - int error = desc->pg_doio(desc); + int error = desc->pg_ops->pg_doio(desc); if (error < 0) desc->pg_error = error; else diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ff8200772377..7ec46d5f05ab 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1055,6 +1055,30 @@ out_forget_reply: goto out; } +bool +pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; + + if (ld == NULL) + return false; + nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0); + return true; +} + +bool +pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; + + if (ld == NULL) + return false; + nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags); + return true; +} + bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 96bf4e6f45be..137a2bd5c8c7 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -87,7 +87,8 @@ struct pnfs_layoutdriver_type { void (*free_lseg) (struct pnfs_layout_segment *lseg); /* test for nfs page cache coalescing */ - bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); + const struct nfs_pageio_ops *pg_read_ops; + const struct nfs_pageio_ops *pg_write_ops; /* Returns true if layoutdriver wants to divert this request to * driver's commit routine. @@ -152,6 +153,10 @@ struct pnfs_layout_segment * pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, loff_t pos, u64 count, enum pnfs_iomode access_type, gfp_t gfp_flags); + +bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *); +bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int); + void set_pnfs_layoutdriver(struct nfs_server *, u32 id); void unset_pnfs_layoutdriver(struct nfs_server *); enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, @@ -293,15 +298,6 @@ static inline int pnfs_return_layout(struct inode *ino) return 0; } -static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, - struct inode *inode) -{ - struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; - - if (ld) - pgio->pg_test = ld->pg_test; -} - #else /* CONFIG_NFS_V4_1 */ static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) @@ -385,9 +381,14 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s) { } -static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, - struct inode *inode) +static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) { + return false; +} + +static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) +{ + return false; } static inline void diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 20a7f952e244..b6d9ec9a208b 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -32,6 +32,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc); static int nfs_pagein_one(struct nfs_pageio_descriptor *desc); +static const struct nfs_pageio_ops nfs_pageio_read_ops; static const struct rpc_call_ops nfs_read_partial_ops; static const struct rpc_call_ops nfs_read_full_ops; @@ -113,6 +114,20 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) } } +static void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, + struct inode *inode) +{ + nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, + NFS_SERVER(inode)->rsize, 0); +} + +static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, + struct inode *inode) +{ + if (!pnfs_pageio_init_read(pgio, inode)) + nfs_pageio_init_read_mds(pgio, inode); +} + int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, struct page *page) { @@ -131,14 +146,11 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, if (len < PAGE_CACHE_SIZE) zero_user_segment(page, len, PAGE_CACHE_SIZE); - nfs_pageio_init(&pgio, inode, NULL, 0, 0); + nfs_pageio_init_read(&pgio, inode); nfs_list_add_request(new, &pgio.pg_list); pgio.pg_count = len; - if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) - nfs_pagein_multi(&pgio); - else - nfs_pagein_one(&pgio); + nfs_pageio_complete(&pgio); return 0; } @@ -365,6 +377,20 @@ out: return ret; } +int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) +{ + if (desc->pg_bsize < PAGE_CACHE_SIZE) + return nfs_pagein_multi(desc); + return nfs_pagein_one(desc); +} +EXPORT_SYMBOL_GPL(nfs_generic_pg_readpages); + + +static const struct nfs_pageio_ops nfs_pageio_read_ops = { + .pg_test = nfs_generic_pg_test, + .pg_doio = nfs_generic_pg_readpages, +}; + /* * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). @@ -635,8 +661,6 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, .pgio = &pgio, }; struct inode *inode = mapping->host; - struct nfs_server *server = NFS_SERVER(inode); - size_t rsize = server->rsize; unsigned long npages; int ret = -ESTALE; @@ -664,10 +688,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, if (ret == 0) goto read_complete; /* all pages were read */ - if (rsize < PAGE_CACHE_SIZE) - nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); - else - nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0); + nfs_pageio_init_read(&pgio, inode); ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 727168059684..b144b5474546 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1033,15 +1033,31 @@ out: return ret; } -static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, +int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) +{ + if (desc->pg_bsize < PAGE_CACHE_SIZE) + return nfs_flush_multi(desc); + return nfs_flush_one(desc); +} +EXPORT_SYMBOL_GPL(nfs_generic_pg_writepages); + +static const struct nfs_pageio_ops nfs_pageio_write_ops = { + .pg_test = nfs_generic_pg_test, + .pg_doio = nfs_generic_pg_writepages, +}; + +static void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) { - size_t wsize = NFS_SERVER(inode)->wsize; + nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, + NFS_SERVER(inode)->wsize, ioflags); +} - if (wsize < PAGE_CACHE_SIZE) - nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); - else - nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags); +static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, + struct inode *inode, int ioflags) +{ + if (!pnfs_pageio_init_write(pgio, inode, ioflags)) + nfs_pageio_init_write_mds(pgio, inode, ioflags); } /* -- cgit v1.2.3 From d8007d4dd6ff8749cc8a4063c3ec87442db76d82 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 10 Jun 2011 13:30:23 -0400 Subject: NFSv4.1: Add an initialisation callback for pNFS Ensure that we always get a layout before setting up the i/o request. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 2 ++ fs/nfs/objlayout/objio_osd.c | 2 ++ fs/nfs/pagelist.c | 2 ++ fs/nfs/pnfs.c | 57 ++++++++++++++++++++++++-------------------- fs/nfs/pnfs.h | 14 ++--------- fs/nfs/read.c | 16 ++++--------- fs/nfs/write.c | 12 +++------- 7 files changed, 46 insertions(+), 59 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 46bb31b79683..faac87f24797 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -682,11 +682,13 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, } static const struct nfs_pageio_ops filelayout_pg_read_ops = { + .pg_init = pnfs_generic_pg_init_read, .pg_test = filelayout_pg_test, .pg_doio = nfs_generic_pg_readpages, }; static const struct nfs_pageio_ops filelayout_pg_write_ops = { + .pg_init = pnfs_generic_pg_init_write, .pg_test = filelayout_pg_test, .pg_doio = nfs_generic_pg_writepages, }; diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index c203fab694af..b759aeba57b9 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -1008,11 +1008,13 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, } static const struct nfs_pageio_ops objio_pg_read_ops = { + .pg_init = pnfs_generic_pg_init_read, .pg_test = objio_pg_test, .pg_doio = nfs_generic_pg_readpages, }; static const struct nfs_pageio_ops objio_pg_write_ops = { + .pg_init = pnfs_generic_pg_init_write, .pg_test = objio_pg_test, .pg_doio = nfs_generic_pg_writepages, }; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 9b8a4730f0bd..d421e19557a5 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -295,6 +295,8 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, if (!nfs_can_coalesce_requests(prev, req, desc)) return 0; } else { + if (desc->pg_ops->pg_init) + desc->pg_ops->pg_init(desc, req); desc->pg_base = req->wb_pgbase; } nfs_list_remove_request(req); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 7ec46d5f05ab..69a0c3f1e462 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -911,7 +911,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, * Layout segment is retreived from the server if not cached. * The appropriate layout segment is referenced and returned to the caller. */ -struct pnfs_layout_segment * +static struct pnfs_layout_segment * pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, loff_t pos, @@ -1055,6 +1055,34 @@ out_forget_reply: goto out; } +void +pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + BUG_ON(pgio->pg_lseg != NULL); + + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + req->wb_context, + req_offset(req), + req->wb_bytes, + IOMODE_READ, + GFP_KERNEL); +} +EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); + +void +pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + BUG_ON(pgio->pg_lseg != NULL); + + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + req->wb_context, + req_offset(req), + req->wb_bytes, + IOMODE_RW, + GFP_NOFS); +} +EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); + bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) { @@ -1083,31 +1111,8 @@ bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { - enum pnfs_iomode access_type; - gfp_t gfp_flags; - - /* We assume that pg_ioflags == 0 iff we're reading a page */ - if (pgio->pg_ioflags == 0) { - access_type = IOMODE_READ; - gfp_flags = GFP_KERNEL; - } else { - access_type = IOMODE_RW; - gfp_flags = GFP_NOFS; - } - - if (pgio->pg_lseg == NULL) { - if (pgio->pg_count != prev->wb_bytes) - return true; - /* This is first coelesce call for a series of nfs_pages */ - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, - prev->wb_context, - req_offset(prev), - pgio->pg_count, - access_type, - gfp_flags); - if (pgio->pg_lseg == NULL) - return true; - } + if (pgio->pg_lseg == NULL) + return nfs_generic_pg_test(pgio, prev, req); /* * Test if a nfs_page is fully contained in the pnfs_layout_range. diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 137a2bd5c8c7..dd54351ae6ac 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -149,10 +149,6 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); /* pnfs.c */ void get_layout_hdr(struct pnfs_layout_hdr *lo); void put_lseg(struct pnfs_layout_segment *lseg); -struct pnfs_layout_segment * -pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, - loff_t pos, u64 count, enum pnfs_iomode access_type, - gfp_t gfp_flags); bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *); bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int); @@ -163,6 +159,8 @@ enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, const struct rpc_call_ops *, int); enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *, const struct rpc_call_ops *); +void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); +void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *); bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); int pnfs_layout_process(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); @@ -318,14 +316,6 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg) { } -static inline struct pnfs_layout_segment * -pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, - loff_t pos, u64 count, enum pnfs_iomode access_type, - gfp_t gfp_flags) -{ - return NULL; -} - static inline enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *data, const struct rpc_call_ops *call_ops) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index b6d9ec9a208b..2cf728b832cd 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -147,9 +147,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, zero_user_segment(page, len, PAGE_CACHE_SIZE); nfs_pageio_init_read(&pgio, inode); - nfs_list_add_request(new, &pgio.pg_list); - pgio.pg_count = len; - + nfs_pageio_add_request(&pgio, new); nfs_pageio_complete(&pgio); return 0; } @@ -281,7 +279,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) unsigned int offset; int requests = 0; int ret = 0; - struct pnfs_layout_segment *lseg; + struct pnfs_layout_segment *lseg = desc->pg_lseg; LIST_HEAD(list); nfs_list_remove_request(req); @@ -299,10 +297,6 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) } while(nbytes != 0); atomic_set(&req->wb_complete, requests); - BUG_ON(desc->pg_lseg != NULL); - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, - req_offset(req), desc->pg_count, - IOMODE_READ, GFP_KERNEL); ClearPageError(page); offset = 0; nbytes = desc->pg_count; @@ -336,6 +330,8 @@ out_bad: } SetPageError(page); nfs_readpage_release(req); + put_lseg(lseg); + desc->pg_lseg = NULL; return -ENOMEM; } @@ -364,10 +360,6 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) *pages++ = req->wb_page; } req = nfs_list_entry(data->pages.next); - if ((!lseg) && list_is_singular(&data->pages)) - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, - req_offset(req), desc->pg_count, - IOMODE_READ, GFP_KERNEL); ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count, 0, lseg); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b144b5474546..d1ae7e45886f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -916,7 +916,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) unsigned int offset; int requests = 0; int ret = 0; - struct pnfs_layout_segment *lseg; + struct pnfs_layout_segment *lseg = desc->pg_lseg; LIST_HEAD(list); nfs_list_remove_request(req); @@ -940,10 +940,6 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) } while (nbytes != 0); atomic_set(&req->wb_complete, requests); - BUG_ON(desc->pg_lseg); - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, - req_offset(req), desc->pg_count, - IOMODE_RW, GFP_NOFS); ClearPageError(page); offset = 0; nbytes = desc->pg_count; @@ -976,6 +972,8 @@ out_bad: nfs_writedata_free(data); } nfs_redirty_request(req); + put_lseg(lseg); + desc->pg_lseg = NULL; return -ENOMEM; } @@ -1016,10 +1014,6 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc) *pages++ = req->wb_page; } req = nfs_list_entry(data->pages.next); - if ((!lseg) && list_is_singular(&data->pages)) - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, - req_offset(req), desc->pg_count, - IOMODE_RW, GFP_NOFS); if ((desc->pg_ioflags & FLUSH_COND_STABLE) && (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) -- cgit v1.2.3 From e885de1a5bc9f46ef8f934c5a7602c89d2d51e8d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 10 Jun 2011 13:30:23 -0400 Subject: NFSv4.1: Fall back to ordinary i/o through the mds if we have no layout segment Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 6 ++++++ fs/nfs/nfs4filelayout.c | 2 -- fs/nfs/objlayout/objio_osd.c | 3 --- fs/nfs/pnfs.c | 7 +++++++ fs/nfs/read.c | 2 +- fs/nfs/write.c | 2 +- 6 files changed, 15 insertions(+), 7 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index fc017eadfe08..31e8b50011af 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -296,7 +296,13 @@ extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, const struct rpc_call_ops *call_ops); extern void nfs_read_prepare(struct rpc_task *task, void *calldata); +struct nfs_pageio_descriptor; +extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, + struct inode *inode); + /* write.c */ +extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, + struct inode *inode, int ioflags); extern void nfs_commit_free(struct nfs_write_data *p); extern int nfs_initiate_write(struct nfs_write_data *data, struct rpc_clnt *clnt, diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index faac87f24797..d1ae5bfa6680 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -669,8 +669,6 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, !nfs_generic_pg_test(pgio, prev, req)) return false; - if (!pgio->pg_lseg) - return 1; p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT; r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT; stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index b759aeba57b9..70272d5355b2 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -1000,9 +1000,6 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, if (!pnfs_generic_pg_test(pgio, prev, req)) return false; - if (pgio->pg_lseg == NULL) - return true; - return pgio->pg_count + req->wb_bytes <= OBJIO_LSEG(pgio->pg_lseg)->max_io_size; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 69a0c3f1e462..cc807fe46953 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1066,6 +1066,10 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r req->wb_bytes, IOMODE_READ, GFP_KERNEL); + /* If no lseg, fall back to read through mds */ + if (pgio->pg_lseg == NULL) + nfs_pageio_init_read_mds(pgio, pgio->pg_inode); + } EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); @@ -1080,6 +1084,9 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page * req->wb_bytes, IOMODE_RW, GFP_NOFS); + /* If no lseg, fall back to write through mds */ + if (pgio->pg_lseg == NULL) + nfs_pageio_init_write_mds(pgio, pgio->pg_inode, pgio->pg_ioflags); } EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 2cf728b832cd..9b2a1d7fb760 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -114,7 +114,7 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) } } -static void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, +void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, struct inode *inode) { nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d1ae7e45886f..00940dc9ab04 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1040,7 +1040,7 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = { .pg_doio = nfs_generic_pg_writepages, }; -static void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, +void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) { nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, -- cgit v1.2.3 From 47cb498e9316314e7e681f417135589195ad78a7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Jun 2011 12:18:11 -0400 Subject: NFSv4.1: Clean ups for the device id cache The fact that the global device id cache holds a reference to the nfs4_deviceid_node until it is invisible to rcu lookups implies that we can always assume that the reference count is non-zero in _find_get_deviceid. Also clean up nfs4_put_deviceid_node and the removal of the device id from the cache. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.h | 1 - fs/nfs/pnfs_dev.c | 44 +++++++++++--------------------------------- 2 files changed, 11 insertions(+), 34 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index dd54351ae6ac..1cfc96f8c45b 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -198,7 +198,6 @@ struct nfs4_deviceid_node { void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); -struct nfs4_deviceid_node *nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, const struct pnfs_layoutdriver_type *, diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index f0f8e1e22f6c..fb9498d91f6a 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -100,8 +100,8 @@ _find_get_deviceid(const struct pnfs_layoutdriver_type *ld, rcu_read_lock(); d = _lookup_deviceid(ld, clp, id, hash); - if (d && !atomic_inc_not_zero(&d->ref)) - d = NULL; + if (d != NULL) + atomic_inc(&d->ref); rcu_read_unlock(); return d; } @@ -115,15 +115,15 @@ nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld, EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); /* - * Unhash and put deviceid + * Remove a deviceid from cache * * @clp nfs_client associated with deviceid * @id the deviceid to unhash * * @ret the unhashed node, if found and dereferenced to zero, NULL otherwise. */ -struct nfs4_deviceid_node * -nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld, +void +nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, const struct nfs_client *clp, const struct nfs4_deviceid *id) { struct nfs4_deviceid_node *d; @@ -134,7 +134,7 @@ nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld, rcu_read_unlock(); if (!d) { spin_unlock(&nfs4_deviceid_lock); - return NULL; + return; } hlist_del_init_rcu(&d->node); spin_unlock(&nfs4_deviceid_lock); @@ -142,28 +142,7 @@ nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld, /* balance the initial ref set in pnfs_insert_deviceid */ if (atomic_dec_and_test(&d->ref)) - return d; - - return NULL; -} -EXPORT_SYMBOL_GPL(nfs4_unhash_put_deviceid); - -/* - * Delete a deviceid from cache - * - * @clp struct nfs_client qualifying the deviceid - * @id deviceid to delete - */ -void -nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, - const struct nfs_client *clp, const struct nfs4_deviceid *id) -{ - struct nfs4_deviceid_node *d; - - d = nfs4_unhash_put_deviceid(ld, clp, id); - if (!d) - return; - d->ld->free_deviceid_node(d); + d->ld->free_deviceid_node(d); } EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); @@ -221,16 +200,15 @@ EXPORT_SYMBOL_GPL(nfs4_insert_deviceid_node); * * @d deviceid node to put * - * @ret true iff the node was deleted + * return true iff the node was deleted + * Note that since the test for d->ref == 0 is sufficient to establish + * that the node is no longer hashed in the global device id cache. */ bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) { - if (!atomic_dec_and_lock(&d->ref, &nfs4_deviceid_lock)) + if (!atomic_dec_and_test(&d->ref)) return false; - hlist_del_init_rcu(&d->node); - spin_unlock(&nfs4_deviceid_lock); - synchronize_rcu(); d->ld->free_deviceid_node(d); return true; } -- cgit v1.2.3 From 7c24d9489fe57d67cb56c6bdad58d89806e7fd97 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Mon, 13 Jun 2011 18:22:38 -0400 Subject: NFSv4.1: File layout only supports whole file layouts Ask for whole file layouts. Until support for layout segments is fully supported in the file layout code, discard non-whole file layouts. Signed-off-by: Andy Adamson Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 47 +++++++++++++++++++++++++++++++++++++++++++++-- fs/nfs/pnfs.c | 6 ++++-- fs/nfs/pnfs.h | 6 ++++++ fs/nfs/read.c | 1 + fs/nfs/write.c | 1 + 5 files changed, 57 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index d1ae5bfa6680..51c19093022e 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -427,6 +427,14 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, dprintk("--> %s\n", __func__); + /* FIXME: remove this check when layout segment support is added */ + if (lgr->range.offset != 0 || + lgr->range.length != NFS4_MAX_UINT64) { + dprintk("%s Only whole file layouts supported. Use MDS i/o\n", + __func__); + goto out; + } + if (fl->pattern_offset > lgr->range.offset) { dprintk("%s pattern_offset %lld too large\n", __func__, fl->pattern_offset); @@ -679,14 +687,49 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, return (p_stripe == r_stripe); } +void +filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req) +{ + BUG_ON(pgio->pg_lseg != NULL); + + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + req->wb_context, + 0, + NFS4_MAX_UINT64, + IOMODE_READ, + GFP_KERNEL); + /* If no lseg, fall back to read through mds */ + if (pgio->pg_lseg == NULL) + nfs_pageio_init_read_mds(pgio, pgio->pg_inode); +} + +void +filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req) +{ + BUG_ON(pgio->pg_lseg != NULL); + + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + req->wb_context, + 0, + NFS4_MAX_UINT64, + IOMODE_RW, + GFP_NOFS); + /* If no lseg, fall back to write through mds */ + if (pgio->pg_lseg == NULL) + nfs_pageio_init_write_mds(pgio, pgio->pg_inode, + pgio->pg_ioflags); +} + static const struct nfs_pageio_ops filelayout_pg_read_ops = { - .pg_init = pnfs_generic_pg_init_read, + .pg_init = filelayout_pg_init_read, .pg_test = filelayout_pg_test, .pg_doio = nfs_generic_pg_readpages, }; static const struct nfs_pageio_ops filelayout_pg_write_ops = { - .pg_init = pnfs_generic_pg_init_write, + .pg_init = filelayout_pg_init_write, .pg_test = filelayout_pg_test, .pg_doio = nfs_generic_pg_writepages, }; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index cc807fe46953..a7dc3367a857 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -911,7 +911,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, * Layout segment is retreived from the server if not cached. * The appropriate layout segment is referenced and returned to the caller. */ -static struct pnfs_layout_segment * +struct pnfs_layout_segment * pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, loff_t pos, @@ -980,7 +980,8 @@ pnfs_update_layout(struct inode *ino, arg.offset -= pg_offset; arg.length += pg_offset; } - arg.length = PAGE_CACHE_ALIGN(arg.length); + if (arg.length != NFS4_MAX_UINT64) + arg.length = PAGE_CACHE_ALIGN(arg.length); lseg = send_layoutget(lo, ctx, &arg, gfp_flags); if (!lseg && first) { @@ -998,6 +999,7 @@ out_unlock: spin_unlock(&ino->i_lock); goto out; } +EXPORT_SYMBOL_GPL(pnfs_update_layout); int pnfs_layout_process(struct nfs4_layoutget *lgp) diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 1cfc96f8c45b..678c4c7b14d9 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -185,6 +185,12 @@ int pnfs_layoutcommit_inode(struct inode *inode, bool sync); int _pnfs_return_layout(struct inode *); int pnfs_ld_write_done(struct nfs_write_data *); int pnfs_ld_read_done(struct nfs_read_data *); +struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, + struct nfs_open_context *ctx, + loff_t pos, + u64 count, + enum pnfs_iomode iomode, + gfp_t gfp_flags); /* pnfs_dev.c */ struct nfs4_deviceid_node { diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 9b2a1d7fb760..c394662c790e 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -120,6 +120,7 @@ void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, NFS_SERVER(inode)->rsize, 0); } +EXPORT_SYMBOL_GPL(nfs_pageio_init_read_mds); static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 00940dc9ab04..7f8732e31982 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1046,6 +1046,7 @@ void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, NFS_SERVER(inode)->wsize, ioflags); } +EXPORT_SYMBOL_GPL(nfs_pageio_init_write_mds); static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) -- cgit v1.2.3 From a56aaa02b1f723e28b41d339ddff02e958d32d43 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 15 Jun 2011 11:59:10 -0400 Subject: NFSv4.1: Clean up layoutreturn Since we take a reference to it, we really ought to pass the a pointer to the layout header in the arguments instead of assuming that NFS_I(inode)->layout will forever point to the correct object. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 4 ++-- fs/nfs/pnfs.c | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 325706f73eea..93ef77666efc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5770,7 +5770,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) { struct nfs4_layoutreturn *lrp = calldata; struct nfs_server *server; - struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout; + struct pnfs_layout_hdr *lo = lrp->args.layout; dprintk("--> %s\n", __func__); @@ -5799,7 +5799,7 @@ static void nfs4_layoutreturn_release(void *calldata) struct nfs4_layoutreturn *lrp = calldata; dprintk("--> %s\n", __func__); - put_layout_hdr(NFS_I(lrp->args.inode)->layout); + put_layout_hdr(lrp->args.layout); kfree(calldata); dprintk("<-- %s\n", __func__); } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a7dc3367a857..5fc2e5d755a5 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -667,6 +667,7 @@ _pnfs_return_layout(struct inode *ino) lrp->args.stateid = stateid; lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; lrp->args.inode = ino; + lrp->args.layout = lo; lrp->clp = NFS_SERVER(ino)->nfs_client; status = nfs4_proc_layoutreturn(lrp); -- cgit v1.2.3 From c47abcf8ff4d0c56d20ce541e80d3e1c975f54b5 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 15 Jun 2011 17:52:40 -0400 Subject: NFSv4.1: do not use deviceids after MDS clientid invalidation Mark all deviceids established under an expired MDS clientid as invalid. Stop all new i/o through DS and send through the MDS. Don't use any new LAYOUTGETs that use the invalid deviceid. Purge all layouts established under the expired MDS clientid. Remove the MDS clientid deviceid and data servers reference Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 10 ++++++++++ fs/nfs/nfs4filelayout.h | 6 ++++++ fs/nfs/pnfs.c | 3 +++ fs/nfs/pnfs.h | 8 ++++++++ fs/nfs/pnfs_dev.c | 20 ++++++++++++++++++++ 5 files changed, 47 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 51c19093022e..af9bf9eed4ca 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -334,6 +334,9 @@ filelayout_read_pagelist(struct nfs_read_data *data) __func__, data->inode->i_ino, data->args.pgbase, (size_t)data->args.count, offset); + if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags)) + return PNFS_NOT_ATTEMPTED; + /* Retrieve the correct rpc_client for the byte range */ j = nfs4_fl_calc_j_index(lseg, offset); idx = nfs4_fl_calc_ds_index(lseg, j); @@ -373,6 +376,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) struct nfs_fh *fh; int status; + if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags)) + return PNFS_NOT_ATTEMPTED; + /* Retrieve the correct rpc_client for the byte range */ j = nfs4_fl_calc_j_index(lseg, offset); idx = nfs4_fl_calc_ds_index(lseg, j); @@ -456,6 +462,10 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, goto out; } else dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); + /* Found deviceid is being reaped */ + if (test_bit(NFS_DEVICEID_INVALID, &dsaddr->id_node.flags)) + goto out_put; + fl->dsaddr = dsaddr; if (fl->first_stripe_index < 0 || diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 68cce730b800..2e42284253fa 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -96,6 +96,12 @@ FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg) generic_hdr); } +static inline struct nfs4_deviceid_node * +FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg) +{ + return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node; +} + extern struct nfs_fh * nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 5fc2e5d755a5..5b3cc3f4bb39 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -452,6 +452,9 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) struct pnfs_layout_hdr *lo; LIST_HEAD(tmp_list); + nfs4_deviceid_mark_client_invalid(clp); + nfs4_deviceid_purge_client(clp); + spin_lock(&clp->cl_lock); rcu_read_lock(); list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 678c4c7b14d9..a59736eae6ec 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -192,12 +192,20 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, enum pnfs_iomode iomode, gfp_t gfp_flags); +void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); + +/* nfs4_deviceid_flags */ +enum { + NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ +}; + /* pnfs_dev.c */ struct nfs4_deviceid_node { struct hlist_node node; struct hlist_node tmpnode; const struct pnfs_layoutdriver_type *ld; const struct nfs_client *nfs_client; + unsigned long flags; struct nfs4_deviceid deviceid; atomic_t ref; }; diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index fb9498d91f6a..6fda5228ef56 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -156,6 +156,7 @@ nfs4_init_deviceid_node(struct nfs4_deviceid_node *d, INIT_HLIST_NODE(&d->tmpnode); d->ld = ld; d->nfs_client = nfs_client; + d->flags = 0; d->deviceid = *id; atomic_set(&d->ref, 1); } @@ -253,3 +254,22 @@ nfs4_deviceid_purge_client(const struct nfs_client *clp) for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) _deviceid_purge_client(clp, h); } + +/* + * Stop use of all deviceids associated with an nfs_client + */ +void +nfs4_deviceid_mark_client_invalid(struct nfs_client *clp) +{ + struct nfs4_deviceid_node *d; + struct hlist_node *n; + int i; + + rcu_read_lock(); + for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i ++){ + hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[i], node) + if (d->nfs_client == clp) + set_bit(NFS_DEVICEID_INVALID, &d->flags); + } + rcu_read_unlock(); +} -- cgit v1.2.3 From 87ed5eb44ad9338b1617a0e78dea81d681325298 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 12 Jul 2011 13:42:01 -0400 Subject: NFS: Don't use DATA_SYNC writes If we're writing back data, and the FLUSH_STABLE flag is set, then we always want to use NFS_FILE_SYNC, since we're always in a situation where we're doing page reclaim, and so we want to free up the page as quickly as possible. If we're in the FLUSH_COND_STABLE case, then we either want to use another unstable write (if we have to do a commit anyway) or again, we want to use NFS_FILE_SYNC because we know that we have no more pages to write out. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 7f8732e31982..1af4d82c4959 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -872,10 +872,14 @@ static int nfs_write_rpcsetup(struct nfs_page *req, data->args.context = get_nfs_open_context(req->wb_context); data->args.lock_context = req->wb_lock_context; data->args.stable = NFS_UNSTABLE; - if (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { - data->args.stable = NFS_DATA_SYNC; - if (!nfs_need_commit(NFS_I(inode))) - data->args.stable = NFS_FILE_SYNC; + switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { + case 0: + break; + case FLUSH_COND_STABLE: + if (nfs_need_commit(NFS_I(inode))) + break; + default: + data->args.stable = NFS_FILE_SYNC; } data->res.fattr = &data->fattr; -- cgit v1.2.3 From 6e4efd568574221840ee8dd86f176dc977c1330c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 12 Jul 2011 13:42:02 -0400 Subject: NFS: Clean up nfs_read_rpcsetup and nfs_write_rpcsetup Split them up into two parts: one which sets up the struct nfs_read/write_data, the other which sets up the actual RPC call or pNFS call. Signed-off-by: Trond Myklebust --- fs/nfs/read.c | 42 +++++++++++++++++++++++++----------------- fs/nfs/write.c | 41 ++++++++++++++++++++++++++--------------- 2 files changed, 51 insertions(+), 32 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/read.c b/fs/nfs/read.c index c394662c790e..248a55425853 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -213,17 +213,14 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read); /* * Set up the NFS read request struct */ -static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, - const struct rpc_call_ops *call_ops, - unsigned int count, unsigned int offset, - struct pnfs_layout_segment *lseg) +static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, + unsigned int count, unsigned int offset) { struct inode *inode = req->wb_context->path.dentry->d_inode; data->req = req; data->inode = inode; data->cred = req->wb_context->cred; - data->lseg = get_lseg(lseg); data->args.fh = NFS_FH(inode); data->args.offset = req_offset(req) + offset; @@ -237,10 +234,21 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, data->res.count = count; data->res.eof = 0; nfs_fattr_init(&data->fattr); +} - if (data->lseg && - (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED)) - return 0; +static int nfs_do_read(struct nfs_read_data *data, + const struct rpc_call_ops *call_ops, + struct pnfs_layout_segment *lseg) +{ + struct inode *inode = data->args.context->path.dentry->d_inode; + + if (lseg) { + data->lseg = get_lseg(lseg); + if (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED) + return 0; + put_lseg(data->lseg); + data->lseg = NULL; + } return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops); } @@ -292,7 +300,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) data = nfs_readdata_alloc(1); if (!data) goto out_bad; - list_add(&data->pages, &list); + list_add(&data->list, &list); requests++; nbytes -= len; } while(nbytes != 0); @@ -304,15 +312,15 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) do { int ret2; - data = list_entry(list.next, struct nfs_read_data, pages); - list_del_init(&data->pages); + data = list_entry(list.next, struct nfs_read_data, list); + list_del_init(&data->list); data->pagevec[0] = page; if (nbytes < rsize) rsize = nbytes; - ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, - rsize, offset, lseg); + nfs_read_rpcsetup(req, data, rsize, offset); + ret2 = nfs_do_read(data, &nfs_read_partial_ops, lseg); if (ret == 0) ret = ret2; offset += rsize; @@ -325,8 +333,8 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) out_bad: while (!list_empty(&list)) { - data = list_entry(list.next, struct nfs_read_data, pages); - list_del(&data->pages); + data = list_entry(list.next, struct nfs_read_data, list); + list_del(&data->list); nfs_readdata_free(data); } SetPageError(page); @@ -362,8 +370,8 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) } req = nfs_list_entry(data->pages.next); - ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count, - 0, lseg); + nfs_read_rpcsetup(req, data, desc->pg_count, 0); + ret = nfs_do_read(data, &nfs_read_full_ops, lseg); out: put_lseg(lseg); desc->pg_lseg = NULL; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 1af4d82c4959..0aeb09b38e4b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -845,11 +845,9 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write); /* * Set up the argument/result storage required for the RPC call. */ -static int nfs_write_rpcsetup(struct nfs_page *req, +static void nfs_write_rpcsetup(struct nfs_page *req, struct nfs_write_data *data, - const struct rpc_call_ops *call_ops, unsigned int count, unsigned int offset, - struct pnfs_layout_segment *lseg, int how) { struct inode *inode = req->wb_context->path.dentry->d_inode; @@ -860,7 +858,6 @@ static int nfs_write_rpcsetup(struct nfs_page *req, data->req = req; data->inode = inode = req->wb_context->path.dentry->d_inode; data->cred = req->wb_context->cred; - data->lseg = get_lseg(lseg); data->args.fh = NFS_FH(inode); data->args.offset = req_offset(req) + offset; @@ -886,10 +883,22 @@ static int nfs_write_rpcsetup(struct nfs_page *req, data->res.count = count; data->res.verf = &data->verf; nfs_fattr_init(&data->fattr); +} - if (data->lseg && - (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED)) - return 0; +static int nfs_do_write(struct nfs_write_data *data, + const struct rpc_call_ops *call_ops, + struct pnfs_layout_segment *lseg, + int how) +{ + struct inode *inode = data->args.context->path.dentry->d_inode; + + if (lseg != NULL) { + data->lseg = get_lseg(lseg); + if (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED) + return 0; + put_lseg(data->lseg); + data->lseg = NULL; + } return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how); } @@ -938,7 +947,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) data = nfs_writedata_alloc(1); if (!data) goto out_bad; - list_add(&data->pages, &list); + list_add(&data->list, &list); requests++; nbytes -= len; } while (nbytes != 0); @@ -950,15 +959,16 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) do { int ret2; - data = list_entry(list.next, struct nfs_write_data, pages); - list_del_init(&data->pages); + data = list_entry(list.next, struct nfs_write_data, list); + list_del_init(&data->list); data->pagevec[0] = page; if (nbytes < wsize) wsize = nbytes; - ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, - wsize, offset, lseg, desc->pg_ioflags); + nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags); + ret2 = nfs_do_write(data, &nfs_write_partial_ops, lseg, + desc->pg_ioflags); if (ret == 0) ret = ret2; offset += wsize; @@ -971,8 +981,8 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) out_bad: while (!list_empty(&list)) { - data = list_entry(list.next, struct nfs_write_data, pages); - list_del(&data->pages); + data = list_entry(list.next, struct nfs_write_data, list); + list_del(&data->list); nfs_writedata_free(data); } nfs_redirty_request(req); @@ -1024,7 +1034,8 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc) desc->pg_ioflags &= ~FLUSH_COND_STABLE; /* Set up the argument struct */ - ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags); + nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags); + ret = nfs_do_write(data, &nfs_write_full_ops, lseg, desc->pg_ioflags); out: put_lseg(lseg); /* Cleans any gotten in ->pg_test */ desc->pg_lseg = NULL; -- cgit v1.2.3 From 3b6091846d5b6113d695c79caec7cc96b62d469b Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 15 Jul 2011 03:33:42 -0400 Subject: NFS: fix return value of nfs_pagein_one/nfs_flush_one Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/read.c | 3 ++- fs/nfs/write.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 248a55425853..581534a4aed7 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -351,12 +351,13 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) struct nfs_read_data *data; struct list_head *head = &desc->pg_list; struct pnfs_layout_segment *lseg = desc->pg_lseg; - int ret = -ENOMEM; + int ret = 0; data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base, desc->pg_count)); if (!data) { nfs_async_read_error(head); + ret = -ENOMEM; goto out; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0aeb09b38e4b..d9dd744588d4 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1006,7 +1006,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc) struct nfs_write_data *data; struct list_head *head = &desc->pg_list; struct pnfs_layout_segment *lseg = desc->pg_lseg; - int ret; + int ret = 0; data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base, desc->pg_count)); -- cgit v1.2.3 From 275acaafd45fbc8ecc3beabd6367e60b3049606a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 12 Jul 2011 13:42:02 -0400 Subject: NFS: Clean up: split out the RPC transmission from nfs_pagein_multi/one ...and do the same for nfs_flush_multi/one. Signed-off-by: Trond Myklebust --- fs/nfs/read.c | 92 +++++++++++++++++++++++++++++++-------------------------- fs/nfs/write.c | 93 ++++++++++++++++++++++++++++++++-------------------------- 2 files changed, 102 insertions(+), 83 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 581534a4aed7..0215bac35fc6 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -30,8 +30,6 @@ #define NFSDBG_FACILITY NFSDBG_PAGECACHE -static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc); -static int nfs_pagein_one(struct nfs_pageio_descriptor *desc); static const struct nfs_pageio_ops nfs_pageio_read_ops; static const struct rpc_call_ops nfs_read_partial_ops; static const struct rpc_call_ops nfs_read_full_ops; @@ -253,6 +251,27 @@ static int nfs_do_read(struct nfs_read_data *data, return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops); } +static int +nfs_do_multiple_reads(struct list_head *head, + const struct rpc_call_ops *call_ops, + struct pnfs_layout_segment *lseg) +{ + struct nfs_read_data *data; + int ret = 0; + + while (!list_empty(head)) { + int ret2; + + data = list_entry(head->next, struct nfs_read_data, list); + list_del_init(&data->list); + + ret2 = nfs_do_read(data, call_ops, lseg); + if (ret == 0) + ret = ret2; + } + return ret; +} + static void nfs_async_read_error(struct list_head *head) { @@ -279,7 +298,7 @@ nfs_async_read_error(struct list_head *head) * won't see the new data until our attribute cache is updated. This is more * or less conventional NFS client behavior. */ -static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) +static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) { struct nfs_page *req = nfs_list_entry(desc->pg_list.next); struct page *page = req->wb_page; @@ -288,11 +307,10 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) unsigned int offset; int requests = 0; int ret = 0; - struct pnfs_layout_segment *lseg = desc->pg_lseg; - LIST_HEAD(list); nfs_list_remove_request(req); + offset = 0; nbytes = desc->pg_count; do { size_t len = min(nbytes,rsize); @@ -300,57 +318,33 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) data = nfs_readdata_alloc(1); if (!data) goto out_bad; - list_add(&data->list, &list); + data->pagevec[0] = page; + nfs_read_rpcsetup(req, data, len, offset); + list_add(&data->list, res); requests++; nbytes -= len; + offset += len; } while(nbytes != 0); atomic_set(&req->wb_complete, requests); - ClearPageError(page); - offset = 0; - nbytes = desc->pg_count; - do { - int ret2; - - data = list_entry(list.next, struct nfs_read_data, list); - list_del_init(&data->list); - - data->pagevec[0] = page; - - if (nbytes < rsize) - rsize = nbytes; - nfs_read_rpcsetup(req, data, rsize, offset); - ret2 = nfs_do_read(data, &nfs_read_partial_ops, lseg); - if (ret == 0) - ret = ret2; - offset += rsize; - nbytes -= rsize; - } while (nbytes != 0); - put_lseg(lseg); - desc->pg_lseg = NULL; - return ret; - out_bad: - while (!list_empty(&list)) { - data = list_entry(list.next, struct nfs_read_data, list); + while (!list_empty(res)) { + data = list_entry(res->next, struct nfs_read_data, list); list_del(&data->list); nfs_readdata_free(data); } SetPageError(page); nfs_readpage_release(req); - put_lseg(lseg); - desc->pg_lseg = NULL; return -ENOMEM; } -static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) +static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res) { struct nfs_page *req; struct page **pages; struct nfs_read_data *data; struct list_head *head = &desc->pg_list; - struct pnfs_layout_segment *lseg = desc->pg_lseg; int ret = 0; data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base, @@ -372,18 +366,32 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) req = nfs_list_entry(data->pages.next); nfs_read_rpcsetup(req, data, desc->pg_count, 0); - ret = nfs_do_read(data, &nfs_read_full_ops, lseg); + list_add(&data->list, res); out: - put_lseg(lseg); - desc->pg_lseg = NULL; return ret; } int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { - if (desc->pg_bsize < PAGE_CACHE_SIZE) - return nfs_pagein_multi(desc); - return nfs_pagein_one(desc); + LIST_HEAD(head); + int ret; + + if (desc->pg_bsize < PAGE_CACHE_SIZE) { + ret = nfs_pagein_multi(desc, &head); + if (ret == 0) + ret = nfs_do_multiple_reads(&head, + &nfs_read_partial_ops, + desc->pg_lseg); + } else { + ret = nfs_pagein_one(desc, &head); + if (ret == 0) + ret = nfs_do_multiple_reads(&head, + &nfs_read_full_ops, + desc->pg_lseg); + } + put_lseg(desc->pg_lseg); + desc->pg_lseg = NULL; + return ret; } EXPORT_SYMBOL_GPL(nfs_generic_pg_readpages); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d9dd744588d4..bd4b34e5870b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -903,6 +903,27 @@ static int nfs_do_write(struct nfs_write_data *data, return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how); } +static int nfs_do_multiple_writes(struct list_head *head, + const struct rpc_call_ops *call_ops, + struct pnfs_layout_segment *lseg, + int how) +{ + struct nfs_write_data *data; + int ret = 0; + + while (!list_empty(head)) { + int ret2; + + data = list_entry(head->next, struct nfs_write_data, list); + list_del_init(&data->list); + + ret2 = nfs_do_write(data, call_ops, lseg, how); + if (ret == 0) + ret = ret2; + } + return ret; +} + /* If a nfs_flush_* function fails, it should remove reqs from @head and * call this on each, which will prepare them to be retried on next * writeback using standard nfs. @@ -920,7 +941,7 @@ static void nfs_redirty_request(struct nfs_page *req) * Generate multiple small requests to write out a single * contiguous dirty area on one page. */ -static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) +static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) { struct nfs_page *req = nfs_list_entry(desc->pg_list.next); struct page *page = req->wb_page; @@ -929,8 +950,6 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) unsigned int offset; int requests = 0; int ret = 0; - struct pnfs_layout_segment *lseg = desc->pg_lseg; - LIST_HEAD(list); nfs_list_remove_request(req); @@ -940,6 +959,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) desc->pg_ioflags &= ~FLUSH_COND_STABLE; + offset = 0; nbytes = desc->pg_count; do { size_t len = min(nbytes, wsize); @@ -947,47 +967,23 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) data = nfs_writedata_alloc(1); if (!data) goto out_bad; - list_add(&data->list, &list); + data->pagevec[0] = page; + nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags); + list_add(&data->list, res); requests++; nbytes -= len; + offset += len; } while (nbytes != 0); atomic_set(&req->wb_complete, requests); - - ClearPageError(page); - offset = 0; - nbytes = desc->pg_count; - do { - int ret2; - - data = list_entry(list.next, struct nfs_write_data, list); - list_del_init(&data->list); - - data->pagevec[0] = page; - - if (nbytes < wsize) - wsize = nbytes; - nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags); - ret2 = nfs_do_write(data, &nfs_write_partial_ops, lseg, - desc->pg_ioflags); - if (ret == 0) - ret = ret2; - offset += wsize; - nbytes -= wsize; - } while (nbytes != 0); - - put_lseg(lseg); - desc->pg_lseg = NULL; return ret; out_bad: - while (!list_empty(&list)) { - data = list_entry(list.next, struct nfs_write_data, list); + while (!list_empty(res)) { + data = list_entry(res->next, struct nfs_write_data, list); list_del(&data->list); nfs_writedata_free(data); } nfs_redirty_request(req); - put_lseg(lseg); - desc->pg_lseg = NULL; return -ENOMEM; } @@ -999,13 +995,12 @@ out_bad: * This is the case if nfs_updatepage detects a conflicting request * that has been written but not committed. */ -static int nfs_flush_one(struct nfs_pageio_descriptor *desc) +static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res) { struct nfs_page *req; struct page **pages; struct nfs_write_data *data; struct list_head *head = &desc->pg_list; - struct pnfs_layout_segment *lseg = desc->pg_lseg; int ret = 0; data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base, @@ -1035,18 +1030,34 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc) /* Set up the argument struct */ nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags); - ret = nfs_do_write(data, &nfs_write_full_ops, lseg, desc->pg_ioflags); + list_add(&data->list, res); out: - put_lseg(lseg); /* Cleans any gotten in ->pg_test */ - desc->pg_lseg = NULL; return ret; } int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { - if (desc->pg_bsize < PAGE_CACHE_SIZE) - return nfs_flush_multi(desc); - return nfs_flush_one(desc); + LIST_HEAD(head); + int ret; + + if (desc->pg_bsize < PAGE_CACHE_SIZE) { + ret = nfs_flush_multi(desc, &head); + if (ret == 0) + ret = nfs_do_multiple_writes(&head, + &nfs_write_partial_ops, + desc->pg_lseg, + desc->pg_ioflags); + } else { + ret = nfs_flush_one(desc, &head); + if (ret == 0) + ret = nfs_do_multiple_writes(&head, + &nfs_write_full_ops, + desc->pg_lseg, + desc->pg_ioflags); + } + put_lseg(desc->pg_lseg); + desc->pg_lseg = NULL; + return ret; } EXPORT_SYMBOL_GPL(nfs_generic_pg_writepages); -- cgit v1.2.3 From 50828d7e6767a92726708bc0666e2b8b84575808 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 12 Jul 2011 13:42:02 -0400 Subject: NFS: Cache rpc_ops in struct nfs_pageio_descriptor Signed-off-by: Trond Myklebust --- fs/nfs/read.c | 19 ++++++++----------- fs/nfs/write.c | 20 +++++++------------- 2 files changed, 15 insertions(+), 24 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 0215bac35fc6..c14362fbe65f 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -327,6 +327,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head } while(nbytes != 0); atomic_set(&req->wb_complete, requests); ClearPageError(page); + desc->pg_rpc_callops = &nfs_read_partial_ops; return ret; out_bad: while (!list_empty(res)) { @@ -367,6 +368,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head * nfs_read_rpcsetup(req, data, desc->pg_count, 0); list_add(&data->list, res); + desc->pg_rpc_callops = &nfs_read_full_ops; out: return ret; } @@ -376,19 +378,14 @@ int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) LIST_HEAD(head); int ret; - if (desc->pg_bsize < PAGE_CACHE_SIZE) { + if (desc->pg_bsize < PAGE_CACHE_SIZE) ret = nfs_pagein_multi(desc, &head); - if (ret == 0) - ret = nfs_do_multiple_reads(&head, - &nfs_read_partial_ops, - desc->pg_lseg); - } else { + else ret = nfs_pagein_one(desc, &head); - if (ret == 0) - ret = nfs_do_multiple_reads(&head, - &nfs_read_full_ops, - desc->pg_lseg); - } + + if (ret == 0) + ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops, + desc->pg_lseg); put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; return ret; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index bd4b34e5870b..71fbba72ace3 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -975,6 +975,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head offset += len; } while (nbytes != 0); atomic_set(&req->wb_complete, requests); + desc->pg_rpc_callops = &nfs_write_partial_ops; return ret; out_bad: @@ -1031,6 +1032,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *r /* Set up the argument struct */ nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags); list_add(&data->list, res); + desc->pg_rpc_callops = &nfs_write_full_ops; out: return ret; } @@ -1040,21 +1042,13 @@ int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) LIST_HEAD(head); int ret; - if (desc->pg_bsize < PAGE_CACHE_SIZE) { + if (desc->pg_bsize < PAGE_CACHE_SIZE) ret = nfs_flush_multi(desc, &head); - if (ret == 0) - ret = nfs_do_multiple_writes(&head, - &nfs_write_partial_ops, - desc->pg_lseg, - desc->pg_ioflags); - } else { + else ret = nfs_flush_one(desc, &head); - if (ret == 0) - ret = nfs_do_multiple_writes(&head, - &nfs_write_full_ops, - desc->pg_lseg, - desc->pg_ioflags); - } + if (ret == 0) + ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops, + desc->pg_lseg, desc->pg_ioflags); put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; return ret; -- cgit v1.2.3 From d097971d8ab4042eaa4bff98698ae9cc55942327 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 12 Jul 2011 13:42:02 -0400 Subject: NFS: Use the nfs_pageio_descriptor->pg_bsize in the read/write request Instead of looking up the rsize and wsize, the routines that generate the RPC requests should really be using the pg_bsize, since that is what we use when deciding whether or not to coalesce write requests... Signed-off-by: Trond Myklebust --- fs/nfs/read.c | 2 +- fs/nfs/write.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/read.c b/fs/nfs/read.c index c14362fbe65f..75088f58183b 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -303,7 +303,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head struct nfs_page *req = nfs_list_entry(desc->pg_list.next); struct page *page = req->wb_page; struct nfs_read_data *data; - size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes; + size_t rsize = desc->pg_bsize, nbytes; unsigned int offset; int requests = 0; int ret = 0; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 71fbba72ace3..c88a1ab42c39 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -946,7 +946,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head struct nfs_page *req = nfs_list_entry(desc->pg_list.next); struct page *page = req->wb_page; struct nfs_write_data *data; - size_t wsize = NFS_SERVER(desc->pg_inode)->wsize, nbytes; + size_t wsize = desc->pg_bsize, nbytes; unsigned int offset; int requests = 0; int ret = 0; -- cgit v1.2.3 From d9156f9f364897e93bdd98b4ad22138de18f7c24 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 12 Jul 2011 13:42:02 -0400 Subject: NFS: Allow the nfs_pageio_descriptor to signal that a re-coalesce is needed If an attempt to do pNFS fails, and we have to fall back to writing through the MDS, then we may want to re-coalesce the requests that we already have since the block size for the MDS read/writes may be different to that of the DS read/writes. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index d421e19557a5..7139dbf8784e 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -240,6 +240,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_bsize = bsize; desc->pg_base = 0; desc->pg_moreio = 0; + desc->pg_recoalesce = 0; desc->pg_inode = inode; desc->pg_ops = pg_ops; desc->pg_ioflags = io_flags; @@ -331,7 +332,7 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) * Returns true if the request 'req' was successfully coalesced into the * existing list of pages 'desc'. */ -int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, +static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { while (!nfs_pageio_do_add_request(desc, req)) { @@ -340,17 +341,67 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, if (desc->pg_error < 0) return 0; desc->pg_moreio = 0; + if (desc->pg_recoalesce) + return 0; } return 1; } +static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) +{ + LIST_HEAD(head); + + do { + list_splice_init(&desc->pg_list, &head); + desc->pg_bytes_written -= desc->pg_count; + desc->pg_count = 0; + desc->pg_base = 0; + desc->pg_recoalesce = 0; + + while (!list_empty(&head)) { + struct nfs_page *req; + + req = list_first_entry(&head, struct nfs_page, wb_list); + nfs_list_remove_request(req); + if (__nfs_pageio_add_request(desc, req)) + continue; + if (desc->pg_error < 0) + return 0; + break; + } + } while (desc->pg_recoalesce); + return 1; +} + +int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, + struct nfs_page *req) +{ + int ret; + + do { + ret = __nfs_pageio_add_request(desc, req); + if (ret) + break; + if (desc->pg_error < 0) + break; + ret = nfs_do_recoalesce(desc); + } while (ret); + return ret; +} + /** * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor * @desc: pointer to io descriptor */ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) { - nfs_pageio_doio(desc); + for (;;) { + nfs_pageio_doio(desc); + if (!desc->pg_recoalesce) + break; + if (!nfs_do_recoalesce(desc)) + break; + } } /** @@ -369,7 +420,7 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) if (!list_empty(&desc->pg_list)) { struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev); if (index != prev->wb_index + 1) - nfs_pageio_doio(desc); + nfs_pageio_complete(desc); } } -- cgit v1.2.3 From 493292ddc78d18ee2ad2d5c24c2b7dd6a24641d2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 13 Jul 2011 15:58:28 -0400 Subject: NFS: Move the pnfs read code into pnfs.c ...and ensure that we recoalese to take into account differences in block sizes when falling back to read through the MDS. Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 6 ++++- fs/nfs/nfs4filelayout.c | 2 +- fs/nfs/objlayout/objio_osd.c | 2 +- fs/nfs/pnfs.c | 57 ++++++++++++++++++++++++++++++++++++++++++-- fs/nfs/pnfs.h | 10 +------- fs/nfs/read.c | 46 ++++++++++++++++------------------- 6 files changed, 83 insertions(+), 40 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 31e8b50011af..d74d7dea9173 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -291,14 +291,18 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *, extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); #endif +struct nfs_pageio_descriptor; /* read.c */ extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, const struct rpc_call_ops *call_ops); extern void nfs_read_prepare(struct rpc_task *task, void *calldata); +extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, + struct list_head *head); -struct nfs_pageio_descriptor; extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, struct inode *inode); +extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); +extern void nfs_readdata_release(struct nfs_read_data *rdata); /* write.c */ extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index af9bf9eed4ca..fc556d6b90f1 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -735,7 +735,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, static const struct nfs_pageio_ops filelayout_pg_read_ops = { .pg_init = filelayout_pg_init_read, .pg_test = filelayout_pg_test, - .pg_doio = nfs_generic_pg_readpages, + .pg_doio = pnfs_generic_pg_readpages, }; static const struct nfs_pageio_ops filelayout_pg_write_ops = { diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 70272d5355b2..add62894f364 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -1007,7 +1007,7 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, static const struct nfs_pageio_ops objio_pg_read_ops = { .pg_init = pnfs_generic_pg_init_read, .pg_test = objio_pg_test, - .pg_doio = nfs_generic_pg_readpages, + .pg_doio = pnfs_generic_pg_readpages, }; static const struct nfs_pageio_ops objio_pg_write_ops = { diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 5b3cc3f4bb39..9eca5a8cdbdd 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -28,6 +28,7 @@ */ #include +#include #include "internal.h" #include "pnfs.h" #include "iostat.h" @@ -1216,18 +1217,32 @@ pnfs_ld_read_done(struct nfs_read_data *data) } EXPORT_SYMBOL_GPL(pnfs_ld_read_done); +static void +pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, + struct nfs_read_data *data) +{ + list_splice_tail_init(&data->pages, &desc->pg_list); + if (data->req && list_empty(&data->req->wb_list)) + nfs_list_add_request(data->req, &desc->pg_list); + nfs_pageio_reset_read_mds(desc); + desc->pg_recoalesce = 1; + nfs_readdata_release(data); +} + /* * Call the appropriate parallel I/O subsystem read function. */ -enum pnfs_try_status +static enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *rdata, - const struct rpc_call_ops *call_ops) + const struct rpc_call_ops *call_ops, + struct pnfs_layout_segment *lseg) { struct inode *inode = rdata->inode; struct nfs_server *nfss = NFS_SERVER(inode); enum pnfs_try_status trypnfs; rdata->mds_ops = call_ops; + rdata->lseg = get_lseg(lseg); dprintk("%s: Reading ino:%lu %u@%llu\n", __func__, inode->i_ino, rdata->args.count, rdata->args.offset); @@ -1243,6 +1258,44 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata, return trypnfs; } +static void +pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head) +{ + struct nfs_read_data *data; + const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; + struct pnfs_layout_segment *lseg = desc->pg_lseg; + + desc->pg_lseg = NULL; + while (!list_empty(head)) { + enum pnfs_try_status trypnfs; + + data = list_entry(head->next, struct nfs_read_data, list); + list_del_init(&data->list); + + trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); + if (trypnfs == PNFS_NOT_ATTEMPTED) + pnfs_read_through_mds(desc, data); + } + put_lseg(lseg); +} + +int +pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) +{ + LIST_HEAD(head); + int ret; + + ret = nfs_generic_pagein(desc, &head); + if (ret != 0) { + put_lseg(desc->pg_lseg); + desc->pg_lseg = NULL; + return ret; + } + pnfs_do_multiple_reads(desc, &head); + return 0; +} +EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); + /* * Currently there is only one (whole file) write lseg. */ diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index a59736eae6ec..c40ffa52c1ab 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -157,9 +157,8 @@ void set_pnfs_layoutdriver(struct nfs_server *, u32 id); void unset_pnfs_layoutdriver(struct nfs_server *); enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, const struct rpc_call_ops *, int); -enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *, - const struct rpc_call_ops *); void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); +int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *); bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); int pnfs_layout_process(struct nfs4_layoutget *lgp); @@ -329,13 +328,6 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg) { } -static inline enum pnfs_try_status -pnfs_try_to_read_data(struct nfs_read_data *data, - const struct rpc_call_ops *call_ops) -{ - return PNFS_NOT_ATTEMPTED; -} - static inline enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *data, const struct rpc_call_ops *call_ops, int how) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 75088f58183b..d2f53ddd8260 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -67,7 +67,7 @@ void nfs_readdata_free(struct nfs_read_data *p) mempool_free(p, nfs_rdata_mempool); } -static void nfs_readdata_release(struct nfs_read_data *rdata) +void nfs_readdata_release(struct nfs_read_data *rdata) { put_lseg(rdata->lseg); put_nfs_open_context(rdata->args.context); @@ -120,6 +120,12 @@ void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, } EXPORT_SYMBOL_GPL(nfs_pageio_init_read_mds); +void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) +{ + pgio->pg_ops = &nfs_pageio_read_ops; + pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; +} + static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) { @@ -235,26 +241,16 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, } static int nfs_do_read(struct nfs_read_data *data, - const struct rpc_call_ops *call_ops, - struct pnfs_layout_segment *lseg) + const struct rpc_call_ops *call_ops) { struct inode *inode = data->args.context->path.dentry->d_inode; - if (lseg) { - data->lseg = get_lseg(lseg); - if (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED) - return 0; - put_lseg(data->lseg); - data->lseg = NULL; - } - return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops); } static int nfs_do_multiple_reads(struct list_head *head, - const struct rpc_call_ops *call_ops, - struct pnfs_layout_segment *lseg) + const struct rpc_call_ops *call_ops) { struct nfs_read_data *data; int ret = 0; @@ -265,7 +261,7 @@ nfs_do_multiple_reads(struct list_head *head, data = list_entry(head->next, struct nfs_read_data, list); list_del_init(&data->list); - ret2 = nfs_do_read(data, call_ops, lseg); + ret2 = nfs_do_read(data, call_ops); if (ret == 0) ret = ret2; } @@ -373,25 +369,23 @@ out: return ret; } -int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) +int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head) +{ + if (desc->pg_bsize < PAGE_CACHE_SIZE) + return nfs_pagein_multi(desc, head); + return nfs_pagein_one(desc, head); +} + +static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { LIST_HEAD(head); int ret; - if (desc->pg_bsize < PAGE_CACHE_SIZE) - ret = nfs_pagein_multi(desc, &head); - else - ret = nfs_pagein_one(desc, &head); - + ret = nfs_generic_pagein(desc, &head); if (ret == 0) - ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops, - desc->pg_lseg); - put_lseg(desc->pg_lseg); - desc->pg_lseg = NULL; + ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops); return ret; } -EXPORT_SYMBOL_GPL(nfs_generic_pg_readpages); - static const struct nfs_pageio_ops nfs_pageio_read_ops = { .pg_test = nfs_generic_pg_test, -- cgit v1.2.3 From dce81290eed64d24493989bb7a08f9e20495e184 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 13 Jul 2011 15:59:19 -0400 Subject: NFS: Move the pnfs write code into pnfs.c ...and ensure that we recoalese to take into account differences in differences in block sizes when falling back to write through the MDS. Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 4 ++++ fs/nfs/nfs4filelayout.c | 2 +- fs/nfs/objlayout/objio_osd.c | 2 +- fs/nfs/pnfs.c | 57 ++++++++++++++++++++++++++++++++++++++++++-- fs/nfs/pnfs.h | 10 +------- fs/nfs/write.c | 39 ++++++++++++++---------------- 6 files changed, 80 insertions(+), 34 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index d74d7dea9173..09e20de31901 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -305,8 +305,12 @@ extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_readdata_release(struct nfs_read_data *rdata); /* write.c */ +extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, + struct list_head *head); extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags); +extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); +extern void nfs_writedata_release(struct nfs_write_data *wdata); extern void nfs_commit_free(struct nfs_write_data *p); extern int nfs_initiate_write(struct nfs_write_data *data, struct rpc_clnt *clnt, diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index fc556d6b90f1..fbc5b42d1970 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -741,7 +741,7 @@ static const struct nfs_pageio_ops filelayout_pg_read_ops = { static const struct nfs_pageio_ops filelayout_pg_write_ops = { .pg_init = filelayout_pg_init_write, .pg_test = filelayout_pg_test, - .pg_doio = nfs_generic_pg_writepages, + .pg_doio = pnfs_generic_pg_writepages, }; static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg) diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index add62894f364..7d49bb160b46 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -1013,7 +1013,7 @@ static const struct nfs_pageio_ops objio_pg_read_ops = { static const struct nfs_pageio_ops objio_pg_write_ops = { .pg_init = pnfs_generic_pg_init_write, .pg_test = objio_pg_test, - .pg_doio = nfs_generic_pg_writepages, + .pg_doio = pnfs_generic_pg_writepages, }; static struct pnfs_layoutdriver_type objlayout_type = { diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 9eca5a8cdbdd..93c73299588d 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1170,15 +1170,30 @@ pnfs_ld_write_done(struct nfs_write_data *data) } EXPORT_SYMBOL_GPL(pnfs_ld_write_done); -enum pnfs_try_status +static void +pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, + struct nfs_write_data *data) +{ + list_splice_tail_init(&data->pages, &desc->pg_list); + if (data->req && list_empty(&data->req->wb_list)) + nfs_list_add_request(data->req, &desc->pg_list); + nfs_pageio_reset_write_mds(desc); + desc->pg_recoalesce = 1; + nfs_writedata_release(data); +} + +static enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *wdata, - const struct rpc_call_ops *call_ops, int how) + const struct rpc_call_ops *call_ops, + struct pnfs_layout_segment *lseg, + int how) { struct inode *inode = wdata->inode; enum pnfs_try_status trypnfs; struct nfs_server *nfss = NFS_SERVER(inode); wdata->mds_ops = call_ops; + wdata->lseg = get_lseg(lseg); dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, inode->i_ino, wdata->args.count, wdata->args.offset, how); @@ -1194,6 +1209,44 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata, return trypnfs; } +static void +pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how) +{ + struct nfs_write_data *data; + const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; + struct pnfs_layout_segment *lseg = desc->pg_lseg; + + desc->pg_lseg = NULL; + while (!list_empty(head)) { + enum pnfs_try_status trypnfs; + + data = list_entry(head->next, struct nfs_write_data, list); + list_del_init(&data->list); + + trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); + if (trypnfs == PNFS_NOT_ATTEMPTED) + pnfs_write_through_mds(desc, data); + } + put_lseg(lseg); +} + +int +pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) +{ + LIST_HEAD(head); + int ret; + + ret = nfs_generic_flush(desc, &head); + if (ret != 0) { + put_lseg(desc->pg_lseg); + desc->pg_lseg = NULL; + return ret; + } + pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags); + return 0; +} +EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); + /* * Called by non rpc-based layout drivers */ diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index c40ffa52c1ab..078670dfbe04 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -155,11 +155,10 @@ bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int) void set_pnfs_layoutdriver(struct nfs_server *, u32 id); void unset_pnfs_layoutdriver(struct nfs_server *); -enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, - const struct rpc_call_ops *, int); void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *); +int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); int pnfs_layout_process(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); @@ -328,13 +327,6 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg) { } -static inline enum pnfs_try_status -pnfs_try_to_write_data(struct nfs_write_data *data, - const struct rpc_call_ops *call_ops, int how) -{ - return PNFS_NOT_ATTEMPTED; -} - static inline int pnfs_return_layout(struct inode *ino) { return 0; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index c88a1ab42c39..cabe5f6611b9 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -97,7 +97,7 @@ void nfs_writedata_free(struct nfs_write_data *p) mempool_free(p, nfs_wdata_mempool); } -static void nfs_writedata_release(struct nfs_write_data *wdata) +void nfs_writedata_release(struct nfs_write_data *wdata) { put_lseg(wdata->lseg); put_nfs_open_context(wdata->args.context); @@ -887,25 +887,15 @@ static void nfs_write_rpcsetup(struct nfs_page *req, static int nfs_do_write(struct nfs_write_data *data, const struct rpc_call_ops *call_ops, - struct pnfs_layout_segment *lseg, int how) { struct inode *inode = data->args.context->path.dentry->d_inode; - if (lseg != NULL) { - data->lseg = get_lseg(lseg); - if (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED) - return 0; - put_lseg(data->lseg); - data->lseg = NULL; - } - return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how); } static int nfs_do_multiple_writes(struct list_head *head, const struct rpc_call_ops *call_ops, - struct pnfs_layout_segment *lseg, int how) { struct nfs_write_data *data; @@ -917,7 +907,7 @@ static int nfs_do_multiple_writes(struct list_head *head, data = list_entry(head->next, struct nfs_write_data, list); list_del_init(&data->list); - ret2 = nfs_do_write(data, call_ops, lseg, how); + ret2 = nfs_do_write(data, call_ops, how); if (ret == 0) ret = ret2; } @@ -1037,23 +1027,24 @@ out: return ret; } -int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) +int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head) +{ + if (desc->pg_bsize < PAGE_CACHE_SIZE) + return nfs_flush_multi(desc, head); + return nfs_flush_one(desc, head); +} + +static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { LIST_HEAD(head); int ret; - if (desc->pg_bsize < PAGE_CACHE_SIZE) - ret = nfs_flush_multi(desc, &head); - else - ret = nfs_flush_one(desc, &head); + ret = nfs_generic_flush(desc, &head); if (ret == 0) ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops, - desc->pg_lseg, desc->pg_ioflags); - put_lseg(desc->pg_lseg); - desc->pg_lseg = NULL; + desc->pg_ioflags); return ret; } -EXPORT_SYMBOL_GPL(nfs_generic_pg_writepages); static const struct nfs_pageio_ops nfs_pageio_write_ops = { .pg_test = nfs_generic_pg_test, @@ -1068,6 +1059,12 @@ void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, } EXPORT_SYMBOL_GPL(nfs_pageio_init_write_mds); +void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) +{ + pgio->pg_ops = &nfs_pageio_write_ops; + pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; +} + static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) { -- cgit v1.2.3 From 1f9453578f059d2651aa6c6b16756627fc9f2a74 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 13 Jul 2011 15:59:57 -0400 Subject: NFS: Clean up - simplify the switch to read/write-through-MDS Use nfs_pageio_reset_read_mds and nfs_pageio_reset_write_mds instead of completely reinitialising the struct nfs_pageio_descriptor. Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 4 ---- fs/nfs/nfs4filelayout.c | 5 ++--- fs/nfs/pnfs.c | 4 ++-- fs/nfs/read.c | 4 ++-- fs/nfs/write.c | 4 ++-- 5 files changed, 8 insertions(+), 13 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 09e20de31901..ab12913dd473 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -299,16 +299,12 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata); extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head); -extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, - struct inode *inode); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_readdata_release(struct nfs_read_data *rdata); /* write.c */ extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head); -extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, - struct inode *inode, int ioflags); extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_writedata_release(struct nfs_write_data *wdata); extern void nfs_commit_free(struct nfs_write_data *p); diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index fbc5b42d1970..f0b37e155ce3 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -711,7 +711,7 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, GFP_KERNEL); /* If no lseg, fall back to read through mds */ if (pgio->pg_lseg == NULL) - nfs_pageio_init_read_mds(pgio, pgio->pg_inode); + nfs_pageio_reset_read_mds(pgio); } void @@ -728,8 +728,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, GFP_NOFS); /* If no lseg, fall back to write through mds */ if (pgio->pg_lseg == NULL) - nfs_pageio_init_write_mds(pgio, pgio->pg_inode, - pgio->pg_ioflags); + nfs_pageio_reset_write_mds(pgio); } static const struct nfs_pageio_ops filelayout_pg_read_ops = { diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 93c73299588d..38e5508555c6 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1075,7 +1075,7 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r GFP_KERNEL); /* If no lseg, fall back to read through mds */ if (pgio->pg_lseg == NULL) - nfs_pageio_init_read_mds(pgio, pgio->pg_inode); + nfs_pageio_reset_read_mds(pgio); } EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); @@ -1093,7 +1093,7 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page * GFP_NOFS); /* If no lseg, fall back to write through mds */ if (pgio->pg_lseg == NULL) - nfs_pageio_init_write_mds(pgio, pgio->pg_inode, pgio->pg_ioflags); + nfs_pageio_reset_write_mds(pgio); } EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index d2f53ddd8260..7cba2280e2b8 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -112,19 +112,19 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) } } -void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, +static void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, struct inode *inode) { nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, NFS_SERVER(inode)->rsize, 0); } -EXPORT_SYMBOL_GPL(nfs_pageio_init_read_mds); void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) { pgio->pg_ops = &nfs_pageio_read_ops; pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; } +EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index cabe5f6611b9..9fba5270b1a8 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1051,19 +1051,19 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = { .pg_doio = nfs_generic_pg_writepages, }; -void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, +static void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) { nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, NFS_SERVER(inode)->wsize, ioflags); } -EXPORT_SYMBOL_GPL(nfs_pageio_init_write_mds); void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) { pgio->pg_ops = &nfs_pageio_write_ops; pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; } +EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) -- cgit v1.2.3 From 9e00abc3c20904fd6a5d888bb7023925799ec8a5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 13 Jul 2011 19:20:49 -0400 Subject: SUNRPC: sunrpc should not explicitly depend on NFS config options Change explicit references to CONFIG_NFS_V4_1 to implicit ones Get rid of the unnecessary defines in backchannel_rqst.c and bc_svc.c: the Makefile takes care of those dependency. Signed-off-by: Trond Myklebust --- fs/nfs/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs') diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 81515545ba75..2cde5d954750 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -77,6 +77,7 @@ config NFS_V4 config NFS_V4_1 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" depends on NFS_FS && NFS_V4 && EXPERIMENTAL + select SUNRPC_BACKCHANNEL select PNFS_FILE_LAYOUT help This option enables support for minor version 1 of the NFSv4 protocol -- cgit v1.2.3 From 94b134ac8e9965309e70684b504c53bca36338b4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 13 Jul 2011 19:26:49 -0400 Subject: NFS: Convert nfs4_set_ds_client to EXPORT_SYMBOL_GPL This is not part of an external ABI... Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 5452ada59461..19ea7d9c75e6 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1463,7 +1463,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, dprintk("<-- %s %p\n", __func__, clp); return clp; } -EXPORT_SYMBOL(nfs4_set_ds_client); +EXPORT_SYMBOL_GPL(nfs4_set_ds_client); /* * Session has been established, and the client marked ready. -- cgit v1.2.3 From 674e405b8b3310702fd43d314f5f432ec2cb9980 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 15 Jul 2011 19:09:08 -0400 Subject: nfs: document nfsv4 sillyrename issues Somebody working on this code asked what the deal was with NFSv4, since this comment notes that it's v2/v3's statelessness that requires sillyrename. Shouldn't hurt to document the answer. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/unlink.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 8d6864c2a5fa..981298ce5124 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -501,6 +501,14 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir, * and only performs the unlink once the last reference to it is put. * * The final cleanup is done during dentry_iput. + * + * (Note: NFSv4 is stateful, and has opens, so in theory an NFSv4 server + * could take responsibility for keeping open files referenced. The server + * would also need to ensure that opened-but-deleted files were kept over + * reboots. However, we may not assume a server does so. (RFC 5661 + * does provide an OPEN4_RESULT_PRESERVE_UNLINKED flag that a server can + * use to advertise that it does this; some day we may take advantage of + * it.)) */ int nfs_sillyrename(struct inode *dir, struct dentry *dentry) -- cgit v1.2.3 From f85ef69ce08bc2209858135328335f668ba35bdb Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 15 Jul 2011 19:18:42 -0400 Subject: pnfs: simplify pnfs files module autoloading Embed the necessary alias into the module rather than waiting for someone to add it to /etc/modprobe.conf Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 2 ++ fs/nfs/objlayout/objio_osd.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index f0b37e155ce3..be93a622872c 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -964,5 +964,7 @@ static void __exit nfs4filelayout_exit(void) pnfs_unregister_layoutdriver(&filelayout_type); } +MODULE_ALIAS("nfs-layouttype4-1"); + module_init(nfs4filelayout_init); module_exit(nfs4filelayout_exit); diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 7d49bb160b46..9383ca7245bc 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -1065,5 +1065,7 @@ objlayout_exit(void) __func__); } +MODULE_ALIAS("nfs-layouttype4-2"); + module_init(objlayout_init); module_exit(objlayout_exit); -- cgit v1.2.3 From 73ca1001ed6881b476e8252adcd0eede1ea368ea Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 18 Jul 2011 11:26:30 -0400 Subject: nfs: don't use d_move in nfs_async_rename_done If the task that initiated the sillyrename ends up being killed by a fatal signal, then it will eventually return back to userspace and end up releasing the i_mutex. d_move however needs to be done while holding the i_mutex. Instead of using d_move here, just unhash the old and new dentries to prevent them from being found by lookups. With this change though, the dentries are now incorrect post-rename and do not reflect the actual name of the file on the server. I'm proceeding under the assumption that since they are unhashed that this isn't really a problem. In order for the sillydelete to still work though, the dname must be copied earlier when setting up the sillydelete info, and the name must be recopied if the sillydelete info has to be moved to a new dentry. Reported-by: Al Viro Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/unlink.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 981298ce5124..b2fbbde58e44 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -147,7 +147,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n alias = d_lookup(parent, &data->args.name); if (alias != NULL) { - int ret = 0; + int ret; void *devname_garbage = NULL; /* @@ -155,14 +155,16 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n * the sillyrename information to the aliased dentry. */ nfs_free_dname(data); + ret = nfs_copy_dname(alias, data); spin_lock(&alias->d_lock); - if (alias->d_inode != NULL && + if (ret == 0 && alias->d_inode != NULL && !(alias->d_flags & DCACHE_NFSFS_RENAMED)) { devname_garbage = alias->d_fsdata; alias->d_fsdata = data; alias->d_flags |= DCACHE_NFSFS_RENAMED; ret = 1; - } + } else + ret = 0; spin_unlock(&alias->d_lock); nfs_dec_sillycount(dir); dput(alias); @@ -171,8 +173,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n * point dentry is definitely not a root, so we won't need * that anymore. */ - if (devname_garbage) - kfree(devname_garbage); + kfree(devname_garbage); return ret; } data->dir = igrab(dir); @@ -204,8 +205,6 @@ static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) if (parent == NULL) goto out_free; dir = parent->d_inode; - if (nfs_copy_dname(dentry, data) != 0) - goto out_dput; /* Non-exclusive lock protects against concurrent lookup() calls */ spin_lock(&dir->i_lock); if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) { @@ -366,6 +365,8 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata) struct nfs_renamedata *data = calldata; struct inode *old_dir = data->old_dir; struct inode *new_dir = data->new_dir; + struct dentry *old_dentry = data->old_dentry; + struct dentry *new_dentry = data->new_dentry; if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) { nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client); @@ -373,12 +374,12 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata) } if (task->tk_status != 0) { - nfs_cancel_async_unlink(data->old_dentry); + nfs_cancel_async_unlink(old_dentry); return; } - nfs_set_verifier(data->old_dentry, nfs_save_change_attribute(old_dir)); - d_move(data->old_dentry, data->new_dentry); + d_drop(old_dentry); + d_drop(new_dentry); } /** @@ -568,6 +569,14 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) if (error) goto out_dput; + /* populate unlinkdata with the right dname */ + error = nfs_copy_dname(sdentry, + (struct nfs_unlinkdata *)dentry->d_fsdata); + if (error) { + nfs_cancel_async_unlink(dentry); + goto out_dput; + } + /* run the rename task, undo unlink if it fails */ task = nfs_async_rename(dir, dir, dentry, sdentry); if (IS_ERR(task)) { -- cgit v1.2.3 From ed1e6211a0a134ff23592c6f057af982ad5dab52 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 25 Jul 2011 15:37:29 -0400 Subject: NFSv4: Don't use the delegation->inode in nfs_mark_return_delegation() nfs_mark_return_delegation() is usually called without any locking, and so it is not safe to dereference delegation->inode. Since the inode is only used to discover the nfs_client anyway, it makes more sense to have the callers pass a valid pointer to the nfs_server as a parameter. Reported-by: Ian Kent Cc: stable@kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index dd25c2aec375..321a66bc3846 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -398,12 +398,11 @@ int nfs_inode_return_delegation(struct inode *inode) return err; } -static void nfs_mark_return_delegation(struct nfs_delegation *delegation) +static void nfs_mark_return_delegation(struct nfs_server *server, + struct nfs_delegation *delegation) { - struct nfs_client *clp = NFS_SERVER(delegation->inode)->nfs_client; - set_bit(NFS_DELEGATION_RETURN, &delegation->flags); - set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); + set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); } /** @@ -441,7 +440,7 @@ static void nfs_mark_return_all_delegation_types(struct nfs_server *server, if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE)) continue; if (delegation->type & flags) - nfs_mark_return_delegation(delegation); + nfs_mark_return_delegation(server, delegation); } } @@ -508,7 +507,7 @@ static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server) list_for_each_entry_rcu(delegation, &server->delegations, super_list) { if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags)) continue; - nfs_mark_return_delegation(delegation); + nfs_mark_return_delegation(server, delegation); } } @@ -539,7 +538,8 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp) int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid) { - struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_client *clp = server->nfs_client; struct nfs_delegation *delegation; rcu_read_lock(); @@ -549,7 +549,7 @@ int nfs_async_inode_return_delegation(struct inode *inode, rcu_read_unlock(); return -ENOENT; } - nfs_mark_return_delegation(delegation); + nfs_mark_return_delegation(server, delegation); rcu_read_unlock(); nfs_delegation_run_state_manager(clp); -- cgit v1.2.3