summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorTrond Myklebust <Trond.Myklebust@netapp.com>2009-04-01 13:28:15 -0400
committerTrond Myklebust <Trond.Myklebust@netapp.com>2009-04-01 13:28:15 -0400
commitcc85906110e26fe8537c3bdbc08a74ae8110030b (patch)
tree891813098ede3dba4d5ff3b83b1f7b491367ad2f /net
parentc09bca786ff941ed17c5f381c4eca5b106808c51 (diff)
parentc69da774b28e01e062e0a3aba7509f2dcfd2a11a (diff)
downloadlinux-cc85906110e26fe8537c3bdbc08a74ae8110030b.tar.gz
linux-cc85906110e26fe8537c3bdbc08a74ae8110030b.tar.bz2
linux-cc85906110e26fe8537c3bdbc08a74ae8110030b.zip
Merge branch 'devel' into for-linus
Diffstat (limited to 'net')
-rw-r--r--net/sunrpc/Kconfig22
-rw-r--r--net/sunrpc/clnt.c48
-rw-r--r--net/sunrpc/rpcb_clnt.c103
-rw-r--r--net/sunrpc/svc.c158
-rw-r--r--net/sunrpc/svc_xprt.c31
-rw-r--r--net/sunrpc/svcsock.c40
-rw-r--r--net/sunrpc/xprt.c89
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c26
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c8
-rw-r--r--net/sunrpc/xprtsock.c363
10 files changed, 505 insertions, 383 deletions
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 5592883e1e4a..afd91c78ce8e 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -17,28 +17,6 @@ config SUNRPC_XPRT_RDMA
If unsure, say N.
-config SUNRPC_REGISTER_V4
- bool "Register local RPC services via rpcbind v4 (EXPERIMENTAL)"
- depends on SUNRPC && EXPERIMENTAL
- default n
- help
- Sun added support for registering RPC services at an IPv6
- address by creating two new versions of the rpcbind protocol
- (RFC 1833).
-
- This option enables support in the kernel RPC server for
- registering kernel RPC services via version 4 of the rpcbind
- protocol. If you enable this option, you must run a portmapper
- daemon that supports rpcbind protocol version 4.
-
- Serving NFS over IPv6 from knfsd (the kernel's NFS server)
- requires that you enable this option and use a portmapper that
- supports rpcbind version 4.
-
- If unsure, say N to get traditional behavior (register kernel
- RPC services using only rpcbind version 2). Distributions
- using the legacy Linux portmapper daemon must say N here.
-
config RPCSEC_GSS_KRB5
tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)"
depends on SUNRPC && EXPERIMENTAL
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 836f15c0c4a3..5abab094441f 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1032,27 +1032,20 @@ call_connect_status(struct rpc_task *task)
dprint_status(task);
task->tk_status = 0;
- if (status >= 0) {
+ if (status >= 0 || status == -EAGAIN) {
clnt->cl_stats->netreconn++;
task->tk_action = call_transmit;
return;
}
- /* Something failed: remote service port may have changed */
- rpc_force_rebind(clnt);
-
switch (status) {
- case -ENOTCONN:
- case -EAGAIN:
- task->tk_action = call_bind;
- if (!RPC_IS_SOFT(task))
- return;
/* if soft mounted, test if we've timed out */
case -ETIMEDOUT:
task->tk_action = call_timeout;
- return;
+ break;
+ default:
+ rpc_exit(task, -EIO);
}
- rpc_exit(task, -EIO);
}
/*
@@ -1105,14 +1098,26 @@ static void
call_transmit_status(struct rpc_task *task)
{
task->tk_action = call_status;
- /*
- * Special case: if we've been waiting on the socket's write_space()
- * callback, then don't call xprt_end_transmit().
- */
- if (task->tk_status == -EAGAIN)
- return;
- xprt_end_transmit(task);
- rpc_task_force_reencode(task);
+ switch (task->tk_status) {
+ case -EAGAIN:
+ break;
+ default:
+ xprt_end_transmit(task);
+ /*
+ * Special cases: if we've been waiting on the
+ * socket's write_space() callback, or if the
+ * socket just returned a connection error,
+ * then hold onto the transport lock.
+ */
+ case -ECONNREFUSED:
+ case -ECONNRESET:
+ case -ENOTCONN:
+ case -EHOSTDOWN:
+ case -EHOSTUNREACH:
+ case -ENETUNREACH:
+ case -EPIPE:
+ rpc_task_force_reencode(task);
+ }
}
/*
@@ -1152,9 +1157,12 @@ call_status(struct rpc_task *task)
xprt_conditional_disconnect(task->tk_xprt,
req->rq_connect_cookie);
break;
+ case -ECONNRESET:
case -ECONNREFUSED:
- case -ENOTCONN:
rpc_force_rebind(clnt);
+ rpc_delay(task, 3*HZ);
+ case -EPIPE:
+ case -ENOTCONN:
task->tk_action = call_bind;
break;
case -EAGAIN:
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 03ae007641e4..beee6da33035 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -63,9 +63,16 @@ enum {
* r_owner
*
* The "owner" is allowed to unset a service in the rpcbind database.
- * We always use the following (arbitrary) fixed string.
+ *
+ * For AF_LOCAL SET/UNSET requests, rpcbind treats this string as a
+ * UID which it maps to a local user name via a password lookup.
+ * In all other cases it is ignored.
+ *
+ * For SET/UNSET requests, user space provides a value, even for
+ * network requests, and GETADDR uses an empty string. We follow
+ * those precedents here.
*/
-#define RPCB_OWNER_STRING "rpcb"
+#define RPCB_OWNER_STRING "0"
#define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING)
static void rpcb_getport_done(struct rpc_task *, void *);
@@ -124,12 +131,6 @@ static const struct sockaddr_in rpcb_inaddr_loopback = {
.sin_port = htons(RPCBIND_PORT),
};
-static const struct sockaddr_in6 rpcb_in6addr_loopback = {
- .sin6_family = AF_INET6,
- .sin6_addr = IN6ADDR_LOOPBACK_INIT,
- .sin6_port = htons(RPCBIND_PORT),
-};
-
static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr,
size_t addrlen, u32 version)
{
@@ -176,9 +177,10 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
return rpc_create(&args);
}
-static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
- u32 version, struct rpc_message *msg)
+static int rpcb_register_call(const u32 version, struct rpc_message *msg)
{
+ struct sockaddr *addr = (struct sockaddr *)&rpcb_inaddr_loopback;
+ size_t addrlen = sizeof(rpcb_inaddr_loopback);
struct rpc_clnt *rpcb_clnt;
int result, error = 0;
@@ -192,7 +194,7 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
error = PTR_ERR(rpcb_clnt);
if (error < 0) {
- printk(KERN_WARNING "RPC: failed to contact local rpcbind "
+ dprintk("RPC: failed to contact local rpcbind "
"server (errno %d).\n", -error);
return error;
}
@@ -254,25 +256,23 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port)
if (port)
msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET];
- return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback,
- sizeof(rpcb_inaddr_loopback),
- RPCBVERS_2, &msg);
+ return rpcb_register_call(RPCBVERS_2, &msg);
}
/*
* Fill in AF_INET family-specific arguments to register
*/
-static int rpcb_register_netid4(struct sockaddr_in *address_to_register,
- struct rpc_message *msg)
+static int rpcb_register_inet4(const struct sockaddr *sap,
+ struct rpc_message *msg)
{
+ const struct sockaddr_in *sin = (const struct sockaddr_in *)sap;
struct rpcbind_args *map = msg->rpc_argp;
- unsigned short port = ntohs(address_to_register->sin_port);
+ unsigned short port = ntohs(sin->sin_port);
char buf[32];
/* Construct AF_INET universal address */
snprintf(buf, sizeof(buf), "%pI4.%u.%u",
- &address_to_register->sin_addr.s_addr,
- port >> 8, port & 0xff);
+ &sin->sin_addr.s_addr, port >> 8, port & 0xff);
map->r_addr = buf;
dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with "
@@ -284,29 +284,27 @@ static int rpcb_register_netid4(struct sockaddr_in *address_to_register,
if (port)
msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
- return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback,
- sizeof(rpcb_inaddr_loopback),
- RPCBVERS_4, msg);
+ return rpcb_register_call(RPCBVERS_4, msg);
}
/*
* Fill in AF_INET6 family-specific arguments to register
*/
-static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
- struct rpc_message *msg)
+static int rpcb_register_inet6(const struct sockaddr *sap,
+ struct rpc_message *msg)
{
+ const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap;
struct rpcbind_args *map = msg->rpc_argp;
- unsigned short port = ntohs(address_to_register->sin6_port);
+ unsigned short port = ntohs(sin6->sin6_port);
char buf[64];
/* Construct AF_INET6 universal address */
- if (ipv6_addr_any(&address_to_register->sin6_addr))
+ if (ipv6_addr_any(&sin6->sin6_addr))
snprintf(buf, sizeof(buf), "::.%u.%u",
port >> 8, port & 0xff);
else
snprintf(buf, sizeof(buf), "%pI6.%u.%u",
- &address_to_register->sin6_addr,
- port >> 8, port & 0xff);
+ &sin6->sin6_addr, port >> 8, port & 0xff);
map->r_addr = buf;
dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with "
@@ -318,9 +316,21 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
if (port)
msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
- return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback,
- sizeof(rpcb_in6addr_loopback),
- RPCBVERS_4, msg);
+ return rpcb_register_call(RPCBVERS_4, msg);
+}
+
+static int rpcb_unregister_all_protofamilies(struct rpc_message *msg)
+{
+ struct rpcbind_args *map = msg->rpc_argp;
+
+ dprintk("RPC: unregistering [%u, %u, '%s'] with "
+ "local rpcbind\n",
+ map->r_prog, map->r_vers, map->r_netid);
+
+ map->r_addr = "";
+ msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
+
+ return rpcb_register_call(RPCBVERS_4, msg);
}
/**
@@ -340,10 +350,11 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
* invoke this function once for each [program, version, address,
* netid] tuple they wish to advertise.
*
- * Callers may also unregister RPC services that are no longer
- * available by setting the port number in the passed-in address
- * to zero. Callers pass a netid of "" to unregister all
- * transport netids associated with [program, version, address].
+ * Callers may also unregister RPC services that are registered at a
+ * specific address by setting the port number in @address to zero.
+ * They may unregister all registered protocol families at once for
+ * a service by passing a NULL @address argument. If @netid is ""
+ * then all netids for [program, version, address] are unregistered.
*
* This function uses rpcbind protocol version 4 to contact the
* local rpcbind daemon. The local rpcbind daemon must support
@@ -378,13 +389,14 @@ int rpcb_v4_register(const u32 program, const u32 version,
.rpc_argp = &map,
};
+ if (address == NULL)
+ return rpcb_unregister_all_protofamilies(&msg);
+
switch (address->sa_family) {
case AF_INET:
- return rpcb_register_netid4((struct sockaddr_in *)address,
- &msg);
+ return rpcb_register_inet4(address, &msg);
case AF_INET6:
- return rpcb_register_netid6((struct sockaddr_in6 *)address,
- &msg);
+ return rpcb_register_inet6(address, &msg);
}
return -EAFNOSUPPORT;
@@ -579,7 +591,7 @@ void rpcb_getport_async(struct rpc_task *task)
map->r_xprt = xprt_get(xprt);
map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID);
map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR);
- map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */
+ map->r_owner = "";
map->r_status = -EIO;
child = rpcb_call_async(rpcb_clnt, map, proc);
@@ -703,11 +715,16 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p,
*portp = 0;
addr_len = ntohl(*p++);
+ if (addr_len == 0) {
+ dprintk("RPC: rpcb_decode_getaddr: "
+ "service is not registered\n");
+ return 0;
+ }
+
/*
- * Simple sanity check. The smallest possible universal
- * address is an IPv4 address string containing 11 bytes.
+ * Simple sanity check.
*/
- if (addr_len < 11 || addr_len > RPCBIND_MAXUADDRLEN)
+ if (addr_len > RPCBIND_MAXUADDRLEN)
goto out_err;
/*
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index bb507e2bb94d..9f2f2412a2f3 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -359,7 +359,7 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu)
*/
static struct svc_serv *
__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
- sa_family_t family, void (*shutdown)(struct svc_serv *serv))
+ void (*shutdown)(struct svc_serv *serv))
{
struct svc_serv *serv;
unsigned int vers;
@@ -368,7 +368,6 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
return NULL;
- serv->sv_family = family;
serv->sv_name = prog->pg_name;
serv->sv_program = prog;
serv->sv_nrthreads = 1;
@@ -427,21 +426,21 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
struct svc_serv *
svc_create(struct svc_program *prog, unsigned int bufsize,
- sa_family_t family, void (*shutdown)(struct svc_serv *serv))
+ void (*shutdown)(struct svc_serv *serv))
{
- return __svc_create(prog, bufsize, /*npools*/1, family, shutdown);
+ return __svc_create(prog, bufsize, /*npools*/1, shutdown);
}
EXPORT_SYMBOL_GPL(svc_create);
struct svc_serv *
svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
- sa_family_t family, void (*shutdown)(struct svc_serv *serv),
+ void (*shutdown)(struct svc_serv *serv),
svc_thread_fn func, struct module *mod)
{
struct svc_serv *serv;
unsigned int npools = svc_pool_map_get();
- serv = __svc_create(prog, bufsize, npools, family, shutdown);
+ serv = __svc_create(prog, bufsize, npools, shutdown);
if (serv != NULL) {
serv->sv_function = func;
@@ -719,8 +718,6 @@ svc_exit_thread(struct svc_rqst *rqstp)
}
EXPORT_SYMBOL_GPL(svc_exit_thread);
-#ifdef CONFIG_SUNRPC_REGISTER_V4
-
/*
* Register an "inet" protocol family netid with the local
* rpcbind daemon via an rpcbind v4 SET request.
@@ -735,12 +732,13 @@ static int __svc_rpcb_register4(const u32 program, const u32 version,
const unsigned short protocol,
const unsigned short port)
{
- struct sockaddr_in sin = {
+ const struct sockaddr_in sin = {
.sin_family = AF_INET,
.sin_addr.s_addr = htonl(INADDR_ANY),
.sin_port = htons(port),
};
- char *netid;
+ const char *netid;
+ int error;
switch (protocol) {
case IPPROTO_UDP:
@@ -750,13 +748,23 @@ static int __svc_rpcb_register4(const u32 program, const u32 version,
netid = RPCBIND_NETID_TCP;
break;
default:
- return -EPROTONOSUPPORT;
+ return -ENOPROTOOPT;
}
- return rpcb_v4_register(program, version,
- (struct sockaddr *)&sin, netid);
+ error = rpcb_v4_register(program, version,
+ (const struct sockaddr *)&sin, netid);
+
+ /*
+ * User space didn't support rpcbind v4, so retry this
+ * registration request with the legacy rpcbind v2 protocol.
+ */
+ if (error == -EPROTONOSUPPORT)
+ error = rpcb_register(program, version, protocol, port);
+
+ return error;
}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
/*
* Register an "inet6" protocol family netid with the local
* rpcbind daemon via an rpcbind v4 SET request.
@@ -771,12 +779,13 @@ static int __svc_rpcb_register6(const u32 program, const u32 version,
const unsigned short protocol,
const unsigned short port)
{
- struct sockaddr_in6 sin6 = {
+ const struct sockaddr_in6 sin6 = {
.sin6_family = AF_INET6,
.sin6_addr = IN6ADDR_ANY_INIT,
.sin6_port = htons(port),
};
- char *netid;
+ const char *netid;
+ int error;
switch (protocol) {
case IPPROTO_UDP:
@@ -786,12 +795,22 @@ static int __svc_rpcb_register6(const u32 program, const u32 version,
netid = RPCBIND_NETID_TCP6;
break;
default:
- return -EPROTONOSUPPORT;
+ return -ENOPROTOOPT;
}
- return rpcb_v4_register(program, version,
- (struct sockaddr *)&sin6, netid);
+ error = rpcb_v4_register(program, version,
+ (const struct sockaddr *)&sin6, netid);
+
+ /*
+ * User space didn't support rpcbind version 4, so we won't
+ * use a PF_INET6 listener.
+ */
+ if (error == -EPROTONOSUPPORT)
+ error = -EAFNOSUPPORT;
+
+ return error;
}
+#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
/*
* Register a kernel RPC service via rpcbind version 4.
@@ -799,69 +818,43 @@ static int __svc_rpcb_register6(const u32 program, const u32 version,
* Returns zero on success; a negative errno value is returned
* if any error occurs.
*/
-static int __svc_register(const u32 program, const u32 version,
- const sa_family_t family,
+static int __svc_register(const char *progname,
+ const u32 program, const u32 version,
+ const int family,
const unsigned short protocol,
const unsigned short port)
{
- int error;
+ int error = -EAFNOSUPPORT;
switch (family) {
- case AF_INET:
- return __svc_rpcb_register4(program, version,
+ case PF_INET:
+ error = __svc_rpcb_register4(program, version,
protocol, port);
- case AF_INET6:
+ break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ case PF_INET6:
error = __svc_rpcb_register6(program, version,
protocol, port);
- if (error < 0)
- return error;
-
- /*
- * Work around bug in some versions of Linux rpcbind
- * which don't allow registration of both inet and
- * inet6 netids.
- *
- * Error return ignored for now.
- */
- __svc_rpcb_register4(program, version,
- protocol, port);
- return 0;
+#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
}
- return -EAFNOSUPPORT;
-}
-
-#else /* CONFIG_SUNRPC_REGISTER_V4 */
-
-/*
- * Register a kernel RPC service via rpcbind version 2.
- *
- * Returns zero on success; a negative errno value is returned
- * if any error occurs.
- */
-static int __svc_register(const u32 program, const u32 version,
- sa_family_t family,
- const unsigned short protocol,
- const unsigned short port)
-{
- if (family != AF_INET)
- return -EAFNOSUPPORT;
-
- return rpcb_register(program, version, protocol, port);
+ if (error < 0)
+ printk(KERN_WARNING "svc: failed to register %sv%u RPC "
+ "service (errno %d).\n", progname, version, -error);
+ return error;
}
-#endif /* CONFIG_SUNRPC_REGISTER_V4 */
-
/**
* svc_register - register an RPC service with the local portmapper
* @serv: svc_serv struct for the service to register
+ * @family: protocol family of service's listener socket
* @proto: transport protocol number to advertise
* @port: port to advertise
*
- * Service is registered for any address in serv's address family
+ * Service is registered for any address in the passed-in protocol family
*/
-int svc_register(const struct svc_serv *serv, const unsigned short proto,
- const unsigned short port)
+int svc_register(const struct svc_serv *serv, const int family,
+ const unsigned short proto, const unsigned short port)
{
struct svc_program *progp;
unsigned int i;
@@ -879,15 +872,15 @@ int svc_register(const struct svc_serv *serv, const unsigned short proto,
i,
proto == IPPROTO_UDP? "udp" : "tcp",
port,
- serv->sv_family,
+ family,
progp->pg_vers[i]->vs_hidden?
" (but not telling portmap)" : "");
if (progp->pg_vers[i]->vs_hidden)
continue;
- error = __svc_register(progp->pg_prog, i,
- serv->sv_family, proto, port);
+ error = __svc_register(progp->pg_name, progp->pg_prog,
+ i, family, proto, port);
if (error < 0)
break;
}
@@ -896,38 +889,31 @@ int svc_register(const struct svc_serv *serv, const unsigned short proto,
return error;
}
-#ifdef CONFIG_SUNRPC_REGISTER_V4
-
+/*
+ * If user space is running rpcbind, it should take the v4 UNSET
+ * and clear everything for this [program, version]. If user space
+ * is running portmap, it will reject the v4 UNSET, but won't have
+ * any "inet6" entries anyway. So a PMAP_UNSET should be sufficient
+ * in this case to clear all existing entries for [program, version].
+ */
static void __svc_unregister(const u32 program, const u32 version,
const char *progname)
{
- struct sockaddr_in6 sin6 = {
- .sin6_family = AF_INET6,
- .sin6_addr = IN6ADDR_ANY_INIT,
- .sin6_port = 0,
- };
int error;
- error = rpcb_v4_register(program, version,
- (struct sockaddr *)&sin6, "");
- dprintk("svc: %s(%sv%u), error %d\n",
- __func__, progname, version, error);
-}
-
-#else /* CONFIG_SUNRPC_REGISTER_V4 */
+ error = rpcb_v4_register(program, version, NULL, "");
-static void __svc_unregister(const u32 program, const u32 version,
- const char *progname)
-{
- int error;
+ /*
+ * User space didn't support rpcbind v4, so retry this
+ * request with the legacy rpcbind v2 protocol.
+ */
+ if (error == -EPROTONOSUPPORT)
+ error = rpcb_register(program, version, 0, 0);
- error = rpcb_register(program, version, 0, 0);
dprintk("svc: %s(%sv%u), error %d\n",
__func__, progname, version, error);
}
-#endif /* CONFIG_SUNRPC_REGISTER_V4 */
-
/*
* All netids, bind addresses and ports registered for [program, version]
* are removed from the local rpcbind database (if the service is not
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index e588df5d6b34..2819ee093f36 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -161,7 +161,9 @@ EXPORT_SYMBOL_GPL(svc_xprt_init);
static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
struct svc_serv *serv,
- unsigned short port, int flags)
+ const int family,
+ const unsigned short port,
+ int flags)
{
struct sockaddr_in sin = {
.sin_family = AF_INET,
@@ -176,12 +178,12 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
struct sockaddr *sap;
size_t len;
- switch (serv->sv_family) {
- case AF_INET:
+ switch (family) {
+ case PF_INET:
sap = (struct sockaddr *)&sin;
len = sizeof(sin);
break;
- case AF_INET6:
+ case PF_INET6:
sap = (struct sockaddr *)&sin6;
len = sizeof(sin6);
break;
@@ -192,7 +194,8 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
return xcl->xcl_ops->xpo_create(serv, sap, len, flags);
}
-int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
+int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
+ const int family, const unsigned short port,
int flags)
{
struct svc_xprt_class *xcl;
@@ -209,7 +212,7 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
goto err;
spin_unlock(&svc_xprt_class_lock);
- newxprt = __svc_xpo_create(xcl, serv, port, flags);
+ newxprt = __svc_xpo_create(xcl, serv, family, port, flags);
if (IS_ERR(newxprt)) {
module_put(xcl->xcl_owner);
return PTR_ERR(newxprt);
@@ -1033,7 +1036,13 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
return dr;
}
-/*
+/**
+ * svc_find_xprt - find an RPC transport instance
+ * @serv: pointer to svc_serv to search
+ * @xcl_name: C string containing transport's class name
+ * @af: Address family of transport's local address
+ * @port: transport's IP port number
+ *
* Return the transport instance pointer for the endpoint accepting
* connections/peer traffic from the specified transport class,
* address family and port.
@@ -1042,14 +1051,14 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
* wild-card, and will result in matching the first transport in the
* service's list that has a matching class name.
*/
-struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name,
- int af, int port)
+struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
+ const sa_family_t af, const unsigned short port)
{
struct svc_xprt *xprt;
struct svc_xprt *found = NULL;
/* Sanity check the args */
- if (!serv || !xcl_name)
+ if (serv == NULL || xcl_name == NULL)
return found;
spin_lock_bh(&serv->sv_lock);
@@ -1058,7 +1067,7 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name,
continue;
if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family)
continue;
- if (port && port != svc_xprt_local_port(xprt))
+ if (port != 0 && port != svc_xprt_local_port(xprt))
continue;
found = xprt;
svc_xprt_get(xprt);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 5763e6460fea..9d504234af4a 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1110,7 +1110,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
struct svc_sock *svsk;
struct sock *inet;
int pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
- int val;
dprintk("svc: svc_setup_socket %p\n", sock);
if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) {
@@ -1122,7 +1121,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
/* Register socket with portmapper */
if (*errp >= 0 && pmap_register)
- *errp = svc_register(serv, inet->sk_protocol,
+ *errp = svc_register(serv, inet->sk_family, inet->sk_protocol,
ntohs(inet_sk(inet)->sport));
if (*errp < 0) {
@@ -1143,18 +1142,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
else
svc_tcp_init(svsk, serv);
- /*
- * We start one listener per sv_serv. We want AF_INET
- * requests to be automatically shunted to our AF_INET6
- * listener using a mapped IPv4 address. Make sure
- * no-one starts an equivalent IPv4 listener, which
- * would steal our incoming connections.
- */
- val = 0;
- if (serv->sv_family == AF_INET6)
- kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
- (char *)&val, sizeof(val));
-
dprintk("svc: svc_setup_socket created %p (inet %p)\n",
svsk, svsk->sk_sk);
@@ -1222,6 +1209,8 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
struct sockaddr_storage addr;
struct sockaddr *newsin = (struct sockaddr *)&addr;
int newlen;
+ int family;
+ int val;
RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
dprintk("svc: svc_create_socket(%s, %d, %s)\n",
@@ -1233,14 +1222,35 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
"sockets supported\n");
return ERR_PTR(-EINVAL);
}
+
type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
+ switch (sin->sa_family) {
+ case AF_INET6:
+ family = PF_INET6;
+ break;
+ case AF_INET:
+ family = PF_INET;
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
- error = sock_create_kern(sin->sa_family, type, protocol, &sock);
+ error = sock_create_kern(family, type, protocol, &sock);
if (error < 0)
return ERR_PTR(error);
svc_reclassify_socket(sock);
+ /*
+ * If this is an PF_INET6 listener, we want to avoid
+ * getting requests from IPv4 remotes. Those should
+ * be shunted to a PF_INET listener via rpcbind.
+ */
+ val = 1;
+ if (family == PF_INET6)
+ kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
+ (char *)&val, sizeof(val));
+
if (type == SOCK_STREAM)
sock->sk->sk_reuse = 1; /* allow address reuse */
error = kernel_bind(sock, sin, len);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 62098d101a1f..a0bfe53f1621 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -152,6 +152,37 @@ out:
EXPORT_SYMBOL_GPL(xprt_unregister_transport);
/**
+ * xprt_load_transport - load a transport implementation
+ * @transport_name: transport to load
+ *
+ * Returns:
+ * 0: transport successfully loaded
+ * -ENOENT: transport module not available
+ */
+int xprt_load_transport(const char *transport_name)
+{
+ struct xprt_class *t;
+ char module_name[sizeof t->name + 5];
+ int result;
+
+ result = 0;
+ spin_lock(&xprt_list_lock);
+ list_for_each_entry(t, &xprt_list, list) {
+ if (strcmp(t->name, transport_name) == 0) {
+ spin_unlock(&xprt_list_lock);
+ goto out;
+ }
+ }
+ spin_unlock(&xprt_list_lock);
+ strcpy(module_name, "xprt");
+ strncat(module_name, transport_name, sizeof t->name);
+ result = request_module(module_name);
+out:
+ return result;
+}
+EXPORT_SYMBOL_GPL(xprt_load_transport);
+
+/**
* xprt_reserve_xprt - serialize write access to transports
* @task: task that is requesting access to the transport
*
@@ -580,7 +611,7 @@ void xprt_disconnect_done(struct rpc_xprt *xprt)
dprintk("RPC: disconnected transport %p\n", xprt);
spin_lock_bh(&xprt->transport_lock);
xprt_clear_connected(xprt);
- xprt_wake_pending_tasks(xprt, -ENOTCONN);
+ xprt_wake_pending_tasks(xprt, -EAGAIN);
spin_unlock_bh(&xprt->transport_lock);
}
EXPORT_SYMBOL_GPL(xprt_disconnect_done);
@@ -598,7 +629,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
/* Try to schedule an autoclose RPC call */
if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
queue_work(rpciod_workqueue, &xprt->task_cleanup);
- xprt_wake_pending_tasks(xprt, -ENOTCONN);
+ xprt_wake_pending_tasks(xprt, -EAGAIN);
spin_unlock_bh(&xprt->transport_lock);
}
@@ -625,7 +656,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie)
/* Try to schedule an autoclose RPC call */
if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
queue_work(rpciod_workqueue, &xprt->task_cleanup);
- xprt_wake_pending_tasks(xprt, -ENOTCONN);
+ xprt_wake_pending_tasks(xprt, -EAGAIN);
out:
spin_unlock_bh(&xprt->transport_lock);
}
@@ -695,9 +726,8 @@ static void xprt_connect_status(struct rpc_task *task)
}
switch (task->tk_status) {
- case -ENOTCONN:
- dprintk("RPC: %5u xprt_connect_status: connection broken\n",
- task->tk_pid);
+ case -EAGAIN:
+ dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid);
break;
case -ETIMEDOUT:
dprintk("RPC: %5u xprt_connect_status: connect attempt timed "
@@ -818,15 +848,8 @@ int xprt_prepare_transmit(struct rpc_task *task)
err = req->rq_received;
goto out_unlock;
}
- if (!xprt->ops->reserve_xprt(task)) {
+ if (!xprt->ops->reserve_xprt(task))
err = -EAGAIN;
- goto out_unlock;
- }
-
- if (!xprt_connected(xprt)) {
- err = -ENOTCONN;
- goto out_unlock;
- }
out_unlock:
spin_unlock_bh(&xprt->transport_lock);
return err;
@@ -870,32 +893,26 @@ void xprt_transmit(struct rpc_task *task)
req->rq_connect_cookie = xprt->connect_cookie;
req->rq_xtime = jiffies;
status = xprt->ops->send_request(task);
- if (status == 0) {
- dprintk("RPC: %5u xmit complete\n", task->tk_pid);
- spin_lock_bh(&xprt->transport_lock);
+ if (status != 0) {
+ task->tk_status = status;
+ return;
+ }
- xprt->ops->set_retrans_timeout(task);
+ dprintk("RPC: %5u xmit complete\n", task->tk_pid);
+ spin_lock_bh(&xprt->transport_lock);
- xprt->stat.sends++;
- xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
- xprt->stat.bklog_u += xprt->backlog.qlen;
+ xprt->ops->set_retrans_timeout(task);
- /* Don't race with disconnect */
- if (!xprt_connected(xprt))
- task->tk_status = -ENOTCONN;
- else if (!req->rq_received)
- rpc_sleep_on(&xprt->pending, task, xprt_timer);
- spin_unlock_bh(&xprt->transport_lock);
- return;
- }
+ xprt->stat.sends++;
+ xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
+ xprt->stat.bklog_u += xprt->backlog.qlen;
- /* Note: at this point, task->tk_sleeping has not yet been set,
- * hence there is no danger of the waking up task being put on
- * schedq, and being picked up by a parallel run of rpciod().
- */
- task->tk_status = status;
- if (status == -ECONNREFUSED)
- rpc_sleep_on(&xprt->sending, task, NULL);
+ /* Don't race with disconnect */
+ if (!xprt_connected(xprt))
+ task->tk_status = -ENOTCONN;
+ else if (!req->rq_received)
+ rpc_sleep_on(&xprt->pending, task, xprt_timer);
+ spin_unlock_bh(&xprt->transport_lock);
}
static inline void do_xprt_reserve(struct rpc_task *task)
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 14106d26bb95..e5e28d1946a4 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -310,6 +310,19 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
__func__, pad, destp, rqst->rq_slen, curlen);
copy_len = rqst->rq_snd_buf.page_len;
+
+ if (rqst->rq_snd_buf.tail[0].iov_len) {
+ curlen = rqst->rq_snd_buf.tail[0].iov_len;
+ if (destp + copy_len != rqst->rq_snd_buf.tail[0].iov_base) {
+ memmove(destp + copy_len,
+ rqst->rq_snd_buf.tail[0].iov_base, curlen);
+ r_xprt->rx_stats.pullup_copy_count += curlen;
+ }
+ dprintk("RPC: %s: tail destp 0x%p len %d\n",
+ __func__, destp + copy_len, curlen);
+ rqst->rq_svec[0].iov_len += curlen;
+ }
+
r_xprt->rx_stats.pullup_copy_count += copy_len;
npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT;
for (i = 0; copy_len && i < npages; i++) {
@@ -332,17 +345,6 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
destp += curlen;
copy_len -= curlen;
}
- if (rqst->rq_snd_buf.tail[0].iov_len) {
- curlen = rqst->rq_snd_buf.tail[0].iov_len;
- if (destp != rqst->rq_snd_buf.tail[0].iov_base) {
- memcpy(destp,
- rqst->rq_snd_buf.tail[0].iov_base, curlen);
- r_xprt->rx_stats.pullup_copy_count += curlen;
- }
- dprintk("RPC: %s: tail destp 0x%p len %d curlen %d\n",
- __func__, destp, copy_len, curlen);
- rqst->rq_svec[0].iov_len += curlen;
- }
/* header now contains entire send message */
return pad;
}
@@ -656,7 +658,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
if (curlen > rqst->rq_rcv_buf.tail[0].iov_len)
curlen = rqst->rq_rcv_buf.tail[0].iov_len;
if (rqst->rq_rcv_buf.tail[0].iov_base != srcp)
- memcpy(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen);
+ memmove(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen);
dprintk("RPC: %s: tail srcp 0x%p len %d curlen %d\n",
__func__, srcp, copy_len, curlen);
rqst->rq_rcv_buf.tail[0].iov_len = curlen;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index a3334e3b73cc..6c26a675435a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -191,7 +191,6 @@ static int map_xdr(struct svcxprt_rdma *xprt,
struct xdr_buf *xdr,
struct svc_rdma_req_map *vec)
{
- int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3;
int sge_no;
u32 sge_bytes;
u32 page_bytes;
@@ -235,7 +234,11 @@ static int map_xdr(struct svcxprt_rdma *xprt,
sge_no++;
}
- BUG_ON(sge_no > sge_max);
+ dprintk("svcrdma: map_xdr: sge_no %d page_no %d "
+ "page_base %u page_len %u head_len %zu tail_len %zu\n",
+ sge_no, page_no, xdr->page_base, xdr->page_len,
+ xdr->head[0].iov_len, xdr->tail[0].iov_len);
+
vec->count = sge_no;
return 0;
}
@@ -579,7 +582,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->sge[page_no+1].length = 0;
}
BUG_ON(sge_no > rdma->sc_max_sge);
- BUG_ON(sge_no > ctxt->count);
memset(&send_wr, 0, sizeof send_wr);
ctxt->wr_op = IB_WR_SEND;
send_wr.wr_id = (unsigned long)ctxt;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 568330eebbfe..d40ff50887aa 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -49,6 +49,9 @@ unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE;
unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
+#define XS_TCP_LINGER_TO (15U * HZ)
+static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO;
+
/*
* We can register our own files under /proc/sys/sunrpc by
* calling register_sysctl_table() again. The files in that
@@ -117,6 +120,14 @@ static ctl_table xs_tunables_table[] = {
.extra2 = &xprt_max_resvport_limit
},
{
+ .procname = "tcp_fin_timeout",
+ .data = &xs_tcp_fin_timeout,
+ .maxlen = sizeof(xs_tcp_fin_timeout),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ .strategy = sysctl_jiffies
+ },
+ {
.ctl_name = 0,
},
};
@@ -521,11 +532,12 @@ static void xs_nospace_callback(struct rpc_task *task)
* @task: task to put to sleep
*
*/
-static void xs_nospace(struct rpc_task *task)
+static int xs_nospace(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ int ret = 0;
dprintk("RPC: %5u xmit incomplete (%u left of %u)\n",
task->tk_pid, req->rq_slen - req->rq_bytes_sent,
@@ -537,6 +549,7 @@ static void xs_nospace(struct rpc_task *task)
/* Don't race with disconnect */
if (xprt_connected(xprt)) {
if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) {
+ ret = -EAGAIN;
/*
* Notify TCP that we're limited by the application
* window size
@@ -548,10 +561,11 @@ static void xs_nospace(struct rpc_task *task)
}
} else {
clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
- task->tk_status = -ENOTCONN;
+ ret = -ENOTCONN;
}
spin_unlock_bh(&xprt->transport_lock);
+ return ret;
}
/**
@@ -594,6 +608,8 @@ static int xs_udp_send_request(struct rpc_task *task)
/* Still some bytes left; set up for a retry later. */
status = -EAGAIN;
}
+ if (!transport->sock)
+ goto out;
switch (status) {
case -ENOTSOCK:
@@ -601,21 +617,19 @@ static int xs_udp_send_request(struct rpc_task *task)
/* Should we call xs_close() here? */
break;
case -EAGAIN:
- xs_nospace(task);
+ status = xs_nospace(task);
break;
+ default:
+ dprintk("RPC: sendmsg returned unrecognized error %d\n",
+ -status);
case -ENETUNREACH:
case -EPIPE:
case -ECONNREFUSED:
/* When the server has died, an ICMP port unreachable message
* prompts ECONNREFUSED. */
clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
- break;
- default:
- clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
- dprintk("RPC: sendmsg returned unrecognized error %d\n",
- -status);
}
-
+out:
return status;
}
@@ -697,6 +711,8 @@ static int xs_tcp_send_request(struct rpc_task *task)
status = -EAGAIN;
break;
}
+ if (!transport->sock)
+ goto out;
switch (status) {
case -ENOTSOCK:
@@ -704,23 +720,19 @@ static int xs_tcp_send_request(struct rpc_task *task)
/* Should we call xs_close() here? */
break;
case -EAGAIN:
- xs_nospace(task);
+ status = xs_nospace(task);
break;
+ default:
+ dprintk("RPC: sendmsg returned unrecognized error %d\n",
+ -status);
case -ECONNRESET:
+ case -EPIPE:
xs_tcp_shutdown(xprt);
case -ECONNREFUSED:
case -ENOTCONN:
- case -EPIPE:
- status = -ENOTCONN;
- clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
- break;
- default:
- dprintk("RPC: sendmsg returned unrecognized error %d\n",
- -status);
clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
- xs_tcp_shutdown(xprt);
}
-
+out:
return status;
}
@@ -767,23 +779,13 @@ static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *s
sk->sk_error_report = transport->old_error_report;
}
-/**
- * xs_close - close a socket
- * @xprt: transport
- *
- * This is used when all requests are complete; ie, no DRC state remains
- * on the server we want to save.
- */
-static void xs_close(struct rpc_xprt *xprt)
+static void xs_reset_transport(struct sock_xprt *transport)
{
- struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct socket *sock = transport->sock;
struct sock *sk = transport->inet;
- if (!sk)
- goto clear_close_wait;
-
- dprintk("RPC: xs_close xprt %p\n", xprt);
+ if (sk == NULL)
+ return;
write_lock_bh(&sk->sk_callback_lock);
transport->inet = NULL;
@@ -797,8 +799,25 @@ static void xs_close(struct rpc_xprt *xprt)
sk->sk_no_check = 0;
sock_release(sock);
-clear_close_wait:
+}
+
+/**
+ * xs_close - close a socket
+ * @xprt: transport
+ *
+ * This is used when all requests are complete; ie, no DRC state remains
+ * on the server we want to save.
+ */
+static void xs_close(struct rpc_xprt *xprt)
+{
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+
+ dprintk("RPC: xs_close xprt %p\n", xprt);
+
+ xs_reset_transport(transport);
+
smp_mb__before_clear_bit();
+ clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
clear_bit(XPRT_CLOSING, &xprt->state);
smp_mb__after_clear_bit();
@@ -1126,6 +1145,47 @@ out:
read_unlock(&sk->sk_callback_lock);
}
+/*
+ * Do the equivalent of linger/linger2 handling for dealing with
+ * broken servers that don't close the socket in a timely
+ * fashion
+ */
+static void xs_tcp_schedule_linger_timeout(struct rpc_xprt *xprt,
+ unsigned long timeout)
+{
+ struct sock_xprt *transport;
+
+ if (xprt_test_and_set_connecting(xprt))
+ return;
+ set_bit(XPRT_CONNECTION_ABORT, &xprt->state);
+ transport = container_of(xprt, struct sock_xprt, xprt);
+ queue_delayed_work(rpciod_workqueue, &transport->connect_worker,
+ timeout);
+}
+
+static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt)
+{
+ struct sock_xprt *transport;
+
+ transport = container_of(xprt, struct sock_xprt, xprt);
+
+ if (!test_bit(XPRT_CONNECTION_ABORT, &xprt->state) ||
+ !cancel_delayed_work(&transport->connect_worker))
+ return;
+ clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
+ xprt_clear_connecting(xprt);
+}
+
+static void xs_sock_mark_closed(struct rpc_xprt *xprt)
+{
+ smp_mb__before_clear_bit();
+ clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
+ clear_bit(XPRT_CLOSING, &xprt->state);
+ smp_mb__after_clear_bit();
+ /* Mark transport as closed and wake up all pending tasks */
+ xprt_disconnect_done(xprt);
+}
+
/**
* xs_tcp_state_change - callback to handle TCP socket state changes
* @sk: socket whose state has changed
@@ -1158,7 +1218,7 @@ static void xs_tcp_state_change(struct sock *sk)
transport->tcp_flags =
TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID;
- xprt_wake_pending_tasks(xprt, 0);
+ xprt_wake_pending_tasks(xprt, -EAGAIN);
}
spin_unlock_bh(&xprt->transport_lock);
break;
@@ -1171,10 +1231,10 @@ static void xs_tcp_state_change(struct sock *sk)
clear_bit(XPRT_CONNECTED, &xprt->state);
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
smp_mb__after_clear_bit();
+ xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
break;
case TCP_CLOSE_WAIT:
/* The server initiated a shutdown of the socket */
- set_bit(XPRT_CLOSING, &xprt->state);
xprt_force_disconnect(xprt);
case TCP_SYN_SENT:
xprt->connect_cookie++;
@@ -1187,40 +1247,35 @@ static void xs_tcp_state_change(struct sock *sk)
xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
break;
case TCP_LAST_ACK:
+ set_bit(XPRT_CLOSING, &xprt->state);
+ xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
smp_mb__before_clear_bit();
clear_bit(XPRT_CONNECTED, &xprt->state);
smp_mb__after_clear_bit();
break;
case TCP_CLOSE:
- smp_mb__before_clear_bit();
- clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
- clear_bit(XPRT_CLOSING, &xprt->state);
- smp_mb__after_clear_bit();
- /* Mark transport as closed and wake up all pending tasks */
- xprt_disconnect_done(xprt);
+ xs_tcp_cancel_linger_timeout(xprt);
+ xs_sock_mark_closed(xprt);
}
out:
read_unlock(&sk->sk_callback_lock);
}
/**
- * xs_tcp_error_report - callback mainly for catching RST events
+ * xs_error_report - callback mainly for catching socket errors
* @sk: socket
*/
-static void xs_tcp_error_report(struct sock *sk)
+static void xs_error_report(struct sock *sk)
{
struct rpc_xprt *xprt;
read_lock(&sk->sk_callback_lock);
- if (sk->sk_err != ECONNRESET || sk->sk_state != TCP_ESTABLISHED)
- goto out;
if (!(xprt = xprt_from_sock(sk)))
goto out;
dprintk("RPC: %s client %p...\n"
"RPC: error %d\n",
__func__, xprt, sk->sk_err);
-
- xprt_force_disconnect(xprt);
+ xprt_wake_pending_tasks(xprt, -EAGAIN);
out:
read_unlock(&sk->sk_callback_lock);
}
@@ -1494,6 +1549,7 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
sk->sk_user_data = xprt;
sk->sk_data_ready = xs_udp_data_ready;
sk->sk_write_space = xs_udp_write_space;
+ sk->sk_error_report = xs_error_report;
sk->sk_no_check = UDP_CSUM_NORCV;
sk->sk_allocation = GFP_ATOMIC;
@@ -1526,9 +1582,10 @@ static void xs_udp_connect_worker4(struct work_struct *work)
goto out;
/* Start by resetting any existing state */
- xs_close(xprt);
+ xs_reset_transport(transport);
- if ((err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
+ err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+ if (err < 0) {
dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
goto out;
}
@@ -1545,8 +1602,8 @@ static void xs_udp_connect_worker4(struct work_struct *work)
xs_udp_finish_connecting(xprt, sock);
status = 0;
out:
- xprt_wake_pending_tasks(xprt, status);
xprt_clear_connecting(xprt);
+ xprt_wake_pending_tasks(xprt, status);
}
/**
@@ -1567,9 +1624,10 @@ static void xs_udp_connect_worker6(struct work_struct *work)
goto out;
/* Start by resetting any existing state */
- xs_close(xprt);
+ xs_reset_transport(transport);
- if ((err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
+ err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock);
+ if (err < 0) {
dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
goto out;
}
@@ -1586,18 +1644,17 @@ static void xs_udp_connect_worker6(struct work_struct *work)
xs_udp_finish_connecting(xprt, sock);
status = 0;
out:
- xprt_wake_pending_tasks(xprt, status);
xprt_clear_connecting(xprt);
+ xprt_wake_pending_tasks(xprt, status);
}
/*
* We need to preserve the port number so the reply cache on the server can
* find our cached RPC replies when we get around to reconnecting.
*/
-static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
+static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transport)
{
int result;
- struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct sockaddr any;
dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt);
@@ -1609,11 +1666,24 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
memset(&any, 0, sizeof(any));
any.sa_family = AF_UNSPEC;
result = kernel_connect(transport->sock, &any, sizeof(any), 0);
- if (result)
+ if (!result)
+ xs_sock_mark_closed(xprt);
+ else
dprintk("RPC: AF_UNSPEC connect return code %d\n",
result);
}
+static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *transport)
+{
+ unsigned int state = transport->inet->sk_state;
+
+ if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED)
+ return;
+ if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT))
+ return;
+ xs_abort_connection(xprt, transport);
+}
+
static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -1629,7 +1699,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
sk->sk_data_ready = xs_tcp_data_ready;
sk->sk_state_change = xs_tcp_state_change;
sk->sk_write_space = xs_tcp_write_space;
- sk->sk_error_report = xs_tcp_error_report;
+ sk->sk_error_report = xs_error_report;
sk->sk_allocation = GFP_ATOMIC;
/* socket options */
@@ -1657,37 +1727,42 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
}
/**
- * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint
- * @work: RPC transport to connect
+ * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint
+ * @xprt: RPC transport to connect
+ * @transport: socket transport to connect
+ * @create_sock: function to create a socket of the correct type
*
* Invoked by a work queue tasklet.
*/
-static void xs_tcp_connect_worker4(struct work_struct *work)
+static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
+ struct sock_xprt *transport,
+ struct socket *(*create_sock)(struct rpc_xprt *,
+ struct sock_xprt *))
{
- struct sock_xprt *transport =
- container_of(work, struct sock_xprt, connect_worker.work);
- struct rpc_xprt *xprt = &transport->xprt;
struct socket *sock = transport->sock;
- int err, status = -EIO;
+ int status = -EIO;
if (xprt->shutdown)
goto out;
if (!sock) {
- /* start from scratch */
- if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
- dprintk("RPC: can't create TCP transport socket (%d).\n", -err);
+ clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
+ sock = create_sock(xprt, transport);
+ if (IS_ERR(sock)) {
+ status = PTR_ERR(sock);
goto out;
}
- xs_reclassify_socket4(sock);
+ } else {
+ int abort_and_exit;
- if (xs_bind4(transport, sock) < 0) {
- sock_release(sock);
- goto out;
- }
- } else
+ abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT,
+ &xprt->state);
/* "close" the socket, preserving the local port */
- xs_tcp_reuse_connection(xprt);
+ xs_tcp_reuse_connection(xprt, transport);
+
+ if (abort_and_exit)
+ goto out_eagain;
+ }
dprintk("RPC: worker connecting xprt %p to address: %s\n",
xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
@@ -1696,83 +1771,104 @@ static void xs_tcp_connect_worker4(struct work_struct *work)
dprintk("RPC: %p connect status %d connected %d sock state %d\n",
xprt, -status, xprt_connected(xprt),
sock->sk->sk_state);
- if (status < 0) {
- switch (status) {
- case -EINPROGRESS:
- case -EALREADY:
- goto out_clear;
- case -ECONNREFUSED:
- case -ECONNRESET:
- /* retry with existing socket, after a delay */
- break;
- default:
- /* get rid of existing socket, and retry */
- xs_tcp_shutdown(xprt);
- }
+ switch (status) {
+ case -ECONNREFUSED:
+ case -ECONNRESET:
+ case -ENETUNREACH:
+ /* retry with existing socket, after a delay */
+ case 0:
+ case -EINPROGRESS:
+ case -EALREADY:
+ xprt_clear_connecting(xprt);
+ return;
}
+ /* get rid of existing socket, and retry */
+ xs_tcp_shutdown(xprt);
+ printk("%s: connect returned unhandled error %d\n",
+ __func__, status);
+out_eagain:
+ status = -EAGAIN;
out:
- xprt_wake_pending_tasks(xprt, status);
-out_clear:
xprt_clear_connecting(xprt);
+ xprt_wake_pending_tasks(xprt, status);
+}
+
+static struct socket *xs_create_tcp_sock4(struct rpc_xprt *xprt,
+ struct sock_xprt *transport)
+{
+ struct socket *sock;
+ int err;
+
+ /* start from scratch */
+ err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
+ if (err < 0) {
+ dprintk("RPC: can't create TCP transport socket (%d).\n",
+ -err);
+ goto out_err;
+ }
+ xs_reclassify_socket4(sock);
+
+ if (xs_bind4(transport, sock) < 0) {
+ sock_release(sock);
+ goto out_err;
+ }
+ return sock;
+out_err:
+ return ERR_PTR(-EIO);
}
/**
- * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint
+ * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint
* @work: RPC transport to connect
*
* Invoked by a work queue tasklet.
*/
-static void xs_tcp_connect_worker6(struct work_struct *work)
+static void xs_tcp_connect_worker4(struct work_struct *work)
{
struct sock_xprt *transport =
container_of(work, struct sock_xprt, connect_worker.work);
struct rpc_xprt *xprt = &transport->xprt;
- struct socket *sock = transport->sock;
- int err, status = -EIO;
- if (xprt->shutdown)
- goto out;
+ xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock4);
+}
- if (!sock) {
- /* start from scratch */
- if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
- dprintk("RPC: can't create TCP transport socket (%d).\n", -err);
- goto out;
- }
- xs_reclassify_socket6(sock);
+static struct socket *xs_create_tcp_sock6(struct rpc_xprt *xprt,
+ struct sock_xprt *transport)
+{
+ struct socket *sock;
+ int err;
+
+ /* start from scratch */
+ err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock);
+ if (err < 0) {
+ dprintk("RPC: can't create TCP transport socket (%d).\n",
+ -err);
+ goto out_err;
+ }
+ xs_reclassify_socket6(sock);
- if (xs_bind6(transport, sock) < 0) {
- sock_release(sock);
- goto out;
- }
- } else
- /* "close" the socket, preserving the local port */
- xs_tcp_reuse_connection(xprt);
+ if (xs_bind6(transport, sock) < 0) {
+ sock_release(sock);
+ goto out_err;
+ }
+ return sock;
+out_err:
+ return ERR_PTR(-EIO);
+}
- dprintk("RPC: worker connecting xprt %p to address: %s\n",
- xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
+/**
+ * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint
+ * @work: RPC transport to connect
+ *
+ * Invoked by a work queue tasklet.
+ */
+static void xs_tcp_connect_worker6(struct work_struct *work)
+{
+ struct sock_xprt *transport =
+ container_of(work, struct sock_xprt, connect_worker.work);
+ struct rpc_xprt *xprt = &transport->xprt;
- status = xs_tcp_finish_connecting(xprt, sock);
- dprintk("RPC: %p connect status %d connected %d sock state %d\n",
- xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
- if (status < 0) {
- switch (status) {
- case -EINPROGRESS:
- case -EALREADY:
- goto out_clear;
- case -ECONNREFUSED:
- case -ECONNRESET:
- /* retry with existing socket, after a delay */
- break;
- default:
- /* get rid of existing socket, and retry */
- xs_tcp_shutdown(xprt);
- }
- }
-out:
- xprt_wake_pending_tasks(xprt, status);
-out_clear:
- xprt_clear_connecting(xprt);
+ xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock6);
}
/**
@@ -1817,9 +1913,6 @@ static void xs_tcp_connect(struct rpc_task *task)
{
struct rpc_xprt *xprt = task->tk_xprt;
- /* Initiate graceful shutdown of the socket if not already done */
- if (test_bit(XPRT_CONNECTED, &xprt->state))
- xs_tcp_shutdown(xprt);
/* Exit if we need to wait for socket shutdown to complete */
if (test_bit(XPRT_CLOSING, &xprt->state))
return;