From 3197d309f5fb042499b2c4c8f2fcb67372df5201 Mon Sep 17 00:00:00 2001 From: Tom Talpey Date: Thu, 9 Oct 2008 15:00:20 -0400 Subject: RPC/RDMA: support FRMR client memory registration. Configure, detect and use "fastreg" support from IB/iWARP verbs layer to perform RPC/RDMA memory registration. Make FRMR the default memreg mode (will fall back if not supported by the selected RDMA adapter). This allows full and optimal operation over the cxgb3 adapter, and others. Signed-off-by: Tom Talpey Acked-by: Tom Tucker Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/transport.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'net/sunrpc/xprtrdma/transport.c') diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index a564c1a39ec5..89970b0a4cc9 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -70,11 +70,7 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_inline_write_padding; -#if !RPCRDMA_PERSISTENT_REGISTRATION -static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_REGISTER; /* FMR? */ -#else -static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_ALLPHYSICAL; -#endif +static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; #ifdef RPC_DEBUG -- cgit v1.2.3 From 575448bd36208f99fe0dd554a43518d798966740 Mon Sep 17 00:00:00 2001 From: Tom Talpey Date: Thu, 9 Oct 2008 15:00:40 -0400 Subject: RPC/RDMA: suppress retransmit on RPC/RDMA clients. An RPC/RDMA client cannot retransmit on an unbroken connection, doing so violates its flow control with the server. Signed-off-by: Tom Talpey Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/transport.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'net/sunrpc/xprtrdma/transport.c') diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 89970b0a4cc9..0aefc6485385 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -587,6 +587,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) } dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); out: + req->rl_connect_cookie = 0; /* our reserved value */ return req->rl_xdr_buf; outfail: @@ -690,13 +691,20 @@ xprt_rdma_send_request(struct rpc_task *task) req->rl_reply->rr_xprt = xprt; } - if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) { - xprt_disconnect_done(xprt); - return -ENOTCONN; /* implies disconnect */ - } + /* Must suppress retransmit to maintain credits */ + if (req->rl_connect_cookie == xprt->connect_cookie) + goto drop_connection; + req->rl_connect_cookie = xprt->connect_cookie; + + if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) + goto drop_connection; rqst->rq_bytes_sent = 0; return 0; + +drop_connection: + xprt_disconnect_done(xprt); + return -ENOTCONN; /* implies disconnect */ } static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) -- cgit v1.2.3 From ad0e9e01da4ece70ff524b49c77c5e850d5dd53e Mon Sep 17 00:00:00 2001 From: Tom Talpey Date: Thu, 9 Oct 2008 15:00:50 -0400 Subject: RPC/RDMA: maintain the RPC task bytes-sent statistic. Signed-off-by: Tom Talpey Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/transport.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/sunrpc/xprtrdma/transport.c') diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 0aefc6485385..ec6d1e7a1941 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -699,6 +699,7 @@ xprt_rdma_send_request(struct rpc_task *task) if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) goto drop_connection; + task->tk_bytes_sent += rqst->rq_snd_buf.len; rqst->rq_bytes_sent = 0; return 0; -- cgit v1.2.3 From 9191ca3b381b15b9a88785a8ae2fa4db8e553b0c Mon Sep 17 00:00:00 2001 From: Tom Talpey Date: Thu, 9 Oct 2008 15:01:11 -0400 Subject: RPC/RDMA: adhere to protocol for unpadded client trailing write chunks. The RPC/RDMA protocol allows clients and servers to avoid RDMA operations for data which is purely the result of XDR padding. On the client, automatically insert the necessary padding for such server replies, and optionally don't marshal such chunks. Signed-off-by: Tom Talpey Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/transport.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net/sunrpc/xprtrdma/transport.c') diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index ec6d1e7a1941..c7d2380bb5e3 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -71,6 +71,7 @@ static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_inline_write_padding; static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; + int xprt_rdma_pad_optimize = 0; #ifdef RPC_DEBUG @@ -135,6 +136,14 @@ static ctl_table xr_tunables_table[] = { .extra1 = &min_memreg, .extra2 = &max_memreg, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "rdma_pad_optimize", + .data = &xprt_rdma_pad_optimize, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = 0, }, -- cgit v1.2.3 From b3cd8d45a764e6edb06e7bd386faf99a879569b8 Mon Sep 17 00:00:00 2001 From: Tom Talpey Date: Thu, 9 Oct 2008 15:02:02 -0400 Subject: RPC/RDMA: optionally emit useful transport info upon connect/disconnect. Signed-off-by: Tom Talpey Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sunrpc/xprtrdma/transport.c') diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index c7d2380bb5e3..c2da680273c5 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -784,7 +784,7 @@ static void __exit xprt_rdma_cleanup(void) { int rc; - dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n"); + dprintk(KERN_INFO "RPCRDMA Module Removed, deregister RPC RDMA transport\n"); #ifdef RPC_DEBUG if (sunrpc_table_header) { unregister_sysctl_table(sunrpc_table_header); -- cgit v1.2.3 From 08ca0dce1eafa419059ac4cad9ed522af7052526 Mon Sep 17 00:00:00 2001 From: Tom Talpey Date: Fri, 10 Oct 2008 11:32:34 -0400 Subject: RPC/RDMA: correct the reconnect timer backoff The RPC/RDMA code had a constant 5-second reconnect backoff, and always performed it, even when re-establishing a connection to a server after the RPC layer closed it due to being idle. Make it an geometric backoff (up to 30 seconds), and don't delay idle reconnect. Signed-off-by: Tom Talpey Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/transport.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net/sunrpc/xprtrdma/transport.c') diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index c2da680273c5..9839c3d94145 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -463,6 +463,8 @@ xprt_rdma_close(struct rpc_xprt *xprt) struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); dprintk("RPC: %s: closing\n", __func__); + if (r_xprt->rx_ep.rep_connected > 0) + xprt->reestablish_timeout = 0; xprt_disconnect_done(xprt); (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); } @@ -490,6 +492,11 @@ xprt_rdma_connect(struct rpc_task *task) /* Reconnect */ schedule_delayed_work(&r_xprt->rdma_connect, xprt->reestablish_timeout); + xprt->reestablish_timeout <<= 1; + if (xprt->reestablish_timeout > (30 * HZ)) + xprt->reestablish_timeout = (30 * HZ); + else if (xprt->reestablish_timeout < (5 * HZ)) + xprt->reestablish_timeout = (5 * HZ); } else { schedule_delayed_work(&r_xprt->rdma_connect, 0); if (!RPC_IS_ASYNC(task)) -- cgit v1.2.3