summaryrefslogtreecommitdiffstats
path: root/include/trace
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2018-05-04 15:35:20 -0400
committerAnna Schumaker <Anna.Schumaker@Netapp.com>2018-05-07 09:20:04 -0400
commit7c8d9e7c8863905951d4eaa7a8d277150f3a37f7 (patch)
tree16017a18ac2150272b7b5dd4be6fe1cd5d230aeb /include/trace
parent0e0b854cfb3302b1907e9d3a927469b95710238f (diff)
downloadlinux-stable-7c8d9e7c8863905951d4eaa7a8d277150f3a37f7.tar.gz
linux-stable-7c8d9e7c8863905951d4eaa7a8d277150f3a37f7.tar.bz2
linux-stable-7c8d9e7c8863905951d4eaa7a8d277150f3a37f7.zip
xprtrdma: Move Receive posting to Receive handler
Receive completion and Reply handling are done by a BOUND workqueue, meaning they run on only one CPU. Posting receives is currently done in the send_request path, which on large systems is typically done on a different CPU than the one handling Receive completions. This results in movement of Receive-related cachelines between the sending and receiving CPUs. More importantly, it means that currently Receives are posted while the transport's write lock is held, which is unnecessary and costly. Finally, allocation of Receive buffers is performed on-demand in the Receive completion handler. This helps guarantee that they are allocated on the same NUMA node as the CPU that handles Receive completions. Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Diffstat (limited to 'include/trace')
-rw-r--r--include/trace/events/rpcrdma.h40
1 files changed, 35 insertions, 5 deletions
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index 99c0049e51a5..ad27e192cdf8 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -546,6 +546,39 @@ TRACE_EVENT(xprtrdma_post_recv,
)
);
+TRACE_EVENT(xprtrdma_post_recvs,
+ TP_PROTO(
+ const struct rpcrdma_xprt *r_xprt,
+ unsigned int count,
+ int status
+ ),
+
+ TP_ARGS(r_xprt, count, status),
+
+ TP_STRUCT__entry(
+ __field(const void *, r_xprt)
+ __field(unsigned int, count)
+ __field(int, status)
+ __field(int, posted)
+ __string(addr, rpcrdma_addrstr(r_xprt))
+ __string(port, rpcrdma_portstr(r_xprt))
+ ),
+
+ TP_fast_assign(
+ __entry->r_xprt = r_xprt;
+ __entry->count = count;
+ __entry->status = status;
+ __entry->posted = r_xprt->rx_buf.rb_posted_receives;
+ __assign_str(addr, rpcrdma_addrstr(r_xprt));
+ __assign_str(port, rpcrdma_portstr(r_xprt));
+ ),
+
+ TP_printk("peer=[%s]:%s r_xprt=%p: %u new recvs, %d active (rc %d)",
+ __get_str(addr), __get_str(port), __entry->r_xprt,
+ __entry->count, __entry->posted, __entry->status
+ )
+);
+
/**
** Completion events
**/
@@ -800,7 +833,6 @@ TRACE_EVENT(xprtrdma_allocate,
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(const void *, req)
- __field(const void *, rep)
__field(size_t, callsize)
__field(size_t, rcvsize)
),
@@ -809,15 +841,13 @@ TRACE_EVENT(xprtrdma_allocate,
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
__entry->req = req;
- __entry->rep = req ? req->rl_reply : NULL;
__entry->callsize = task->tk_rqstp->rq_callsize;
__entry->rcvsize = task->tk_rqstp->rq_rcvsize;
),
- TP_printk("task:%u@%u req=%p rep=%p (%zu, %zu)",
+ TP_printk("task:%u@%u req=%p (%zu, %zu)",
__entry->task_id, __entry->client_id,
- __entry->req, __entry->rep,
- __entry->callsize, __entry->rcvsize
+ __entry->req, __entry->callsize, __entry->rcvsize
)
);