summaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2014-11-17 23:06:20 -0800
committerDavid S. Miller <davem@davemloft.net>2014-11-19 14:57:01 -0500
commit355a901e6cf1b2b763ec85caa2a9f04fbcc4ab4a (patch)
treee91e6de26d24dd3105aa85b678d809e7ffd7749d /net/ipv4/tcp_output.c
parentbaeababb5b85d5c4e6c917efe2a1504179438d3b (diff)
downloadlinux-stable-355a901e6cf1b2b763ec85caa2a9f04fbcc4ab4a.tar.gz
linux-stable-355a901e6cf1b2b763ec85caa2a9f04fbcc4ab4a.tar.bz2
linux-stable-355a901e6cf1b2b763ec85caa2a9f04fbcc4ab4a.zip
tcp: make connect() mem charging friendly
While working on sk_forward_alloc problems reported by Denys Fedoryshchenko, we found that tcp connect() (and fastopen) do not call sk_wmem_schedule() for SYN packet (and/or SYN/DATA packet), so sk_forward_alloc is negative while connect is in progress. We can fix this by calling regular sk_stream_alloc_skb() both for the SYN packet (in tcp_connect()) and the syn_data packet in tcp_send_syn_data() Then, tcp_send_syn_data() can avoid copying syn_data as we simply can manipulate syn_data->cb[] to remove SYN flag (and increment seq) Instead of open coding memcpy_fromiovecend(), simply use this helper. This leaves in socket write queue clean fast clone skbs. This was tested against our fastopen packetdrill tests. Reported-by: Denys Fedoryshchenko <nuclearcat@nuclearcat.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c68
1 files changed, 28 insertions, 40 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index eb73a1dccf56..f5bd4bd3f7e6 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3011,9 +3011,9 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_fastopen_request *fo = tp->fastopen_req;
- int syn_loss = 0, space, i, err = 0, iovlen = fo->data->msg_iovlen;
- struct sk_buff *syn_data = NULL, *data;
+ int syn_loss = 0, space, err = 0;
unsigned long last_syn_loss = 0;
+ struct sk_buff *syn_data;
tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */
tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie,
@@ -3044,48 +3044,40 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
/* limit to order-0 allocations */
space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER));
- syn_data = skb_copy_expand(syn, MAX_TCP_HEADER, space,
- sk->sk_allocation);
- if (syn_data == NULL)
+ syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation);
+ if (!syn_data)
goto fallback;
+ syn_data->ip_summed = CHECKSUM_PARTIAL;
+ memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
+ if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space),
+ fo->data->msg_iov, 0, space))) {
+ kfree_skb(syn_data);
+ goto fallback;
+ }
- for (i = 0; i < iovlen && syn_data->len < space; ++i) {
- struct iovec *iov = &fo->data->msg_iov[i];
- unsigned char __user *from = iov->iov_base;
- int len = iov->iov_len;
+ /* No more data pending in inet_wait_for_connect() */
+ if (space == fo->size)
+ fo->data = NULL;
+ fo->copied = space;
- if (syn_data->len + len > space)
- len = space - syn_data->len;
- else if (i + 1 == iovlen)
- /* No more data pending in inet_wait_for_connect() */
- fo->data = NULL;
+ tcp_connect_queue_skb(sk, syn_data);
- if (skb_add_data(syn_data, from, len))
- goto fallback;
- }
+ err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
- /* Queue a data-only packet after the regular SYN for retransmission */
- data = pskb_copy(syn_data, sk->sk_allocation);
- if (data == NULL)
- goto fallback;
- TCP_SKB_CB(data)->seq++;
- TCP_SKB_CB(data)->tcp_flags &= ~TCPHDR_SYN;
- TCP_SKB_CB(data)->tcp_flags = (TCPHDR_ACK|TCPHDR_PSH);
- tcp_connect_queue_skb(sk, data);
- fo->copied = data->len;
-
- /* syn_data is about to be sent, we need to take current time stamps
- * for the packets that are in write queue : SYN packet and DATA
- */
- skb_mstamp_get(&syn->skb_mstamp);
- data->skb_mstamp = syn->skb_mstamp;
+ syn->skb_mstamp = syn_data->skb_mstamp;
- if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) {
+ /* Now full SYN+DATA was cloned and sent (or not),
+ * remove the SYN from the original skb (syn_data)
+ * we keep in write queue in case of a retransmit, as we
+ * also have the SYN packet (with no data) in the same queue.
+ */
+ TCP_SKB_CB(syn_data)->seq++;
+ TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH;
+ if (!err) {
tp->syn_data = (fo->copied > 0);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
goto done;
}
- syn_data = NULL;
fallback:
/* Send a regular SYN with Fast Open cookie request option */
@@ -3094,7 +3086,6 @@ fallback:
err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation);
if (err)
tp->syn_fastopen = 0;
- kfree_skb(syn_data);
done:
fo->cookie.len = -1; /* Exclude Fast Open option for SYN retries */
return err;
@@ -3114,13 +3105,10 @@ int tcp_connect(struct sock *sk)
return 0;
}
- buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
- if (unlikely(buff == NULL))
+ buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
+ if (unlikely(!buff))
return -ENOBUFS;
- /* Reserve space for headers. */
- skb_reserve(buff, MAX_TCP_HEADER);
-
tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
tp->retrans_stamp = tcp_time_stamp;
tcp_connect_queue_skb(sk, buff);