diff options
Diffstat (limited to 'net/sctp')
-rw-r--r-- | net/sctp/Kconfig | 12 | ||||
-rw-r--r-- | net/sctp/Makefile | 3 | ||||
-rw-r--r-- | net/sctp/associola.c | 13 | ||||
-rw-r--r-- | net/sctp/chunk.c | 4 | ||||
-rw-r--r-- | net/sctp/endpointola.c | 2 | ||||
-rw-r--r-- | net/sctp/ipv6.c | 27 | ||||
-rw-r--r-- | net/sctp/output.c | 27 | ||||
-rw-r--r-- | net/sctp/outqueue.c | 94 | ||||
-rw-r--r-- | net/sctp/probe.c | 214 | ||||
-rw-r--r-- | net/sctp/protocol.c | 9 | ||||
-rw-r--r-- | net/sctp/sm_make_chunk.c | 24 | ||||
-rw-r--r-- | net/sctp/sm_sideeffect.c | 8 | ||||
-rw-r--r-- | net/sctp/socket.c | 39 | ||||
-rw-r--r-- | net/sctp/transport.c | 61 |
14 files changed, 355 insertions, 182 deletions
diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index 58b3e882a187..126b014eb79b 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -37,6 +37,18 @@ menuconfig IP_SCTP if IP_SCTP +config NET_SCTPPROBE + tristate "SCTP: Association probing" + depends on PROC_FS && KPROBES + ---help--- + This module allows for capturing the changes to SCTP association + state in response to incoming packets. It is used for debugging + SCTP congestion control algorithms. If you don't understand + what was just said, you don't need it: say N. + + To compile this code as a module, choose M here: the + module will be called sctp_probe. + config SCTP_DBG_MSG bool "SCTP: Debug messages" help diff --git a/net/sctp/Makefile b/net/sctp/Makefile index 6b794734380a..5c30b7a873df 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -3,6 +3,7 @@ # obj-$(CONFIG_IP_SCTP) += sctp.o +obj-$(CONFIG_NET_SCTPPROBE) += sctp_probe.o sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ protocol.o endpointola.o associola.o \ @@ -11,6 +12,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ tsnmap.o bind_addr.o socket.o primitive.o \ output.o input.o debug.o ssnmap.o auth.o +sctp_probe-y := probe.o + sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o sctp-$(CONFIG_PROC_FS) += proc.o sctp-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 99c93ee98ad9..3912420cedcc 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -87,9 +87,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* Retrieve the SCTP per socket area. */ sp = sctp_sk((struct sock *)sk); - /* Init all variables to a known value. */ - memset(asoc, 0, sizeof(struct sctp_association)); - /* Discarding const is appropriate here. */ asoc->ep = (struct sctp_endpoint *)ep; sctp_endpoint_hold(asoc->ep); @@ -762,7 +759,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, asoc->peer.retran_path = peer; } - if (asoc->peer.active_path == asoc->peer.retran_path) { + if (asoc->peer.active_path == asoc->peer.retran_path && + peer->state != SCTP_UNCONFIRMED) { asoc->peer.retran_path = peer; } @@ -1320,12 +1318,13 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) /* Keep track of the next transport in case * we don't find any active transport. */ - if (!next) + if (t->state != SCTP_UNCONFIRMED && !next) next = t; } } - asoc->peer.retran_path = t; + if (t) + asoc->peer.retran_path = t; SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association" " %p addr: ", @@ -1485,7 +1484,7 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned len) if (asoc->rwnd >= len) { asoc->rwnd -= len; if (over) { - asoc->rwnd_press = asoc->rwnd; + asoc->rwnd_press += asoc->rwnd; asoc->rwnd = 0; } } else { diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 3eab6db59a37..476caaf100ed 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -58,9 +58,9 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg) msg->send_failed = 0; msg->send_error = 0; msg->can_abandon = 0; + msg->can_delay = 1; msg->expires_at = 0; INIT_LIST_HEAD(&msg->chunks); - msg->msg_size = 0; } /* Allocate and initialize datamsg. */ @@ -157,7 +157,6 @@ static void sctp_datamsg_assign(struct sctp_datamsg *msg, struct sctp_chunk *chu { sctp_datamsg_hold(msg); chunk->msg = msg; - msg->msg_size += chunk->skb->len; } @@ -247,6 +246,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, if (msg_len >= first_len) { msg_len -= first_len; whole = 1; + msg->can_delay = 0; } /* How many full sized? How many bytes leftover? */ diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 7ec09ba03a1c..e10acc01c75f 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -70,8 +70,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, struct sctp_shared_key *null_key; int err; - memset(ep, 0, sizeof(struct sctp_endpoint)); - ep->digest = kzalloc(SCTP_SIGNATURE_SIZE, gfp); if (!ep->digest) return NULL; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 9fb5d37c37ad..732689140fb8 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -232,7 +232,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) if (!(transport->param_flags & SPP_PMTUD_ENABLE)) skb->local_df = 1; - return ip6_xmit(sk, skb, &fl, np->opt, 0); + return ip6_xmit(sk, skb, &fl, np->opt); } /* Returns the dst cache entry for the given source and destination ip @@ -277,20 +277,7 @@ static struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc, static inline int sctp_v6_addr_match_len(union sctp_addr *s1, union sctp_addr *s2) { - struct in6_addr *a1 = &s1->v6.sin6_addr; - struct in6_addr *a2 = &s2->v6.sin6_addr; - int i, j; - - for (i = 0; i < 4 ; i++) { - __be32 a1xora2; - - a1xora2 = a1->s6_addr32[i] ^ a2->s6_addr32[i]; - - if ((j = fls(ntohl(a1xora2)))) - return (i * 32 + 32 - j); - } - - return (i*32); + return ipv6_addr_diff(&s1->v6.sin6_addr, &s2->v6.sin6_addr); } /* Fills in the source address(saddr) based on the destination address(daddr) @@ -372,13 +359,13 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, } read_lock_bh(&in6_dev->lock); - for (ifp = in6_dev->addr_list; ifp; ifp = ifp->if_next) { + list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { /* Add the address to the local list. */ addr = t_new(struct sctp_sockaddr_entry, GFP_ATOMIC); if (addr) { addr->a.v6.sin6_family = AF_INET6; addr->a.v6.sin6_port = 0; - addr->a.v6.sin6_addr = ifp->addr; + ipv6_addr_copy(&addr->a.v6.sin6_addr, &ifp->addr); addr->a.v6.sin6_scope_id = dev->ifindex; addr->valid = 1; INIT_LIST_HEAD(&addr->list); @@ -419,7 +406,7 @@ static void sctp_v6_from_sk(union sctp_addr *addr, struct sock *sk) { addr->v6.sin6_family = AF_INET6; addr->v6.sin6_port = 0; - addr->v6.sin6_addr = inet6_sk(sk)->rcv_saddr; + ipv6_addr_copy(&addr->v6.sin6_addr, &inet6_sk(sk)->rcv_saddr); } /* Initialize sk->sk_rcv_saddr from sctp_addr. */ @@ -432,7 +419,7 @@ static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk) inet6_sk(sk)->rcv_saddr.s6_addr32[3] = addr->v4.sin_addr.s_addr; } else { - inet6_sk(sk)->rcv_saddr = addr->v6.sin6_addr; + ipv6_addr_copy(&inet6_sk(sk)->rcv_saddr, &addr->v6.sin6_addr); } } @@ -445,7 +432,7 @@ static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk) inet6_sk(sk)->daddr.s6_addr32[2] = htonl(0x0000ffff); inet6_sk(sk)->daddr.s6_addr32[3] = addr->v4.sin_addr.s_addr; } else { - inet6_sk(sk)->daddr = addr->v6.sin6_addr; + ipv6_addr_copy(&inet6_sk(sk)->daddr, &addr->v6.sin6_addr); } } diff --git a/net/sctp/output.c b/net/sctp/output.c index fad261d41ec2..a646681f5acd 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -429,24 +429,17 @@ int sctp_packet_transmit(struct sctp_packet *packet) list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { list_del_init(&chunk->list); if (sctp_chunk_is_data(chunk)) { + /* 6.3.1 C4) When data is in flight and when allowed + * by rule C5, a new RTT measurement MUST be made each + * round trip. Furthermore, new RTT measurements + * SHOULD be made no more than once per round-trip + * for a given destination transport address. + */ - if (!chunk->resent) { - - /* 6.3.1 C4) When data is in flight and when allowed - * by rule C5, a new RTT measurement MUST be made each - * round trip. Furthermore, new RTT measurements - * SHOULD be made no more than once per round-trip - * for a given destination transport address. - */ - - if (!tp->rto_pending) { - chunk->rtt_in_progress = 1; - tp->rto_pending = 1; - } + if (!tp->rto_pending) { + chunk->rtt_in_progress = 1; + tp->rto_pending = 1; } - - chunk->resent = 1; - has_data = 1; } @@ -681,7 +674,7 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, * Don't delay large message writes that may have been * fragmeneted into small peices. */ - if ((len < max) && (chunk->msg->msg_size < max)) { + if ((len < max) && chunk->msg->can_delay) { retval = SCTP_XMIT_NAGLE_DELAY; goto finish; } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index abfc0b8dee74..5d057178ce0c 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -62,7 +62,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, struct list_head *transmitted_queue, struct sctp_transport *transport, struct sctp_sackhdr *sack, - __u32 highest_new_tsn); + __u32 *highest_new_tsn); static void sctp_mark_missing(struct sctp_outq *q, struct list_head *transmitted_queue, @@ -308,7 +308,7 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk) /* If it is data, queue it up, otherwise, send it * immediately. */ - if (SCTP_CID_DATA == chunk->chunk_hdr->type) { + if (sctp_chunk_is_data(chunk)) { /* Is it OK to queue data chunks? */ /* From 9. Termination of Association * @@ -598,11 +598,23 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, if (fast_rtx && !chunk->fast_retransmit) continue; +redo: /* Attempt to append this chunk to the packet. */ status = sctp_packet_append_chunk(pkt, chunk); switch (status) { case SCTP_XMIT_PMTU_FULL: + if (!pkt->has_data && !pkt->has_cookie_echo) { + /* If this packet did not contain DATA then + * retransmission did not happen, so do it + * again. We'll ignore the error here since + * control chunks are already freed so there + * is nothing we can do. + */ + sctp_packet_transmit(pkt); + goto redo; + } + /* Send this packet. */ error = sctp_packet_transmit(pkt); @@ -647,14 +659,6 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, if (chunk->fast_retransmit == SCTP_NEED_FRTX) chunk->fast_retransmit = SCTP_DONT_FRTX; - /* Force start T3-rtx timer when fast retransmitting - * the earliest outstanding TSN - */ - if (!timer && fast_rtx && - ntohl(chunk->subh.data_hdr->tsn) == - asoc->ctsn_ack_point + 1) - timer = 2; - q->empty = 0; break; } @@ -854,6 +858,12 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) if (status != SCTP_XMIT_OK) { /* put the chunk back */ list_add(&chunk->list, &q->control_chunk_list); + } else if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN) { + /* PR-SCTP C5) If a FORWARD TSN is sent, the + * sender MUST assure that at least one T3-rtx + * timer is running. + */ + sctp_transport_reset_timers(transport); } break; @@ -906,8 +916,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) rtx_timeout, &start_timer); if (start_timer) - sctp_transport_reset_timers(transport, - start_timer-1); + sctp_transport_reset_timers(transport); /* This can happen on COOKIE-ECHO resend. Only * one chunk can get bundled with a COOKIE-ECHO. @@ -1040,7 +1049,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) list_add_tail(&chunk->transmitted_list, &transport->transmitted); - sctp_transport_reset_timers(transport, 0); + sctp_transport_reset_timers(transport); q->empty = 0; @@ -1100,32 +1109,6 @@ static void sctp_sack_update_unack_data(struct sctp_association *assoc, assoc->unack_data = unack_data; } -/* Return the highest new tsn that is acknowledged by the given SACK chunk. */ -static __u32 sctp_highest_new_tsn(struct sctp_sackhdr *sack, - struct sctp_association *asoc) -{ - struct sctp_transport *transport; - struct sctp_chunk *chunk; - __u32 highest_new_tsn, tsn; - struct list_head *transport_list = &asoc->peer.transport_addr_list; - - highest_new_tsn = ntohl(sack->cum_tsn_ack); - - list_for_each_entry(transport, transport_list, transports) { - list_for_each_entry(chunk, &transport->transmitted, - transmitted_list) { - tsn = ntohl(chunk->subh.data_hdr->tsn); - - if (!chunk->tsn_gap_acked && - TSN_lt(highest_new_tsn, tsn) && - sctp_acked(sack, tsn)) - highest_new_tsn = tsn; - } - } - - return highest_new_tsn; -} - /* This is where we REALLY process a SACK. * * Process the SACK against the outqueue. Mostly, this just frees @@ -1145,6 +1128,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) struct sctp_transport *primary = asoc->peer.primary_path; int count_of_newacks = 0; int gap_ack_blocks; + u8 accum_moved = 0; /* Grab the association's destination address list. */ transport_list = &asoc->peer.transport_addr_list; @@ -1193,18 +1177,15 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) if (gap_ack_blocks) highest_tsn += ntohs(frags[gap_ack_blocks - 1].gab.end); - if (TSN_lt(asoc->highest_sacked, highest_tsn)) { - highest_new_tsn = highest_tsn; + if (TSN_lt(asoc->highest_sacked, highest_tsn)) asoc->highest_sacked = highest_tsn; - } else { - highest_new_tsn = sctp_highest_new_tsn(sack, asoc); - } + highest_new_tsn = sack_ctsn; /* Run through the retransmit queue. Credit bytes received * and free those chunks that we can. */ - sctp_check_transmitted(q, &q->retransmit, NULL, sack, highest_new_tsn); + sctp_check_transmitted(q, &q->retransmit, NULL, sack, &highest_new_tsn); /* Run through the transmitted queue. * Credit bytes received and free those chunks which we can. @@ -1213,7 +1194,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) */ list_for_each_entry(transport, transport_list, transports) { sctp_check_transmitted(q, &transport->transmitted, - transport, sack, highest_new_tsn); + transport, sack, &highest_new_tsn); /* * SFR-CACC algorithm: * C) Let count_of_newacks be the number of @@ -1223,16 +1204,22 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) count_of_newacks ++; } + /* Move the Cumulative TSN Ack Point if appropriate. */ + if (TSN_lt(asoc->ctsn_ack_point, sack_ctsn)) { + asoc->ctsn_ack_point = sack_ctsn; + accum_moved = 1; + } + if (gap_ack_blocks) { + + if (asoc->fast_recovery && accum_moved) + highest_new_tsn = highest_tsn; + list_for_each_entry(transport, transport_list, transports) sctp_mark_missing(q, &transport->transmitted, transport, highest_new_tsn, count_of_newacks); } - /* Move the Cumulative TSN Ack Point if appropriate. */ - if (TSN_lt(asoc->ctsn_ack_point, sack_ctsn)) - asoc->ctsn_ack_point = sack_ctsn; - /* Update unack_data field in the assoc. */ sctp_sack_update_unack_data(asoc, sack); @@ -1315,7 +1302,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, struct list_head *transmitted_queue, struct sctp_transport *transport, struct sctp_sackhdr *sack, - __u32 highest_new_tsn_in_sack) + __u32 *highest_new_tsn_in_sack) { struct list_head *lchunk; struct sctp_chunk *tchunk; @@ -1387,7 +1374,6 @@ static void sctp_check_transmitted(struct sctp_outq *q, * instance). */ if (!tchunk->tsn_gap_acked && - !tchunk->resent && tchunk->rtt_in_progress) { tchunk->rtt_in_progress = 0; rtt = jiffies - tchunk->sent_at; @@ -1404,6 +1390,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, */ if (!tchunk->tsn_gap_acked) { tchunk->tsn_gap_acked = 1; + *highest_new_tsn_in_sack = tsn; bytes_acked += sctp_data_size(tchunk); if (!tchunk->transport) migrate_bytes += sctp_data_size(tchunk); @@ -1677,7 +1664,8 @@ static void sctp_mark_missing(struct sctp_outq *q, struct sctp_chunk *chunk; __u32 tsn; char do_fast_retransmit = 0; - struct sctp_transport *primary = q->asoc->peer.primary_path; + struct sctp_association *asoc = q->asoc; + struct sctp_transport *primary = asoc->peer.primary_path; list_for_each_entry(chunk, transmitted_queue, transmitted_list) { diff --git a/net/sctp/probe.c b/net/sctp/probe.c new file mode 100644 index 000000000000..db3a42b8b349 --- /dev/null +++ b/net/sctp/probe.c @@ -0,0 +1,214 @@ +/* + * sctp_probe - Observe the SCTP flow with kprobes. + * + * The idea for this came from Werner Almesberger's umlsim + * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org> + * + * Modified for SCTP from Stephen Hemminger's code + * Copyright (C) 2010, Wei Yongjun <yjwei@cn.fujitsu.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/socket.h> +#include <linux/sctp.h> +#include <linux/proc_fs.h> +#include <linux/vmalloc.h> +#include <linux/module.h> +#include <linux/kfifo.h> +#include <linux/time.h> +#include <net/net_namespace.h> + +#include <net/sctp/sctp.h> +#include <net/sctp/sm.h> + +MODULE_AUTHOR("Wei Yongjun <yjwei@cn.fujitsu.com>"); +MODULE_DESCRIPTION("SCTP snooper"); +MODULE_LICENSE("GPL"); + +static int port __read_mostly = 0; +MODULE_PARM_DESC(port, "Port to match (0=all)"); +module_param(port, int, 0); + +static int bufsize __read_mostly = 64 * 1024; +MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)"); +module_param(bufsize, int, 0); + +static int full __read_mostly = 1; +MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)"); +module_param(full, int, 0); + +static const char procname[] = "sctpprobe"; + +static struct { + struct kfifo fifo; + spinlock_t lock; + wait_queue_head_t wait; + struct timespec tstart; +} sctpw; + +static void printl(const char *fmt, ...) +{ + va_list args; + int len; + char tbuf[256]; + + va_start(args, fmt); + len = vscnprintf(tbuf, sizeof(tbuf), fmt, args); + va_end(args); + + kfifo_in_locked(&sctpw.fifo, tbuf, len, &sctpw.lock); + wake_up(&sctpw.wait); +} + +static int sctpprobe_open(struct inode *inode, struct file *file) +{ + kfifo_reset(&sctpw.fifo); + getnstimeofday(&sctpw.tstart); + + return 0; +} + +static ssize_t sctpprobe_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + int error = 0, cnt = 0; + unsigned char *tbuf; + + if (!buf) + return -EINVAL; + + if (len == 0) + return 0; + + tbuf = vmalloc(len); + if (!tbuf) + return -ENOMEM; + + error = wait_event_interruptible(sctpw.wait, + kfifo_len(&sctpw.fifo) != 0); + if (error) + goto out_free; + + cnt = kfifo_out_locked(&sctpw.fifo, tbuf, len, &sctpw.lock); + error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0; + +out_free: + vfree(tbuf); + + return error ? error : cnt; +} + +static const struct file_operations sctpprobe_fops = { + .owner = THIS_MODULE, + .open = sctpprobe_open, + .read = sctpprobe_read, +}; + +sctp_disposition_t jsctp_sf_eat_sack(const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const sctp_subtype_t type, + void *arg, + sctp_cmd_seq_t *commands) +{ + struct sctp_transport *sp; + static __u32 lcwnd = 0; + struct timespec now; + + sp = asoc->peer.primary_path; + + if ((full || sp->cwnd != lcwnd) && + (!port || asoc->peer.port == port || + ep->base.bind_addr.port == port)) { + lcwnd = sp->cwnd; + + getnstimeofday(&now); + now = timespec_sub(now, sctpw.tstart); + + printl("%lu.%06lu ", (unsigned long) now.tv_sec, + (unsigned long) now.tv_nsec / NSEC_PER_USEC); + + printl("%p %5d %5d %5d %8d %5d ", asoc, + ep->base.bind_addr.port, asoc->peer.port, + asoc->pathmtu, asoc->peer.rwnd, asoc->unack_data); + + list_for_each_entry(sp, &asoc->peer.transport_addr_list, + transports) { + if (sp == asoc->peer.primary_path) + printl("*"); + + if (sp->ipaddr.sa.sa_family == AF_INET) + printl("%pI4 ", &sp->ipaddr.v4.sin_addr); + else + printl("%pI6 ", &sp->ipaddr.v6.sin6_addr); + + printl("%2u %8u %8u %8u %8u %8u ", + sp->state, sp->cwnd, sp->ssthresh, + sp->flight_size, sp->partial_bytes_acked, + sp->pathmtu); + } + printl("\n"); + } + + jprobe_return(); + return 0; +} + +static struct jprobe sctp_recv_probe = { + .kp = { + .symbol_name = "sctp_sf_eat_sack_6_2", + }, + .entry = jsctp_sf_eat_sack, +}; + +static __init int sctpprobe_init(void) +{ + int ret = -ENOMEM; + + init_waitqueue_head(&sctpw.wait); + spin_lock_init(&sctpw.lock); + if (kfifo_alloc(&sctpw.fifo, bufsize, GFP_KERNEL)) + return ret; + + if (!proc_net_fops_create(&init_net, procname, S_IRUSR, + &sctpprobe_fops)) + goto free_kfifo; + + ret = register_jprobe(&sctp_recv_probe); + if (ret) + goto remove_proc; + + pr_info("SCTP probe registered (port=%d)\n", port); + + return 0; + +remove_proc: + proc_net_remove(&init_net, procname); +free_kfifo: + kfifo_free(&sctpw.fifo); + return ret; +} + +static __exit void sctpprobe_exit(void) +{ + kfifo_free(&sctpw.fifo); + proc_net_remove(&init_net, procname); + unregister_jprobe(&sctp_recv_probe); +} + +module_init(sctpprobe_init); +module_exit(sctpprobe_exit); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index a56f98e82f92..182749867c72 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -474,13 +474,17 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, memset(&fl, 0x0, sizeof(struct flowi)); fl.fl4_dst = daddr->v4.sin_addr.s_addr; + fl.fl_ip_dport = daddr->v4.sin_port; fl.proto = IPPROTO_SCTP; if (asoc) { fl.fl4_tos = RT_CONN_FLAGS(asoc->base.sk); fl.oif = asoc->base.sk->sk_bound_dev_if; + fl.fl_ip_sport = htons(asoc->base.bind_addr.port); } - if (saddr) + if (saddr) { fl.fl4_src = saddr->v4.sin_addr.s_addr; + fl.fl_ip_sport = saddr->v4.sin_port; + } SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ", __func__, &fl.fl4_dst, &fl.fl4_src); @@ -528,6 +532,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, if ((laddr->state == SCTP_ADDR_SRC) && (AF_INET == laddr->a.sa.sa_family)) { fl.fl4_src = laddr->a.v4.sin_addr.s_addr; + fl.fl_ip_sport = laddr->a.v4.sin_port; if (!ip_route_output_key(&init_net, &rt, &fl)) { dst = &rt->u.dst; goto out_unlock; @@ -854,7 +859,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, IP_PMTUDISC_DO : IP_PMTUDISC_DONT; SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS); - return ip_queue_xmit(skb, 0); + return ip_queue_xmit(skb); } static struct sctp_af sctp_af_inet; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 30c1767186b8..d8261f3d7715 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -445,10 +445,17 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, if (!retval) goto nomem_chunk; - /* Per the advice in RFC 2960 6.4, send this reply to - * the source of the INIT packet. + /* RFC 2960 6.4 Multi-homed SCTP Endpoints + * + * An endpoint SHOULD transmit reply chunks (e.g., SACK, + * HEARTBEAT ACK, * etc.) to the same destination transport + * address from which it received the DATA or control chunk + * to which it is replying. + * + * [INIT ACK back to where the INIT came from.] */ retval->transport = chunk->transport; + retval->subh.init_hdr = sctp_addto_chunk(retval, sizeof(initack), &initack); retval->param_hdr.v = sctp_addto_chunk(retval, addrs_len, addrs.v); @@ -487,18 +494,6 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, /* We need to remove the const qualifier at this point. */ retval->asoc = (struct sctp_association *) asoc; - /* RFC 2960 6.4 Multi-homed SCTP Endpoints - * - * An endpoint SHOULD transmit reply chunks (e.g., SACK, - * HEARTBEAT ACK, * etc.) to the same destination transport - * address from which it received the DATA or control chunk - * to which it is replying. - * - * [INIT ACK back to where the INIT came from.] - */ - if (chunk) - retval->transport = chunk->transport; - nomem_chunk: kfree(cookie); nomem_cookie: @@ -1254,7 +1249,6 @@ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb, INIT_LIST_HEAD(&retval->list); retval->skb = skb; retval->asoc = (struct sctp_association *)asoc; - retval->resent = 0; retval->has_tsn = 0; retval->has_ssn = 0; retval->rtt_in_progress = 0; diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index eb1f42f45fdd..22e670200449 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -732,11 +732,15 @@ static void sctp_cmd_setup_t2(sctp_cmd_seq_t *cmds, { struct sctp_transport *t; - t = sctp_assoc_choose_alter_transport(asoc, + if (chunk->transport) + t = chunk->transport; + else { + t = sctp_assoc_choose_alter_transport(asoc, asoc->shutdown_last_sent_to); + chunk->transport = t; + } asoc->shutdown_last_sent_to = t; asoc->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = t->rto; - chunk->transport = t; } /* Helper function to change the state of an association. */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 44a1ab03a3f0..ba1add0b13c3 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3720,9 +3720,6 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) SCTP_DBG_OBJCNT_INC(sock); - /* Set socket backlog limit. */ - sk->sk_backlog.limit = sysctl_sctp_rmem[1]; - local_bh_disable(); percpu_counter_inc(&sctp_sockets_allocated); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -4387,7 +4384,7 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len, transports) { memcpy(&temp, &from->ipaddr, sizeof(temp)); sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); - addrlen = sctp_get_af_specific(sk->sk_family)->sockaddr_len; + addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; if (space_left < addrlen) return -ENOMEM; if (copy_to_user(to, &temp, addrlen)) @@ -5482,7 +5479,6 @@ pp_found: */ int reuse = sk->sk_reuse; struct sock *sk2; - struct hlist_node *node; SCTP_DEBUG_PRINTK("sctp_get_port() found a possible match\n"); if (pp->fastreuse && sk->sk_reuse && @@ -5703,7 +5699,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) struct sctp_sock *sp = sctp_sk(sk); unsigned int mask; - poll_wait(file, sk->sk_sleep, wait); + poll_wait(file, sk_sleep(sk), wait); /* A TCP-style listening socket becomes readable when the accept queue * is not empty. @@ -5944,7 +5940,7 @@ static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p) int error; DEFINE_WAIT(wait); - prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); /* Socket errors? */ error = sock_error(sk); @@ -5981,14 +5977,14 @@ static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p) sctp_lock_sock(sk); ready: - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return 0; interrupted: error = sock_intr_errno(*timeo_p); out: - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); *err = error; return error; } @@ -6062,14 +6058,14 @@ static void __sctp_write_space(struct sctp_association *asoc) wake_up_interruptible(&asoc->wait); if (sctp_writeable(sk)) { - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + wake_up_interruptible(sk_sleep(sk)); /* Note that we try to include the Async I/O support * here by modeling from the current TCP/UDP code. * We have not tested with it yet. */ - if (sock->fasync_list && + if (sock->wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); @@ -6191,12 +6187,15 @@ do_nonblock: void sctp_data_ready(struct sock *sk, int len) { - read_lock_bh(&sk->sk_callback_lock); - if (sk_has_sleeper(sk)) - wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN | + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLRDNORM | POLLRDBAND); sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); - read_unlock_bh(&sk->sk_callback_lock); + rcu_read_unlock(); } /* If socket sndbuf has changed, wake up all per association waiters. */ @@ -6307,7 +6306,7 @@ static int sctp_wait_for_accept(struct sock *sk, long timeo) for (;;) { - prepare_to_wait_exclusive(sk->sk_sleep, &wait, + prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (list_empty(&ep->asocs)) { @@ -6333,7 +6332,7 @@ static int sctp_wait_for_accept(struct sock *sk, long timeo) break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return err; } @@ -6343,7 +6342,7 @@ static void sctp_wait_for_close(struct sock *sk, long timeout) DEFINE_WAIT(wait); do { - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (list_empty(&sctp_sk(sk)->ep->asocs)) break; sctp_release_sock(sk); @@ -6351,7 +6350,7 @@ static void sctp_wait_for_close(struct sock *sk, long timeout) sctp_lock_sock(sk); } while (!signal_pending(current) && timeout); - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); } static void sctp_skb_set_owner_r_frag(struct sk_buff *skb, struct sock *sk) diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 4a368038d46f..d67501f92ca3 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -64,9 +64,6 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, /* Copy in the address. */ peer->ipaddr = *addr; peer->af_specific = sctp_get_af_specific(addr->sa.sa_family); - peer->asoc = NULL; - - peer->dst = NULL; memset(&peer->saddr, 0, sizeof(union sctp_addr)); /* From 6.3.1 RTO Calculation: @@ -76,34 +73,21 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, * parameter 'RTO.Initial'. */ peer->rto = msecs_to_jiffies(sctp_rto_initial); - peer->rtt = 0; - peer->rttvar = 0; - peer->srtt = 0; - peer->rto_pending = 0; - peer->hb_sent = 0; - peer->fast_recovery = 0; peer->last_time_heard = jiffies; peer->last_time_ecne_reduced = jiffies; - peer->init_sent_count = 0; - peer->param_flags = SPP_HB_DISABLE | SPP_PMTUD_ENABLE | SPP_SACKDELAY_ENABLE; - peer->hbinterval = 0; /* Initialize the default path max_retrans. */ peer->pathmaxrxt = sctp_max_retrans_path; - peer->error_count = 0; INIT_LIST_HEAD(&peer->transmitted); INIT_LIST_HEAD(&peer->send_ready); INIT_LIST_HEAD(&peer->transports); - peer->T3_rtx_timer.expires = 0; - peer->hb_timer.expires = 0; - setup_timer(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, (unsigned long)peer); setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event, @@ -115,15 +99,6 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce)); atomic_set(&peer->refcnt, 1); - peer->dead = 0; - - peer->malloced = 0; - - /* Initialize the state information for SFR-CACC */ - peer->cacc.changeover_active = 0; - peer->cacc.cycling_changeover = 0; - peer->cacc.next_tsn_at_change = 0; - peer->cacc.cacc_saw_newack = 0; return peer; } @@ -197,7 +172,7 @@ static void sctp_transport_destroy(struct sctp_transport *transport) /* Start T3_rtx timer if it is not already running and update the heartbeat * timer. This routine is called every time a DATA chunk is sent. */ -void sctp_transport_reset_timers(struct sctp_transport *transport, int force) +void sctp_transport_reset_timers(struct sctp_transport *transport) { /* RFC 2960 6.3.2 Retransmission Timer Rules * @@ -207,7 +182,7 @@ void sctp_transport_reset_timers(struct sctp_transport *transport, int force) * address. */ - if (force || !timer_pending(&transport->T3_rtx_timer)) + if (!timer_pending(&transport->T3_rtx_timer)) if (!mod_timer(&transport->T3_rtx_timer, jiffies + transport->rto)) sctp_transport_hold(transport); @@ -405,15 +380,16 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) void sctp_transport_raise_cwnd(struct sctp_transport *transport, __u32 sack_ctsn, __u32 bytes_acked) { + struct sctp_association *asoc = transport->asoc; __u32 cwnd, ssthresh, flight_size, pba, pmtu; cwnd = transport->cwnd; flight_size = transport->flight_size; /* See if we need to exit Fast Recovery first */ - if (transport->fast_recovery && - TSN_lte(transport->fast_recovery_exit, sack_ctsn)) - transport->fast_recovery = 0; + if (asoc->fast_recovery && + TSN_lte(asoc->fast_recovery_exit, sack_ctsn)) + asoc->fast_recovery = 0; /* The appropriate cwnd increase algorithm is performed if, and only * if the cumulative TSN whould advanced and the congestion window is @@ -442,7 +418,7 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, * 2) the destination's path MTU. This upper bound protects * against the ACK-Splitting attack outlined in [SAVAGE99]. */ - if (transport->fast_recovery) + if (asoc->fast_recovery) return; if (bytes_acked > pmtu) @@ -493,6 +469,8 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, void sctp_transport_lower_cwnd(struct sctp_transport *transport, sctp_lower_cwnd_t reason) { + struct sctp_association *asoc = transport->asoc; + switch (reason) { case SCTP_LOWER_CWND_T3_RTX: /* RFC 2960 Section 7.2.3, sctpimpguide @@ -503,11 +481,11 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, * partial_bytes_acked = 0 */ transport->ssthresh = max(transport->cwnd/2, - 4*transport->asoc->pathmtu); - transport->cwnd = transport->asoc->pathmtu; + 4*asoc->pathmtu); + transport->cwnd = asoc->pathmtu; - /* T3-rtx also clears fast recovery on the transport */ - transport->fast_recovery = 0; + /* T3-rtx also clears fast recovery */ + asoc->fast_recovery = 0; break; case SCTP_LOWER_CWND_FAST_RTX: @@ -523,15 +501,15 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, * cwnd = ssthresh * partial_bytes_acked = 0 */ - if (transport->fast_recovery) + if (asoc->fast_recovery) return; /* Mark Fast recovery */ - transport->fast_recovery = 1; - transport->fast_recovery_exit = transport->asoc->next_tsn - 1; + asoc->fast_recovery = 1; + asoc->fast_recovery_exit = asoc->next_tsn - 1; transport->ssthresh = max(transport->cwnd/2, - 4*transport->asoc->pathmtu); + 4*asoc->pathmtu); transport->cwnd = transport->ssthresh; break; @@ -551,7 +529,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, if (time_after(jiffies, transport->last_time_ecne_reduced + transport->rtt)) { transport->ssthresh = max(transport->cwnd/2, - 4*transport->asoc->pathmtu); + 4*asoc->pathmtu); transport->cwnd = transport->ssthresh; transport->last_time_ecne_reduced = jiffies; } @@ -567,7 +545,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, * interval. */ transport->cwnd = max(transport->cwnd/2, - 4*transport->asoc->pathmtu); + 4*asoc->pathmtu); break; } @@ -652,7 +630,6 @@ void sctp_transport_reset(struct sctp_transport *t) t->error_count = 0; t->rto_pending = 0; t->hb_sent = 0; - t->fast_recovery = 0; /* Initialize the state information for SFR-CACC */ t->cacc.changeover_active = 0; |