From 3fcde74b3877756f4b4725a883d0b48696c0d369 Mon Sep 17 00:00:00 2001 From: Kirill Korotaev Date: Fri, 1 Sep 2006 01:34:10 -0700 Subject: [NEIGH]: neigh_table_clear() doesn't free stats neigh_table_clear() doesn't free tbl->stats. Found by Alexey Kuznetsov. Though Alexey considers this leak minor for mainstream, I still believe that cleanup code should not forget to free some of the resources :) At least, this is critical for OpenVZ with virtualized neighbour tables. Signed-Off-By: Kirill Korotaev Signed-off-by: David S. Miller --- net/core/neighbour.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 5130d2efdbbe..fe2113f54e2b 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1432,6 +1432,9 @@ int neigh_table_clear(struct neigh_table *tbl) kfree(tbl->phash_buckets); tbl->phash_buckets = NULL; + free_percpu(tbl->stats); + tbl->stats = NULL; + return 0; } -- cgit v1.2.3 From b3a8a40da5751525936c88f60bbc6a007f9eee37 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 13 Sep 2006 19:51:02 -0700 Subject: [TCP]: Turn ABC off. Turn Appropriate Byte Count off by default because it unfairly penalizes applications that do small writes. Add better documentation to describe what it is so users will understand why they might want to turn it on. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 111ff39a08c5..159fa3f1ba67 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -89,7 +89,7 @@ int sysctl_tcp_frto; int sysctl_tcp_nometrics_save; int sysctl_tcp_moderate_rcvbuf = 1; -int sysctl_tcp_abc = 1; +int sysctl_tcp_abc; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ -- cgit v1.2.3 From 70e76b768bac81149c8c189e781c3f130ea2eba5 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 13 Sep 2006 19:57:54 -0700 Subject: [IPVS]: auto-help for ip_vs_ftp Fill in a help message for the ports option to ip_vs_ftp Signed-Off-By: Simon Horman Signed-off-by: David S. Miller --- net/ipv4/ipvs/ip_vs_ftp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c index a19a33ceb811..347a66c05569 100644 --- a/net/ipv4/ipvs/ip_vs_ftp.c +++ b/net/ipv4/ipvs/ip_vs_ftp.c @@ -46,6 +46,7 @@ */ static int ports[IP_VS_APP_MAX_PORTS] = {21, 0}; module_param_array(ports, int, NULL, 0); +MODULE_PARM_DESC(ports, "Ports to monitor for FTP control commands"); /* * Debug level -- cgit v1.2.3 From 3f5af5b353ca36aca4f8a46e3da2172f669dbbbc Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 13 Sep 2006 19:58:44 -0700 Subject: [IPVS]: Make sure ip_vs_ftp ports are valid I'm not entirely sure what happens in the case of a valid port, at best it'll be silently ignored. This patch ignores them a little more verbosely. Signed-Off-By: Simon Horman Signed-off-by: David S. Miller --- net/ipv4/ipvs/ip_vs_ftp.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c index 347a66c05569..f0c553c038de 100644 --- a/net/ipv4/ipvs/ip_vs_ftp.c +++ b/net/ipv4/ipvs/ip_vs_ftp.c @@ -373,6 +373,12 @@ static int __init ip_vs_ftp_init(void) for (i=0; i 0xffff) { + IP_VS_WARNING("ip_vs_ftp: Ignoring invalid " + "configuration port[%d] = %d\n", + i, ports[i]); + continue; + } ret = register_ip_vs_app_inc(app, app->protocol, ports[i]); if (ret) break; -- cgit v1.2.3 From b552216ff1340a056aa0e12da68203902ae81d2c Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 13 Sep 2006 19:59:23 -0700 Subject: [IPVS]: remove the debug option go ip_vs_ftp This patch makes the debuging behaviour of this code more consistent with the rest of IPVS. Signed-Off-By: Simon Horman Signed-off-by: David S. Miller --- net/ipv4/ipvs/ip_vs_ftp.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c index f0c553c038de..37fafb1fbcff 100644 --- a/net/ipv4/ipvs/ip_vs_ftp.c +++ b/net/ipv4/ipvs/ip_vs_ftp.c @@ -48,14 +48,6 @@ static int ports[IP_VS_APP_MAX_PORTS] = {21, 0}; module_param_array(ports, int, NULL, 0); MODULE_PARM_DESC(ports, "Ports to monitor for FTP control commands"); -/* - * Debug level - */ -#ifdef CONFIG_IP_VS_DEBUG -static int debug=0; -module_param(debug, int, 0); -#endif - /* Dummy variable */ static int ip_vs_ftp_pasv; @@ -178,7 +170,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, &start, &end) != 1) return 1; - IP_VS_DBG(1-debug, "PASV response (%u.%u.%u.%u:%d) -> " + IP_VS_DBG(7, "PASV response (%u.%u.%u.%u:%d) -> " "%u.%u.%u.%u:%d detected\n", NIPQUAD(from), ntohs(port), NIPQUAD(cp->caddr), 0); @@ -281,7 +273,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, while (data <= data_limit - 6) { if (strnicmp(data, "PASV\r\n", 6) == 0) { /* Passive mode on */ - IP_VS_DBG(1-debug, "got PASV at %zd of %zd\n", + IP_VS_DBG(7, "got PASV at %zd of %zd\n", data - data_start, data_limit - data_start); cp->app_data = &ip_vs_ftp_pasv; @@ -303,7 +295,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, &start, &end) != 1) return 1; - IP_VS_DBG(1-debug, "PORT %u.%u.%u.%u:%d detected\n", + IP_VS_DBG(7, "PORT %u.%u.%u.%u:%d detected\n", NIPQUAD(to), ntohs(port)); /* Passive mode off */ @@ -312,7 +304,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, /* * Now update or create a connection entry for it */ - IP_VS_DBG(1-debug, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n", + IP_VS_DBG(7, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n", ip_vs_proto_name(iph->protocol), NIPQUAD(to), ntohs(port), NIPQUAD(cp->vaddr), 0); @@ -382,8 +374,8 @@ static int __init ip_vs_ftp_init(void) ret = register_ip_vs_app_inc(app, app->protocol, ports[i]); if (ret) break; - IP_VS_DBG(1-debug, "%s: loaded support on port[%d] = %d\n", - app->name, i, ports[i]); + IP_VS_INFO("%s: loaded support on port[%d] = %d\n", + app->name, i, ports[i]); } if (ret) -- cgit v1.2.3 From e012d51cbc41c5e603d7850c82acb0dad9e450dd Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 13 Sep 2006 20:01:28 -0700 Subject: [IPV6]: Fix tclass setting for raw sockets. np->cork.tclass is used only in cork'ed context. Otherwise, np->tclass should be used. Bug#7096 reported by Remi Denis-Courmont . Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 4 ++-- net/ipv6/raw.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 3d6e9a351150..356a8a7ef22a 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -401,7 +401,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, if (hlimit < 0) hlimit = ipv6_get_hoplimit(dst->dev); - tclass = np->cork.tclass; + tclass = np->tclass; if (tclass < 0) tclass = 0; @@ -497,7 +497,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (hlimit < 0) hlimit = ipv6_get_hoplimit(dst->dev); - tclass = np->cork.tclass; + tclass = np->tclass; if (tclass < 0) tclass = 0; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index d57e61ce4a7d..15b862d8acab 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -781,7 +781,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, } if (tclass < 0) { - tclass = np->cork.tclass; + tclass = np->tclass; if (tclass < 0) tclass = 0; } -- cgit v1.2.3 From d0ee011f7290b51974e4d058cf4455a5ef91177d Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Wed, 13 Sep 2006 20:08:07 -0700 Subject: [IPV6]: Accept -1 for IPV6_TCLASS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch should add support for -1 as "default" IPv6 traffic class, as specified in IETF RFC3542 ยง6.5. Within the kernel, it seems tclass < 0 is already handled, but setsockopt, getsockopt and recvmsg calls won't accept it from userland. Signed-off-by: Remi Denis-Courmont Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/datagram.c | 2 +- net/ipv6/ipv6_sockglue.c | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 99a6eb23378b..3b55b4c8e2d1 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -696,7 +696,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, } tc = *(int *)CMSG_DATA(cmsg); - if (tc < 0 || tc > 0xff) + if (tc < -1 || tc > 0xff) goto exit_f; err = 0; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 43327264e69c..a5eaaf693abf 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -362,7 +362,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, break; case IPV6_TCLASS: - if (val < 0 || val > 0xff) + if (val < -1 || val > 0xff) goto e_inval; np->tclass = val; retv = 0; @@ -947,6 +947,8 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, case IPV6_TCLASS: val = np->tclass; + if (val < 0) + val = 0; break; case IPV6_RECVTCLASS: -- cgit v1.2.3 From 485c2967d622449f4bbfae305a6fc4e185b5b094 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 13 Sep 2006 20:12:40 -0700 Subject: [BRIDGE]: random extra bytes on STP TCN packet We seem to send 3 extra bytes in a TCN, which will be whatever happens to be on the stack. Thanks to Aji_Srinivas@emc.com for seeing. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_stp_bpdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index a7ba0cce0b46..068d8afbf0a7 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -121,7 +121,7 @@ void br_send_tcn_bpdu(struct net_bridge_port *p) buf[1] = 0; buf[2] = 0; buf[3] = BPDU_TYPE_TCN; - br_send_bpdu(p, buf, 7); + br_send_bpdu(p, buf, 4); } /* -- cgit v1.2.3 From 3795da47e82ee0596174f52817d6d3ecd91f68ea Mon Sep 17 00:00:00 2001 From: Wong Hoi Sing Edison Date: Wed, 13 Sep 2006 20:30:30 -0700 Subject: [TCP] tcp-lp: bug fix for oops in 2.6.18-rc6 Sorry that the patch submited yesterday still contain a small bug. This version have already been test for hours with BT connections. The oops is now difficult to reproduce. Signed-off-by: Wong Hoi Sing Edison Signed-off-by: David S. Miller --- net/ipv4/tcp_lp.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index 1f977b6ee9a1..48f28d617ce6 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -3,13 +3,8 @@ * * TCP Low Priority is a distributed algorithm whose goal is to utilize only * the excess network bandwidth as compared to the ``fair share`` of - * bandwidth as targeted by TCP. Available from: - * http://www.ece.rice.edu/~akuzma/Doc/akuzma/TCP-LP.pdf + * bandwidth as targeted by TCP. * - * Original Author: - * Aleksandar Kuzmanovic - * - * See http://www-ece.rice.edu/networks/TCP-LP/ for their implementation. * As of 2.6.13, Linux supports pluggable congestion control algorithms. * Due to the limitation of the API, we take the following changes from * the original TCP-LP implementation: @@ -24,11 +19,20 @@ * o OWD is handled in relative format, where local time stamp will in * tcp_time_stamp format. * - * Port from 2.4.19 to 2.6.16 as module by: - * Wong Hoi Sing Edison - * Hung Hing Lun + * Original Author: + * Aleksandar Kuzmanovic + * Available from: + * http://www.ece.rice.edu/~akuzma/Doc/akuzma/TCP-LP.pdf + * Original implementation for 2.4.19: + * http://www-ece.rice.edu/networks/TCP-LP/ * - * Version: $Id: tcp_lp.c,v 1.22 2006-05-02 18:18:19 hswong3i Exp $ + * 2.6.x module Authors: + * Wong Hoi Sing, Edison + * Hung Hing Lun, Mike + * SourceForge project page: + * http://tcp-lp-mod.sourceforge.net/ + * + * Version: $Id: tcp_lp.c,v 1.24 2006/09/05 20:22:53 hswong3i Exp $ */ #include @@ -153,16 +157,19 @@ static u32 tcp_lp_remote_hz_estimator(struct sock *sk) if (m < 0) m = -m; - if (rhz != 0) { + if (rhz > 0) { m -= rhz >> 6; /* m is now error in remote HZ est */ rhz += m; /* 63/64 old + 1/64 new */ } else rhz = m << 6; + out: /* record time for successful remote HZ calc */ - lp->flag |= LP_VALID_RHZ; + if (rhz > 0) + lp->flag |= LP_VALID_RHZ; + else + lp->flag &= ~LP_VALID_RHZ; - out: /* record reference time stamp */ lp->remote_ref_time = tp->rx_opt.rcv_tsval; lp->local_ref_time = tp->rx_opt.rcv_tsecr; @@ -333,6 +340,6 @@ static void __exit tcp_lp_unregister(void) module_init(tcp_lp_register); module_exit(tcp_lp_unregister); -MODULE_AUTHOR("Wong Hoi Sing Edison, Hung Hing Lun"); +MODULE_AUTHOR("Wong Hoi Sing Edison, Hung Hing Lun Mike"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("TCP Low Priority"); -- cgit v1.2.3 From 080f22c0dc544e498e57ad281a9de063fa839957 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 13 Sep 2006 21:13:54 -0700 Subject: [NET]: Mark frame diverter for future removal. The code for frame diverter is unmaintained and has bitrotted. The number of users is very small and the code has lots of problems. If anyone is using it, they maybe exposing themselves to bad packet attacks. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index c6cec5aa5486..4959a4e1e0fe 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -177,7 +177,7 @@ source "net/lapb/Kconfig" config NET_DIVERT bool "Frame Diverter (EXPERIMENTAL)" - depends on EXPERIMENTAL + depends on EXPERIMENTAL && BROKEN ---help--- The Frame Diverter allows you to divert packets from the network, that are not aimed at the interface receiving it (in -- cgit v1.2.3 From cbe21d8fefca605b90e34f0f019505e599712f9b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 17 Sep 2006 23:59:57 -0700 Subject: [PACKET]: Don't truncate non-linear skbs with mmaped IO Non-linear skbs are truncated to their linear part with mmaped IO. Fix by using skb_copy_bits instead of memcpy. Signed-off-by: Patrick McHardy Acked-by: Alexey Kuznetsov Signed-off-by: David S. Miller --- net/packet/af_packet.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index f9cef3671593..4172a5235916 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -626,8 +626,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe if ((int)snaplen < 0) snaplen = 0; } - if (snaplen > skb->len-skb->data_len) - snaplen = skb->len-skb->data_len; spin_lock(&sk->sk_receive_queue.lock); h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head); @@ -644,7 +642,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe status &= ~TP_STATUS_LOSING; spin_unlock(&sk->sk_receive_queue.lock); - memcpy((u8*)h + macoff, skb->data, snaplen); + skb_copy_bits(skb, 0, (u8*)h + macoff, snaplen); h->tp_len = skb->len; h->tp_snaplen = snaplen; -- cgit v1.2.3 From d7811e623dd4be3e3bdba2d6330f7f15541ee45f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 18 Sep 2006 00:22:30 -0700 Subject: [NET]: Drop tx lock in dev_watchdog_up Fix lockdep warning with GRE, iptables and Speedtouch ADSL, PPP over ATM. On Sat, Sep 02, 2006 at 08:39:28PM +0000, Krzysztof Halasa wrote: > > ======================================================= > [ INFO: possible circular locking dependency detected ] > ------------------------------------------------------- > swapper/0 is trying to acquire lock: > (&dev->queue_lock){-+..}, at: [] dev_queue_xmit+0x56/0x290 > > but task is already holding lock: > (&dev->_xmit_lock){-+..}, at: [] dev_queue_xmit+0x224/0x290 > > which lock already depends on the new lock. This turns out to be a genuine bug. The queue lock and xmit lock are intentionally taken out of order. Two things are supposed to prevent dead-locks from occuring: 1) When we hold the queue_lock we're supposed to only do try_lock on the tx_lock. 2) We always drop the queue_lock after taking the tx_lock and before doing anything else. > > the existing dependency chain (in reverse order) is: > > -> #1 (&dev->_xmit_lock){-+..}: > [] lock_acquire+0x76/0xa0 > [] _spin_lock_bh+0x31/0x40 > [] dev_activate+0x69/0x120 This path obviously breaks assumption 1) and therefore can lead to ABBA dead-locks. I've looked at the history and there seems to be no reason for the lock to be held at all in dev_watchdog_up. The lock appeared in day one and even there it was unnecessary. In fact, people added __dev_watchdog_up precisely in order to get around the tx lock there. The function dev_watchdog_up is already serialised by rtnl_lock since its only caller dev_activate is always called under it. So here is a simple patch to remove the tx lock from dev_watchdog_up. In 2.6.19 we can eliminate the unnecessary __dev_watchdog_up and replace it with dev_watchdog_up. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 0834c2ee9174..6f9151899795 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -238,9 +238,7 @@ void __netdev_watchdog_up(struct net_device *dev) static void dev_watchdog_up(struct net_device *dev) { - netif_tx_lock_bh(dev); __netdev_watchdog_up(dev); - netif_tx_unlock_bh(dev); } static void dev_watchdog_down(struct net_device *dev) -- cgit v1.2.3 From fe26109a9dfd9327fdbe630fc819e1b7450986b2 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Mon, 18 Sep 2006 06:37:58 -0700 Subject: [ATM] CLIP: Do not refer freed skbuff in clip_mkip(). In clip_mkip(), skb->dev is dereferenced after clip_push(), which frees up skb. Advisory: AD_LAB-06009 (). Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/atm/clip.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/atm/clip.c b/net/atm/clip.c index 7ce7bfe3fbad..7af2c411da82 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -500,9 +500,11 @@ static int clip_mkip(struct atm_vcc *vcc, int timeout) } else { unsigned int len = skb->len; + skb_get(skb); clip_push(vcc, skb); PRIV(skb->dev)->stats.rx_packets--; PRIV(skb->dev)->stats.rx_bytes -= len; + kfree_skb(skb); } return 0; } -- cgit v1.2.3