From ef222013fc8c1a2fcba5c7ab169be8ffcb778ec4 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Wed, 11 Jul 2007 06:42:04 +0200 Subject: [Bluetooth] Add hci_recv_fragment() helper function Most drivers must handle fragmented HCI data packets and events. This patch adds a generic function for their reassembly to the Bluetooth core layer and thus allows to shrink the complexity of the drivers. Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index c0fc39620f36..7c78744ec0fd 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -109,6 +109,7 @@ struct hci_dev { struct sk_buff_head cmd_q; struct sk_buff *sent_cmd; + struct sk_buff *reassembly[3]; struct semaphore req_lock; wait_queue_head_t req_wait_q; @@ -437,6 +438,8 @@ static inline int hci_recv_frame(struct sk_buff *skb) return 0; } +int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count); + int hci_register_sysfs(struct hci_dev *hdev); void hci_unregister_sysfs(struct hci_dev *hdev); void hci_conn_add_sysfs(struct hci_conn *conn); -- cgit v1.2.3 From e06e7c615877026544ad7f8b309d1a3706410383 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 10 Jun 2007 17:22:39 -0700 Subject: [IPV4]: The scheduled removal of multipath cached routing support. With help from Chris Wedgwood. Signed-off-by: David S. Miller --- include/linux/Kbuild | 1 - include/linux/ip_mp_alg.h | 22 ----------- include/linux/rtnetlink.h | 2 +- include/net/dst.h | 1 - include/net/ip_fib.h | 16 -------- include/net/ip_mp_alg.h | 96 ----------------------------------------------- include/net/route.h | 1 - 7 files changed, 1 insertion(+), 138 deletions(-) delete mode 100644 include/linux/ip_mp_alg.h delete mode 100644 include/net/ip_mp_alg.h (limited to 'include') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index afae306b177c..d94451682761 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -91,7 +91,6 @@ header-y += in6.h header-y += in_route.h header-y += ioctl.h header-y += ipmi_msgdefs.h -header-y += ip_mp_alg.h header-y += ipsec.h header-y += ipx.h header-y += irda.h diff --git a/include/linux/ip_mp_alg.h b/include/linux/ip_mp_alg.h deleted file mode 100644 index e234e2008f5d..000000000000 --- a/include/linux/ip_mp_alg.h +++ /dev/null @@ -1,22 +0,0 @@ -/* ip_mp_alg.h: IPV4 multipath algorithm support, user-visible values. - * - * Copyright (C) 2004, 2005 Einar Lueck - * Copyright (C) 2005 David S. Miller - */ - -#ifndef _LINUX_IP_MP_ALG_H -#define _LINUX_IP_MP_ALG_H - -enum ip_mp_alg { - IP_MP_ALG_NONE, - IP_MP_ALG_RR, - IP_MP_ALG_DRR, - IP_MP_ALG_RANDOM, - IP_MP_ALG_WRANDOM, - __IP_MP_ALG_MAX -}; - -#define IP_MP_ALG_MAX (__IP_MP_ALG_MAX - 1) - -#endif /* _LINUX_IP_MP_ALG_H */ - diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 1fae30af91f3..612785848532 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -261,7 +261,7 @@ enum rtattr_type_t RTA_FLOW, RTA_CACHEINFO, RTA_SESSION, - RTA_MP_ALGO, + RTA_MP_ALGO, /* no longer used */ RTA_TABLE, __RTA_MAX }; diff --git a/include/net/dst.h b/include/net/dst.h index 82270f9332db..e9ff4a4caef9 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -47,7 +47,6 @@ struct dst_entry #define DST_NOXFRM 2 #define DST_NOPOLICY 4 #define DST_NOHASH 8 -#define DST_BALANCED 0x10 unsigned long expires; unsigned short header_len; /* more space at head required */ diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 69252cbe05b0..8cadc77c7df4 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -39,7 +39,6 @@ struct fib_config { int fc_mx_len; int fc_mp_len; u32 fc_flow; - u32 fc_mp_alg; u32 fc_nlflags; struct nl_info fc_nlinfo; }; @@ -85,9 +84,6 @@ struct fib_info { int fib_nhs; #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_power; -#endif -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - u32 fib_mp_alg; #endif struct fib_nh fib_nh[0]; #define fib_dev fib_nh[0].nh_dev @@ -103,10 +99,6 @@ struct fib_result { unsigned char nh_sel; unsigned char type; unsigned char scope; -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - __be32 network; - __be32 netmask; -#endif struct fib_info *fi; #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_rule *r; @@ -145,14 +137,6 @@ struct fib_result_nl { #define FIB_RES_DEV(res) (FIB_RES_NH(res).nh_dev) #define FIB_RES_OIF(res) (FIB_RES_NH(res).nh_oif) -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED -#define FIB_RES_NETWORK(res) ((res).network) -#define FIB_RES_NETMASK(res) ((res).netmask) -#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ -#define FIB_RES_NETWORK(res) (0) -#define FIB_RES_NETMASK(res) (0) -#endif /* CONFIG_IP_ROUTE_MULTIPATH_WRANDOM */ - struct fib_table { struct hlist_node tb_hlist; u32 tb_id; diff --git a/include/net/ip_mp_alg.h b/include/net/ip_mp_alg.h deleted file mode 100644 index 25b56571e54b..000000000000 --- a/include/net/ip_mp_alg.h +++ /dev/null @@ -1,96 +0,0 @@ -/* ip_mp_alg.h: IPV4 multipath algorithm support. - * - * Copyright (C) 2004, 2005 Einar Lueck - * Copyright (C) 2005 David S. Miller - */ - -#ifndef _NET_IP_MP_ALG_H -#define _NET_IP_MP_ALG_H - -#include -#include -#include - -struct fib_nh; - -struct ip_mp_alg_ops { - void (*mp_alg_select_route)(const struct flowi *flp, - struct rtable *rth, struct rtable **rp); - void (*mp_alg_flush)(void); - void (*mp_alg_set_nhinfo)(__be32 network, __be32 netmask, - unsigned char prefixlen, - const struct fib_nh *nh); - void (*mp_alg_remove)(struct rtable *rth); -}; - -extern int multipath_alg_register(struct ip_mp_alg_ops *, enum ip_mp_alg); -extern void multipath_alg_unregister(struct ip_mp_alg_ops *, enum ip_mp_alg); - -extern struct ip_mp_alg_ops *ip_mp_alg_table[]; - -static inline int multipath_select_route(const struct flowi *flp, - struct rtable *rth, - struct rtable **rp) -{ -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg]; - - /* mp_alg_select_route _MUST_ be implemented */ - if (ops && (rth->u.dst.flags & DST_BALANCED)) { - ops->mp_alg_select_route(flp, rth, rp); - return 1; - } -#endif - return 0; -} - -static inline void multipath_flush(void) -{ -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - int i; - - for (i = IP_MP_ALG_NONE; i <= IP_MP_ALG_MAX; i++) { - struct ip_mp_alg_ops *ops = ip_mp_alg_table[i]; - - if (ops && ops->mp_alg_flush) - ops->mp_alg_flush(); - } -#endif -} - -static inline void multipath_set_nhinfo(struct rtable *rth, - __be32 network, __be32 netmask, - unsigned char prefixlen, - const struct fib_nh *nh) -{ -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg]; - - if (ops && ops->mp_alg_set_nhinfo) - ops->mp_alg_set_nhinfo(network, netmask, prefixlen, nh); -#endif -} - -static inline void multipath_remove(struct rtable *rth) -{ -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg]; - - if (ops && ops->mp_alg_remove && - (rth->u.dst.flags & DST_BALANCED)) - ops->mp_alg_remove(rth); -#endif -} - -static inline int multipath_comparekeys(const struct flowi *flp1, - const struct flowi *flp2) -{ - return flp1->fl4_dst == flp2->fl4_dst && - flp1->fl4_src == flp2->fl4_src && - flp1->oif == flp2->oif && - flp1->mark == flp2->mark && - !((flp1->fl4_tos ^ flp2->fl4_tos) & - (IPTOS_RT_MASK | RTO_ONLINK)); -} - -#endif /* _NET_IP_MP_ALG_H */ diff --git a/include/net/route.h b/include/net/route.h index 749e4dfe5ff3..188b8936bb27 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -62,7 +62,6 @@ struct rtable unsigned rt_flags; __u16 rt_type; - __u16 rt_multipath_alg; __be32 rt_dst; /* Path destination */ __be32 rt_src; /* Path source */ -- cgit v1.2.3 From 05646c91109bfd129361d57dc5d98464ab6f6578 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Sun, 10 Jun 2007 17:25:24 -0700 Subject: [TIPC]: Optimize stream send routine to avoid fragmentation This patch enhances TIPC's stream socket send routine so that it avoids transmitting data in chunks that require fragmentation and reassembly, thereby improving performance at both the sending and receiving ends of the connection. The "maximum packet size" hint that records MTU info allows the socket to decide how big a chunk it should send; in the event that the hint has become stale, fragmentation may still occur, but the data will be passed correctly and the hint will be updated in time for the following send. Note: The 66060 byte pseudo-MTU used for intra-node connections requires the send routine to perform an additional check to ensure it does not exceed TIPC"s limit of 66000 bytes of user data per chunk. Signed-off-by: Allan Stephens Signed-off-by: Jon Paul Maloy Signed-off-by: David S. Miller --- include/net/tipc/tipc_port.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/tipc/tipc_port.h b/include/net/tipc/tipc_port.h index 333bba6dc522..cfc4ba46de8f 100644 --- a/include/net/tipc/tipc_port.h +++ b/include/net/tipc/tipc_port.h @@ -1,8 +1,8 @@ /* * include/net/tipc/tipc_port.h: Include file for privileged access to TIPC ports * - * Copyright (c) 1994-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 1994-2007, Ericsson AB + * Copyright (c) 2005-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -55,6 +55,7 @@ * @conn_unacked: number of unacknowledged messages received from peer port * @published: non-zero if port has one or more associated names * @congested: non-zero if cannot send because of link or port congestion + * @max_pkt: maximum packet size "hint" used when building messages sent by port * @ref: unique reference to port in TIPC object registry * @phdr: preformatted message header used when sending messages */ @@ -68,6 +69,7 @@ struct tipc_port { u32 conn_unacked; int published; u32 congested; + u32 max_pkt; u32 ref; struct tipc_msg phdr; }; -- cgit v1.2.3 From 8de0a15483b357d0f0b821330ec84d1660cadc4e Mon Sep 17 00:00:00 2001 From: Ville Tervo Date: Wed, 11 Jul 2007 09:23:41 +0200 Subject: [Bluetooth] Keep rfcomm_dev on the list until it is freed This patch changes the RFCOMM TTY release process so that the TTY is kept on the list until it is really freed. A new device flag is used to keep track of released TTYs. Signed-off-by: Ville Tervo Signed-off-by: Marcel Holtmann --- include/net/bluetooth/rfcomm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index 3c563f02907c..25aa575db807 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -323,6 +323,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc #define RFCOMM_RELEASE_ONHUP 1 #define RFCOMM_HANGUP_NOW 2 #define RFCOMM_TTY_ATTACHED 3 +#define RFCOMM_TTY_RELEASED 4 struct rfcomm_dev_req { s16 dev_id; -- cgit v1.2.3 From 8c7b7faaa630fef7f68d8728cee1cce398cc9697 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 10 Jul 2007 22:08:12 -0700 Subject: [NET]: Kill eth_copy_and_sum(). It hasn't "summed" anything in over 7 years, and it's just a straight mempcy ala skb_copy_to_linear_data() so just get rid of it. Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 071c67abed86..f48eb89efd0f 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -40,12 +40,6 @@ extern int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh); extern struct net_device *alloc_etherdev(int sizeof_priv); -static inline void eth_copy_and_sum (struct sk_buff *dest, - const unsigned char *src, - int len, int base) -{ - memcpy (dest->data, src, len); -} /** * is_zero_ether_addr - Determine if give Ethernet address is all zeros. -- cgit v1.2.3 From 6472ce6096bf27d85a1f2580964a36f290bd60a9 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 13 Jun 2007 12:03:21 -0700 Subject: [NET]: Mark struct net_device * argument to netdev_priv const Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3a70f553b28f..94cc77cd3aa3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -546,7 +546,7 @@ struct net_device #define NETDEV_ALIGN 32 #define NETDEV_ALIGN_CONST (NETDEV_ALIGN - 1) -static inline void *netdev_priv(struct net_device *dev) +static inline void *netdev_priv(const struct net_device *dev) { return (char *)dev + ((sizeof(struct net_device) + NETDEV_ALIGN_CONST) -- cgit v1.2.3 From 38f7b870d4a6a5d3ec21557e849620cb7d032965 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 13 Jun 2007 12:03:51 -0700 Subject: [RTNETLINK]: Link creation API Add rtnetlink API for creating, changing and deleting software devices. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_link.h | 13 +++++++++++ include/linux/netdevice.h | 3 +++ include/net/rtnetlink.h | 58 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+) (limited to 'include') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 604c2434f71c..3144babd2357 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -76,6 +76,8 @@ enum #define IFLA_WEIGHT IFLA_WEIGHT IFLA_OPERSTATE, IFLA_LINKMODE, + IFLA_LINKINFO, +#define IFLA_LINKINFO IFLA_LINKINFO __IFLA_MAX }; @@ -140,4 +142,15 @@ struct ifla_cacheinfo __u32 retrans_time; }; +enum +{ + IFLA_INFO_UNSPEC, + IFLA_INFO_KIND, + IFLA_INFO_DATA, + IFLA_INFO_XSTATS, + __IFLA_INFO_MAX, +}; + +#define IFLA_INFO_MAX (__IFLA_INFO_MAX - 1) + #endif /* _LINUX_IF_LINK_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 94cc77cd3aa3..e7913ee5581c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -540,6 +540,9 @@ struct net_device struct device dev; /* space for optional statistics and wireless sysfs groups */ struct attribute_group *sysfs_groups[3]; + + /* rtnetlink link ops */ + const struct rtnl_link_ops *rtnl_link_ops; }; #define to_net_dev(d) container_of(d, struct net_device, dev) diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 3b3d4745618d..3861c05cdf0f 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -22,4 +22,62 @@ static inline int rtnl_msg_family(struct nlmsghdr *nlh) return AF_UNSPEC; } +/** + * struct rtnl_link_ops - rtnetlink link operations + * + * @list: Used internally + * @kind: Identifier + * @maxtype: Highest device specific netlink attribute number + * @policy: Netlink policy for device specific attribute validation + * @validate: Optional validation function for netlink/changelink parameters + * @priv_size: sizeof net_device private space + * @setup: net_device setup function + * @newlink: Function for configuring and registering a new device + * @changelink: Function for changing parameters of an existing device + * @dellink: Function to remove a device + * @get_size: Function to calculate required room for dumping device + * specific netlink attributes + * @fill_info: Function to dump device specific netlink attributes + * @get_xstats_size: Function to calculate required room for dumping devic + * specific statistics + * @fill_xstats: Function to dump device specific statistics + */ +struct rtnl_link_ops { + struct list_head list; + + const char *kind; + + size_t priv_size; + void (*setup)(struct net_device *dev); + + int maxtype; + const struct nla_policy *policy; + int (*validate)(struct nlattr *tb[], + struct nlattr *data[]); + + int (*newlink)(struct net_device *dev, + struct nlattr *tb[], + struct nlattr *data[]); + int (*changelink)(struct net_device *dev, + struct nlattr *tb[], + struct nlattr *data[]); + void (*dellink)(struct net_device *dev); + + size_t (*get_size)(const struct net_device *dev); + int (*fill_info)(struct sk_buff *skb, + const struct net_device *dev); + + size_t (*get_xstats_size)(const struct net_device *dev); + int (*fill_xstats)(struct sk_buff *skb, + const struct net_device *dev); +}; + +extern int __rtnl_link_register(struct rtnl_link_ops *ops); +extern void __rtnl_link_unregister(struct rtnl_link_ops *ops); + +extern int rtnl_link_register(struct rtnl_link_ops *ops); +extern void rtnl_link_unregister(struct rtnl_link_ops *ops); + +#define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind) + #endif -- cgit v1.2.3 From 734423cf38021966a5d3bd5f5c6aaecaf32fb4ac Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 13 Jun 2007 12:07:07 -0700 Subject: [VLAN]: Use 32 bit value for skb->priority mapping skb->priority has only 32 bits and even VLAN uses 32 bit values in its API. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 81e9bc93569b..aeddb49193f9 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -99,7 +99,7 @@ static inline void vlan_group_set_device(struct vlan_group *vg, int vlan_id, } struct vlan_priority_tci_mapping { - unsigned long priority; + u32 priority; unsigned short vlan_qos; /* This should be shifted when first set, so we only do it * at provisioning time. * ((skb->priority << 13) & 0xE000) @@ -112,7 +112,7 @@ struct vlan_dev_info { /** This will be the mapping that correlates skb->priority to * 3 bits of VLAN QOS tags... */ - unsigned long ingress_priority_map[8]; + u32 ingress_priority_map[8]; struct vlan_priority_tci_mapping *egress_priority_map[16]; /* hash table */ unsigned short vlan_id; /* The VLAN Identifier for this interface. */ -- cgit v1.2.3 From b020cb488586f982f40eb257a32e92a4de710d65 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 13 Jun 2007 12:07:22 -0700 Subject: [VLAN]: Keep track of number of QoS mappings Keep track of the number of configured ingress/egress QoS mappings to avoid iteration while calculating the netlink attribute size. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index aeddb49193f9..b46d4225f74e 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -112,7 +112,10 @@ struct vlan_dev_info { /** This will be the mapping that correlates skb->priority to * 3 bits of VLAN QOS tags... */ + unsigned int nr_ingress_mappings; u32 ingress_priority_map[8]; + + unsigned int nr_egress_mappings; struct vlan_priority_tci_mapping *egress_priority_map[16]; /* hash table */ unsigned short vlan_id; /* The VLAN Identifier for this interface. */ -- cgit v1.2.3 From a4bf3af4ac46802436d352ef409cee4fe80445b3 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 13 Jun 2007 12:07:37 -0700 Subject: [VLAN]: Introduce symbolic constants for flag values Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index b46d4225f74e..c7912876a210 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -398,6 +398,10 @@ enum vlan_ioctl_cmds { GET_VLAN_VID_CMD /* Get the VID of this VLAN (specified by name) */ }; +enum vlan_flags { + VLAN_FLAG_REORDER_HDR = 0x1, +}; + enum vlan_name_types { VLAN_NAME_TYPE_PLUS_VID, /* Name will look like: vlan0005 */ VLAN_NAME_TYPE_RAW_PLUS_VID, /* name will look like: eth1.0005 */ -- cgit v1.2.3 From 07b5b17e157b7018d0ca40ca0d1581a23096fb45 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 13 Jun 2007 12:07:54 -0700 Subject: [VLAN]: Use rtnl_link API Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_link.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 3144babd2357..422084d18ce1 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -153,4 +153,38 @@ enum #define IFLA_INFO_MAX (__IFLA_INFO_MAX - 1) +/* VLAN section */ + +enum +{ + IFLA_VLAN_UNSPEC, + IFLA_VLAN_ID, + IFLA_VLAN_FLAGS, + IFLA_VLAN_EGRESS_QOS, + IFLA_VLAN_INGRESS_QOS, + __IFLA_VLAN_MAX, +}; + +#define IFLA_VLAN_MAX (__IFLA_VLAN_MAX - 1) + +struct ifla_vlan_flags { + __u32 flags; + __u32 mask; +}; + +enum +{ + IFLA_VLAN_QOS_UNSPEC, + IFLA_VLAN_QOS_MAPPING, + __IFLA_VLAN_QOS_MAX +}; + +#define IFLA_VLAN_QOS_MAX (__IFLA_VLAN_QOS_MAX - 1) + +struct ifla_vlan_qos_mapping +{ + __u32 from; + __u32 to; +}; + #endif /* _LINUX_IF_LINK_H */ -- cgit v1.2.3 From f1c91da44728fba24927e44056a56e507c11cf7b Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sat, 16 Jun 2007 12:38:51 -0300 Subject: [KTIME]: Introduce ktime_us_delta This provides a reusable time difference function which returns the difference in microseconds, as often used in the DCCP code. Commiter note: renamed ktime_delta to ktime_us_delta and put it in ktime.h. Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/ktime.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 2b139f66027f..923665958f90 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -279,6 +279,11 @@ static inline s64 ktime_to_us(const ktime_t kt) return (s64) tv.tv_sec * USEC_PER_SEC + tv.tv_usec; } +static inline s64 ktime_us_delta(const ktime_t later, const ktime_t earlier) +{ + return ktime_to_us(ktime_sub(later, earlier)); +} + /* * The resolution of the clocks. The resolution value is returned in * the clock_getres() system call to give application programmers an -- cgit v1.2.3 From 1e180f726a58089d15637b5495fecbad8c50c833 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 16 Jun 2007 12:39:38 -0300 Subject: [KTIME]: Introduce ktime_add_us Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/ktime.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 923665958f90..dae7143644fe 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -284,6 +284,11 @@ static inline s64 ktime_us_delta(const ktime_t later, const ktime_t earlier) return ktime_to_us(ktime_sub(later, earlier)); } +static inline ktime_t ktime_add_us(const ktime_t kt, const u64 usec) +{ + return ktime_add_ns(kt, usec * 1000); +} + /* * The resolution of the clocks. The resolution value is returned in * the clock_getres() system call to give application programmers an -- cgit v1.2.3 From 334a8132d9950f769f390f0f35c233d099688e7a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 25 Jun 2007 04:35:20 -0700 Subject: [SKBUFF]: Keep track of writable header len of headerless clones Currently NAT (and others) that want to modify cloned skbs copy them, even if in the vast majority of cases its not necessary because the skb is a clone made by TCP and the portion NAT wants to modify is actually writable because TCP release the header reference before cloning. The problem is that there is no clean way for NAT to find out how long the writable header area is, so this patch introduces skb->hdr_len to hold this length. When a headerless skb is cloned skb->hdr_len is set to the current headroom, for regular clones it is copied from the original. A new function skb_clone_writable(skb, len) returns whether the skb is writable up to len bytes from skb->data. To avoid enlarging the skb the mac_len field is reduced to 16 bit and the new hdr_len field is put in the remaining 16 bit. I've done a few rough benchmarks of NAT (not with this exact patch, but a very similar one). As expected it saves huge amounts of system time in case of sendfile, bringing it down to basically the same amount as without NAT, with sendmsg it only helps on loopback, probably because of the large MTU. Transmit a 1GB file using sendfile/sendmsg over eth0/lo with and without NAT: - sendfile eth0, no NAT: sys 0m0.388s - sendfile eth0, NAT: sys 0m1.835s - sendfile eth0: NAT + path: sys 0m0.370s (~ -80%) - sendfile lo, no NAT: sys 0m0.258s - sendfile lo, NAT: sys 0m2.609s - sendfile lo, NAT + patch: sys 0m0.260s (~ -90%) - sendmsg eth0, no NAT: sys 0m2.508s - sendmsg eth0, NAT: sys 0m2.539s - sendmsg eth0, NAT + patch: sys 0m2.445s (no change) - sendmsg lo, no NAT: sys 0m2.151s - sendmsg lo, NAT: sys 0m3.557s - sendmsg lo, NAT + patch: sys 0m2.159s (~ -40%) I expect other users can see a similar performance improvement, packet mangling iptables targets, ipip and ip_gre come to mind .. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/skbuff.h | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6f0b2f7d0010..881fe80f01d0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -147,8 +147,8 @@ struct skb_shared_info { /* We divide dataref into two halves. The higher 16 bits hold references * to the payload part of skb->data. The lower 16 bits hold references to - * the entire skb->data. It is up to the users of the skb to agree on - * where the payload starts. + * the entire skb->data. A clone of a headerless skb holds the length of + * the header in skb->hdr_len. * * All users must obey the rule that the skb->data reference count must be * greater than or equal to the payload reference count. @@ -206,6 +206,7 @@ typedef unsigned char *sk_buff_data_t; * @len: Length of actual data * @data_len: Data length * @mac_len: Length of link layer header + * @hdr_len: writable header length of cloned skb * @csum: Checksum (must include start/offset pair) * @csum_start: Offset from skb->head where checksumming should start * @csum_offset: Offset from csum_start where checksum should be stored @@ -260,8 +261,9 @@ struct sk_buff { char cb[48]; unsigned int len, - data_len, - mac_len; + data_len; + __u16 mac_len, + hdr_len; union { __wsum csum; struct { @@ -1321,6 +1323,20 @@ static inline struct sk_buff *netdev_alloc_skb(struct net_device *dev, return __netdev_alloc_skb(dev, length, GFP_ATOMIC); } +/** + * skb_clone_writable - is the header of a clone writable + * @skb: buffer to check + * @len: length up to which to write + * + * Returns true if modifying the header part of the cloned buffer + * does not requires the data to be copied. + */ +static inline int skb_clone_writable(struct sk_buff *skb, int len) +{ + return !skb_header_cloned(skb) && + skb_headroom(skb) + len <= skb->hdr_len; +} + /** * skb_cow - copy header of skb when it is required * @skb: buffer to cow -- cgit v1.2.3 From 1092cb219774a82b1f16781aec7b8d4ec727c981 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 25 Jun 2007 13:49:35 -0700 Subject: [NETLINK]: attr: add nested compat attribute type Add a nested compat attribute type that can be used to convert attributes that contain a structure to nested attributes in a backwards compatible way. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netlink.h | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index 7b510a9edb91..d7b824be5422 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -118,6 +118,9 @@ * Nested Attributes Construction: * nla_nest_start(skb, type) start a nested attribute * nla_nest_end(skb, nla) finalize a nested attribute + * nla_nest_compat_start(skb, type, start a nested compat attribute + * len, data) + * nla_nest_compat_end(skb, type) finalize a nested compat attribute * nla_nest_cancel(skb, nla) cancel nested attribute construction * * Attribute Length Calculations: @@ -152,6 +155,7 @@ * nla_find_nested() find attribute in nested attributes * nla_parse() parse and validate stream of attrs * nla_parse_nested() parse nested attribuets + * nla_parse_nested_compat() parse nested compat attributes * nla_for_each_attr() loop over all attributes * nla_for_each_nested() loop over the nested attributes *========================================================================= @@ -170,6 +174,7 @@ enum { NLA_FLAG, NLA_MSECS, NLA_NESTED, + NLA_NESTED_COMPAT, NLA_NUL_STRING, NLA_BINARY, __NLA_TYPE_MAX, @@ -190,6 +195,7 @@ enum { * NLA_NUL_STRING Maximum length of string (excluding NUL) * NLA_FLAG Unused * NLA_BINARY Maximum length of attribute payload + * NLA_NESTED_COMPAT Exact length of structure payload * All other Exact length of attribute payload * * Example: @@ -733,6 +739,39 @@ static inline int nla_parse_nested(struct nlattr *tb[], int maxtype, { return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy); } + +/** + * nla_parse_nested_compat - parse nested compat attributes + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @nla: attribute containing the nested attributes + * @data: pointer to point to contained structure + * @len: length of contained structure + * @policy: validation policy + * + * Parse a nested compat attribute. The compat attribute contains a structure + * and optionally a set of nested attributes. On success the data pointer + * points to the nested data and tb contains the parsed attributes + * (see nla_parse). + */ +static inline int __nla_parse_nested_compat(struct nlattr *tb[], int maxtype, + struct nlattr *nla, + const struct nla_policy *policy, + int len) +{ + if (nla_len(nla) < len) + return -1; + if (nla_len(nla) >= NLA_ALIGN(len) + sizeof(struct nlattr)) + return nla_parse_nested(tb, maxtype, + nla_data(nla) + NLA_ALIGN(len), + policy); + memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); + return 0; +} + +#define nla_parse_nested_compat(tb, maxtype, nla, policy, data, len) \ +({ data = nla_len(nla) >= len ? nla_data(nla) : NULL; \ + __nla_parse_nested_compat(tb, maxtype, nla, policy, len); }) /** * nla_put_u8 - Add a u16 netlink attribute to a socket buffer * @skb: socket buffer to add attribute to @@ -964,6 +1003,51 @@ static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start) return skb->len; } +/** + * nla_nest_compat_start - Start a new level of nested compat attributes + * @skb: socket buffer to add attributes to + * @attrtype: attribute type of container + * @attrlen: length of structure + * @data: pointer to structure + * + * Start a nested compat attribute that contains both a structure and + * a set of nested attributes. + * + * Returns the container attribute + */ +static inline struct nlattr *nla_nest_compat_start(struct sk_buff *skb, + int attrtype, int attrlen, + const void *data) +{ + struct nlattr *start = (struct nlattr *)skb_tail_pointer(skb); + + if (nla_put(skb, attrtype, attrlen, data) < 0) + return NULL; + if (nla_nest_start(skb, attrtype) == NULL) { + nlmsg_trim(skb, start); + return NULL; + } + return start; +} + +/** + * nla_nest_compat_end - Finalize nesting of compat attributes + * @skb: socket buffer the attribtues are stored in + * @start: container attribute + * + * Corrects the container attribute header to include the all + * appeneded attributes. + * + * Returns the total data length of the skb. + */ +static inline int nla_nest_compat_end(struct sk_buff *skb, struct nlattr *start) +{ + struct nlattr *nest = (void *)start + NLMSG_ALIGN(start->nla_len); + + start->nla_len = skb_tail_pointer(skb) - (unsigned char *)start; + return nla_nest_end(skb, nest); +} + /** * nla_nest_cancel - Cancel nesting of attributes * @skb: socket buffer the message is stored in -- cgit v1.2.3 From afdc3238ec948531205f5c5f77d2de7bae519c71 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 25 Jun 2007 14:30:16 -0700 Subject: [RTNETLINK]: Add nested compat attribute Add a nested compat attribute type that can be used to convert attributes that contain a structure to nested attributes in a backwards compatible way. The attribute looks like this: struct { [ compat contents ] struct rtattr { .rta_len = total size, .rta_type = type, } rta; struct old_structure struct; [ nested top-level attribute ] struct rtattr { .rta_len = nest size, .rta_type = type, } nest_attr; [ optional 0 .. n nested attributes ] struct rtattr { .rta_len = private attribute len, .rta_type = private attribute typ, } nested_attr; struct nested_data data; }; Since both userspace and kernel deal correctly with attributes that are larger than expected old versions will just parse the compat part and ignore the rest. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 612785848532..6731e7f4cc0f 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -570,6 +570,8 @@ static __inline__ int rtattr_strcmp(const struct rtattr *rta, const char *str) } extern int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len); +extern int rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr, + struct rtattr *rta, void **data, int len); #define rtattr_parse_nested(tb, max, rta) \ rtattr_parse((tb), (max), RTA_DATA((rta)), RTA_PAYLOAD((rta))) @@ -638,6 +640,18 @@ extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const voi ({ (start)->rta_len = skb_tail_pointer(skb) - (unsigned char *)(start); \ (skb)->len; }) +#define RTA_NEST_COMPAT(skb, type, attrlen, data) \ +({ struct rtattr *__start = (struct rtattr *)skb_tail_pointer(skb); \ + RTA_PUT(skb, type, attrlen, data); \ + RTA_NEST(skb, type); \ + __start; }) + +#define RTA_NEST_COMPAT_END(skb, start) \ +({ struct rtattr *__nest = (void *)(start) + NLMSG_ALIGN((start)->rta_len); \ + (start)->rta_len = skb_tail_pointer(skb) - (unsigned char *)(start); \ + RTA_NEST_END(skb, __nest); \ + (skb)->len; }) + #define RTA_NEST_CANCEL(skb, start) \ ({ if (start) \ skb_trim(skb, (unsigned char *) (start) - (skb)->data); \ -- cgit v1.2.3 From 2371baa4bdab3268b32009926f75e7a5d3a41506 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 26 Jun 2007 03:23:44 -0700 Subject: [RTNETLINK]: Fix rtnetlink compat attribute patch Sent the wrong patch previously. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 6731e7f4cc0f..c91476ce314a 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -570,12 +570,16 @@ static __inline__ int rtattr_strcmp(const struct rtattr *rta, const char *str) } extern int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len); -extern int rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr, - struct rtattr *rta, void **data, int len); +extern int __rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr, + struct rtattr *rta, int len); #define rtattr_parse_nested(tb, max, rta) \ rtattr_parse((tb), (max), RTA_DATA((rta)), RTA_PAYLOAD((rta))) +#define rtattr_parse_nested_compat(tb, max, rta, data, len) \ +({ data = RTA_PAYLOAD(rta) >= len ? RTA_DATA(rta) : NULL; \ + __rtattr_parse_nested_compat(tb, max, rta, len); }) + extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo); extern int rtnl_unicast(struct sk_buff *skb, u32 pid); extern int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, -- cgit v1.2.3 From 136ebf08b46f839e2dc9db34322b654e5d9b9936 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Tue, 26 Jun 2007 23:51:41 -0700 Subject: [IPV6] MIP6: Kill unnecessary ifdefs. Kill unnecessary CONFIG_IPV6_MIP6. o It is redundant for RAW socket to keep MH out with the config then it can handle any protocol. o Clean-up at AH. Signed-off-by: Masahide NAKAMURA Signed-off-by: David S. Miller --- include/net/flow.h | 4 ---- include/net/xfrm.h | 2 -- 2 files changed, 6 deletions(-) (limited to 'include') diff --git a/include/net/flow.h b/include/net/flow.h index f3cc1f812619..af59fa5cc1f8 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -67,20 +67,16 @@ struct flowi { __be32 spi; -#ifdef CONFIG_IPV6_MIP6 struct { __u8 type; } mht; -#endif } uli_u; #define fl_ip_sport uli_u.ports.sport #define fl_ip_dport uli_u.ports.dport #define fl_icmp_type uli_u.icmpt.type #define fl_icmp_code uli_u.icmpt.code #define fl_ipsec_spi uli_u.spi -#ifdef CONFIG_IPV6_MIP6 #define fl_mh_type uli_u.mht.type -#endif __u32 secid; /* used by xfrm; see secid.txt */ } __attribute__((__aligned__(BITS_PER_LONG/8))); diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 311f25af5e1a..7720c1182bb4 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -509,11 +509,9 @@ __be16 xfrm_flowi_sport(struct flowi *fl) case IPPROTO_ICMPV6: port = htons(fl->fl_icmp_type); break; -#ifdef CONFIG_IPV6_MIP6 case IPPROTO_MH: port = htons(fl->fl_mh_type); break; -#endif default: port = 0; /*XXX*/ } -- cgit v1.2.3 From 59fbb3a61e02deaeaa4fb50792217921f3002d64 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Tue, 26 Jun 2007 23:56:32 -0700 Subject: [IPV6] MIP6: Loadable module support for MIPv6. This patch makes MIPv6 loadable module named "mip6". Here is a modprobe.conf(5) example to load it automatically when user application uses XFRM state for MIPv6: alias xfrm-type-10-43 mip6 alias xfrm-type-10-60 mip6 Some MIPv6 feature is not included by this modular, however, it should not be affected to other features like either IPsec or IPv6 with and without the patch. We may discuss XFRM, MH (RAW socket) and ancillary data/sockopt separately for future work. Loadable features: * MH receiving check (to send ICMP error back) * RO header parsing and building (i.e. RH2 and HAO in DSTOPTS) * XFRM policy/state database handling for RO These are NOT covered as loadable: * Home Address flags and its rule on source address selection * XFRM sub policy (depends on its own kernel option) * XFRM functions to receive RO as IPv6 extension header * MH sending/receiving through raw socket if user application opens it (since raw socket allows to do so) * RH2 sending as ancillary data * RH2 operation with setsockopt(2) Signed-off-by: Masahide NAKAMURA Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 +- include/net/addrconf.h | 2 +- include/net/mip6.h | 4 ---- include/net/rawv6.h | 9 +++++++++ 4 files changed, 11 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 648bd1f0912d..213b63be3c8f 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -247,7 +247,7 @@ struct inet6_skb_parm { __u16 lastopt; __u32 nhoff; __u16 flags; -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) __u16 dsthao; #endif diff --git a/include/net/addrconf.h b/include/net/addrconf.h index f3531d0bcd05..33b593e17441 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -61,7 +61,7 @@ extern int addrconf_set_dstaddr(void __user *arg); extern int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict); -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) extern int ipv6_chk_home_addr(struct in6_addr *addr); #endif extern struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, diff --git a/include/net/mip6.h b/include/net/mip6.h index 68263c6d9996..63272610a24a 100644 --- a/include/net/mip6.h +++ b/include/net/mip6.h @@ -54,8 +54,4 @@ struct ip6_mh { #define IP6_MH_TYPE_BERROR 7 /* Binding Error */ #define IP6_MH_TYPE_MAX IP6_MH_TYPE_BERROR -extern int mip6_init(void); -extern void mip6_fini(void); -extern int mip6_mh_filter(struct sock *sk, struct sk_buff *skb); - #endif diff --git a/include/net/rawv6.h b/include/net/rawv6.h index af8960878ef4..a5819891d525 100644 --- a/include/net/rawv6.h +++ b/include/net/rawv6.h @@ -3,6 +3,8 @@ #ifdef __KERNEL__ +#include + #define RAWV6_HTABLE_SIZE MAX_INET_PROTOS extern struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE]; extern rwlock_t raw_v6_lock; @@ -23,6 +25,13 @@ extern void rawv6_err(struct sock *sk, int type, int code, int offset, __be32 info); +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +int rawv6_mh_filter_register(int (*filter)(struct sock *sock, + struct sk_buff *skb)); +int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock, + struct sk_buff *skb)); +#endif + #endif #endif -- cgit v1.2.3 From d3d6dd3adaaad71eae20902ed81808a66a40a5b9 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Tue, 26 Jun 2007 23:57:49 -0700 Subject: [XFRM]: Add module alias for transformation type. It is clean-up for XFRM type modules and adds aliases with its protocol: ESP, AH, IPCOMP, IPIP and IPv6 for IPsec ROUTING and DSTOPTS for MIPv6 It is almost the same thing as XFRM mode alias, but it is added new defines XFRM_PROTO_XXX for preprocessing since some protocols are defined as enum. Signed-off-by: Masahide NAKAMURA Acked-by: Ingo Oeser Signed-off-by: David S. Miller --- include/net/xfrm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 7720c1182bb4..ee3827f053d7 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -19,9 +19,19 @@ #include #include +#define XFRM_PROTO_ESP 50 +#define XFRM_PROTO_AH 51 +#define XFRM_PROTO_COMP 108 +#define XFRM_PROTO_IPIP 4 +#define XFRM_PROTO_IPV6 41 +#define XFRM_PROTO_ROUTING IPPROTO_ROUTING +#define XFRM_PROTO_DSTOPTS IPPROTO_DSTOPTS + #define XFRM_ALIGN8(len) (((len) + 7) & ~7) #define MODULE_ALIAS_XFRM_MODE(family, encap) \ MODULE_ALIAS("xfrm-mode-" __stringify(family) "-" __stringify(encap)) +#define MODULE_ALIAS_XFRM_TYPE(family, proto) \ + MODULE_ALIAS("xfrm-type-" __stringify(family) "-" __stringify(proto)) extern struct sock *xfrm_nl; extern u32 sysctl_xfrm_aevent_etime; -- cgit v1.2.3 From d212f87b068c9d72065ef579d85b5ee6b8b59381 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 27 Jun 2007 00:47:37 -0700 Subject: [NET]: IPV6 checksum offloading in network devices The existing model for checksum offload does not correctly handle devices that can offload IPV4 and IPV6 only. The NETIF_F_HW_CSUM flag implies device can do any arbitrary protocol. This patch: * adds NETIF_F_IPV6_CSUM for those devices * fixes bnx2 and tg3 devices that need it * add NETIF_F_IPV6_CSUM to ipv6 output (incl GSO) * fixes assumptions about NETIF_F_ALL_CSUM in nat * adjusts bridge union of checksumming computation Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e7913ee5581c..7a8f22fb4eee 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -314,9 +314,10 @@ struct net_device /* Net device features */ unsigned long features; #define NETIF_F_SG 1 /* Scatter/gather IO. */ -#define NETIF_F_IP_CSUM 2 /* Can checksum only TCP/UDP over IPv4. */ +#define NETIF_F_IP_CSUM 2 /* Can checksum TCP/UDP over IPv4. */ #define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */ #define NETIF_F_HW_CSUM 8 /* Can checksum all the packets. */ +#define NETIF_F_IPV6_CSUM 16 /* Can checksum TCP/UDP over IPV6 */ #define NETIF_F_HIGHDMA 32 /* Can DMA to high memory. */ #define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */ #define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */ @@ -338,8 +339,11 @@ struct net_device /* List of features with software fallbacks. */ #define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6) + #define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM) -#define NETIF_F_ALL_CSUM (NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM) +#define NETIF_F_V4_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM) +#define NETIF_F_V6_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM) +#define NETIF_F_ALL_CSUM (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM) struct net_device *next_sched; -- cgit v1.2.3 From bf742482d7a647c5c6f03f78eb35a862e159ecf5 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 27 Jun 2007 01:26:19 -0700 Subject: [NET]: dev: introduce generic net_device address lists Introduce struct dev_addr_list and list maintenance functions based on dev_mc_list and the related functions. This will be used by follow-up patches for both multicast and secondary unicast addresses. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7a8f22fb4eee..aa389c77aa3e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -177,6 +177,14 @@ struct netif_rx_stats DECLARE_PER_CPU(struct netif_rx_stats, netdev_rx_stat); +struct dev_addr_list +{ + struct dev_addr_list *next; + u8 da_addr[MAX_ADDR_LEN]; + u8 da_addrlen; + int da_users; + int da_gusers; +}; /* * We tag multicasts with these structures. @@ -1008,6 +1016,9 @@ extern void dev_mc_upload(struct net_device *dev); extern int dev_mc_delete(struct net_device *dev, void *addr, int alen, int all); extern int dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly); extern void dev_mc_discard(struct net_device *dev); +extern int __dev_addr_delete(struct dev_addr_list **list, void *addr, int alen, int all); +extern int __dev_addr_add(struct dev_addr_list **list, void *addr, int alen, int newonly); +extern void __dev_addr_discard(struct dev_addr_list **list); extern void dev_set_promiscuity(struct net_device *dev, int inc); extern void dev_set_allmulti(struct net_device *dev, int inc); extern void netdev_state_change(struct net_device *dev); -- cgit v1.2.3 From 3fba5a8b1e3df2384b90493538161e83cf15dd5f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 27 Jun 2007 01:26:58 -0700 Subject: [NET]: dev_mcast: switch to generic net_device address lists Use generic net_device address lists for multicast list handling. Some defines are used to keep drivers working. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index aa389c77aa3e..9e114e77e54d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -189,15 +189,12 @@ struct dev_addr_list /* * We tag multicasts with these structures. */ - -struct dev_mc_list -{ - struct dev_mc_list *next; - __u8 dmi_addr[MAX_ADDR_LEN]; - unsigned char dmi_addrlen; - int dmi_users; - int dmi_gusers; -}; + +#define dev_mc_list dev_addr_list +#define dmi_addr da_addr +#define dmi_addrlen da_addrlen +#define dmi_users da_users +#define dmi_gusers da_gusers struct hh_cache { @@ -400,7 +397,7 @@ struct net_device unsigned char addr_len; /* hardware address length */ unsigned short dev_id; /* for shared network cards */ - struct dev_mc_list *mc_list; /* Multicast mac addresses */ + struct dev_addr_list *mc_list; /* Multicast mac addresses */ int mc_count; /* Number of installed mcasts */ int promiscuity; int allmulti; -- cgit v1.2.3 From 4417da668c0021903464f92db278ddae348e0299 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 27 Jun 2007 01:28:10 -0700 Subject: [NET]: dev: secondary unicast address support Add support for configuring secondary unicast addresses on network devices. To support this devices capable of filtering multiple unicast addresses need to change their set_multicast_list function to configure unicast filters as well and assign it to dev->set_rx_mode instead of dev->set_multicast_list. Other devices are put into promiscous mode when secondary unicast addresses are present. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9e114e77e54d..2c0cc19edfb2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -397,6 +397,9 @@ struct net_device unsigned char addr_len; /* hardware address length */ unsigned short dev_id; /* for shared network cards */ + struct dev_addr_list *uc_list; /* Secondary unicast mac addresses */ + int uc_count; /* Number of installed ucasts */ + int uc_promisc; struct dev_addr_list *mc_list; /* Multicast mac addresses */ int mc_count; /* Number of installed mcasts */ int promiscuity; @@ -502,6 +505,8 @@ struct net_device void *saddr, unsigned len); int (*rebuild_header)(struct sk_buff *skb); +#define HAVE_SET_RX_MODE + void (*set_rx_mode)(struct net_device *dev); #define HAVE_MULTICAST void (*set_multicast_list)(struct net_device *dev); #define HAVE_SET_MAC_ADDR @@ -1008,8 +1013,11 @@ extern struct net_device *alloc_netdev(int sizeof_priv, const char *name, void (*setup)(struct net_device *)); extern int register_netdev(struct net_device *dev); extern void unregister_netdev(struct net_device *dev); -/* Functions used for multicast support */ -extern void dev_mc_upload(struct net_device *dev); +/* Functions used for secondary unicast and multicast support */ +extern void dev_set_rx_mode(struct net_device *dev); +extern void __dev_set_rx_mode(struct net_device *dev); +extern int dev_unicast_delete(struct net_device *dev, void *addr, int alen); +extern int dev_unicast_add(struct net_device *dev, void *addr, int alen); extern int dev_mc_delete(struct net_device *dev, void *addr, int alen, int all); extern int dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly); extern void dev_mc_discard(struct net_device *dev); -- cgit v1.2.3 From 342f0234c71b40da785dd6a7ce1dd481ecbfdb81 Mon Sep 17 00:00:00 2001 From: James Chapman Date: Wed, 27 Jun 2007 15:37:46 -0700 Subject: [UDP]: Introduce UDP encapsulation type for L2TP This patch adds a new UDP_ENCAP_L2TPINUDP encapsulation type for UDP sockets. When a UDP socket's encap_type is UDP_ENCAP_L2TPINUDP, the skb is delivered to a function pointed to by the udp_sock's encap_rcv funcptr. If the skb isn't wanted by L2TP, it returns >0, which causes it to be passed through to UDP. Include padding to put the new encap_rcv field on a 4-byte boundary. Previously, the only user of UDP encap sockets was ESP, so when CONFIG_XFRM was not defined, some of the encap code was compiled out. This patch changes that. As a result, udp_encap_rcv() will now do a little more work when CONFIG_XFRM is not defined. Signed-off-by: James Chapman Signed-off-by: David S. Miller --- include/linux/udp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/udp.h b/include/linux/udp.h index 6de445c31a64..8ec703f462da 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -42,6 +42,7 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb) /* UDP encapsulation types */ #define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */ #define UDP_ENCAP_ESPINUDP 2 /* draft-ietf-ipsec-udp-encaps-06 */ +#define UDP_ENCAP_L2TPINUDP 3 /* rfc2661 */ #ifdef __KERNEL__ #include @@ -70,6 +71,11 @@ struct udp_sock { #define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ #define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */ __u8 pcflag; /* marks socket as UDP-Lite if > 0 */ + __u8 unused[3]; + /* + * For encapsulation sockets. + */ + int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); }; static inline struct udp_sock *udp_sk(const struct sock *sk) -- cgit v1.2.3 From cf14a4d06742d59ecb2d837a3f53bb24d1ff9acb Mon Sep 17 00:00:00 2001 From: James Chapman Date: Wed, 27 Jun 2007 15:43:43 -0700 Subject: [L2TP]: Changes to existing ppp and socket kernel headers for L2TP Add struct sockaddr_pppol2tp to carry L2TP-specific address information for the PPPoX (PPPoL2TP) socket. Unfortunately we can't use the union inside struct sockaddr_pppox because the L2TP-specific data is larger than the current size of the union and we must preserve the size of struct sockaddr_pppox for binary compatibility. Also add a PPPIOCGL2TPSTATS ioctl to allow userspace to obtain L2TP counters and state from the kernel. Add new if_pppol2tp.h header. [ Modified to use aligned_u64 in statistics structure -DaveM ] Signed-off-by: James Chapman Signed-off-by: David S. Miller --- include/linux/Kbuild | 1 + include/linux/if_ppp.h | 16 +++++++++++ include/linux/if_pppol2tp.h | 69 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/if_pppox.h | 16 +++++++++-- include/linux/socket.h | 1 + 5 files changed, 101 insertions(+), 2 deletions(-) create mode 100644 include/linux/if_pppol2tp.h (limited to 'include') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index d94451682761..127d2d192b5a 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -225,6 +225,7 @@ unifdef-y += if_fddi.h unifdef-y += if_frad.h unifdef-y += if_ltalk.h unifdef-y += if_link.h +unifdef-y += if_pppol2tp.h unifdef-y += if_pppox.h unifdef-y += if_shaper.h unifdef-y += if_tr.h diff --git a/include/linux/if_ppp.h b/include/linux/if_ppp.h index 768372f07caa..0f2f70d4e48c 100644 --- a/include/linux/if_ppp.h +++ b/include/linux/if_ppp.h @@ -110,6 +110,21 @@ struct ifpppcstatsreq { struct ppp_comp_stats stats; }; +/* For PPPIOCGL2TPSTATS */ +struct pppol2tp_ioc_stats { + __u16 tunnel_id; /* redundant */ + __u16 session_id; /* if zero, get tunnel stats */ + __u32 using_ipsec:1; /* valid only for session_id == 0 */ + aligned_u64 tx_packets; + aligned_u64 tx_bytes; + aligned_u64 tx_errors; + aligned_u64 rx_packets; + aligned_u64 rx_bytes; + aligned_u64 rx_seq_discards; + aligned_u64 rx_oos_packets; + aligned_u64 rx_errors; +}; + #define ifr__name b.ifr_ifrn.ifrn_name #define stats_ptr b.ifr_ifru.ifru_data @@ -146,6 +161,7 @@ struct ifpppcstatsreq { #define PPPIOCDISCONN _IO('t', 57) /* disconnect channel */ #define PPPIOCATTCHAN _IOW('t', 56, int) /* attach to ppp channel */ #define PPPIOCGCHAN _IOR('t', 55, int) /* get ppp channel number */ +#define PPPIOCGL2TPSTATS _IOR('t', 54, struct pppol2tp_ioc_stats) #define SIOCGPPPSTATS (SIOCDEVPRIVATE + 0) #define SIOCGPPPVER (SIOCDEVPRIVATE + 1) /* NEVER change this!! */ diff --git a/include/linux/if_pppol2tp.h b/include/linux/if_pppol2tp.h new file mode 100644 index 000000000000..516203b6fdeb --- /dev/null +++ b/include/linux/if_pppol2tp.h @@ -0,0 +1,69 @@ +/*************************************************************************** + * Linux PPP over L2TP (PPPoL2TP) Socket Implementation (RFC 2661) + * + * This file supplies definitions required by the PPP over L2TP driver + * (pppol2tp.c). All version information wrt this file is located in pppol2tp.c + * + * License: + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#ifndef __LINUX_IF_PPPOL2TP_H +#define __LINUX_IF_PPPOL2TP_H + +#include + +#ifdef __KERNEL__ +#include +#endif + +/* Structure used to connect() the socket to a particular tunnel UDP + * socket. + */ +struct pppol2tp_addr +{ + pid_t pid; /* pid that owns the fd. + * 0 => current */ + int fd; /* FD of UDP socket to use */ + + struct sockaddr_in addr; /* IP address and port to send to */ + + __be16 s_tunnel, s_session; /* For matching incoming packets */ + __be16 d_tunnel, d_session; /* For sending outgoing packets */ +}; + +/* Socket options: + * DEBUG - bitmask of debug message categories + * SENDSEQ - 0 => don't send packets with sequence numbers + * 1 => send packets with sequence numbers + * RECVSEQ - 0 => receive packet sequence numbers are optional + * 1 => drop receive packets without sequence numbers + * LNSMODE - 0 => act as LAC. + * 1 => act as LNS. + * REORDERTO - reorder timeout (in millisecs). If 0, don't try to reorder. + */ +enum { + PPPOL2TP_SO_DEBUG = 1, + PPPOL2TP_SO_RECVSEQ = 2, + PPPOL2TP_SO_SENDSEQ = 3, + PPPOL2TP_SO_LNSMODE = 4, + PPPOL2TP_SO_REORDERTO = 5, +}; + +/* Debug message categories for the DEBUG socket option */ +enum { + PPPOL2TP_MSG_DEBUG = (1 << 0), /* verbose debug (if + * compiled in) */ + PPPOL2TP_MSG_CONTROL = (1 << 1), /* userspace - kernel + * interface */ + PPPOL2TP_MSG_SEQ = (1 << 2), /* sequence numbers */ + PPPOL2TP_MSG_DATA = (1 << 3), /* data packets */ +}; + + + +#endif diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 6f987be60fe2..25652545ba6e 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -27,6 +27,7 @@ #include #include #endif /* __KERNEL__ */ +#include /* For user-space programs to pick up these definitions * which they wouldn't get otherwise without defining __KERNEL__ @@ -50,8 +51,9 @@ struct pppoe_addr{ * Protocols supported by AF_PPPOX */ #define PX_PROTO_OE 0 /* Currently just PPPoE */ -#define PX_MAX_PROTO 1 - +#define PX_PROTO_OL2TP 1 /* Now L2TP also */ +#define PX_MAX_PROTO 2 + struct sockaddr_pppox { sa_family_t sa_family; /* address family, AF_PPPOX */ unsigned int sa_protocol; /* protocol identifier */ @@ -60,6 +62,16 @@ struct sockaddr_pppox { }sa_addr; }__attribute__ ((packed)); +/* The use of the above union isn't viable because the size of this + * struct must stay fixed over time -- applications use sizeof(struct + * sockaddr_pppox) to fill it. We use a protocol specific sockaddr + * type instead. + */ +struct sockaddr_pppol2tp { + sa_family_t sa_family; /* address family, AF_PPPOX */ + unsigned int sa_protocol; /* protocol identifier */ + struct pppol2tp_addr pppol2tp; +}__attribute__ ((packed)); /********************************************************************* * diff --git a/include/linux/socket.h b/include/linux/socket.h index 6e7c9483a6a6..fe195c97a89d 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -287,6 +287,7 @@ struct ucred { #define SOL_NETLINK 270 #define SOL_TIPC 271 #define SOL_RXRPC 272 +#define SOL_PPPOL2TP 273 /* IPX options */ #define IPX_TYPE 1 -- cgit v1.2.3 From f25f4e44808f0f6c9875d94ef1c41ef86c288eb2 Mon Sep 17 00:00:00 2001 From: Peter P Waskiewicz Jr Date: Fri, 6 Jul 2007 13:36:20 -0700 Subject: [CORE] Stack changes to add multiqueue hardware support API Add the multiqueue hardware device support API to the core network stack. Allow drivers to allocate multiple queues and manage them at the netdev level if they choose to do so. Added a new field to sk_buff, namely queue_mapping, for drivers to know which tx_ring to select based on OS classification of the flow. Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 3 +- include/linux/netdevice.h | 80 ++++++++++++++++++++++++++++++++++++++++++--- include/linux/skbuff.h | 25 ++++++++++++-- 3 files changed, 99 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index f48eb89efd0f..6cdb97365e47 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -39,7 +39,8 @@ extern void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev extern int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh); -extern struct net_device *alloc_etherdev(int sizeof_priv); +extern struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count); +#define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1) /** * is_zero_ether_addr - Determine if give Ethernet address is all zeros. diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2c0cc19edfb2..9817821729c4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -108,6 +108,14 @@ struct wireless_dev; #define MAX_HEADER (LL_MAX_HEADER + 48) #endif +struct net_device_subqueue +{ + /* Give a control state for each queue. This struct may contain + * per-queue locks in the future. + */ + unsigned long state; +}; + /* * Network device statistics. Akin to the 2.0 ether stats but * with byte counters. @@ -331,6 +339,7 @@ struct net_device #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ #define NETIF_F_GSO 2048 /* Enable software GSO. */ #define NETIF_F_LLTX 4096 /* LockLess TX */ +#define NETIF_F_MULTI_QUEUE 16384 /* Has multiple TX/RX queues */ /* Segmentation offload features */ #define NETIF_F_GSO_SHIFT 16 @@ -557,6 +566,10 @@ struct net_device /* rtnetlink link ops */ const struct rtnl_link_ops *rtnl_link_ops; + + /* The TX queue control structures */ + unsigned int egress_subqueue_count; + struct net_device_subqueue egress_subqueue[0]; }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -565,9 +578,7 @@ struct net_device static inline void *netdev_priv(const struct net_device *dev) { - return (char *)dev + ((sizeof(struct net_device) - + NETDEV_ALIGN_CONST) - & ~NETDEV_ALIGN_CONST); + return dev->priv; } #define SET_MODULE_OWNER(dev) do { } while (0) @@ -719,6 +730,62 @@ static inline int netif_running(const struct net_device *dev) return test_bit(__LINK_STATE_START, &dev->state); } +/* + * Routines to manage the subqueues on a device. We only need start + * stop, and a check if it's stopped. All other device management is + * done at the overall netdevice level. + * Also test the device if we're multiqueue. + */ +static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + clear_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state); +#endif +} + +static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE +#ifdef CONFIG_NETPOLL_TRAP + if (netpoll_trap()) + return; +#endif + set_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state); +#endif +} + +static inline int netif_subqueue_stopped(const struct net_device *dev, + u16 queue_index) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + return test_bit(__LINK_STATE_XOFF, + &dev->egress_subqueue[queue_index].state); +#else + return 0; +#endif +} + +static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE +#ifdef CONFIG_NETPOLL_TRAP + if (netpoll_trap()) + return; +#endif + if (test_and_clear_bit(__LINK_STATE_XOFF, + &dev->egress_subqueue[queue_index].state)) + __netif_schedule(dev); +#endif +} + +static inline int netif_is_multiqueue(const struct net_device *dev) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + return (!!(NETIF_F_MULTI_QUEUE & dev->features)); +#else + return 0; +#endif +} /* Use this variant when it is known for sure that it * is executing from interrupt context. @@ -1009,8 +1076,11 @@ static inline void netif_tx_disable(struct net_device *dev) extern void ether_setup(struct net_device *dev); /* Support for loadable net-drivers */ -extern struct net_device *alloc_netdev(int sizeof_priv, const char *name, - void (*setup)(struct net_device *)); +extern struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, + void (*setup)(struct net_device *), + unsigned int queue_count); +#define alloc_netdev(sizeof_priv, name, setup) \ + alloc_netdev_mq(sizeof_priv, name, setup, 1) extern int register_netdev(struct net_device *dev); extern void unregister_netdev(struct net_device *dev); /* Functions used for secondary unicast and multicast support */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 881fe80f01d0..2d6a14f5f2f1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -196,7 +196,6 @@ typedef unsigned char *sk_buff_data_t; * @sk: Socket we are owned by * @tstamp: Time we arrived * @dev: Device we arrived on/are leaving by - * @iif: ifindex of device we arrived on * @transport_header: Transport layer header * @network_header: Network layer header * @mac_header: Link layer header @@ -231,6 +230,8 @@ typedef unsigned char *sk_buff_data_t; * @nfctinfo: Relationship of this skb to the connection * @nfct_reasm: netfilter conntrack re-assembly pointer * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c + * @iif: ifindex of device we arrived on + * @queue_mapping: Queue mapping for multiqueue devices * @tc_index: Traffic control index * @tc_verd: traffic control verdict * @dma_cookie: a cookie to one of several possible DMA operations @@ -246,8 +247,6 @@ struct sk_buff { struct sock *sk; ktime_t tstamp; struct net_device *dev; - int iif; - /* 4 byte hole on 64 bit*/ struct dst_entry *dst; struct sec_path *sp; @@ -290,12 +289,18 @@ struct sk_buff { #ifdef CONFIG_BRIDGE_NETFILTER struct nf_bridge_info *nf_bridge; #endif + + int iif; + __u16 queue_mapping; + #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ #ifdef CONFIG_NET_CLS_ACT __u16 tc_verd; /* traffic control verdict */ #endif #endif + /* 2 byte hole */ + #ifdef CONFIG_NET_DMA dma_cookie_t dma_cookie; #endif @@ -1725,6 +1730,20 @@ static inline void skb_init_secmark(struct sk_buff *skb) { } #endif +static inline void skb_set_queue_mapping(struct sk_buff *skb, u16 queue_mapping) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + skb->queue_mapping = queue_mapping; +#endif +} + +static inline void skb_copy_queue_mapping(struct sk_buff *to, const struct sk_buff *from) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + to->queue_mapping = from->queue_mapping; +#endif +} + static inline int skb_is_gso(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_size; -- cgit v1.2.3 From d62733c8e437fdb58325617c4b3331769ba82d70 Mon Sep 17 00:00:00 2001 From: Peter P Waskiewicz Jr Date: Thu, 28 Jun 2007 21:04:31 -0700 Subject: [SCHED]: Qdisc changes and sch_rr added for multiqueue Add the new sch_rr qdisc for multiqueue network device support. Allow sch_prio and sch_rr to be compiled with or without multiqueue hardware support. sch_rr is part of sch_prio, and is referenced from MODULE_ALIAS. This was done since sch_prio and sch_rr only differ in their dequeue routine. Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index d10f35338507..268c51599eb8 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -101,6 +101,15 @@ struct tc_prio_qopt __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */ }; +enum +{ + TCA_PRIO_UNSPEC, + TCA_PRIO_MQ, + __TCA_PRIO_MAX +}; + +#define TCA_PRIO_MAX (__TCA_PRIO_MAX - 1) + /* TBF section */ struct tc_tbf_qopt -- cgit v1.2.3 From 61cbc2fca6335be52788773b21efdc52a2750924 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 30 Jun 2007 13:35:52 -0700 Subject: [NET]: Fix secondary unicast/multicast address count maintenance When a reference to an existing address is increased or decreased without hitting zero, the address count is incorrectly adjusted. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9817821729c4..8590d685d935 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1091,8 +1091,8 @@ extern int dev_unicast_add(struct net_device *dev, void *addr, int alen); extern int dev_mc_delete(struct net_device *dev, void *addr, int alen, int all); extern int dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly); extern void dev_mc_discard(struct net_device *dev); -extern int __dev_addr_delete(struct dev_addr_list **list, void *addr, int alen, int all); -extern int __dev_addr_add(struct dev_addr_list **list, void *addr, int alen, int newonly); +extern int __dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int all); +extern int __dev_addr_add(struct dev_addr_list **list, int *count, void *addr, int alen, int newonly); extern void __dev_addr_discard(struct dev_addr_list **list); extern void dev_set_promiscuity(struct net_device *dev, int inc); extern void dev_set_allmulti(struct net_device *dev, int inc); -- cgit v1.2.3 From 628529b6ee334fedc8d25ce56205bb99566572b9 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 2 Jul 2007 22:41:14 -0700 Subject: [XFRM] Introduce standalone SAD lookup This allows other in-kernel functions to do SAD lookups. The only known user at the moment is pktgen. Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/xfrm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index ee3827f053d7..d3a898b18fb4 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -928,6 +928,10 @@ extern struct xfrm_state *xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm_policy *pol, int *err, unsigned short family); +extern struct xfrm_state * xfrm_stateonly_find(xfrm_address_t *daddr, + xfrm_address_t *saddr, + unsigned short family, + u8 mode, u8 proto, u32 reqid); extern int xfrm_state_check_expire(struct xfrm_state *x); extern void xfrm_state_insert(struct xfrm_state *x); extern int xfrm_state_add(struct xfrm_state *x); -- cgit v1.2.3 From 4bdf39911e7a887c4499161422423cbaf16684e8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 2 Jul 2007 22:47:37 -0700 Subject: [NET_SCHED]: Remove unnecessary stats_lock pointers Remove stats_lock pointers from qdisc-internal structures, in all cases it points to dev->queue_lock. The only case where it is necessary is for top-level qdiscs, where it might also point to dev->ingress_lock in case of the ingress qdisc. Also remove it from actions completely, it always points to the actions internal lock. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/act_api.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 8b06c2f3657f..2f0273feabd3 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -19,7 +19,6 @@ struct tcf_common { struct gnet_stats_basic tcfc_bstats; struct gnet_stats_queue tcfc_qstats; struct gnet_stats_rate_est tcfc_rate_est; - spinlock_t *tcfc_stats_lock; spinlock_t tcfc_lock; }; #define tcf_next common.tcfc_next @@ -32,7 +31,6 @@ struct tcf_common { #define tcf_bstats common.tcfc_bstats #define tcf_qstats common.tcfc_qstats #define tcf_rate_est common.tcfc_rate_est -#define tcf_stats_lock common.tcfc_stats_lock #define tcf_lock common.tcfc_lock struct tcf_police { -- cgit v1.2.3 From 8c644623fe7e41f59fe97cdf666cba3cb7ced7d8 Mon Sep 17 00:00:00 2001 From: Guido Guenther Date: Mon, 2 Jul 2007 22:50:25 -0700 Subject: [NET]: Allow group ownership of TUN/TAP devices. Introduce a new syscall TUNSETGROUP for group ownership setting of tap devices. The user now is allowed to send packages if either his euid or his egid matches the one specified via tunctl (via -u or -g respecitvely). If both, gid and uid, are set via tunctl, both have to match. Signed-off-by: Guido Guenther Signed-off-by: Jeff Dike Signed-off-by: David S. Miller --- include/linux/if_tun.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index 88aef7b86ef4..42eb6945b93e 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -36,6 +36,7 @@ struct tun_struct { unsigned long flags; int attached; uid_t owner; + gid_t group; wait_queue_head_t read_wait; struct sk_buff_head readq; @@ -78,6 +79,7 @@ struct tun_struct { #define TUNSETPERSIST _IOW('T', 203, int) #define TUNSETOWNER _IOW('T', 204, int) #define TUNSETLINK _IOW('T', 205, int) +#define TUNSETGROUP _IOW('T', 206, int) /* TUNSETIFF ifr flags */ #define IFF_TUN 0x0001 -- cgit v1.2.3 From 89da1ecf5483e6aa29b456a15ad6d05a6797c5a5 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Mon, 2 Jul 2007 22:54:18 -0700 Subject: [IrDA]: Netlink layer. First IrDA configuration netlink layer implementation. Currently, we only support the set/get mode commands. Signed-off-by: Samuel Ortiz Signed-off-by: David S. Miller --- include/linux/irda.h | 27 +++++++++++++++++++++++++++ include/net/irda/irda.h | 3 +++ include/net/irda/irlap.h | 2 ++ 3 files changed, 32 insertions(+) (limited to 'include') diff --git a/include/linux/irda.h b/include/linux/irda.h index 945ba3110874..35911bd4dbe8 100644 --- a/include/linux/irda.h +++ b/include/linux/irda.h @@ -216,6 +216,33 @@ struct if_irda_req { #define ifr_dtr ifr_ifru.ifru_line.dtr #define ifr_rts ifr_ifru.ifru_line.rts + +/* IrDA netlink definitions */ +#define IRDA_NL_NAME "irda" +#define IRDA_NL_VERSION 1 + +enum irda_nl_commands { + IRDA_NL_CMD_UNSPEC, + IRDA_NL_CMD_SET_MODE, + IRDA_NL_CMD_GET_MODE, + + __IRDA_NL_CMD_AFTER_LAST +}; +#define IRDA_NL_CMD_MAX (__IRDA_NL_CMD_AFTER_LAST - 1) + +enum nl80211_attrs { + IRDA_NL_ATTR_UNSPEC, + IRDA_NL_ATTR_IFNAME, + IRDA_NL_ATTR_MODE, + + __IRDA_NL_ATTR_AFTER_LAST +}; +#define IRDA_NL_ATTR_MAX (__IRDA_NL_ATTR_AFTER_LAST - 1) + +/* IrDA modes */ +#define IRDA_MODE_PRIMARY 0x1 +#define IRDA_MODE_SECONDARY 0x2 + #endif /* KERNEL_IRDA_H */ diff --git a/include/net/irda/irda.h b/include/net/irda/irda.h index 36bee441aa56..08387553b57e 100644 --- a/include/net/irda/irda.h +++ b/include/net/irda/irda.h @@ -125,6 +125,9 @@ extern void irda_sysctl_unregister(void); extern int irsock_init(void); extern void irsock_cleanup(void); +extern int irda_nl_register(void); +extern void irda_nl_unregister(void); + extern int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev); diff --git a/include/net/irda/irlap.h b/include/net/irda/irlap.h index a3d370efb903..9d0c78ea92f5 100644 --- a/include/net/irda/irlap.h +++ b/include/net/irda/irlap.h @@ -208,6 +208,8 @@ struct irlap_cb { int xbofs_delay; /* Nr of XBOF's used to MTT */ int bofs_count; /* Negotiated extra BOFs */ int next_bofs; /* Negotiated extra BOFs after next frame */ + + int mode; /* IrLAP mode (primary, secondary or monitor) */ }; /* -- cgit v1.2.3 From 411725280bd0058ebb83c0e32133b7a94902c3a6 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Mon, 2 Jul 2007 22:55:31 -0700 Subject: [IrDA]: Monitor mode. Through the IrDA netlink set mode command, we switch to IrDA monitor mode, where one IrLAP instance receives all the packets on the media, without ever responding to them. Signed-off-by: Samuel Ortiz Signed-off-by: David S. Miller --- include/linux/irda.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/irda.h b/include/linux/irda.h index 35911bd4dbe8..8e3735714c1c 100644 --- a/include/linux/irda.h +++ b/include/linux/irda.h @@ -242,6 +242,7 @@ enum nl80211_attrs { /* IrDA modes */ #define IRDA_MODE_PRIMARY 0x1 #define IRDA_MODE_SECONDARY 0x2 +#define IRDA_MODE_MONITOR 0x4 #endif /* KERNEL_IRDA_H */ -- cgit v1.2.3 From 067b207b281db5e3f03f8d244286c20f61aa2343 Mon Sep 17 00:00:00 2001 From: James Chapman Date: Thu, 5 Jul 2007 17:08:05 -0700 Subject: [UDP]: Cleanup UDP encapsulation code This cleanup fell out after adding L2TP support where a new encap_rcv funcptr was added to struct udp_sock. Have XFRM use the new encap_rcv funcptr, which allows us to move the XFRM encap code from udp.c into xfrm4_input.c. Make xfrm4_rcv_encap() static since it is no longer called externally. Signed-off-by: James Chapman Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/xfrm.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d3a898b18fb4..ae959e950174 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1003,7 +1003,7 @@ extern int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr); #ifdef CONFIG_XFRM -extern int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type); +extern int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); extern int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen); extern int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsigned short family); #else @@ -1012,12 +1012,13 @@ static inline int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optv return -ENOPROTOOPT; } -static inline int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) +static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) { /* should not happen */ kfree_skb(skb); return 0; } + static inline int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsigned short family) { return -EINVAL; -- cgit v1.2.3 From 7bfe24611671ec76b44281e582b38535e21f01a9 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 7 Jul 2007 22:14:23 -0700 Subject: [NETFILTER]: ip6_tables: fix explanation of valid upper protocol number This explains the allowed upper protocol numbers. IP6T_F_NOPROTO was introduced to use 0 as Hop-by-Hop option header, not wildcard. But that seemed to be forgotten. 0 has been used as wildcard since 2002-08-23. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv6/ip6_tables.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 4686f8342cbd..9a720f05888f 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -44,8 +44,14 @@ struct ip6t_ip6 { char iniface[IFNAMSIZ], outiface[IFNAMSIZ]; unsigned char iniface_mask[IFNAMSIZ], outiface_mask[IFNAMSIZ]; - /* ARGH, HopByHop uses 0, so can't do 0 = ANY, - instead IP6T_F_NOPROTO must be set */ + /* Upper protocol number + * - The allowed value is 0 (any) or protocol number of last parsable + * header, which is 50 (ESP), 59 (No Next Header), 135 (MH), or + * the non IPv6 extension headers. + * - The protocol numbers of IPv6 extension headers except of ESP and + * MH do not match any packets. + * - You also need to set IP6T_FLAGS_PROTO to "flags" to check protocol. + */ u_int16_t proto; /* TOS to match iff flags & IP6T_F_TOS */ u_int8_t tos; -- cgit v1.2.3 From cff533ac12494fa002e2c46acc94d670e5f636a2 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 7 Jul 2007 22:15:12 -0700 Subject: [NETFILTER]: x_tables: switch hotdrop to bool Switch the "hotdrop" variables to boolean Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 7e733a6ba4f6..b8577d18d10d 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -148,7 +148,7 @@ struct xt_match const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop); + bool *hotdrop); /* Called when user tries to insert an entry of this type. */ /* Should return true or false. */ -- cgit v1.2.3 From 1d93a9cbad608f6398ba6c5b588c504ccd35a2ca Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 7 Jul 2007 22:15:35 -0700 Subject: [NETFILTER]: x_tables: switch xt_match->match to bool Switch the return type of match functions to boolean Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index b8577d18d10d..304fce356a43 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -141,14 +141,14 @@ struct xt_match /* Arguments changed since 2.6.9, as this must now handle non-linear skb, using skb_header_pointer and skb_ip_make_writable. */ - int (*match)(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop); + bool (*match)(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, + const void *matchinfo, + int offset, + unsigned int protoff, + bool *hotdrop); /* Called when user tries to insert an entry of this type. */ /* Should return true or false. */ -- cgit v1.2.3 From ccb79bdce71f2c04cfa9bfcbaf4d37e2f963d684 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 7 Jul 2007 22:16:00 -0700 Subject: [NETFILTER]: x_tables: switch xt_match->checkentry to bool Switch the return type of match functions to boolean Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 304fce356a43..5130dd60a2fc 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -152,11 +152,11 @@ struct xt_match /* Called when user tries to insert an entry of this type. */ /* Should return true or false. */ - int (*checkentry)(const char *tablename, - const void *ip, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask); + bool (*checkentry)(const char *tablename, + const void *ip, + const struct xt_match *match, + void *matchinfo, + unsigned int hook_mask); /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_match *match, void *matchinfo); -- cgit v1.2.3 From e1931b784a8de324abf310fa3b5e3f25d3988233 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 7 Jul 2007 22:16:26 -0700 Subject: [NETFILTER]: x_tables: switch xt_target->checkentry to bool Switch the return type of target checkentry functions to boolean. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 5130dd60a2fc..64f425a855bb 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -202,11 +202,11 @@ struct xt_target hook_mask is a bitmask of hooks from which it can be called. */ /* Should return true or false. */ - int (*checkentry)(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask); + bool (*checkentry)(const char *tablename, + const void *entry, + const struct xt_target *target, + void *targinfo, + unsigned int hook_mask); /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_target *target, void *targinfo); -- cgit v1.2.3 From 1b50b8a371e90a5e110f466e4ac02cf6b5f681de Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 7 Jul 2007 22:20:36 -0700 Subject: [NETFILTER]: Add u32 match Along comes... xt_u32, a revamped ipt_u32 from POM-NG, Plus: * 2007-06-02: added ipv6 support * 2007-06-05: uses kmalloc for the big buffer * 2007-06-05: added inversion * 2007-06-20: use skb_copy_bits() and get rid of the big buffer and lock (suggested by Pablo Neira Ayuso) Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/xt_u32.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 include/linux/netfilter/xt_u32.h (limited to 'include') diff --git a/include/linux/netfilter/xt_u32.h b/include/linux/netfilter/xt_u32.h new file mode 100644 index 000000000000..9947f56cdbdd --- /dev/null +++ b/include/linux/netfilter/xt_u32.h @@ -0,0 +1,40 @@ +#ifndef _XT_U32_H +#define _XT_U32_H 1 + +enum xt_u32_ops { + XT_U32_AND, + XT_U32_LEFTSH, + XT_U32_RIGHTSH, + XT_U32_AT, +}; + +struct xt_u32_location_element { + u_int32_t number; + u_int8_t nextop; +}; + +struct xt_u32_value_element { + u_int32_t min; + u_int32_t max; +}; + +/* + * Any way to allow for an arbitrary number of elements? + * For now, I settle with a limit of 10 each. + */ +#define XT_U32_MAXSIZE 10 + +struct xt_u32_test { + struct xt_u32_location_element location[XT_U32_MAXSIZE+1]; + struct xt_u32_value_element value[XT_U32_MAXSIZE+1]; + u_int8_t nnums; + u_int8_t nvalues; +}; + +struct xt_u32 { + struct xt_u32_test tests[XT_U32_MAXSIZE+1]; + u_int8_t ntests; + u_int8_t invert; +}; + +#endif /* _XT_U32_H */ -- cgit v1.2.3 From ba9dda3ab5a865542e69dfe01edb2436857c9420 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 7 Jul 2007 22:21:23 -0700 Subject: [NETFILTER]: x_tables: add TRACE target The TRACE target can be used to follow IP and IPv6 packets through the ruleset. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick NcHardy Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2d6a14f5f2f1..625d73b07ab7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -227,6 +227,7 @@ typedef unsigned char *sk_buff_data_t; * @mark: Generic packet mark * @nfct: Associated connection, if any * @ipvs_property: skbuff is owned by ipvs + * @nf_trace: netfilter packet trace flag * @nfctinfo: Relationship of this skb to the connection * @nfct_reasm: netfilter conntrack re-assembly pointer * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c @@ -278,7 +279,8 @@ struct sk_buff { nfctinfo:3; __u8 pkt_type:3, fclone:2, - ipvs_property:1; + ipvs_property:1, + nf_trace:1; __be16 protocol; void (*destructor)(struct sk_buff *skb); -- cgit v1.2.3 From 4ba887790ce2015e8c464809c0be902fb813ad15 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 7 Jul 2007 22:22:33 -0700 Subject: [NETFILTER]: nf_nat: move NAT declarations from nf_conntrack_ipv4.h to nf_nat.h Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/ipv4/nf_conntrack_ipv4.h | 23 +---------------------- include/net/netfilter/nf_conntrack.h | 1 + include/net/netfilter/nf_nat.h | 18 ++++++++++++++++++ include/net/netfilter/nf_nat_core.h | 1 + 4 files changed, 21 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 1401ccc051c4..3ed4e14970c5 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -9,29 +9,8 @@ #ifndef _NF_CONNTRACK_IPV4_H #define _NF_CONNTRACK_IPV4_H -#ifdef CONFIG_NF_NAT_NEEDED -#include -#include - -/* per conntrack: nat application helper private data */ -union nf_conntrack_nat_help { - /* insert nat helper private data here */ - struct nf_nat_pptp nat_pptp_info; -}; - -struct nf_conn_nat { - struct nf_nat_info info; - union nf_conntrack_nat_help help; -#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \ - defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE) - int masq_index; -#endif -}; -#endif /* CONFIG_NF_NAT_NEEDED */ - /* Returns new sk_buff, or NULL */ -struct sk_buff * -nf_ct_ipv4_ct_gather_frags(struct sk_buff *skb); +struct sk_buff *nf_ct_ipv4_ct_gather_frags(struct sk_buff *skb); extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4; diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 4732432f8eb0..12a0e793cc0b 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -280,6 +280,7 @@ nf_conntrack_unregister_cache(u_int32_t features); * nat: nf_conn .. nf_conn_nat, nf_conn .. nf_conn_nat .. nf_conn help */ #ifdef CONFIG_NF_NAT_NEEDED +#include static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct) { unsigned int offset = sizeof(struct nf_conn); diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index bc57dd7b9b5c..47d3dc107a6a 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -51,6 +51,7 @@ struct nf_nat_multi_range_compat #ifdef __KERNEL__ #include +#include /* The structure embedded in the conntrack structure. */ struct nf_nat_info @@ -59,6 +60,23 @@ struct nf_nat_info struct nf_nat_seq seq[IP_CT_DIR_MAX]; }; +/* per conntrack: nat application helper private data */ +union nf_conntrack_nat_help +{ + /* insert nat helper private data here */ + struct nf_nat_pptp nat_pptp_info; +}; + +struct nf_conn_nat +{ + struct nf_nat_info info; + union nf_conntrack_nat_help help; +#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \ + defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE) + int masq_index; +#endif +}; + struct nf_conn; /* Set up the info structure to map into this range. */ diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h index 9778ffa93440..c3cd127ba4bb 100644 --- a/include/net/netfilter/nf_nat_core.h +++ b/include/net/netfilter/nf_nat_core.h @@ -2,6 +2,7 @@ #define _NF_NAT_CORE_H #include #include +#include /* This header used to share core functionality between the standalone NAT module, and the compatibility layer's use of NAT for masquerading. */ -- cgit v1.2.3 From ecfab2c9fe5597221c2b30dec48634a2361a0d08 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 7 Jul 2007 22:23:21 -0700 Subject: [NETFILTER]: nf_conntrack: introduce extension infrastructure Old space allocator of conntrack had problems about extensibility. - It required slab cache per combination of extensions. - It expected what extensions would be assigned, but it was impossible to expect that completely, then we allocated bigger memory object than really required. - It needed to search helper twice due to lock issue. Now basic informations of a connection are stored in 'struct nf_conn'. And a storage for extension (helper, NAT) is allocated by kmalloc. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack.h | 3 ++ include/net/netfilter/nf_conntrack_extend.h | 80 +++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 include/net/netfilter/nf_conntrack_extend.h (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 12a0e793cc0b..c31382d3ef11 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -131,6 +131,9 @@ struct nf_conn /* Storage reserved for other modules: */ union nf_conntrack_proto proto; + /* Extensions */ + struct nf_ct_ext *ext; + /* features dynamically at the end: helper, nat (both optional) */ char data[0]; }; diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h new file mode 100644 index 000000000000..8a988d136465 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -0,0 +1,80 @@ +#ifndef _NF_CONNTRACK_EXTEND_H +#define _NF_CONNTRACK_EXTEND_H + +#include + +enum nf_ct_ext_id +{ + NF_CT_EXT_NUM, +}; + +/* Extensions: optional stuff which isn't permanently in struct. */ +struct nf_ct_ext { + u8 offset[NF_CT_EXT_NUM]; + u8 len; + u8 real_len; + char data[0]; +}; + +static inline int nf_ct_ext_exist(const struct nf_conn *ct, u8 id) +{ + return (ct->ext && ct->ext->offset[id]); +} + +static inline void *__nf_ct_ext_find(const struct nf_conn *ct, u8 id) +{ + if (!nf_ct_ext_exist(ct, id)) + return NULL; + + return (void *)ct->ext + ct->ext->offset[id]; +} +#define nf_ct_ext_find(ext, id) \ + ((id##_TYPE *)__nf_ct_ext_find((ext), (id))) + +/* Destroy all relationships */ +extern void __nf_ct_ext_destroy(struct nf_conn *ct); +static inline void nf_ct_ext_destroy(struct nf_conn *ct) +{ + if (ct->ext) + __nf_ct_ext_destroy(ct); +} + +/* Free operation. If you want to free a object referred from private area, + * please implement __nf_ct_ext_free() and call it. + */ +static inline void nf_ct_ext_free(struct nf_conn *ct) +{ + if (ct->ext) + kfree(ct->ext); +} + +/* Add this type, returns pointer to data or NULL. */ +void * +__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp); +#define nf_ct_ext_add(ct, id, gfp) \ + ((id##_TYPE *)__nf_ct_ext_add((ct), (id), (gfp))) + +#define NF_CT_EXT_F_PREALLOC 0x0001 + +struct nf_ct_ext_type +{ + /* Destroys relationships (can be NULL). */ + void (*destroy)(struct nf_conn *ct); + /* Called when realloacted (can be NULL). + Contents has already been moved. */ + void (*move)(struct nf_conn *ct, void *old); + + enum nf_ct_ext_id id; + + unsigned int flags; + + /* Length and min alignment. */ + u8 len; + u8 align; + /* initial size of nf_ct_ext. */ + u8 alloc_size; +}; + +int nf_ct_extend_register(struct nf_ct_ext_type *type); +void nf_ct_extend_unregister(struct nf_ct_ext_type *type); +#endif /* _NF_CONNTRACK_EXTEND_H */ -- cgit v1.2.3 From ceceae1b1555a9afcb8dacf90df5fa1f20fd5466 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 7 Jul 2007 22:23:42 -0700 Subject: [NETFILTER]: nf_conntrack: use extension infrastructure for helper Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack.h | 26 -------------------------- include/net/netfilter/nf_conntrack_core.h | 3 +++ include/net/netfilter/nf_conntrack_extend.h | 3 +++ include/net/netfilter/nf_conntrack_helper.h | 5 +++++ 4 files changed, 11 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index c31382d3ef11..f1e0fee9aa9c 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -294,32 +294,6 @@ static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct) offset = ALIGN(offset, __alignof__(struct nf_conn_nat)); return (struct nf_conn_nat *) ((void *)ct + offset); } - -static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct) -{ - unsigned int offset = sizeof(struct nf_conn); - - if (!(ct->features & NF_CT_F_HELP)) - return NULL; - if (ct->features & NF_CT_F_NAT) { - offset = ALIGN(offset, __alignof__(struct nf_conn_nat)); - offset += sizeof(struct nf_conn_nat); - } - - offset = ALIGN(offset, __alignof__(struct nf_conn_help)); - return (struct nf_conn_help *) ((void *)ct + offset); -} -#else /* No NAT */ -static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct) -{ - unsigned int offset = sizeof(struct nf_conn); - - if (!(ct->features & NF_CT_F_HELP)) - return NULL; - - offset = ALIGN(offset, __alignof__(struct nf_conn_help)); - return (struct nf_conn_help *) ((void *)ct + offset); -} #endif /* CONFIG_NF_NAT_NEEDED */ #endif /* __KERNEL__ */ #endif /* _NF_CONNTRACK_H */ diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 9fb906688ffa..3bf7d05ea64d 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -30,6 +30,9 @@ extern void nf_conntrack_cleanup(void); extern int nf_conntrack_proto_init(void); extern void nf_conntrack_proto_fini(void); +extern int nf_conntrack_helper_init(void); +extern void nf_conntrack_helper_fini(void); + struct nf_conntrack_l3proto; extern struct nf_conntrack_l3proto *nf_ct_find_l3proto(u_int16_t pf); /* Like above, but you already have conntrack read lock. */ diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 8a988d136465..05357dc5d2d2 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -5,9 +5,12 @@ enum nf_ct_ext_id { + NF_CT_EXT_HELPER, NF_CT_EXT_NUM, }; +#define NF_CT_EXT_HELPER_TYPE struct nf_conn_help + /* Extensions: optional stuff which isn't permanently in struct. */ struct nf_ct_ext { u8 offset[NF_CT_EXT_NUM]; diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 8c72ac9f0ab8..b43a75ba44ac 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -10,6 +10,7 @@ #ifndef _NF_CONNTRACK_HELPER_H #define _NF_CONNTRACK_HELPER_H #include +#include struct module; @@ -52,4 +53,8 @@ extern void nf_ct_helper_put(struct nf_conntrack_helper *helper); extern int nf_conntrack_helper_register(struct nf_conntrack_helper *); extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); +static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct) +{ + return nf_ct_ext_find(ct, NF_CT_EXT_HELPER); +} #endif /*_NF_CONNTRACK_HELPER_H*/ -- cgit v1.2.3 From e54cbc1f91dea4f98b6209e693d3b5eae46321bd Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 7 Jul 2007 22:24:04 -0700 Subject: [NETFILTER]: nf_nat: add reference to conntrack from entry of bysource list I will split 'struct nf_nat_info' out from conntrack. So I cannot use 'offsetof' to get the pointer to conntrack from it. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_nat.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index 47d3dc107a6a..575dc8ac48dc 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -53,11 +53,14 @@ struct nf_nat_multi_range_compat #include #include +struct nf_conn; + /* The structure embedded in the conntrack structure. */ struct nf_nat_info { struct list_head bysource; struct nf_nat_seq seq[IP_CT_DIR_MAX]; + struct nf_conn *ct; }; /* per conntrack: nat application helper private data */ @@ -77,8 +80,6 @@ struct nf_conn_nat #endif }; -struct nf_conn; - /* Set up the info structure to map into this range. */ extern unsigned int nf_nat_setup_info(struct nf_conn *ct, const struct nf_nat_range *range, -- cgit v1.2.3 From 2d59e5ca8c7113ad91452f0f9259a4b55ee90323 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 7 Jul 2007 22:24:28 -0700 Subject: [NETFILTER]: nf_nat: use extension infrastructure Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack.h | 17 ----------------- include/net/netfilter/nf_conntrack_extend.h | 2 ++ include/net/netfilter/nf_nat.h | 6 ++++++ 3 files changed, 8 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index f1e0fee9aa9c..b2083d386f33 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -278,22 +278,5 @@ nf_conntrack_register_cache(u_int32_t features, const char *name, size_t size); extern void nf_conntrack_unregister_cache(u_int32_t features); -/* valid combinations: - * basic: nf_conn, nf_conn .. nf_conn_help - * nat: nf_conn .. nf_conn_nat, nf_conn .. nf_conn_nat .. nf_conn help - */ -#ifdef CONFIG_NF_NAT_NEEDED -#include -static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct) -{ - unsigned int offset = sizeof(struct nf_conn); - - if (!(ct->features & NF_CT_F_NAT)) - return NULL; - - offset = ALIGN(offset, __alignof__(struct nf_conn_nat)); - return (struct nf_conn_nat *) ((void *)ct + offset); -} -#endif /* CONFIG_NF_NAT_NEEDED */ #endif /* __KERNEL__ */ #endif /* _NF_CONNTRACK_H */ diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 05357dc5d2d2..73b5711faf32 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -6,10 +6,12 @@ enum nf_ct_ext_id { NF_CT_EXT_HELPER, + NF_CT_EXT_NAT, NF_CT_EXT_NUM, }; #define NF_CT_EXT_HELPER_TYPE struct nf_conn_help +#define NF_CT_EXT_NAT_TYPE struct nf_conn_nat /* Extensions: optional stuff which isn't permanently in struct. */ struct nf_ct_ext { diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index 575dc8ac48dc..0425e28c6a78 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -52,6 +52,7 @@ struct nf_nat_multi_range_compat #ifdef __KERNEL__ #include #include +#include struct nf_conn; @@ -89,6 +90,11 @@ extern unsigned int nf_nat_setup_info(struct nf_conn *ct, extern int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack); +static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct) +{ + return nf_ct_ext_find(ct, NF_CT_EXT_NAT); +} + extern int nf_nat_module_is_loaded; #else /* !__KERNEL__: iptables wants this to compile. */ -- cgit v1.2.3 From ff09b7493c8f433d3ffd6a31ad58d190f82ef0c5 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 7 Jul 2007 22:25:28 -0700 Subject: [NETFILTER]: nf_nat: remove unused nf_nat_module_is_loaded Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_nat.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index 0425e28c6a78..0541eed5008f 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -95,8 +95,6 @@ static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct) return nf_ct_ext_find(ct, NF_CT_EXT_NAT); } -extern int nf_nat_module_is_loaded; - #else /* !__KERNEL__: iptables wants this to compile. */ #define nf_nat_multi_range nf_nat_multi_range_compat #endif /*__KERNEL__*/ -- cgit v1.2.3 From dacd2a1a5cf621288833aa3c6e815b86a1536538 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 7 Jul 2007 22:25:51 -0700 Subject: [NETFILTER]: nf_conntrack: remove old memory allocator of conntrack Now memory space for help and NAT are allocated by extension infrastructure. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack.h | 14 -------------- include/net/netfilter/nf_conntrack_l3proto.h | 2 -- 2 files changed, 16 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index b2083d386f33..71386e5c4bb7 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -117,9 +117,6 @@ struct nf_conn /* Unique ID that identifies this conntrack*/ unsigned int id; - /* features - nat, helper, ... used by allocating system */ - u_int32_t features; - #if defined(CONFIG_NF_CONNTRACK_MARK) u_int32_t mark; #endif @@ -133,9 +130,6 @@ struct nf_conn /* Extensions */ struct nf_ct_ext *ext; - - /* features dynamically at the end: helper, nat (both optional) */ - char data[0]; }; static inline struct nf_conn * @@ -265,14 +259,6 @@ do { \ local_bh_enable(); \ } while (0) -/* no helper, no nat */ -#define NF_CT_F_BASIC 0 -/* for helper */ -#define NF_CT_F_HELP 1 -/* for nat. */ -#define NF_CT_F_NAT 2 -#define NF_CT_F_NUM 4 - extern int nf_conntrack_register_cache(u_int32_t features, const char *name, size_t size); extern void diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 96a58d8e1d3f..890752d7f673 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -64,8 +64,6 @@ struct nf_conntrack_l3proto int (*prepare)(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, u_int8_t *protonum); - u_int32_t (*get_features)(const struct nf_conntrack_tuple *tuple); - int (*tuple_to_nfattr)(struct sk_buff *skb, const struct nf_conntrack_tuple *t); -- cgit v1.2.3 From d8a0509a696de60296a66ba4fe4f9eaade497103 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 7 Jul 2007 22:26:16 -0700 Subject: [NETFILTER]: nf_nat: kill global 'destroy' operation This kills the global 'destroy' operation which was used by NAT. Instead it uses the extension infrastructure so that multiple extensions can register own operations. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 71386e5c4bb7..ef4a403878a3 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -213,9 +213,6 @@ extern void nf_conntrack_tcp_update(struct sk_buff *skb, struct nf_conn *conntrack, int dir); -/* Call me when a conntrack is destroyed. */ -extern void (*nf_conntrack_destroyed)(struct nf_conn *conntrack); - /* Fake conntrack entry for untracked connections */ extern struct nf_conn nf_conntrack_untracked; -- cgit v1.2.3 From b6b84d4a94e95727a4c65841eea23ac60c6aa329 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 7 Jul 2007 22:26:35 -0700 Subject: [NETFILTER]: nf_nat: merge nf_conn and nf_nat_info Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_nat.h | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index 0541eed5008f..d0e5e436dc1b 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -54,16 +54,6 @@ struct nf_nat_multi_range_compat #include #include -struct nf_conn; - -/* The structure embedded in the conntrack structure. */ -struct nf_nat_info -{ - struct list_head bysource; - struct nf_nat_seq seq[IP_CT_DIR_MAX]; - struct nf_conn *ct; -}; - /* per conntrack: nat application helper private data */ union nf_conntrack_nat_help { @@ -71,9 +61,14 @@ union nf_conntrack_nat_help struct nf_nat_pptp nat_pptp_info; }; +struct nf_conn; + +/* The structure embedded in the conntrack structure. */ struct nf_conn_nat { - struct nf_nat_info info; + struct list_head bysource; + struct nf_nat_seq seq[IP_CT_DIR_MAX]; + struct nf_conn *ct; union nf_conntrack_nat_help help; #if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \ defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE) -- cgit v1.2.3 From f205c5e0c28aa7e0fb6eaaa66e97928f9d9e6994 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:28:14 -0700 Subject: [NETFILTER]: nf_conntrack: use hlists for conntrack hash Convert conntrack hash to hlists to reduce its size and cache footprint. Since the default hashsize to max. entries ratio sucks (1:16), this patch doesn't reduce the amount of memory used for the hash by default, but instead uses a better ratio of 1:8, which results in the same max. entries value. One thing worth noting is early_drop. It really should use LRU, so it now has to iterate over the entire chain to find the last unconfirmed entry. Since chains shouldn't be very long and the entire operation is very rare this shouldn't be a problem. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack_core.h | 4 ++-- include/net/netfilter/nf_conntrack_tuple.h | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 3bf7d05ea64d..6351948654b3 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -84,9 +84,9 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_l3proto *l3proto, struct nf_conntrack_l4proto *proto); -extern struct list_head *nf_conntrack_hash; +extern struct hlist_head *nf_conntrack_hash; extern struct list_head nf_conntrack_expect_list; extern rwlock_t nf_conntrack_lock ; -extern struct list_head unconfirmed; +extern struct hlist_head unconfirmed; #endif /* _NF_CONNTRACK_CORE_H */ diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h index 5d72b16e876f..d02ce876b4ca 100644 --- a/include/net/netfilter/nf_conntrack_tuple.h +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -125,8 +125,7 @@ DEBUGP("tuple %p: %u %u " NIP6_FMT " %hu -> " NIP6_FMT " %hu\n", \ /* Connections have two entries in the hash table: one for each way */ struct nf_conntrack_tuple_hash { - struct list_head list; - + struct hlist_node hnode; struct nf_conntrack_tuple tuple; }; -- cgit v1.2.3 From 330f7db5e578e1e298ba3a41748e5ea333a64a2b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:28:42 -0700 Subject: [NETFILTER]: nf_conntrack: remove 'ignore_conntrack' argument from nf_conntrack_find_get All callers pass NULL, this also doesn't seem very useful for modules. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack_core.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 6351948654b3..2fa3a1bbc7f2 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -58,8 +58,7 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, /* Find a connection corresponding to a tuple. */ extern struct nf_conntrack_tuple_hash * -nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple, - const struct nf_conn *ignored_conntrack); +nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple); extern int __nf_conntrack_confirm(struct sk_buff **pskb); -- cgit v1.2.3 From ac565e5fc104fe1842a87f2206fcfb7b6dda903d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:30:08 -0700 Subject: [NETFILTER]: nf_conntrack: export hash allocation/destruction functions Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index ef4a403878a3..8f2cbb965f3d 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -172,6 +172,10 @@ static inline void nf_ct_put(struct nf_conn *ct) extern int nf_ct_l3proto_try_module_get(unsigned short l3proto); extern void nf_ct_l3proto_module_put(unsigned short l3proto); +extern struct hlist_head *nf_ct_alloc_hashtable(int *sizep, int *vmalloced); +extern void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, + int size); + extern struct nf_conntrack_tuple_hash * __nf_conntrack_find(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack); -- cgit v1.2.3 From 53aba5979e1d964c0234816eda2316f1c2e7946d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:30:27 -0700 Subject: [NETFILTER]: nf_nat: use hlists for bysource hash Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_nat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index d0e5e436dc1b..6ae52f7c9f55 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -66,7 +66,7 @@ struct nf_conn; /* The structure embedded in the conntrack structure. */ struct nf_conn_nat { - struct list_head bysource; + struct hlist_node bysource; struct nf_nat_seq seq[IP_CT_DIR_MAX]; struct nf_conn *ct; union nf_conntrack_nat_help help; -- cgit v1.2.3 From 6823645d608541c2c69e8a99454936e058c294e0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:30:49 -0700 Subject: [NETFILTER]: nf_conntrack_expect: function naming unification Currently there is a wild mix of nf_conntrack_expect_, nf_ct_exp_, expect_, exp_, ... Consistently use nf_ct_ as prefix for exported functions. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack_core.h | 2 +- include/net/netfilter/nf_conntrack_ecache.h | 17 ++++++++--------- include/net/netfilter/nf_conntrack_expect.h | 28 ++++++++++++++-------------- 3 files changed, 23 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 2fa3a1bbc7f2..a18f79c80db8 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -84,7 +84,7 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_l4proto *proto); extern struct hlist_head *nf_conntrack_hash; -extern struct list_head nf_conntrack_expect_list; +extern struct list_head nf_ct_expect_list; extern rwlock_t nf_conntrack_lock ; extern struct hlist_head unconfirmed; diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 811c9073c532..f0b9078235c9 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -49,15 +49,15 @@ static inline void nf_conntrack_event(enum ip_conntrack_events event, atomic_notifier_call_chain(&nf_conntrack_chain, event, ct); } -extern struct atomic_notifier_head nf_conntrack_expect_chain; -extern int nf_conntrack_expect_register_notifier(struct notifier_block *nb); -extern int nf_conntrack_expect_unregister_notifier(struct notifier_block *nb); +extern struct atomic_notifier_head nf_ct_expect_chain; +extern int nf_ct_expect_register_notifier(struct notifier_block *nb); +extern int nf_ct_expect_unregister_notifier(struct notifier_block *nb); static inline void -nf_conntrack_expect_event(enum ip_conntrack_expect_events event, - struct nf_conntrack_expect *exp) +nf_ct_expect_event(enum ip_conntrack_expect_events event, + struct nf_conntrack_expect *exp) { - atomic_notifier_call_chain(&nf_conntrack_expect_chain, event, exp); + atomic_notifier_call_chain(&nf_ct_expect_chain, event, exp); } #else /* CONFIG_NF_CONNTRACK_EVENTS */ @@ -67,9 +67,8 @@ static inline void nf_conntrack_event_cache(enum ip_conntrack_events event, static inline void nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) {} static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {} -static inline void -nf_conntrack_expect_event(enum ip_conntrack_expect_events event, - struct nf_conntrack_expect *exp) {} +static inline void nf_ct_expect_event(enum ip_conntrack_expect_events event, + struct nf_conntrack_expect *exp) {} static inline void nf_ct_event_cache_flush(void) {} #endif /* CONFIG_NF_CONNTRACK_EVENTS */ diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 173c7c1eff23..c0b1d1fb23e1 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -6,8 +6,8 @@ #define _NF_CONNTRACK_EXPECT_H #include -extern struct list_head nf_conntrack_expect_list; -extern struct kmem_cache *nf_conntrack_expect_cachep; +extern struct list_head nf_ct_expect_list; +extern struct kmem_cache *nf_ct_expect_cachep; extern const struct file_operations exp_file_ops; struct nf_conntrack_expect @@ -54,27 +54,27 @@ struct nf_conntrack_expect struct nf_conntrack_expect * -__nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple); +__nf_ct_expect_find(const struct nf_conntrack_tuple *tuple); struct nf_conntrack_expect * -nf_conntrack_expect_find_get(const struct nf_conntrack_tuple *tuple); +nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple); struct nf_conntrack_expect * -find_expectation(const struct nf_conntrack_tuple *tuple); +nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple); void nf_ct_unlink_expect(struct nf_conntrack_expect *exp); void nf_ct_remove_expectations(struct nf_conn *ct); -void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp); +void nf_ct_unexpect_related(struct nf_conntrack_expect *exp); /* Allocate space for an expectation: this is mandatory before calling - nf_conntrack_expect_related. You will have to call put afterwards. */ -struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me); -void nf_conntrack_expect_init(struct nf_conntrack_expect *, int, - union nf_conntrack_address *, - union nf_conntrack_address *, - u_int8_t, __be16 *, __be16 *); -void nf_conntrack_expect_put(struct nf_conntrack_expect *exp); -int nf_conntrack_expect_related(struct nf_conntrack_expect *expect); + nf_ct_expect_related. You will have to call put afterwards. */ +struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me); +void nf_ct_expect_init(struct nf_conntrack_expect *, int, + union nf_conntrack_address *, + union nf_conntrack_address *, + u_int8_t, __be16 *, __be16 *); +void nf_ct_expect_put(struct nf_conntrack_expect *exp); +int nf_ct_expect_related(struct nf_conntrack_expect *expect); #endif /*_NF_CONNTRACK_EXPECT_H*/ -- cgit v1.2.3 From d4156e8cd93f5772483928aaf4960120caebd789 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:31:32 -0700 Subject: [NETFILTER]: nf_conntrack: reduce masks to a subset of tuples Since conntrack currently allows to use masks for every bit of both helper and expectation tuples, we can't hash them and have to keep them on two global lists that are searched for every new connection. This patch removes the never used ability to use masks for the destination part of the expectation tuple and completely removes masks from helpers since the only reasonable choice is a full match on l3num, protonum and src.u.all. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack_expect.h | 3 +- include/net/netfilter/nf_conntrack_helper.h | 5 +-- include/net/netfilter/nf_conntrack_tuple.h | 65 +++++++++++++++++++---------- 3 files changed, 47 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index c0b1d1fb23e1..13643f7f7422 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -16,7 +16,8 @@ struct nf_conntrack_expect struct list_head list; /* We expect this tuple, with the following mask */ - struct nf_conntrack_tuple tuple, mask; + struct nf_conntrack_tuple tuple; + struct nf_conntrack_tuple_mask mask; /* Function to call after setup and insertion */ void (*expectfn)(struct nf_conn *new, diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index b43a75ba44ac..d62e6f093af4 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -24,10 +24,9 @@ struct nf_conntrack_helper * expected connections */ unsigned int timeout; /* timeout for expecteds */ - /* Mask of things we will help (compared against server response) */ + /* Tuple of things we will help (compared against server response) */ struct nf_conntrack_tuple tuple; - struct nf_conntrack_tuple mask; - + /* Function to call when data passes; return verdict, or -1 to invalidate. */ int (*help)(struct sk_buff **pskb, diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h index d02ce876b4ca..99934ab538e6 100644 --- a/include/net/netfilter/nf_conntrack_tuple.h +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -100,6 +100,14 @@ struct nf_conntrack_tuple } dst; }; +struct nf_conntrack_tuple_mask +{ + struct { + union nf_conntrack_address u3; + union nf_conntrack_man_proto u; + } src; +}; + /* This is optimized opposed to a memset of the whole structure. Everything we * really care about is the source/destination unions */ #define NF_CT_TUPLE_U_BLANK(tuple) \ @@ -161,31 +169,44 @@ static inline int nf_ct_tuple_equal(const struct nf_conntrack_tuple *t1, return nf_ct_tuple_src_equal(t1, t2) && nf_ct_tuple_dst_equal(t1, t2); } +static inline int nf_ct_tuple_mask_equal(const struct nf_conntrack_tuple_mask *m1, + const struct nf_conntrack_tuple_mask *m2) +{ + return (m1->src.u3.all[0] == m2->src.u3.all[0] && + m1->src.u3.all[1] == m2->src.u3.all[1] && + m1->src.u3.all[2] == m2->src.u3.all[2] && + m1->src.u3.all[3] == m2->src.u3.all[3] && + m1->src.u.all == m2->src.u.all); +} + +static inline int nf_ct_tuple_src_mask_cmp(const struct nf_conntrack_tuple *t1, + const struct nf_conntrack_tuple *t2, + const struct nf_conntrack_tuple_mask *mask) +{ + int count; + + for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++) { + if ((t1->src.u3.all[count] ^ t2->src.u3.all[count]) & + mask->src.u3.all[count]) + return 0; + } + + if ((t1->src.u.all ^ t2->src.u.all) & mask->src.u.all) + return 0; + + if (t1->src.l3num != t2->src.l3num || + t1->dst.protonum != t2->dst.protonum) + return 0; + + return 1; +} + static inline int nf_ct_tuple_mask_cmp(const struct nf_conntrack_tuple *t, const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_tuple *mask) + const struct nf_conntrack_tuple_mask *mask) { - int count = 0; - - for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ - if ((t->src.u3.all[count] ^ tuple->src.u3.all[count]) & - mask->src.u3.all[count]) - return 0; - } - - for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ - if ((t->dst.u3.all[count] ^ tuple->dst.u3.all[count]) & - mask->dst.u3.all[count]) - return 0; - } - - if ((t->src.u.all ^ tuple->src.u.all) & mask->src.u.all || - (t->dst.u.all ^ tuple->dst.u.all) & mask->dst.u.all || - (t->src.l3num ^ tuple->src.l3num) & mask->src.l3num || - (t->dst.protonum ^ tuple->dst.protonum) & mask->dst.protonum) - return 0; - - return 1; + return nf_ct_tuple_src_mask_cmp(t, tuple, mask) && + nf_ct_tuple_dst_equal(t, tuple); } #endif /* _NF_CONNTRACK_TUPLE_H */ -- cgit v1.2.3 From e9c1b084e17ca225b6be731b819308ee0f9e04b8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:32:53 -0700 Subject: [NETFILTER]: nf_conntrack: move expectaton related init code to nf_conntrack_expect.c Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack_expect.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 13643f7f7422..cf6a619664e8 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -7,8 +7,6 @@ #include extern struct list_head nf_ct_expect_list; -extern struct kmem_cache *nf_ct_expect_cachep; -extern const struct file_operations exp_file_ops; struct nf_conntrack_expect { @@ -53,6 +51,8 @@ struct nf_conntrack_expect #define NF_CT_EXPECT_PERMANENT 0x1 +int nf_conntrack_expect_init(void); +void nf_conntrack_expect_fini(void); struct nf_conntrack_expect * __nf_ct_expect_find(const struct nf_conntrack_tuple *tuple); -- cgit v1.2.3 From a71c085562bcc99e8b711cab4222bff1f6e955da Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:33:47 -0700 Subject: [NETFILTER]: nf_conntrack: use hashtable for expectations Currently all expectations are kept on a global list that - needs to be searched for every new conncetion - needs to be walked for evicting expectations when a master connection has reached its limit - needs to be walked on connection destruction for connections that have open expectations This is obviously not good, especially when considering helpers like H.323 that register *lots* of expectations and can set up permanent expectations, but it also allows for an easy DoS against firewalls using connection tracking helpers. Use a hashtable for expectations to avoid incurring the search overhead for every new connection. The default hash size is 1/256 of the conntrack hash table size, this can be overriden using a module parameter. This patch only introduces the hash table for expectation lookups and keeps other users to reduce the noise, the following patches will get rid of it completely. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack_core.h | 1 - include/net/netfilter/nf_conntrack_expect.h | 5 +++++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index a18f79c80db8..4056f5f08da1 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -84,7 +84,6 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_l4proto *proto); extern struct hlist_head *nf_conntrack_hash; -extern struct list_head nf_ct_expect_list; extern rwlock_t nf_conntrack_lock ; extern struct hlist_head unconfirmed; diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index cf6a619664e8..424d4bdb9848 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -7,12 +7,17 @@ #include extern struct list_head nf_ct_expect_list; +extern struct hlist_head *nf_ct_expect_hash; +extern unsigned int nf_ct_expect_hsize; struct nf_conntrack_expect { /* Internal linked list (global expectation list) */ struct list_head list; + /* Hash member */ + struct hlist_node hnode; + /* We expect this tuple, with the following mask */ struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_mask mask; -- cgit v1.2.3 From b560580a13b180bc1e3cad7ffbc93388cc39be5d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:35:56 -0700 Subject: [NETFILTER]: nf_conntrack_expect: maintain per conntrack expectation list This patch brings back the per-conntrack expectation list that was removed around 2.6.10 to avoid walking all expectations on expectation eviction and conntrack destruction. As these were the last users of the global expectation list, this patch also kills that. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack.h | 2 ++ include/net/netfilter/nf_conntrack_expect.h | 5 ++--- include/net/netfilter/nf_conntrack_helper.h | 2 ++ 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 8f2cbb965f3d..d4f02eb0c66c 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -82,6 +82,8 @@ struct nf_conn_help { union nf_conntrack_help help; + struct hlist_head expectations; + /* Current number of expected connections */ unsigned int expecting; }; diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 424d4bdb9848..9d5af4e22c4f 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -6,14 +6,13 @@ #define _NF_CONNTRACK_EXPECT_H #include -extern struct list_head nf_ct_expect_list; extern struct hlist_head *nf_ct_expect_hash; extern unsigned int nf_ct_expect_hsize; struct nf_conntrack_expect { - /* Internal linked list (global expectation list) */ - struct list_head list; + /* Conntrack expectation list member */ + struct hlist_node lnode; /* Hash member */ struct hlist_node hnode; diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index d62e6f093af4..2c0e2e0fb7ff 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -52,6 +52,8 @@ extern void nf_ct_helper_put(struct nf_conntrack_helper *helper); extern int nf_conntrack_helper_register(struct nf_conntrack_helper *); extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); +extern struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp); + static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct) { return nf_ct_ext_find(ct, NF_CT_EXT_HELPER); -- cgit v1.2.3 From f264a7df08d50bb4a23be6a9aa06940e497ac1c4 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:36:24 -0700 Subject: [NETFILTER]: nf_conntrack_expect: introduce nf_conntrack_expect_max sysct As a last step of preventing DoS by creating lots of expectations, this patch introduces a global maximum and a sysctl to control it. The default is initialized to 4 * the expectation hash table size, which results in 1/64 of the default maxmimum of conntracks. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack_expect.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 9d5af4e22c4f..cae1a0dce365 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -8,6 +8,7 @@ extern struct hlist_head *nf_ct_expect_hash; extern unsigned int nf_ct_expect_hsize; +extern unsigned int nf_ct_expect_max; struct nf_conntrack_expect { -- cgit v1.2.3 From b8a7fe6c10511fce10b20efa163123f4041f2550 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:36:46 -0700 Subject: [NETFILTER]: nf_conntrack_helper: use hashtable for conntrack helpers Eliminate the last global list searched for every new connection. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack_helper.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 2c0e2e0fb7ff..d04f99964d94 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -15,8 +15,8 @@ struct module; struct nf_conntrack_helper -{ - struct list_head list; /* Internal use. */ +{ + struct hlist_node hnode; /* Internal use. */ const char *name; /* name of the module */ struct module *me; /* pointer to self */ -- cgit v1.2.3 From d3c3f4243e135b3d8c41d98be0cb2f54a4141abf Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:38:30 -0700 Subject: [NETFILTER]: ipt_CLUSTERIP: add compat code Adjust structure size and don't expect pointers passed in from userspace to be valid. Also replace an enum in an ABI structure by a fixed size type. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ipt_CLUSTERIP.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h index d9bceedfb3dc..daf50be22c9d 100644 --- a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h +++ b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h @@ -18,13 +18,13 @@ struct clusterip_config; struct ipt_clusterip_tgt_info { u_int32_t flags; - + /* only relevant for new ones */ u_int8_t clustermac[6]; u_int16_t num_total_nodes; u_int16_t num_local_nodes; u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; - enum clusterip_hashmode hash_mode; + u_int32_t hash_mode; u_int32_t hash_initval; struct clusterip_config *config; -- cgit v1.2.3 From 0d53778e81ac7af266dac8a20cc328328c327112 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:39:38 -0700 Subject: [NETFILTER]: Convert DEBUGP to pr_debug Convert DEBUGP to pr_debug and fix lots of non-compiling debug statements. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nf_conntrack_pptp.h | 2 ++ include/net/netfilter/nf_conntrack_tuple.h | 10 +++++----- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_pptp.h b/include/linux/netfilter/nf_conntrack_pptp.h index 9d8144a488cd..c93061f33144 100644 --- a/include/linux/netfilter/nf_conntrack_pptp.h +++ b/include/linux/netfilter/nf_conntrack_pptp.h @@ -4,6 +4,8 @@ #include +extern const char *pptp_msg_name[]; + /* state of the control session */ enum pptp_ctrlsess_state { PPTP_SESSION_NONE, /* no session present */ diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h index 99934ab538e6..040dae5f0c9e 100644 --- a/include/net/netfilter/nf_conntrack_tuple.h +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -120,11 +120,11 @@ struct nf_conntrack_tuple_mask #ifdef __KERNEL__ -#define NF_CT_DUMP_TUPLE(tp) \ -DEBUGP("tuple %p: %u %u " NIP6_FMT " %hu -> " NIP6_FMT " %hu\n", \ - (tp), (tp)->src.l3num, (tp)->dst.protonum, \ - NIP6(*(struct in6_addr *)(tp)->src.u3.all), ntohs((tp)->src.u.all), \ - NIP6(*(struct in6_addr *)(tp)->dst.u3.all), ntohs((tp)->dst.u.all)) +#define NF_CT_DUMP_TUPLE(tp) \ +pr_debug("tuple %p: %u %u " NIP6_FMT " %hu -> " NIP6_FMT " %hu\n", \ + (tp), (tp)->src.l3num, (tp)->dst.protonum, \ + NIP6(*(struct in6_addr *)(tp)->src.u3.all), ntohs((tp)->src.u.all), \ + NIP6(*(struct in6_addr *)(tp)->dst.u3.all), ntohs((tp)->dst.u.all)) /* If we're the first tuple, it's the original dir. */ #define NF_CT_DIRECTION(h) \ -- cgit v1.2.3 From ce7663d84a87bb4e1743f62950bf7dceed723a13 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 7 Jul 2007 22:40:08 -0700 Subject: [NETFILTER]: nfnetlink_queue: don't unregister handler of other subsystem The queue handlers registered by ip[6]_queue.ko at initialization should not be unregistered according to requests from userland program using nfnetlink_queue. If we allow that, there is no way to register the handlers of built-in ip[6]_queue again. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 10b5c6275706..0eed0b7ab2df 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -275,7 +275,8 @@ struct nf_queue_handler { }; extern int nf_register_queue_handler(int pf, struct nf_queue_handler *qh); -extern int nf_unregister_queue_handler(int pf); +extern int nf_unregister_queue_handler(int pf, + struct nf_queue_handler *qh); extern void nf_unregister_queue_handlers(struct nf_queue_handler *qh); extern void nf_reinject(struct sk_buff *skb, struct nf_info *info, -- cgit v1.2.3 From 4839c52b01ca91be1c62761e08fb3deb3881e857 Mon Sep 17 00:00:00 2001 From: Philippe De Muyter Date: Mon, 9 Jul 2007 15:32:57 -0700 Subject: [IPV4]: Make ip_tos2prio const. Signed-off-by: Philippe De Muyter Signed-off-by: David S. Miller --- include/net/route.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/route.h b/include/net/route.h index 188b8936bb27..f7ce6259f86f 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -135,7 +135,7 @@ static inline void ip_rt_put(struct rtable * rt) #define IPTOS_RT_MASK (IPTOS_TOS_MASK & ~3) -extern __u8 ip_tos2prio[16]; +extern const __u8 ip_tos2prio[16]; static inline char rt_tos2priority(u8 tos) { -- cgit v1.2.3 From 5b7f990927fe87ad3bec762a33c0e72bcbf6841e Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Wed, 11 Jul 2007 09:51:55 +0200 Subject: [Bluetooth] Add basics to better support and handle eSCO links To better support and handle eSCO links in the future a bunch of constants needs to be added and some basic routines need to be updated. This is the initial step. Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 18 ++++++++++++++++-- include/net/bluetooth/hci_core.h | 2 ++ 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 93ce272a5d27..ebfb96b41106 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -107,14 +107,14 @@ enum { #define HCI_IDLE_TIMEOUT (6000) /* 6 seconds */ #define HCI_INIT_TIMEOUT (10000) /* 10 seconds */ -/* HCI Packet types */ +/* HCI data types */ #define HCI_COMMAND_PKT 0x01 #define HCI_ACLDATA_PKT 0x02 #define HCI_SCODATA_PKT 0x03 #define HCI_EVENT_PKT 0x04 #define HCI_VENDOR_PKT 0xff -/* HCI Packet types */ +/* HCI packet types */ #define HCI_DM1 0x0008 #define HCI_DM3 0x0400 #define HCI_DM5 0x4000 @@ -129,6 +129,14 @@ enum { #define SCO_PTYPE_MASK (HCI_HV1 | HCI_HV2 | HCI_HV3) #define ACL_PTYPE_MASK (~SCO_PTYPE_MASK) +/* eSCO packet types */ +#define ESCO_HV1 0x0001 +#define ESCO_HV2 0x0002 +#define ESCO_HV3 0x0004 +#define ESCO_EV3 0x0008 +#define ESCO_EV4 0x0010 +#define ESCO_EV5 0x0020 + /* ACL flags */ #define ACL_CONT 0x01 #define ACL_START 0x02 @@ -138,6 +146,7 @@ enum { /* Baseband links */ #define SCO_LINK 0x00 #define ACL_LINK 0x01 +#define ESCO_LINK 0x02 /* LMP features */ #define LMP_3SLOT 0x01 @@ -162,6 +171,11 @@ enum { #define LMP_PSCHEME 0x02 #define LMP_PCONTROL 0x04 +#define LMP_ESCO 0x80 + +#define LMP_EV4 0x01 +#define LMP_EV5 0x02 + #define LMP_SNIFF_SUBR 0x02 /* Connection modes */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 7c78744ec0fd..8f67c8a7169b 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -78,6 +78,7 @@ struct hci_dev { __u16 voice_setting; __u16 pkt_type; + __u16 esco_type; __u16 link_policy; __u16 link_mode; @@ -452,6 +453,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define lmp_encrypt_capable(dev) ((dev)->features[0] & LMP_ENCRYPT) #define lmp_sniff_capable(dev) ((dev)->features[0] & LMP_SNIFF) #define lmp_sniffsubr_capable(dev) ((dev)->features[5] & LMP_SNIFF_SUBR) +#define lmp_esco_capable(dev) ((dev)->features[3] & LMP_ESCO) /* ----- HCI protocols ----- */ struct hci_proto { -- cgit v1.2.3 From c6c6e3e05c0b4349824efcdd36650e7be9d5c7c3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 10 Jul 2007 22:41:55 -0700 Subject: [NET]: Update comments for skb checksums Rusty (whose comments we should all study and emulate :) pointed out that our comments for skb checksums are no longer up-to-date. So here is a patch to 1) add the case of partial checksums on input; 2) update partial checksum case to mention csum_start/csum_offset; 3) mention the new IPv6 feature bit. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 625d73b07ab7..9391e4a4c344 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -65,13 +65,20 @@ * is able to produce some skb->csum, it MUST use COMPLETE, * not UNNECESSARY. * + * PARTIAL: identical to the case for output below. This may occur + * on a packet received directly from another Linux OS, e.g., + * a virtualised Linux kernel on the same host. The packet can + * be treated in the same way as UNNECESSARY except that on + * output (i.e., forwarding) the checksum must be filled in + * by the OS or the hardware. + * * B. Checksumming on output. * * NONE: skb is checksummed by protocol or csum is not required. * * PARTIAL: device is required to csum packet as seen by hard_start_xmit - * from skb->transport_header to the end and to record the checksum - * at skb->transport_header + skb->csum. + * from skb->csum_start to the end and to record the checksum + * at skb->csum_start + skb->csum_offset. * * Device must show its capabilities in dev->features, set * at device setup time. @@ -82,6 +89,7 @@ * TCP/UDP over IPv4. Sigh. Vendors like this * way by an unknown reason. Though, see comment above * about CHECKSUM_UNNECESSARY. 8) + * NETIF_F_IPV6_CSUM about as dumb as the last one but does IPv6 instead. * * Any questions? No questions, good. --ANK */ -- cgit v1.2.3 From bb4dbf9e61d0801927e7df2569bb3dd8287ea301 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Tue, 10 Jul 2007 22:55:49 -0700 Subject: [IPV6]: Do not send RH0 anymore. Based on . Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/ipv6.h | 4 ++-- include/net/ipv6.h | 4 ---- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 213b63be3c8f..cb3118cf277c 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -27,8 +27,8 @@ struct in6_ifreq { int ifr6_ifindex; }; -#define IPV6_SRCRT_STRICT 0x01 /* this hop must be a neighbor */ -#define IPV6_SRCRT_TYPE_0 0 /* IPv6 type 0 Routing Header */ +#define IPV6_SRCRT_STRICT 0x01 /* Deprecated; will be removed */ +#define IPV6_SRCRT_TYPE_0 0 /* Deprecated; will be removed */ #define IPV6_SRCRT_TYPE_2 2 /* IPv6 type 2 Routing Header */ /* diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 78a0d06d98d5..46b9dce82f6e 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -512,10 +512,6 @@ extern int ipv6_ext_hdr(u8 nexthdr); extern int ipv6_find_tlv(struct sk_buff *skb, int offset, int type); -extern struct ipv6_txoptions * ipv6_invert_rthdr(struct sock *sk, - struct ipv6_rt_hdr *hdr); - - /* * socket options (ipv6_sockglue.c) */ -- cgit v1.2.3 From 4c752098f529f41abfc985426a3eca0f2cb96676 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 23 May 2007 13:28:48 +0900 Subject: [IPV6]: Make IPV6_{RECV,2292}RTHDR boolean options. Because reversing RH0 is no longer supported by deprecation of RH0, let's make IPV6_{RECV,2292}RTHDR boolean options. Boolean are more appropriate from standard POV. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/ipv6.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index cb3118cf277c..97983dc9df13 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -299,8 +299,8 @@ struct ipv6_pinfo { /* pktoption flags */ union { struct { - __u16 srcrt:2, - osrcrt:2, + __u16 srcrt:1, + osrcrt:1, rxinfo:1, rxoinfo:1, rxhlim:1, -- cgit v1.2.3 From ed8b548ce3cb988f59a0fd9af6ccdc4f8198cd19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 10 Jul 2007 23:02:12 -0700 Subject: [DECNET]: Another unnecessary net/tcp.h inclusion in net/dn.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No longer needed. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/dn.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/dn.h b/include/net/dn.h index ac4ce9091747..627778384c84 100644 --- a/include/net/dn.h +++ b/include/net/dn.h @@ -3,7 +3,6 @@ #include #include -#include #include #define dn_ntohs(x) le16_to_cpu(x) -- cgit v1.2.3 From 1fd05ba5a2f2aa8e7b9b52ef55df850e2e7d54c9 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 11 Jul 2007 14:22:39 -0700 Subject: [AF_UNIX]: Rewrite garbage collector, fixes race. Throw out the old mark & sweep garbage collector and put in a refcounting cycle detecting one. The old one had a race with recvmsg, that resulted in false positives and hence data loss. The old algorithm operated on all unix sockets in the system, so any additional locking would have meant performance problems for all users of these. The new algorithm instead only operates on "in flight" sockets, which are very rare, and the additional locking for these doesn't negatively impact the vast majority of users. In fact it's probable, that there weren't *any* heavy senders of sockets over sockets, otherwise the above race would have been discovered long ago. The patch works OK with the app that exposed the race with the old code. The garbage collection has also been verified to work in a few simple cases. Signed-off-by: Miklos Szeredi Signed-off-by: David S. Miller --- include/net/af_unix.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 65f49fd7deff..6de1e9e35c73 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -79,9 +79,10 @@ struct unix_sock { struct mutex readlock; struct sock *peer; struct sock *other; - struct sock *gc_tree; + struct list_head link; atomic_t inflight; spinlock_t lock; + unsigned int gc_candidate : 1; wait_queue_head_t peer_wait; }; #define unix_sk(__sk) ((struct unix_sock *)__sk) -- cgit v1.2.3 From 29578624e354f56143d92510fff33a8b2aaa2c03 Mon Sep 17 00:00:00 2001 From: Olaf Kirch Date: Wed, 11 Jul 2007 19:32:02 -0700 Subject: [NET]: Fix races in net_rx_action vs netpoll. Keep netpoll/poll_napi from messing with the poll_list. Only net_rx_action is allowed to manipulate the list. Signed-off-by: Olaf Kirch Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8590d685d935..79cc3dab4be7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -261,6 +261,8 @@ enum netdev_state_t __LINK_STATE_LINKWATCH_PENDING, __LINK_STATE_DORMANT, __LINK_STATE_QDISC_RUNNING, + /* Set by the netpoll NAPI code */ + __LINK_STATE_POLL_LIST_FROZEN, }; @@ -1014,6 +1016,14 @@ static inline void netif_rx_complete(struct net_device *dev) { unsigned long flags; +#ifdef CONFIG_NETPOLL + /* Prevent race with netpoll - yes, this is a kludge. + * But at least it doesn't penalize the non-netpoll + * code path. */ + if (test_bit(__LINK_STATE_POLL_LIST_FROZEN, &dev->state)) + return; +#endif + local_irq_save(flags); __netif_rx_complete(dev); local_irq_restore(flags); -- cgit v1.2.3 From 8c979c26a0f093c13290320edda799d8335e50ae Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 11 Jul 2007 19:45:24 -0700 Subject: [VLAN]: Fix MAC address handling The VLAN MAC address handling is broken in multiple ways. When the address differs when setting it, the real device is put in promiscous mode twice, but never taken out again. Additionally it doesn't resync when the real device's address is changed and needlessly puts it in promiscous mode when the vlan device is still down. Fix by moving address handling to vlan_dev_open/vlan_dev_stop and properly deal with address changes in the device notifier. Also switch to dev_unicast_add (which needs the exact same handling). Since the set_mac_address handler is identical to the generic ethernet one with these changes, kill it and use ether_setup(). Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index c7912876a210..61a57dc2ac99 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -135,6 +135,7 @@ struct vlan_dev_info { int old_allmulti; /* similar to above. */ int old_promiscuity; /* similar to above. */ struct net_device *real_dev; /* the underlying device/interface */ + unsigned char real_dev_addr[ETH_ALEN]; struct proc_dir_entry *dent; /* Holds the proc data */ unsigned long cnt_inc_headroom_on_tx; /* How many times did we have to grow the skb on TX. */ unsigned long cnt_encap_on_xmit; /* How many times did we have to encapsulate the skb on TX. */ -- cgit v1.2.3 From db3d99c090e0cdb34b1274767e062bfddbb384bc Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 11 Jul 2007 19:46:26 -0700 Subject: [NET_SCHED]: ematch: module autoloading Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/pkt_cls.h | 17 +++++++---------- include/net/pkt_cls.h | 2 ++ 2 files changed, 9 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h index c3f01b3085a4..30b8571e6b34 100644 --- a/include/linux/pkt_cls.h +++ b/include/linux/pkt_cls.h @@ -403,16 +403,13 @@ enum * 1..32767 Reserved for ematches inside kernel tree * 32768..65535 Free to use, not reliable */ -enum -{ - TCF_EM_CONTAINER, - TCF_EM_CMP, - TCF_EM_NBYTE, - TCF_EM_U32, - TCF_EM_META, - TCF_EM_TEXT, - __TCF_EM_MAX -}; +#define TCF_EM_CONTAINER 0 +#define TCF_EM_CMP 1 +#define TCF_EM_NBYTE 2 +#define TCF_EM_U32 3 +#define TCF_EM_META 4 +#define TCF_EM_TEXT 5 +#define TCF_EM_MAX 5 enum { diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 4129df708079..6c29920cbe29 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -306,6 +306,8 @@ static inline int tcf_em_tree_match(struct sk_buff *skb, return 1; } +#define MODULE_ALIAS_TCF_EMATCH(kind) MODULE_ALIAS("ematch-kind-" __stringify(kind)) + #else /* CONFIG_NET_EMATCH */ struct tcf_ematch_tree -- cgit v1.2.3