diff options
Diffstat (limited to 'net')
101 files changed, 1426 insertions, 683 deletions
diff --git a/net/802/Makefile b/net/802/Makefile index 01861929591a..977704a54f68 100644 --- a/net/802/Makefile +++ b/net/802/Makefile @@ -2,8 +2,6 @@ # Makefile for the Linux 802.x protocol layers. # -obj-y := p8023.o - # Check the p8022 selections against net/core/Makefile. obj-$(CONFIG_SYSCTL) += sysctl_net_802.o obj-$(CONFIG_LLC) += p8022.o psnap.o @@ -11,5 +9,5 @@ obj-$(CONFIG_TR) += p8022.o psnap.o tr.o sysctl_net_802.o obj-$(CONFIG_NET_FC) += fc.o obj-$(CONFIG_FDDI) += fddi.o obj-$(CONFIG_HIPPI) += hippi.o -obj-$(CONFIG_IPX) += p8022.o psnap.o +obj-$(CONFIG_IPX) += p8022.o psnap.o p8023.o obj-$(CONFIG_ATALK) += p8022.o psnap.o diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 72f3f7b8de80..bdb4d89730d2 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -295,7 +295,7 @@ static inline __be16 br_type_trans(struct sk_buff *skb, struct net_device *dev) unsigned char *rawp; eth = eth_hdr(skb); - if (*eth->h_dest & 1) { + if (is_multicast_ether_addr(eth->h_dest)) { if (memcmp(eth->h_dest, dev->broadcast, ETH_ALEN) == 0) skb->pkt_type = PACKET_BROADCAST; else diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 158a9c46d863..f57cde78c3de 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -480,13 +480,8 @@ static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb) BT_DBG("dlc %p tty %p len %d", dlc, tty, skb->len); if (test_bit(TTY_DONT_FLIP, &tty->flags)) { - register int i; - for (i = 0; i < skb->len; i++) { - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - tty_flip_buffer_push(tty); - - tty_insert_flip_char(tty, skb->data[i], 0); - } + tty_buffer_request_room(tty, skb->len); + tty_insert_flip_string(tty, skb->data, skb->len); tty_flip_buffer_push(tty); } else tty->ldisc.receive_buf(tty, skb->data, NULL, skb->len); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index c387852f753a..e3a73cead6b6 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -68,7 +68,7 @@ int br_handle_frame_finish(struct sk_buff *skb) } } - if (dest[0] & 1) { + if (is_multicast_ether_addr(dest)) { br_flood_forward(br, skb, !passedup); if (!passedup) br_pass_frame_up(br, skb); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 223f8270daee..7cac3fb9f809 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -394,8 +394,9 @@ inhdr_error: * target in particular. Save the original destination IP * address to be able to detect DNAT afterwards. */ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { struct iphdr *iph; __u32 len; @@ -412,8 +413,10 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb, goto out; if (skb->protocol == __constant_htons(ETH_P_8021Q)) { + u8 *vhdr = skb->data; skb_pull(skb, VLAN_HLEN); - (skb)->nh.raw += VLAN_HLEN; + skb_postpull_rcsum(skb, vhdr, VLAN_HLEN); + skb->nh.raw += VLAN_HLEN; } return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn); } @@ -429,8 +432,10 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb, goto out; if (skb->protocol == __constant_htons(ETH_P_8021Q)) { + u8 *vhdr = skb->data; skb_pull(skb, VLAN_HLEN); - (skb)->nh.raw += VLAN_HLEN; + skb_postpull_rcsum(skb, vhdr, VLAN_HLEN); + skb->nh.raw += VLAN_HLEN; } if (!pskb_may_pull(skb, sizeof(struct iphdr))) diff --git a/net/core/dev.c b/net/core/dev.c index 5081287923d5..bf66b114d3c2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1092,15 +1092,12 @@ int skb_checksum_help(struct sk_buff *skb, int inward) goto out; } - if (offset > (int)skb->len) - BUG(); + BUG_ON(offset > (int)skb->len); csum = skb_checksum(skb, offset, skb->len-offset, 0); offset = skb->tail - skb->h.raw; - if (offset <= 0) - BUG(); - if (skb->csum + 2 > offset) - BUG(); + BUG_ON(offset <= 0); + BUG_ON(skb->csum + 2 > offset); *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum); skb->ip_summed = CHECKSUM_NONE; diff --git a/net/core/filter.c b/net/core/filter.c index 8964d3445588..9eb9d0017a01 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -75,7 +75,7 @@ static inline void *load_pointer(struct sk_buff *skb, int k, * len is the number of filter blocks in the array. */ -int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) +unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) { struct sock_filter *fentry; /* We walk down these */ void *ptr; @@ -241,9 +241,9 @@ load_b: A = X; continue; case BPF_RET|BPF_K: - return ((unsigned int)fentry->k); + return fentry->k; case BPF_RET|BPF_A: - return ((unsigned int)A); + return A; case BPF_ST: mem[fentry->k] = A; continue; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index e1da81d261d1..58adaf208dd6 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -16,6 +16,7 @@ #include <net/sock.h> #include <linux/rtnetlink.h> #include <linux/wireless.h> +#include <net/iw_handler.h> #define to_class_dev(obj) container_of(obj,struct class_device,kobj) #define to_net_dev(class) container_of(class, struct net_device, class_dev) @@ -294,13 +295,19 @@ static ssize_t wireless_show(struct class_device *cd, char *buf, char *)) { struct net_device *dev = to_net_dev(cd); - const struct iw_statistics *iw; + const struct iw_statistics *iw = NULL; ssize_t ret = -EINVAL; read_lock(&dev_base_lock); - if (dev_isalive(dev) && dev->get_wireless_stats - && (iw = dev->get_wireless_stats(dev)) != NULL) - ret = (*format)(iw, buf); + if (dev_isalive(dev)) { + if(dev->wireless_handlers && + dev->wireless_handlers->get_wireless_stats) + iw = dev->wireless_handlers->get_wireless_stats(dev); + else if (dev->get_wireless_stats) + iw = dev->get_wireless_stats(dev); + if (iw != NULL) + ret = (*format)(iw, buf); + } read_unlock(&dev_base_lock); return ret; @@ -402,7 +409,8 @@ void netdev_unregister_sysfs(struct net_device * net) sysfs_remove_group(&class_dev->kobj, &netstat_group); #ifdef WIRELESS_EXT - if (net->get_wireless_stats) + if (net->get_wireless_stats || (net->wireless_handlers && + net->wireless_handlers->get_wireless_stats)) sysfs_remove_group(&class_dev->kobj, &wireless_group); #endif class_device_del(class_dev); @@ -427,10 +435,12 @@ int netdev_register_sysfs(struct net_device *net) goto out_unreg; #ifdef WIRELESS_EXT - if (net->get_wireless_stats && - (ret = sysfs_create_group(&class_dev->kobj, &wireless_group))) - goto out_cleanup; - + if (net->get_wireless_stats || (net->wireless_handlers && + net->wireless_handlers->get_wireless_stats)) { + ret = sysfs_create_group(&class_dev->kobj, &wireless_group); + if (ret) + goto out_cleanup; + } return 0; out_cleanup: if (net->get_stats) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 06cad2d63e8a..631056d44b7b 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -473,7 +473,6 @@ static char version[] __initdata = VERSION; static int pktgen_remove_device(struct pktgen_thread* t, struct pktgen_dev *i); static int pktgen_add_device(struct pktgen_thread* t, const char* ifname); -static struct pktgen_thread* pktgen_find_thread(const char* name); static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread* t, const char* ifname); static int pktgen_device_event(struct notifier_block *, unsigned long, void *); static void pktgen_run_all_threads(void); @@ -2883,7 +2882,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char* ifname) return add_dev_to_thread(t, pkt_dev); } -static struct pktgen_thread *pktgen_find_thread(const char* name) +static struct pktgen_thread * __init pktgen_find_thread(const char* name) { struct pktgen_thread *t = NULL; @@ -2900,7 +2899,7 @@ static struct pktgen_thread *pktgen_find_thread(const char* name) return t; } -static int pktgen_create_thread(const char* name, int cpu) +static int __init pktgen_create_thread(const char* name, int cpu) { struct pktgen_thread *t = NULL; struct proc_dir_entry *pe; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 070f91cfde59..d0732e9c8560 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -791,8 +791,7 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc) int end = offset + skb_shinfo(skb)->frags[i].size; if (end > len) { if (skb_cloned(skb)) { - if (!realloc) - BUG(); + BUG_ON(!realloc); if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) return -ENOMEM; } @@ -894,8 +893,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) struct sk_buff *insp = NULL; do { - if (!list) - BUG(); + BUG_ON(!list); if (list->len <= eat) { /* Eaten as whole. */ @@ -1199,8 +1197,7 @@ unsigned int skb_checksum(const struct sk_buff *skb, int offset, start = end; } } - if (len) - BUG(); + BUG_ON(len); return csum; } @@ -1282,8 +1279,7 @@ unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, start = end; } } - if (len) - BUG(); + BUG_ON(len); return csum; } @@ -1297,8 +1293,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) else csstart = skb_headlen(skb); - if (csstart > skb_headlen(skb)) - BUG(); + BUG_ON(csstart > skb_headlen(skb)); memcpy(to, skb->data, csstart); diff --git a/net/core/utils.c b/net/core/utils.c index 587eb7787deb..ac1d1fcf8673 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -162,7 +162,7 @@ EXPORT_SYMBOL(net_srandom); * is otherwise not dependent on the TCP/IP stack. */ -__u32 in_aton(const char *str) +__be32 in_aton(const char *str) { unsigned long l; unsigned int val; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 3f244670764a..00f983226672 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -986,6 +986,7 @@ int dccp_v4_rcv(struct sk_buff *skb) if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; + nf_reset(skb); return sk_receive_skb(sk, skb); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index c609dc78f487..df074259f9c3 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -27,6 +27,7 @@ #include <net/ipv6.h> #include <net/protocol.h> #include <net/transp_v6.h> +#include <net/ip6_checksum.h> #include <net/xfrm.h> #include "dccp.h" @@ -1028,7 +1029,7 @@ discard: return 0; } -static int dccp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +static int dccp_v6_rcv(struct sk_buff **pskb) { const struct dccp_hdr *dh; struct sk_buff *skb = *pskb; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 9f4dbeb59315..9890fd97e538 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -163,7 +163,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) skb_pull(skb,ETH_HLEN); eth = eth_hdr(skb); - if (*eth->h_dest&1) { + if (is_multicast_ether_addr(eth->h_dest)) { if (!compare_ether_addr(eth->h_dest, dev->broadcast)) skb->pkt_type = PACKET_BROADCAST; else diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c index 073aebdf0f67..f8dca31be5dd 100644 --- a/net/ieee80211/ieee80211_crypt_wep.c +++ b/net/ieee80211/ieee80211_crypt_wep.c @@ -75,22 +75,14 @@ static void prism2_wep_deinit(void *priv) kfree(priv); } -/* Perform WEP encryption on given skb that has at least 4 bytes of headroom - * for IV and 4 bytes of tailroom for ICV. Both IV and ICV will be transmitted, - * so the payload length increases with 8 bytes. - * - * WEP frame payload: IV + TX key idx, RC4(data), ICV = RC4(CRC32(data)) - */ -static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) +/* Add WEP IV/key info to a frame that has at least 4 bytes of headroom */ +static int prism2_wep_build_iv(struct sk_buff *skb, int hdr_len, void *priv) { struct prism2_wep_data *wep = priv; - u32 crc, klen, len; - u8 key[WEP_KEY_LEN + 3]; - u8 *pos, *icv; - struct scatterlist sg; - - if (skb_headroom(skb) < 4 || skb_tailroom(skb) < 4 || - skb->len < hdr_len) + u32 klen, len; + u8 *pos; + + if (skb_headroom(skb) < 4 || skb->len < hdr_len) return -1; len = skb->len - hdr_len; @@ -112,15 +104,47 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) } /* Prepend 24-bit IV to RC4 key and TX frame */ - *pos++ = key[0] = (wep->iv >> 16) & 0xff; - *pos++ = key[1] = (wep->iv >> 8) & 0xff; - *pos++ = key[2] = wep->iv & 0xff; + *pos++ = (wep->iv >> 16) & 0xff; + *pos++ = (wep->iv >> 8) & 0xff; + *pos++ = wep->iv & 0xff; *pos++ = wep->key_idx << 6; + return 0; +} + +/* Perform WEP encryption on given skb that has at least 4 bytes of headroom + * for IV and 4 bytes of tailroom for ICV. Both IV and ICV will be transmitted, + * so the payload length increases with 8 bytes. + * + * WEP frame payload: IV + TX key idx, RC4(data), ICV = RC4(CRC32(data)) + */ +static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) +{ + struct prism2_wep_data *wep = priv; + u32 crc, klen, len; + u8 *pos, *icv; + struct scatterlist sg; + u8 key[WEP_KEY_LEN + 3]; + + /* other checks are in prism2_wep_build_iv */ + if (skb_tailroom(skb) < 4) + return -1; + + /* add the IV to the frame */ + if (prism2_wep_build_iv(skb, hdr_len, priv)) + return -1; + + /* Copy the IV into the first 3 bytes of the key */ + memcpy(key, skb->data + hdr_len, 3); + /* Copy rest of the WEP key (the secret part) */ memcpy(key + 3, wep->key, wep->key_len); + + len = skb->len - hdr_len - 4; + pos = skb->data + hdr_len + 4; + klen = 3 + wep->key_len; - /* Append little-endian CRC32 and encrypt it to produce ICV */ + /* Append little-endian CRC32 over only the data and encrypt it to produce ICV */ crc = ~crc32_le(~0, pos, len); icv = skb_put(skb, 4); icv[0] = crc; @@ -231,6 +255,7 @@ static struct ieee80211_crypto_ops ieee80211_crypt_wep = { .name = "WEP", .init = prism2_wep_init, .deinit = prism2_wep_deinit, + .build_iv = prism2_wep_build_iv, .encrypt_mpdu = prism2_wep_encrypt, .decrypt_mpdu = prism2_wep_decrypt, .encrypt_msdu = NULL, diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c index 445f206e65e0..e5b33c8d5dbc 100644 --- a/net/ieee80211/ieee80211_tx.c +++ b/net/ieee80211/ieee80211_tx.c @@ -288,7 +288,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) /* Determine total amount of storage required for TXB packets */ bytes = skb->len + SNAP_SIZE + sizeof(u16); - if (host_encrypt) + if (host_encrypt || host_build_iv) fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA | IEEE80211_FCTL_PROTECTED; else diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c index 181755f2aa8b..406d5b964905 100644 --- a/net/ieee80211/ieee80211_wx.c +++ b/net/ieee80211/ieee80211_wx.c @@ -284,7 +284,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee, }; int i, key, key_provided, len; struct ieee80211_crypt_data **crypt; - int host_crypto = ieee->host_encrypt || ieee->host_decrypt; + int host_crypto = ieee->host_encrypt || ieee->host_decrypt || ieee->host_build_iv; IEEE80211_DEBUG_WX("SET_ENCODE\n"); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index be5a519cd2f8..105039eb7629 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -899,8 +899,7 @@ static void icmp_address_reply(struct sk_buff *skb) u32 _mask, *mp; mp = skb_header_pointer(skb, 0, sizeof(_mask), &_mask); - if (mp == NULL) - BUG(); + BUG_ON(mp == NULL); for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { if (*mp == ifa->ifa_mask && inet_ifa_match(rt->rt_src, ifa)) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index c49908192047..457db99c76df 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -50,9 +50,10 @@ static struct sock *idiagnl; #define INET_DIAG_PUT(skb, attrtype, attrlen) \ RTA_DATA(__RTA_PUT(skb, attrtype, attrlen)) -static int inet_diag_fill(struct sk_buff *skb, struct sock *sk, - int ext, u32 pid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh) +static int inet_csk_diag_fill(struct sock *sk, + struct sk_buff *skb, + int ext, u32 pid, u32 seq, u16 nlmsg_flags, + const struct nlmsghdr *unlh) { const struct inet_sock *inet = inet_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); @@ -70,20 +71,22 @@ static int inet_diag_fill(struct sk_buff *skb, struct sock *sk, nlh->nlmsg_flags = nlmsg_flags; r = NLMSG_DATA(nlh); - if (sk->sk_state != TCP_TIME_WAIT) { - if (ext & (1 << (INET_DIAG_MEMINFO - 1))) - minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, - sizeof(*minfo)); - if (ext & (1 << (INET_DIAG_INFO - 1))) - info = INET_DIAG_PUT(skb, INET_DIAG_INFO, - handler->idiag_info_size); - - if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) { - size_t len = strlen(icsk->icsk_ca_ops->name); - strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1), - icsk->icsk_ca_ops->name); - } + BUG_ON(sk->sk_state == TCP_TIME_WAIT); + + if (ext & (1 << (INET_DIAG_MEMINFO - 1))) + minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, sizeof(*minfo)); + + if (ext & (1 << (INET_DIAG_INFO - 1))) + info = INET_DIAG_PUT(skb, INET_DIAG_INFO, + handler->idiag_info_size); + + if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) { + const size_t len = strlen(icsk->icsk_ca_ops->name); + + strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1), + icsk->icsk_ca_ops->name); } + r->idiag_family = sk->sk_family; r->idiag_state = sk->sk_state; r->idiag_timer = 0; @@ -93,37 +96,6 @@ static int inet_diag_fill(struct sk_buff *skb, struct sock *sk, r->id.idiag_cookie[0] = (u32)(unsigned long)sk; r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); - if (r->idiag_state == TCP_TIME_WAIT) { - const struct inet_timewait_sock *tw = inet_twsk(sk); - long tmo = tw->tw_ttd - jiffies; - if (tmo < 0) - tmo = 0; - - r->id.idiag_sport = tw->tw_sport; - r->id.idiag_dport = tw->tw_dport; - r->id.idiag_src[0] = tw->tw_rcv_saddr; - r->id.idiag_dst[0] = tw->tw_daddr; - r->idiag_state = tw->tw_substate; - r->idiag_timer = 3; - r->idiag_expires = (tmo * 1000 + HZ - 1) / HZ; - r->idiag_rqueue = 0; - r->idiag_wqueue = 0; - r->idiag_uid = 0; - r->idiag_inode = 0; -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) - if (r->idiag_family == AF_INET6) { - const struct inet6_timewait_sock *tw6 = inet6_twsk(sk); - - ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, - &tw6->tw_v6_rcv_saddr); - ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, - &tw6->tw_v6_daddr); - } -#endif - nlh->nlmsg_len = skb->tail - b; - return skb->len; - } - r->id.idiag_sport = inet->sport; r->id.idiag_dport = inet->dport; r->id.idiag_src[0] = inet->rcv_saddr; @@ -185,7 +157,75 @@ nlmsg_failure: return -1; } -static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) +static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, + struct sk_buff *skb, int ext, u32 pid, + u32 seq, u16 nlmsg_flags, + const struct nlmsghdr *unlh) +{ + long tmo; + struct inet_diag_msg *r; + const unsigned char *previous_tail = skb->tail; + struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq, + unlh->nlmsg_type, sizeof(*r)); + + r = NLMSG_DATA(nlh); + BUG_ON(tw->tw_state != TCP_TIME_WAIT); + + nlh->nlmsg_flags = nlmsg_flags; + + tmo = tw->tw_ttd - jiffies; + if (tmo < 0) + tmo = 0; + + r->idiag_family = tw->tw_family; + r->idiag_state = tw->tw_state; + r->idiag_timer = 0; + r->idiag_retrans = 0; + r->id.idiag_if = tw->tw_bound_dev_if; + r->id.idiag_cookie[0] = (u32)(unsigned long)tw; + r->id.idiag_cookie[1] = (u32)(((unsigned long)tw >> 31) >> 1); + r->id.idiag_sport = tw->tw_sport; + r->id.idiag_dport = tw->tw_dport; + r->id.idiag_src[0] = tw->tw_rcv_saddr; + r->id.idiag_dst[0] = tw->tw_daddr; + r->idiag_state = tw->tw_substate; + r->idiag_timer = 3; + r->idiag_expires = (tmo * 1000 + HZ - 1) / HZ; + r->idiag_rqueue = 0; + r->idiag_wqueue = 0; + r->idiag_uid = 0; + r->idiag_inode = 0; +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + if (tw->tw_family == AF_INET6) { + const struct inet6_timewait_sock *tw6 = + inet6_twsk((struct sock *)tw); + + ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, + &tw6->tw_v6_rcv_saddr); + ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, + &tw6->tw_v6_daddr); + } +#endif + nlh->nlmsg_len = skb->tail - previous_tail; + return skb->len; +nlmsg_failure: + skb_trim(skb, previous_tail - skb->data); + return -1; +} + +static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, + int ext, u32 pid, u32 seq, u16 nlmsg_flags, + const struct nlmsghdr *unlh) +{ + if (sk->sk_state == TCP_TIME_WAIT) + return inet_twsk_diag_fill((struct inet_timewait_sock *)sk, + skb, ext, pid, seq, nlmsg_flags, + unlh); + return inet_csk_diag_fill(sk, skb, ext, pid, seq, nlmsg_flags, unlh); +} + +static int inet_diag_get_exact(struct sk_buff *in_skb, + const struct nlmsghdr *nlh) { int err; struct sock *sk; @@ -235,7 +275,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nl if (!rep) goto out; - if (inet_diag_fill(rep, sk, req->idiag_ext, + if (sk_diag_fill(sk, rep, req->idiag_ext, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 0, nlh) <= 0) BUG(); @@ -283,7 +323,7 @@ static int bitstring_match(const u32 *a1, const u32 *a2, int bits) static int inet_diag_bc_run(const void *bc, int len, - const struct inet_diag_entry *entry) + const struct inet_diag_entry *entry) { while (len > 0) { int yes = 1; @@ -322,7 +362,7 @@ static int inet_diag_bc_run(const void *bc, int len, yes = 0; break; } - + if (cond->prefix_len == 0) break; @@ -331,7 +371,8 @@ static int inet_diag_bc_run(const void *bc, int len, else addr = entry->daddr; - if (bitstring_match(addr, cond->addr, cond->prefix_len)) + if (bitstring_match(addr, cond->addr, + cond->prefix_len)) break; if (entry->family == AF_INET6 && cond->family == AF_INET) { @@ -346,7 +387,7 @@ static int inet_diag_bc_run(const void *bc, int len, } } - if (yes) { + if (yes) { len -= op->yes; bc += op->yes; } else { @@ -407,14 +448,15 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) default: return -EINVAL; } - bc += op->yes; + bc += op->yes; len -= op->yes; } return len == 0 ? 0 : -EINVAL; } -static int inet_diag_dump_sock(struct sk_buff *skb, struct sock *sk, - struct netlink_callback *cb) +static int inet_csk_diag_dump(struct sock *sk, + struct sk_buff *skb, + struct netlink_callback *cb) { struct inet_diag_req *r = NLMSG_DATA(cb->nlh); @@ -444,14 +486,50 @@ static int inet_diag_dump_sock(struct sk_buff *skb, struct sock *sk, return 0; } - return inet_diag_fill(skb, sk, r->idiag_ext, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); + return inet_csk_diag_fill(sk, skb, r->idiag_ext, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); +} + +static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, + struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct inet_diag_req *r = NLMSG_DATA(cb->nlh); + + if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { + struct inet_diag_entry entry; + struct rtattr *bc = (struct rtattr *)(r + 1); + + entry.family = tw->tw_family; +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + if (tw->tw_family == AF_INET6) { + struct inet6_timewait_sock *tw6 = + inet6_twsk((struct sock *)tw); + entry.saddr = tw6->tw_v6_rcv_saddr.s6_addr32; + entry.daddr = tw6->tw_v6_daddr.s6_addr32; + } else +#endif + { + entry.saddr = &tw->tw_rcv_saddr; + entry.daddr = &tw->tw_daddr; + } + entry.sport = tw->tw_num; + entry.dport = ntohs(tw->tw_dport); + entry.userlocks = 0; + + if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) + return 0; + } + + return inet_twsk_diag_fill(tw, skb, r->idiag_ext, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, - struct request_sock *req, - u32 pid, u32 seq, - const struct nlmsghdr *unlh) + struct request_sock *req, u32 pid, u32 seq, + const struct nlmsghdr *unlh) { const struct inet_request_sock *ireq = inet_rsk(req); struct inet_sock *inet = inet_sk(sk); @@ -504,7 +582,7 @@ nlmsg_failure: } static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, - struct netlink_callback *cb) + struct netlink_callback *cb) { struct inet_diag_entry entry; struct inet_diag_req *r = NLMSG_DATA(cb->nlh); @@ -556,7 +634,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, inet6_rsk(req)->loc_addr.s6_addr32 : #endif &ireq->loc_addr; - entry.daddr = + entry.daddr = #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) (entry.family == AF_INET6) ? inet6_rsk(req)->rmt_addr.s6_addr32 : @@ -599,7 +677,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) handler = inet_diag_table[cb->nlh->nlmsg_type]; BUG_ON(handler == NULL); hashinfo = handler->idiag_hashinfo; - + s_i = cb->args[1]; s_num = num = cb->args[2]; @@ -630,7 +708,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->args[3] > 0) goto syn_recv; - if (inet_diag_dump_sock(skb, sk, cb) < 0) { + if (inet_csk_diag_dump(sk, skb, cb) < 0) { inet_listen_unlock(hashinfo); goto done; } @@ -672,7 +750,6 @@ skip_listen_ht: s_num = 0; read_lock_bh(&head->lock); - num = 0; sk_for_each(sk, node, &head->chain) { struct inet_sock *inet = inet_sk(sk); @@ -684,9 +761,10 @@ skip_listen_ht: if (r->id.idiag_sport != inet->sport && r->id.idiag_sport) goto next_normal; - if (r->id.idiag_dport != inet->dport && r->id.idiag_dport) + if (r->id.idiag_dport != inet->dport && + r->id.idiag_dport) goto next_normal; - if (inet_diag_dump_sock(skb, sk, cb) < 0) { + if (inet_csk_diag_dump(sk, skb, cb) < 0) { read_unlock_bh(&head->lock); goto done; } @@ -695,19 +773,20 @@ next_normal: } if (r->idiag_states & TCPF_TIME_WAIT) { - sk_for_each(sk, node, + struct inet_timewait_sock *tw; + + inet_twsk_for_each(tw, node, &hashinfo->ehash[i + hashinfo->ehash_size].chain) { - struct inet_sock *inet = inet_sk(sk); if (num < s_num) goto next_dying; - if (r->id.idiag_sport != inet->sport && + if (r->id.idiag_sport != tw->tw_sport && r->id.idiag_sport) goto next_dying; - if (r->id.idiag_dport != inet->dport && + if (r->id.idiag_dport != tw->tw_dport && r->id.idiag_dport) goto next_dying; - if (inet_diag_dump_sock(skb, sk, cb) < 0) { + if (inet_twsk_diag_dump(tw, skb, cb) < 0) { read_unlock_bh(&head->lock); goto done; } @@ -724,8 +803,7 @@ done: return skb->len; } -static __inline__ int -inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static inline int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { if (!(nlh->nlmsg_flags&NLM_F_REQUEST)) return 0; @@ -755,9 +833,8 @@ inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } return netlink_dump_start(idiagnl, skb, nlh, inet_diag_dump, NULL); - } else { + } else return inet_diag_get_exact(skb, nlh); - } err_inval: return -EINVAL; @@ -766,15 +843,15 @@ err_inval: static inline void inet_diag_rcv_skb(struct sk_buff *skb) { - int err; - struct nlmsghdr * nlh; - if (skb->len >= NLMSG_SPACE(0)) { - nlh = (struct nlmsghdr *)skb->data; - if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) + int err; + struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data; + + if (nlh->nlmsg_len < sizeof(*nlh) || + skb->len < nlh->nlmsg_len) return; err = inet_diag_rcv_msg(skb, nlh); - if (err || nlh->nlmsg_flags & NLM_F_ACK) + if (err || nlh->nlmsg_flags & NLM_F_ACK) netlink_ack(skb, nlh, err); } } diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index ce5fe3f74a3d..2160874ce7aa 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -304,8 +304,7 @@ static void unlink_from_pool(struct inet_peer *p) /* look for a node to insert instead of p */ struct inet_peer *t; t = lookup_rightempty(p); - if (*stackptr[-1] != t) - BUG(); + BUG_ON(*stackptr[-1] != t); **--stackptr = t->avl_left; /* t is removed, t->v4daddr > x->v4daddr for any * x in p->avl_left subtree. @@ -314,8 +313,7 @@ static void unlink_from_pool(struct inet_peer *p) t->avl_left = p->avl_left; t->avl_right = p->avl_right; t->avl_height = p->avl_height; - if (delp[1] != &p->avl_left) - BUG(); + BUG_ON(delp[1] != &p->avl_left); delp[1] = &t->avl_left; /* was &p->avl_left */ } peer_avl_rebalance(stack, stackptr); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index ce2b70ce4018..2a8adda15e11 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -383,7 +383,7 @@ out_nomem: */ static inline struct ipq *ip_find(struct iphdr *iph, u32 user) { - __u16 id = iph->id; + __be16 id = iph->id; __u32 saddr = iph->saddr; __u32 daddr = iph->daddr; __u8 protocol = iph->protocol; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 912c42f57c79..1e93eafa7af1 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -188,7 +188,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(u32 remote, u32 local, u32 key) } if (ipgre_fb_tunnel_dev->flags&IFF_UP) - return ipgre_fb_tunnel_dev->priv; + return netdev_priv(ipgre_fb_tunnel_dev); return NULL; } @@ -278,7 +278,7 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int return NULL; dev->init = ipgre_tunnel_init; - nt = dev->priv; + nt = netdev_priv(dev); nt->parms = *parms; if (register_netdevice(dev) < 0) { @@ -286,9 +286,6 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int goto failed; } - nt = dev->priv; - nt->parms = *parms; - dev_hold(dev); ipgre_tunnel_link(nt); return nt; @@ -299,7 +296,7 @@ failed: static void ipgre_tunnel_uninit(struct net_device *dev) { - ipgre_tunnel_unlink((struct ip_tunnel*)dev->priv); + ipgre_tunnel_unlink(netdev_priv(dev)); dev_put(dev); } @@ -518,7 +515,7 @@ out: skb2->dst->ops->update_pmtu(skb2->dst, rel_info); rel_info = htonl(rel_info); } else if (type == ICMP_TIME_EXCEEDED) { - struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv; + struct ip_tunnel *t = netdev_priv(skb2->dev); if (t->parms.iph.ttl) { rel_type = ICMP_DEST_UNREACH; rel_code = ICMP_HOST_UNREACH; @@ -669,7 +666,7 @@ drop_nolock: static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { - struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; + struct ip_tunnel *tunnel = netdev_priv(dev); struct net_device_stats *stats = &tunnel->stat; struct iphdr *old_iph = skb->nh.iph; struct iphdr *tiph; @@ -832,6 +829,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->h.raw = skb->nh.raw; skb->nh.raw = skb_push(skb, gre_hlen); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED); dst_release(skb->dst); skb->dst = &rt->u.dst; @@ -914,7 +912,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) t = ipgre_tunnel_locate(&p, 0); } if (t == NULL) - t = (struct ip_tunnel*)dev->priv; + t = netdev_priv(dev); memcpy(&p, &t->parms, sizeof(p)); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) err = -EFAULT; @@ -954,7 +952,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) } else { unsigned nflags=0; - t = (struct ip_tunnel*)dev->priv; + t = netdev_priv(dev); if (MULTICAST(p.iph.daddr)) nflags = IFF_BROADCAST; @@ -1003,7 +1001,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) if ((t = ipgre_tunnel_locate(&p, 0)) == NULL) goto done; err = -EPERM; - if (t == ipgre_fb_tunnel_dev->priv) + if (t == netdev_priv(ipgre_fb_tunnel_dev)) goto done; dev = t->dev; } @@ -1020,12 +1018,12 @@ done: static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev) { - return &(((struct ip_tunnel*)dev->priv)->stat); + return &(((struct ip_tunnel*)netdev_priv(dev))->stat); } static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) { - struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; + struct ip_tunnel *tunnel = netdev_priv(dev); if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen) return -EINVAL; dev->mtu = new_mtu; @@ -1065,7 +1063,7 @@ static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, void *daddr, void *saddr, unsigned len) { - struct ip_tunnel *t = (struct ip_tunnel*)dev->priv; + struct ip_tunnel *t = netdev_priv(dev); struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); u16 *p = (u16*)(iph+1); @@ -1092,7 +1090,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned sh static int ipgre_open(struct net_device *dev) { - struct ip_tunnel *t = (struct ip_tunnel*)dev->priv; + struct ip_tunnel *t = netdev_priv(dev); if (MULTICAST(t->parms.iph.daddr)) { struct flowi fl = { .oif = t->parms.link, @@ -1116,7 +1114,7 @@ static int ipgre_open(struct net_device *dev) static int ipgre_close(struct net_device *dev) { - struct ip_tunnel *t = (struct ip_tunnel*)dev->priv; + struct ip_tunnel *t = netdev_priv(dev); if (MULTICAST(t->parms.iph.daddr) && t->mlink) { struct in_device *in_dev = inetdev_by_index(t->mlink); if (in_dev) { @@ -1156,7 +1154,7 @@ static int ipgre_tunnel_init(struct net_device *dev) int mtu = ETH_DATA_LEN; int addend = sizeof(struct iphdr) + 4; - tunnel = (struct ip_tunnel*)dev->priv; + tunnel = netdev_priv(dev); iph = &tunnel->parms.iph; tunnel->dev = dev; @@ -1220,7 +1218,7 @@ static int ipgre_tunnel_init(struct net_device *dev) static int __init ipgre_fb_tunnel_init(struct net_device *dev) { - struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; + struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; tunnel->dev = dev; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index e45846ae570b..18d7fad474d7 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -185,7 +185,6 @@ int ip_call_ra_chain(struct sk_buff *skb) raw_rcv(last, skb2); } last = sk; - nf_reset(skb); } } @@ -204,10 +203,6 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb) __skb_pull(skb, ihl); - /* Free reference early: we don't need it any more, and it may - hold ip_conntrack module loaded indefinitely. */ - nf_reset(skb); - /* Point into the IP datagram, just past the header. */ skb->h.raw = skb->data; @@ -232,10 +227,12 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb) if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) { int ret; - if (!ipprot->no_policy && - !xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { - kfree_skb(skb); - goto out; + if (!ipprot->no_policy) { + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { + kfree_skb(skb); + goto out; + } + nf_reset(skb); } ret = ipprot->handler(skb); if (ret < 0) { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 71da31818cfc..3324fbfe528a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -69,6 +69,7 @@ #include <net/ip.h> #include <net/protocol.h> #include <net/route.h> +#include <net/xfrm.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/arp.h> @@ -85,6 +86,8 @@ int sysctl_ip_default_ttl = IPDEFTTL; +static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)); + /* Generate a checksum for an outgoing IP datagram. */ __inline__ void ip_send_check(struct iphdr *iph) { @@ -202,6 +205,11 @@ static inline int ip_finish_output2(struct sk_buff *skb) static inline int ip_finish_output(struct sk_buff *skb) { +#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) + /* Policy lookup after SNAT yielded a new policy */ + if (skb->dst->xfrm != NULL) + return xfrm4_output_finish(skb); +#endif if (skb->len > dst_mtu(skb->dst) && !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size)) return ip_fragment(skb, ip_finish_output2); @@ -409,7 +417,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) * single device frame, and queue such a frame for sending. */ -int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) +static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) { struct iphdr *iph; int raw = 0; @@ -418,7 +426,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) struct sk_buff *skb2; unsigned int mtu, hlen, left, len, ll_rs; int offset; - int not_last_frag; + __be16 not_last_frag; struct rtable *rt = (struct rtable*)skb->dst; int err = 0; @@ -1180,7 +1188,7 @@ int ip_push_pending_frames(struct sock *sk) struct ip_options *opt = NULL; struct rtable *rt = inet->cork.rt; struct iphdr *iph; - int df = 0; + __be16 df = 0; __u8 ttl; int err = 0; @@ -1391,7 +1399,6 @@ void __init ip_init(void) #endif } -EXPORT_SYMBOL(ip_fragment); EXPORT_SYMBOL(ip_generic_getfrag); EXPORT_SYMBOL(ip_queue_xmit); EXPORT_SYMBOL(ip_send_check); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 35571cff81c6..bc5ca23b2646 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -244,7 +244,7 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c if (dev == NULL) return NULL; - nt = dev->priv; + nt = netdev_priv(dev); SET_MODULE_OWNER(dev); dev->init = ipip_tunnel_init; nt->parms = *parms; @@ -269,7 +269,7 @@ static void ipip_tunnel_uninit(struct net_device *dev) tunnels_wc[0] = NULL; write_unlock_bh(&ipip_lock); } else - ipip_tunnel_unlink((struct ip_tunnel*)dev->priv); + ipip_tunnel_unlink(netdev_priv(dev)); dev_put(dev); } @@ -443,7 +443,7 @@ out: skb2->dst->ops->update_pmtu(skb2->dst, rel_info); rel_info = htonl(rel_info); } else if (type == ICMP_TIME_EXCEEDED) { - struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv; + struct ip_tunnel *t = netdev_priv(skb2->dev); if (t->parms.iph.ttl) { rel_type = ICMP_DEST_UNREACH; rel_code = ICMP_HOST_UNREACH; @@ -514,7 +514,7 @@ out: static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { - struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; + struct ip_tunnel *tunnel = netdev_priv(dev); struct net_device_stats *stats = &tunnel->stat; struct iphdr *tiph = &tunnel->parms.iph; u8 tos = tunnel->parms.iph.tos; @@ -621,6 +621,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->h.raw = skb->nh.raw; skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED); dst_release(skb->dst); skb->dst = &rt->u.dst; @@ -673,7 +674,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) t = ipip_tunnel_locate(&p, 0); } if (t == NULL) - t = (struct ip_tunnel*)dev->priv; + t = netdev_priv(dev); memcpy(&p, &t->parms, sizeof(p)); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) err = -EFAULT; @@ -710,7 +711,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) err = -EINVAL; break; } - t = (struct ip_tunnel*)dev->priv; + t = netdev_priv(dev); ipip_tunnel_unlink(t); t->parms.iph.saddr = p.iph.saddr; t->parms.iph.daddr = p.iph.daddr; @@ -764,7 +765,7 @@ done: static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev) { - return &(((struct ip_tunnel*)dev->priv)->stat); + return &(((struct ip_tunnel*)netdev_priv(dev))->stat); } static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) @@ -799,7 +800,7 @@ static int ipip_tunnel_init(struct net_device *dev) struct ip_tunnel *tunnel; struct iphdr *iph; - tunnel = (struct ip_tunnel*)dev->priv; + tunnel = netdev_priv(dev); iph = &tunnel->parms.iph; tunnel->dev = dev; @@ -837,7 +838,7 @@ static int ipip_tunnel_init(struct net_device *dev) static int __init ipip_fb_tunnel_init(struct net_device *dev) { - struct ip_tunnel *tunnel = dev->priv; + struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; tunnel->dev = dev; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9a5c0ce7ff35..f58ac9854c3f 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -178,8 +178,8 @@ static int reg_vif_num = -1; static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { read_lock(&mrt_lock); - ((struct net_device_stats*)dev->priv)->tx_bytes += skb->len; - ((struct net_device_stats*)dev->priv)->tx_packets++; + ((struct net_device_stats*)netdev_priv(dev))->tx_bytes += skb->len; + ((struct net_device_stats*)netdev_priv(dev))->tx_packets++; ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT); read_unlock(&mrt_lock); kfree_skb(skb); @@ -188,7 +188,7 @@ static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) static struct net_device_stats *reg_vif_get_stats(struct net_device *dev) { - return (struct net_device_stats*)dev->priv; + return (struct net_device_stats*)netdev_priv(dev); } static void reg_vif_setup(struct net_device *dev) @@ -1149,8 +1149,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) if (vif->flags & VIFF_REGISTER) { vif->pkt_out++; vif->bytes_out+=skb->len; - ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len; - ((struct net_device_stats*)vif->dev->priv)->tx_packets++; + ((struct net_device_stats*)netdev_priv(vif->dev))->tx_bytes += skb->len; + ((struct net_device_stats*)netdev_priv(vif->dev))->tx_packets++; ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT); kfree_skb(skb); return; @@ -1210,8 +1210,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) if (vif->flags & VIFF_TUNNEL) { ip_encap(skb, vif->local, vif->remote); /* FIXME: extra output firewall step used to be here. --RR */ - ((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++; - ((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb->len; + ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_packets++; + ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_bytes+=skb->len; } IPCB(skb)->flags |= IPSKB_FORWARDED; @@ -1467,8 +1467,8 @@ int pim_rcv_v1(struct sk_buff * skb) skb->pkt_type = PACKET_HOST; dst_release(skb->dst); skb->dst = NULL; - ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len; - ((struct net_device_stats*)reg_dev->priv)->rx_packets++; + ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len; + ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++; nf_reset(skb); netif_rx(skb); dev_put(reg_dev); @@ -1522,8 +1522,8 @@ static int pim_rcv(struct sk_buff * skb) skb->ip_summed = 0; skb->pkt_type = PACKET_HOST; dst_release(skb->dst); - ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len; - ((struct net_device_stats*)reg_dev->priv)->rx_packets++; + ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len; + ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++; skb->dst = NULL; nf_reset(skb); netif_rx(skb); diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c index 3b87482049cf..52c12e9edbbc 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/ipv4/ipvs/ip_vs_xmit.c @@ -322,7 +322,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct net_device *tdev; /* Device to other host */ struct iphdr *old_iph = skb->nh.iph; u8 tos = old_iph->tos; - u16 df = old_iph->frag_off; + __be16 df = old_iph->frag_off; struct iphdr *iph; /* Our new IP header */ int max_headroom; /* The extra header space needed */ int mtu; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index ae0779d82c5d..3321092b0914 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -7,11 +7,13 @@ #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> +#include <linux/ip.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/icmp.h> #include <net/route.h> -#include <linux/ip.h> +#include <net/xfrm.h> +#include <net/ip.h> /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ int ip_route_me_harder(struct sk_buff **pskb) @@ -33,7 +35,6 @@ int ip_route_me_harder(struct sk_buff **pskb) #ifdef CONFIG_IP_ROUTE_FWMARK fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark; #endif - fl.proto = iph->protocol; if (ip_route_output_key(&rt, &fl) != 0) return -1; @@ -60,6 +61,13 @@ int ip_route_me_harder(struct sk_buff **pskb) if ((*pskb)->dst->error) return -1; +#ifdef CONFIG_XFRM + if (!(IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) && + xfrm_decode_session(*pskb, &fl, AF_INET) == 0) + if (xfrm_lookup(&(*pskb)->dst, &fl, (*pskb)->sk, 0)) + return -1; +#endif + /* Change in oif may mean change in hh_len. */ hh_len = (*pskb)->dst->dev->hard_header_len; if (skb_headroom(*pskb) < hh_len) { @@ -78,6 +86,9 @@ int ip_route_me_harder(struct sk_buff **pskb) } EXPORT_SYMBOL(ip_route_me_harder); +void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *); +EXPORT_SYMBOL(ip_nat_decode_session); + /* * Extra routing may needed on local out, as the QUEUE target never * returns control to the table. diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 88a60650e6b8..a9893ec03e02 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -487,6 +487,16 @@ config IP_NF_MATCH_STRING To compile it as a module, choose M here. If unsure, say N. +config IP_NF_MATCH_POLICY + tristate "IPsec policy match support" + depends on IP_NF_IPTABLES && XFRM + help + Policy matching allows you to match packets based on the + IPsec policy that was used during decapsulation/will + be used during encapsulation. + + To compile it as a module, choose M here. If unsure, say N. + # `filter', generic and specific targets config IP_NF_FILTER tristate "Packet filtering" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index d0a447e520a2..549b01a648b3 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -72,6 +72,7 @@ obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o obj-$(CONFIG_IP_NF_MATCH_REALM) += ipt_realm.o obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o obj-$(CONFIG_IP_NF_MATCH_PHYSDEV) += ipt_physdev.o +obj-$(CONFIG_IP_NF_MATCH_POLICY) += ipt_policy.o obj-$(CONFIG_IP_NF_MATCH_COMMENT) += ipt_comment.o obj-$(CONFIG_IP_NF_MATCH_STRING) += ipt_string.o diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c index 977fb59d4563..0b25050981a1 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c @@ -16,6 +16,7 @@ #include <linux/types.h> #include <linux/sched.h> #include <linux/timer.h> +#include <linux/interrupt.h> #include <linux/netfilter.h> #include <linux/module.h> #include <linux/in.h> diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index e7fa29e576dc..77f304680d86 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -995,7 +995,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, || (!test_bit(IPS_ASSURED_BIT, &conntrack->status) && conntrack->proto.tcp.last_index == TCP_ACK_SET)) && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) { - /* RST sent to invalid SYN or ACK we had let trough + /* RST sent to invalid SYN or ACK we had let through * at a) and c) above: * * a) SYN was in window then @@ -1006,7 +1006,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, * segments we ignored. */ goto in_window; } - /* Just fall trough */ + /* Just fall through */ default: /* Keep compilers happy. */ break; diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index f04111f74e09..8b8a1f00bbf4 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -55,6 +55,44 @@ : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \ : "*ERROR*"))) +#ifdef CONFIG_XFRM +static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) +{ + struct ip_conntrack *ct; + struct ip_conntrack_tuple *t; + enum ip_conntrack_info ctinfo; + enum ip_conntrack_dir dir; + unsigned long statusbit; + + ct = ip_conntrack_get(skb, &ctinfo); + if (ct == NULL) + return; + dir = CTINFO2DIR(ctinfo); + t = &ct->tuplehash[dir].tuple; + + if (dir == IP_CT_DIR_ORIGINAL) + statusbit = IPS_DST_NAT; + else + statusbit = IPS_SRC_NAT; + + if (ct->status & statusbit) { + fl->fl4_dst = t->dst.ip; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP) + fl->fl_ip_dport = t->dst.u.tcp.port; + } + + statusbit ^= IPS_NAT_MASK; + + if (ct->status & statusbit) { + fl->fl4_src = t->src.ip; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP) + fl->fl_ip_sport = t->src.u.tcp.port; + } +} +#endif + static unsigned int ip_nat_fn(unsigned int hooknum, struct sk_buff **pskb, @@ -162,18 +200,20 @@ ip_nat_in(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - u_int32_t saddr, daddr; + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; unsigned int ret; - saddr = (*pskb)->nh.iph->saddr; - daddr = (*pskb)->nh.iph->daddr; - ret = ip_nat_fn(hooknum, pskb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN - && ((*pskb)->nh.iph->saddr != saddr - || (*pskb)->nh.iph->daddr != daddr)) { - dst_release((*pskb)->dst); - (*pskb)->dst = NULL; + && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.src.ip != + ct->tuplehash[!dir].tuple.dst.ip) { + dst_release((*pskb)->dst); + (*pskb)->dst = NULL; + } } return ret; } @@ -185,12 +225,30 @@ ip_nat_out(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; + unsigned int ret; + /* root is playing with raw sockets. */ if ((*pskb)->len < sizeof(struct iphdr) || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) return NF_ACCEPT; - return ip_nat_fn(hooknum, pskb, in, out, okfn); + ret = ip_nat_fn(hooknum, pskb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN + && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.src.ip != + ct->tuplehash[!dir].tuple.dst.ip +#ifdef CONFIG_XFRM + || ct->tuplehash[dir].tuple.src.u.all != + ct->tuplehash[!dir].tuple.dst.u.all +#endif + ) + return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; + } + return ret; } static unsigned int @@ -200,7 +258,8 @@ ip_nat_local_fn(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - u_int32_t saddr, daddr; + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; unsigned int ret; /* root is playing with raw sockets. */ @@ -208,14 +267,20 @@ ip_nat_local_fn(unsigned int hooknum, || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) return NF_ACCEPT; - saddr = (*pskb)->nh.iph->saddr; - daddr = (*pskb)->nh.iph->daddr; - ret = ip_nat_fn(hooknum, pskb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN - && ((*pskb)->nh.iph->saddr != saddr - || (*pskb)->nh.iph->daddr != daddr)) - return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; + && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.dst.ip != + ct->tuplehash[!dir].tuple.src.ip +#ifdef CONFIG_XFRM + || ct->tuplehash[dir].tuple.dst.u.all != + ct->tuplehash[dir].tuple.src.u.all +#endif + ) + return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; + } return ret; } @@ -303,10 +368,14 @@ static int init_or_cleanup(int init) if (!init) goto cleanup; +#ifdef CONFIG_XFRM + BUG_ON(ip_nat_decode_session != NULL); + ip_nat_decode_session = nat_decode_session; +#endif ret = ip_nat_rule_init(); if (ret < 0) { printk("ip_nat_init: can't setup rules.\n"); - goto cleanup_nothing; + goto cleanup_decode_session; } ret = nf_register_hook(&ip_nat_in_ops); if (ret < 0) { @@ -354,7 +423,11 @@ static int init_or_cleanup(int init) nf_unregister_hook(&ip_nat_in_ops); cleanup_rule_init: ip_nat_rule_cleanup(); - cleanup_nothing: + cleanup_decode_session: +#ifdef CONFIG_XFRM + ip_nat_decode_session = NULL; + synchronize_net(); +#endif return ret; } diff --git a/net/ipv4/netfilter/ipt_helper.c b/net/ipv4/netfilter/ipt_helper.c index bf14e1c7798a..aef649e393af 100644 --- a/net/ipv4/netfilter/ipt_helper.c +++ b/net/ipv4/netfilter/ipt_helper.c @@ -13,6 +13,7 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter.h> +#include <linux/interrupt.h> #if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE) #include <linux/netfilter_ipv4/ip_conntrack.h> #include <linux/netfilter_ipv4/ip_conntrack_core.h> diff --git a/net/ipv4/netfilter/ipt_policy.c b/net/ipv4/netfilter/ipt_policy.c new file mode 100644 index 000000000000..709debcc69c9 --- /dev/null +++ b/net/ipv4/netfilter/ipt_policy.c @@ -0,0 +1,170 @@ +/* IP tables module for matching IPsec policy + * + * Copyright (c) 2004,2005 Patrick McHardy, <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/config.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/init.h> +#include <net/xfrm.h> + +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv4/ipt_policy.h> + +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_DESCRIPTION("IPtables IPsec policy matching module"); +MODULE_LICENSE("GPL"); + + +static inline int +match_xfrm_state(struct xfrm_state *x, const struct ipt_policy_elem *e) +{ +#define MATCH(x,y) (!e->match.x || ((e->x == (y)) ^ e->invert.x)) + + return MATCH(saddr, x->props.saddr.a4 & e->smask) && + MATCH(daddr, x->id.daddr.a4 & e->dmask) && + MATCH(proto, x->id.proto) && + MATCH(mode, x->props.mode) && + MATCH(spi, x->id.spi) && + MATCH(reqid, x->props.reqid); +} + +static int +match_policy_in(const struct sk_buff *skb, const struct ipt_policy_info *info) +{ + const struct ipt_policy_elem *e; + struct sec_path *sp = skb->sp; + int strict = info->flags & IPT_POLICY_MATCH_STRICT; + int i, pos; + + if (sp == NULL) + return -1; + if (strict && info->len != sp->len) + return 0; + + for (i = sp->len - 1; i >= 0; i--) { + pos = strict ? i - sp->len + 1 : 0; + if (pos >= info->len) + return 0; + e = &info->pol[pos]; + + if (match_xfrm_state(sp->x[i].xvec, e)) { + if (!strict) + return 1; + } else if (strict) + return 0; + } + + return strict ? 1 : 0; +} + +static int +match_policy_out(const struct sk_buff *skb, const struct ipt_policy_info *info) +{ + const struct ipt_policy_elem *e; + struct dst_entry *dst = skb->dst; + int strict = info->flags & IPT_POLICY_MATCH_STRICT; + int i, pos; + + if (dst->xfrm == NULL) + return -1; + + for (i = 0; dst && dst->xfrm; dst = dst->child, i++) { + pos = strict ? i : 0; + if (pos >= info->len) + return 0; + e = &info->pol[pos]; + + if (match_xfrm_state(dst->xfrm, e)) { + if (!strict) + return 1; + } else if (strict) + return 0; + } + + return strict ? 1 : 0; +} + +static int match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, int offset, int *hotdrop) +{ + const struct ipt_policy_info *info = matchinfo; + int ret; + + if (info->flags & IPT_POLICY_MATCH_IN) + ret = match_policy_in(skb, info); + else + ret = match_policy_out(skb, info); + + if (ret < 0) + ret = info->flags & IPT_POLICY_MATCH_NONE ? 1 : 0; + else if (info->flags & IPT_POLICY_MATCH_NONE) + ret = 0; + + return ret; +} + +static int checkentry(const char *tablename, const struct ipt_ip *ip, + void *matchinfo, unsigned int matchsize, + unsigned int hook_mask) +{ + struct ipt_policy_info *info = matchinfo; + + if (matchsize != IPT_ALIGN(sizeof(*info))) { + printk(KERN_ERR "ipt_policy: matchsize %u != %zu\n", + matchsize, IPT_ALIGN(sizeof(*info))); + return 0; + } + if (!(info->flags & (IPT_POLICY_MATCH_IN|IPT_POLICY_MATCH_OUT))) { + printk(KERN_ERR "ipt_policy: neither incoming nor " + "outgoing policy selected\n"); + return 0; + } + if (hook_mask & (1 << NF_IP_PRE_ROUTING | 1 << NF_IP_LOCAL_IN) + && info->flags & IPT_POLICY_MATCH_OUT) { + printk(KERN_ERR "ipt_policy: output policy not valid in " + "PRE_ROUTING and INPUT\n"); + return 0; + } + if (hook_mask & (1 << NF_IP_POST_ROUTING | 1 << NF_IP_LOCAL_OUT) + && info->flags & IPT_POLICY_MATCH_IN) { + printk(KERN_ERR "ipt_policy: input policy not valid in " + "POST_ROUTING and OUTPUT\n"); + return 0; + } + if (info->len > IPT_POLICY_MAX_ELEM) { + printk(KERN_ERR "ipt_policy: too many policy elements\n"); + return 0; + } + + return 1; +} + +static struct ipt_match policy_match = { + .name = "policy", + .match = match, + .checkentry = checkentry, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ipt_register_match(&policy_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&policy_match); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 4b0d7e4d6269..165a4d81efa4 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -255,6 +255,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); return NET_RX_DROP; } + nf_reset(skb); skb_push(skb, skb->data - skb->nh.raw); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0a461232329f..a97ed5416c28 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3347,7 +3347,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, int offset = start - TCP_SKB_CB(skb)->seq; int size = TCP_SKB_CB(skb)->end_seq - start; - if (offset < 0) BUG(); + BUG_ON(offset < 0); if (size > 0) { size = min(copy, size); if (skb_copy_bits(skb, offset, skb_put(nskb, size), size)) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e9f83e5b28ce..6ea353907af5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1080,6 +1080,7 @@ process: if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; + nf_reset(skb); if (sk_filter(sk, skb, 0)) goto discard_and_relse; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 223abaa72bc5..00840474a449 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -989,6 +989,7 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) kfree_skb(skb); return -1; } + nf_reset(skb); if (up->encap_type) { /* @@ -1149,6 +1150,7 @@ int udp_rcv(struct sk_buff *skb) if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; + nf_reset(skb); /* No socket. Drop packet silently, if checksum is wrong */ if (udp_checksum_complete(skb)) diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 2d3849c38a0f..850d919591d1 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -11,6 +11,8 @@ #include <linux/module.h> #include <linux/string.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> #include <net/inet_ecn.h> #include <net/ip.h> #include <net/xfrm.h> @@ -45,6 +47,23 @@ static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq) return xfrm_parse_spi(skb, nexthdr, spi, seq); } +#ifdef CONFIG_NETFILTER +static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) +{ + struct iphdr *iph = skb->nh.iph; + + if (skb->dst == NULL) { + if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, + skb->dev)) + goto drop; + } + return dst_input(skb); +drop: + kfree_skb(skb); + return NET_RX_DROP; +} +#endif + int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) { int err; @@ -137,6 +156,8 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) memcpy(skb->sp->x+skb->sp->len, xfrm_vec, xfrm_nr*sizeof(struct sec_decap_state)); skb->sp->len += xfrm_nr; + nf_reset(skb); + if (decaps) { if (!(skb->dev->flags&IFF_LOOPBACK)) { dst_release(skb->dst); @@ -145,7 +166,17 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) netif_rx(skb); return 0; } else { +#ifdef CONFIG_NETFILTER + __skb_push(skb, skb->data - skb->nh.raw); + skb->nh.iph->tot_len = htons(skb->len); + ip_send_check(skb->nh.iph); + + NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL, + xfrm4_rcv_encap_finish); + return 0; +#else return -skb->nh.iph->protocol; +#endif } drop_unlock: diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 66620a95942a..d4df0ddd424b 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -8,8 +8,10 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/compiler.h> #include <linux/skbuff.h> #include <linux/spinlock.h> +#include <linux/netfilter_ipv4.h> #include <net/inet_ecn.h> #include <net/ip.h> #include <net/xfrm.h> @@ -95,7 +97,7 @@ out: return ret; } -int xfrm4_output(struct sk_buff *skb) +static int xfrm4_output_one(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; struct xfrm_state *x = dst->xfrm; @@ -113,27 +115,33 @@ int xfrm4_output(struct sk_buff *skb) goto error_nolock; } - spin_lock_bh(&x->lock); - err = xfrm_state_check(x, skb); - if (err) - goto error; + do { + spin_lock_bh(&x->lock); + err = xfrm_state_check(x, skb); + if (err) + goto error; - xfrm4_encap(skb); + xfrm4_encap(skb); - err = x->type->output(x, skb); - if (err) - goto error; + err = x->type->output(x, skb); + if (err) + goto error; - x->curlft.bytes += skb->len; - x->curlft.packets++; + x->curlft.bytes += skb->len; + x->curlft.packets++; - spin_unlock_bh(&x->lock); + spin_unlock_bh(&x->lock); - if (!(skb->dst = dst_pop(dst))) { - err = -EHOSTUNREACH; - goto error_nolock; - } - err = NET_XMIT_BYPASS; + if (!(skb->dst = dst_pop(dst))) { + err = -EHOSTUNREACH; + goto error_nolock; + } + dst = skb->dst; + x = dst->xfrm; + } while (x && !x->props.mode); + + IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; + err = 0; out_exit: return err; @@ -143,3 +151,33 @@ error_nolock: kfree_skb(skb); goto out_exit; } + +int xfrm4_output_finish(struct sk_buff *skb) +{ + int err; + + while (likely((err = xfrm4_output_one(skb)) == 0)) { + nf_reset(skb); + + err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, &skb, NULL, + skb->dst->dev, dst_output); + if (unlikely(err != 1)) + break; + + if (!skb->dst->xfrm) + return dst_output(skb); + + err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL, + skb->dst->dev, xfrm4_output_finish); + if (unlikely(err != 1)) + break; + } + + return err; +} + +int xfrm4_output(struct sk_buff *skb) +{ + return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev, + xfrm4_output_finish); +} diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 704fb73e6c5f..e53e421eeee9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1228,7 +1228,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) /* Gets referenced address, destroys ifaddr */ -void addrconf_dad_stop(struct inet6_ifaddr *ifp) +static void addrconf_dad_stop(struct inet6_ifaddr *ifp) { if (ifp->flags&IFA_F_PERMANENT) { spin_lock_bh(&ifp->lock); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 68afc53be662..25c3fe5005d9 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -689,11 +689,11 @@ snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign) if (ptr == NULL) return -EINVAL; - ptr[0] = __alloc_percpu(mibsize, mibalign); + ptr[0] = __alloc_percpu(mibsize); if (!ptr[0]) goto err0; - ptr[1] = __alloc_percpu(mibsize, mibalign); + ptr[1] = __alloc_percpu(mibsize); if (!ptr[1]) goto err1; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 113374dc342c..2a1e7e45b890 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -152,7 +152,7 @@ static struct tlvtype_proc tlvprocdestopt_lst[] = { {-1, NULL} }; -static int ipv6_destopt_rcv(struct sk_buff **skbp, unsigned int *nhoffp) +static int ipv6_destopt_rcv(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); @@ -169,7 +169,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp, unsigned int *nhoffp) if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { skb->h.raw += ((skb->h.raw[1]+1)<<3); - *nhoffp = opt->dst1; + opt->nhoff = opt->dst1; return 1; } @@ -192,7 +192,7 @@ void __init ipv6_destopt_init(void) NONE header. No data in packet. ********************************/ -static int ipv6_nodata_rcv(struct sk_buff **skbp, unsigned int *nhoffp) +static int ipv6_nodata_rcv(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; @@ -215,7 +215,7 @@ void __init ipv6_nodata_init(void) Routing header. ********************************/ -static int ipv6_rthdr_rcv(struct sk_buff **skbp, unsigned int *nhoffp) +static int ipv6_rthdr_rcv(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); @@ -249,7 +249,7 @@ looped_back: skb->h.raw += (hdr->hdrlen + 1) << 3; opt->dst0 = opt->dst1; opt->dst1 = 0; - *nhoffp = (&hdr->nexthdr) - skb->nh.raw; + opt->nhoff = (&hdr->nexthdr) - skb->nh.raw; return 1; } @@ -487,9 +487,14 @@ static struct tlvtype_proc tlvprochopopt_lst[] = { int ipv6_parse_hopopts(struct sk_buff *skb, int nhoff) { - IP6CB(skb)->hop = sizeof(struct ipv6hdr); - if (ip6_parse_tlv(tlvprochopopt_lst, skb)) + struct inet6_skb_parm *opt = IP6CB(skb); + + opt->hop = sizeof(struct ipv6hdr); + if (ip6_parse_tlv(tlvprochopopt_lst, skb)) { + skb->h.raw += (skb->h.raw[1]+1)<<3; + opt->nhoff = sizeof(struct ipv6hdr); return sizeof(struct ipv6hdr); + } return -1; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 6ec6a2b549bb..53c81fcd20ba 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -79,7 +79,7 @@ DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly; static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL; #define icmpv6_socket __get_cpu_var(__icmpv6_socket) -static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp); +static int icmpv6_rcv(struct sk_buff **pskb); static struct inet6_protocol icmpv6_protocol = { .handler = icmpv6_rcv, @@ -581,7 +581,7 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info) * Handle icmp messages */ -static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +static int icmpv6_rcv(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct net_device *dev = skb->dev; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 792f90f0f9ec..f8f3a37a1494 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -25,6 +25,7 @@ #include <net/inet_hashtables.h> #include <net/ip6_route.h> #include <net/sock.h> +#include <net/inet6_connection_sock.h> int inet6_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index a6026d2787d2..29f73592e68e 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -48,7 +48,7 @@ -static inline int ip6_rcv_finish( struct sk_buff *skb) +inline int ip6_rcv_finish( struct sk_buff *skb) { if (skb->dst == NULL) ip6_route_input(skb); @@ -97,6 +97,9 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (hdr->version != 6) goto err; + skb->h.raw = (u8 *)(hdr + 1); + IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); + pkt_len = ntohs(hdr->payload_len); /* pkt_len may be zero if Jumbo payload option is present */ @@ -111,8 +114,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt } if (hdr->nexthdr == NEXTHDR_HOP) { - skb->h.raw = (u8*)(hdr+1); - if (ipv6_parse_hopopts(skb, offsetof(struct ipv6hdr, nexthdr)) < 0) { + if (ipv6_parse_hopopts(skb, IP6CB(skb)->nhoff) < 0) { IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); return 0; } @@ -143,26 +145,15 @@ static inline int ip6_input_finish(struct sk_buff *skb) int nexthdr; u8 hash; - skb->h.raw = skb->nh.raw + sizeof(struct ipv6hdr); - /* * Parse extension headers */ - nexthdr = skb->nh.ipv6h->nexthdr; - nhoff = offsetof(struct ipv6hdr, nexthdr); - - /* Skip hop-by-hop options, they are already parsed. */ - if (nexthdr == NEXTHDR_HOP) { - nhoff = sizeof(struct ipv6hdr); - nexthdr = skb->h.raw[0]; - skb->h.raw += (skb->h.raw[1]+1)<<3; - } - rcu_read_lock(); resubmit: if (!pskb_pull(skb, skb->h.raw - skb->data)) goto discard; + nhoff = IP6CB(skb)->nhoff; nexthdr = skb->nh.raw[nhoff]; raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]); @@ -194,7 +185,7 @@ resubmit: !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard; - ret = ipprot->handler(&skb, &nhoff); + ret = ipprot->handler(&skb); if (ret > 0) goto resubmit; else if (ret == 0) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index b4c4beba0ede..efa3e72cfcfa 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -226,6 +226,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); ipv6_addr_copy(&hdr->daddr, first_hop); + skb->priority = sk->sk_priority; + mtu = dst_mtu(dst); if ((skb->len <= mtu) || ipfragok) { IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); @@ -1182,6 +1184,8 @@ int ip6_push_pending_frames(struct sock *sk) ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); ipv6_addr_copy(&hdr->daddr, final_dst); + skb->priority = sk->sk_priority; + skb->dst = dst_clone(&rt->u.dst); IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index e315d0f80af1..c3c2bf699a67 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -243,7 +243,7 @@ ip6_tnl_create(struct ip6_tnl_parm *p, struct ip6_tnl **pt) if (dev == NULL) return -ENOMEM; - t = dev->priv; + t = netdev_priv(dev); dev->init = ip6ip6_tnl_dev_init; t->parms = *p; @@ -308,7 +308,7 @@ ip6ip6_tnl_locate(struct ip6_tnl_parm *p, struct ip6_tnl **pt, int create) static void ip6ip6_tnl_dev_uninit(struct net_device *dev) { - struct ip6_tnl *t = dev->priv; + struct ip6_tnl *t = netdev_priv(dev); if (dev == ip6ip6_fb_tnl_dev) { write_lock_bh(&ip6ip6_lock); @@ -510,7 +510,7 @@ static inline void ip6ip6_ecn_decapsulate(struct ipv6hdr *outer_iph, **/ static int -ip6ip6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +ip6ip6_rcv(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct ipv6hdr *ipv6h; @@ -623,7 +623,7 @@ ip6ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr) static int ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) { - struct ip6_tnl *t = (struct ip6_tnl *) dev->priv; + struct ip6_tnl *t = netdev_priv(dev); struct net_device_stats *stats = &t->stat; struct ipv6hdr *ipv6h = skb->nh.ipv6h; struct ipv6_txoptions *opt = NULL; @@ -933,11 +933,11 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) break; } if ((err = ip6ip6_tnl_locate(&p, &t, 0)) == -ENODEV) - t = (struct ip6_tnl *) dev->priv; + t = netdev_priv(dev); else if (err) break; } else - t = (struct ip6_tnl *) dev->priv; + t = netdev_priv(dev); memcpy(&p, &t->parms, sizeof (p)); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) { @@ -955,7 +955,7 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) break; } if (!create && dev != ip6ip6_fb_tnl_dev) { - t = (struct ip6_tnl *) dev->priv; + t = netdev_priv(dev); } if (!t && (err = ip6ip6_tnl_locate(&p, &t, create))) { break; @@ -991,12 +991,12 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) err = ip6ip6_tnl_locate(&p, &t, 0); if (err) break; - if (t == ip6ip6_fb_tnl_dev->priv) { + if (t == netdev_priv(ip6ip6_fb_tnl_dev)) { err = -EPERM; break; } } else { - t = (struct ip6_tnl *) dev->priv; + t = netdev_priv(dev); } err = unregister_netdevice(t->dev); break; @@ -1016,7 +1016,7 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) static struct net_device_stats * ip6ip6_tnl_get_stats(struct net_device *dev) { - return &(((struct ip6_tnl *) dev->priv)->stat); + return &(((struct ip6_tnl *)netdev_priv(dev))->stat); } /** @@ -1073,7 +1073,7 @@ static void ip6ip6_tnl_dev_setup(struct net_device *dev) static inline void ip6ip6_tnl_dev_init_gen(struct net_device *dev) { - struct ip6_tnl *t = (struct ip6_tnl *) dev->priv; + struct ip6_tnl *t = netdev_priv(dev); t->fl.proto = IPPROTO_IPV6; t->dev = dev; strcpy(t->parms.name, dev->name); @@ -1087,7 +1087,7 @@ ip6ip6_tnl_dev_init_gen(struct net_device *dev) static int ip6ip6_tnl_dev_init(struct net_device *dev) { - struct ip6_tnl *t = (struct ip6_tnl *) dev->priv; + struct ip6_tnl *t = netdev_priv(dev); ip6ip6_tnl_dev_init_gen(dev); ip6ip6_tnl_link_config(t); return 0; @@ -1103,7 +1103,7 @@ ip6ip6_tnl_dev_init(struct net_device *dev) static int ip6ip6_fb_tnl_dev_init(struct net_device *dev) { - struct ip6_tnl *t = dev->priv; + struct ip6_tnl *t = netdev_priv(dev); ip6ip6_tnl_dev_init_gen(dev); dev_hold(dev); tnls_wc[0] = t; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index f8626ebf90fd..b63678328a3b 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -10,6 +10,7 @@ #include <net/dst.h> #include <net/ipv6.h> #include <net/ip6_route.h> +#include <net/xfrm.h> int ip6_route_me_harder(struct sk_buff *skb) { @@ -21,11 +22,17 @@ int ip6_route_me_harder(struct sk_buff *skb) { .ip6_u = { .daddr = iph->daddr, .saddr = iph->saddr, } }, - .proto = iph->nexthdr, }; dst = ip6_route_output(skb->sk, &fl); +#ifdef CONFIG_XFRM + if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && + xfrm_decode_session(skb, &fl, AF_INET6) == 0) + if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0)) + return -1; +#endif + if (dst->error) { IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 04912f9b35c3..105dd69ee9fb 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -179,6 +179,16 @@ config IP6_NF_MATCH_PHYSDEV To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_MATCH_POLICY + tristate "IPsec policy match support" + depends on IP6_NF_IPTABLES && XFRM + help + Policy matching allows you to match packets based on the + IPsec policy that was used during decapsulation/will + be used during encapsulation. + + To compile it as a module, choose M here. If unsure, say N. + # The targets config IP6_NF_FILTER tristate "Packet filtering" diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 9ab5b2ca1f59..c0c809b426e8 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o ip6t_dst.o obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o obj-$(CONFIG_IP6_NF_MATCH_AHESP) += ip6t_esp.o ip6t_ah.o +obj-$(CONFIG_IP6_NF_MATCH_POLICY) += ip6t_policy.o obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o obj-$(CONFIG_IP6_NF_MATCH_MULTIPORT) += ip6t_multiport.o obj-$(CONFIG_IP6_NF_MATCH_OWNER) += ip6t_owner.o diff --git a/net/ipv6/netfilter/ip6t_policy.c b/net/ipv6/netfilter/ip6t_policy.c new file mode 100644 index 000000000000..13fedad48c1d --- /dev/null +++ b/net/ipv6/netfilter/ip6t_policy.c @@ -0,0 +1,175 @@ +/* IP tables module for matching IPsec policy + * + * Copyright (c) 2004,2005 Patrick McHardy, <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/config.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/init.h> +#include <net/xfrm.h> + +#include <linux/netfilter_ipv6.h> +#include <linux/netfilter_ipv6/ip6_tables.h> +#include <linux/netfilter_ipv6/ip6t_policy.h> + +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_DESCRIPTION("IPtables IPsec policy matching module"); +MODULE_LICENSE("GPL"); + + +static inline int +match_xfrm_state(struct xfrm_state *x, const struct ip6t_policy_elem *e) +{ +#define MATCH_ADDR(x,y,z) (!e->match.x || \ + ((ip6_masked_addrcmp((z), &e->x, &e->y)) == 0) ^ e->invert.x) +#define MATCH(x,y) (!e->match.x || ((e->x == (y)) ^ e->invert.x)) + + return MATCH_ADDR(saddr, smask, (struct in6_addr *)&x->props.saddr.a6) && + MATCH_ADDR(daddr, dmask, (struct in6_addr *)&x->id.daddr.a6) && + MATCH(proto, x->id.proto) && + MATCH(mode, x->props.mode) && + MATCH(spi, x->id.spi) && + MATCH(reqid, x->props.reqid); +} + +static int +match_policy_in(const struct sk_buff *skb, const struct ip6t_policy_info *info) +{ + const struct ip6t_policy_elem *e; + struct sec_path *sp = skb->sp; + int strict = info->flags & IP6T_POLICY_MATCH_STRICT; + int i, pos; + + if (sp == NULL) + return -1; + if (strict && info->len != sp->len) + return 0; + + for (i = sp->len - 1; i >= 0; i--) { + pos = strict ? i - sp->len + 1 : 0; + if (pos >= info->len) + return 0; + e = &info->pol[pos]; + + if (match_xfrm_state(sp->x[i].xvec, e)) { + if (!strict) + return 1; + } else if (strict) + return 0; + } + + return strict ? 1 : 0; +} + +static int +match_policy_out(const struct sk_buff *skb, const struct ip6t_policy_info *info) +{ + const struct ip6t_policy_elem *e; + struct dst_entry *dst = skb->dst; + int strict = info->flags & IP6T_POLICY_MATCH_STRICT; + int i, pos; + + if (dst->xfrm == NULL) + return -1; + + for (i = 0; dst && dst->xfrm; dst = dst->child, i++) { + pos = strict ? i : 0; + if (pos >= info->len) + return 0; + e = &info->pol[pos]; + + if (match_xfrm_state(dst->xfrm, e)) { + if (!strict) + return 1; + } else if (strict) + return 0; + } + + return strict ? 1 : 0; +} + +static int match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + unsigned int protoff, + int *hotdrop) +{ + const struct ip6t_policy_info *info = matchinfo; + int ret; + + if (info->flags & IP6T_POLICY_MATCH_IN) + ret = match_policy_in(skb, info); + else + ret = match_policy_out(skb, info); + + if (ret < 0) + ret = info->flags & IP6T_POLICY_MATCH_NONE ? 1 : 0; + else if (info->flags & IP6T_POLICY_MATCH_NONE) + ret = 0; + + return ret; +} + +static int checkentry(const char *tablename, const struct ip6t_ip6 *ip, + void *matchinfo, unsigned int matchsize, + unsigned int hook_mask) +{ + struct ip6t_policy_info *info = matchinfo; + + if (matchsize != IP6T_ALIGN(sizeof(*info))) { + printk(KERN_ERR "ip6t_policy: matchsize %u != %zu\n", + matchsize, IP6T_ALIGN(sizeof(*info))); + return 0; + } + if (!(info->flags & (IP6T_POLICY_MATCH_IN|IP6T_POLICY_MATCH_OUT))) { + printk(KERN_ERR "ip6t_policy: neither incoming nor " + "outgoing policy selected\n"); + return 0; + } + if (hook_mask & (1 << NF_IP6_PRE_ROUTING | 1 << NF_IP6_LOCAL_IN) + && info->flags & IP6T_POLICY_MATCH_OUT) { + printk(KERN_ERR "ip6t_policy: output policy not valid in " + "PRE_ROUTING and INPUT\n"); + return 0; + } + if (hook_mask & (1 << NF_IP6_POST_ROUTING | 1 << NF_IP6_LOCAL_OUT) + && info->flags & IP6T_POLICY_MATCH_IN) { + printk(KERN_ERR "ip6t_policy: input policy not valid in " + "POST_ROUTING and OUTPUT\n"); + return 0; + } + if (info->len > IP6T_POLICY_MAX_ELEM) { + printk(KERN_ERR "ip6t_policy: too many policy elements\n"); + return 0; + } + + return 1; +} + +static struct ip6t_match policy_match = { + .name = "policy", + .match = match, + .checkentry = checkentry, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ip6t_register_match(&policy_match); +} + +static void __exit fini(void) +{ + ip6t_unregister_match(&policy_match); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 5d316cb72ec9..15e1456b3f18 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -581,7 +581,6 @@ err: * the last and the first frames arrived and all the bits are here. */ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, - unsigned int *nhoffp, struct net_device *dev) { struct sk_buff *fp, *head = fq->fragments; @@ -654,6 +653,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, head->dev = dev; skb_set_timestamp(head, &fq->stamp); head->nh.ipv6h->payload_len = htons(payload_len); + IP6CB(head)->nhoff = nhoff; *skb_in = head; @@ -663,7 +663,6 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS); fq->fragments = NULL; - *nhoffp = nhoff; return 1; out_oversize: @@ -678,7 +677,7 @@ out_fail: return -1; } -static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp) +static int ipv6_frag_rcv(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; struct net_device *dev = skb->dev; @@ -710,7 +709,7 @@ static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp) skb->h.raw += sizeof(struct frag_hdr); IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS); - *nhoffp = (u8*)fhdr - skb->nh.raw; + IP6CB(skb)->nhoff = (u8*)fhdr - skb->nh.raw; return 1; } @@ -722,11 +721,11 @@ static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp) spin_lock(&fq->lock); - ip6_frag_queue(fq, skb, fhdr, *nhoffp); + ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); if (fq->last_in == (FIRST_IN|LAST_IN) && fq->meat == fq->len) - ret = ip6_frag_reasm(fq, skbp, nhoffp, dev); + ret = ip6_frag_reasm(fq, skbp, dev); spin_unlock(&fq->lock); fq_put(fq, NULL); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 577d49732b0f..0dae48aa1cec 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -184,7 +184,7 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int if (dev == NULL) return NULL; - nt = dev->priv; + nt = netdev_priv(dev); dev->init = ipip6_tunnel_init; nt->parms = *parms; @@ -210,7 +210,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev) write_unlock_bh(&ipip6_lock); dev_put(dev); } else { - ipip6_tunnel_unlink((struct ip_tunnel*)dev->priv); + ipip6_tunnel_unlink(netdev_priv(dev)); dev_put(dev); } } @@ -346,7 +346,7 @@ out: rt6i = rt6_lookup(&iph6->daddr, &iph6->saddr, NULL, 0); if (rt6i && rt6i->rt6i_dev && rt6i->rt6i_dev->type == ARPHRD_SIT) { - struct ip_tunnel * t = (struct ip_tunnel*)rt6i->rt6i_dev->priv; + struct ip_tunnel *t = netdev_priv(rt6i->rt6i_dev); if (rel_type == ICMPV6_TIME_EXCEED && t->parms.iph.ttl) { rel_type = ICMPV6_DEST_UNREACH; rel_code = ICMPV6_ADDR_UNREACH; @@ -381,6 +381,7 @@ static int ipip6_rcv(struct sk_buff *skb) skb->mac.raw = skb->nh.raw; skb->nh.raw = skb->data; memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); + IPCB(skb)->flags = 0; skb->protocol = htons(ETH_P_IPV6); skb->pkt_type = PACKET_HOST; tunnel->stat.rx_packets++; @@ -423,7 +424,7 @@ static inline u32 try_6to4(struct in6_addr *v6dst) static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { - struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; + struct ip_tunnel *tunnel = netdev_priv(dev); struct net_device_stats *stats = &tunnel->stat; struct iphdr *tiph = &tunnel->parms.iph; struct ipv6hdr *iph6 = skb->nh.ipv6h; @@ -552,6 +553,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->h.raw = skb->nh.raw; skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + IPCB(skb)->flags = 0; dst_release(skb->dst); skb->dst = &rt->u.dst; @@ -608,7 +610,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) t = ipip6_tunnel_locate(&p, 0); } if (t == NULL) - t = (struct ip_tunnel*)dev->priv; + t = netdev_priv(dev); memcpy(&p, &t->parms, sizeof(p)); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) err = -EFAULT; @@ -645,7 +647,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) err = -EINVAL; break; } - t = (struct ip_tunnel*)dev->priv; + t = netdev_priv(dev); ipip6_tunnel_unlink(t); t->parms.iph.saddr = p.iph.saddr; t->parms.iph.daddr = p.iph.daddr; @@ -681,7 +683,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) if ((t = ipip6_tunnel_locate(&p, 0)) == NULL) goto done; err = -EPERM; - if (t == ipip6_fb_tunnel_dev->priv) + if (t == netdev_priv(ipip6_fb_tunnel_dev)) goto done; dev = t->dev; } @@ -698,7 +700,7 @@ done: static struct net_device_stats *ipip6_tunnel_get_stats(struct net_device *dev) { - return &(((struct ip_tunnel*)dev->priv)->stat); + return &(((struct ip_tunnel*)netdev_priv(dev))->stat); } static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) @@ -733,7 +735,7 @@ static int ipip6_tunnel_init(struct net_device *dev) struct ip_tunnel *tunnel; struct iphdr *iph; - tunnel = (struct ip_tunnel*)dev->priv; + tunnel = netdev_priv(dev); iph = &tunnel->parms.iph; tunnel->dev = dev; @@ -773,7 +775,7 @@ static int ipip6_tunnel_init(struct net_device *dev) static int __init ipip6_fb_tunnel_init(struct net_device *dev) { - struct ip_tunnel *tunnel = dev->priv; + struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; tunnel->dev = dev; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2947bc56d8a0..a25f4e8a8ada 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1153,7 +1153,7 @@ ipv6_pktoptions: return 0; } -static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +static int tcp_v6_rcv(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct tcphdr *th; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d8538dcea813..c47648892c04 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -435,7 +435,7 @@ out: read_unlock(&udp_hash_lock); } -static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +static int udpv6_rcv(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct sock *sk; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 28c29d78338e..1ca2da68ef69 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -11,6 +11,8 @@ #include <linux/module.h> #include <linux/string.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv6.h> #include <net/dsfield.h> #include <net/inet_ecn.h> #include <net/ip.h> @@ -26,7 +28,7 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) IP6_ECN_set_ce(inner_iph); } -int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi) +int xfrm6_rcv_spi(struct sk_buff **pskb, u32 spi) { struct sk_buff *skb = *pskb; int err; @@ -38,7 +40,7 @@ int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi) int nexthdr; unsigned int nhoff; - nhoff = *nhoffp; + nhoff = IP6CB(skb)->nhoff; nexthdr = skb->nh.raw[nhoff]; seq = 0; @@ -121,6 +123,8 @@ int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi) skb->sp->len += xfrm_nr; skb->ip_summed = CHECKSUM_NONE; + nf_reset(skb); + if (decaps) { if (!(skb->dev->flags&IFF_LOOPBACK)) { dst_release(skb->dst); @@ -129,7 +133,16 @@ int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi) netif_rx(skb); return -1; } else { +#ifdef CONFIG_NETFILTER + skb->nh.ipv6h->payload_len = htons(skb->len); + __skb_push(skb, skb->data - skb->nh.raw); + + NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL, + ip6_rcv_finish); + return -1; +#else return 1; +#endif } drop_unlock: @@ -144,7 +157,7 @@ drop: EXPORT_SYMBOL(xfrm6_rcv_spi); -int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +int xfrm6_rcv(struct sk_buff **pskb) { - return xfrm6_rcv_spi(pskb, nhoffp, 0); + return xfrm6_rcv_spi(pskb, 0); } diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 6b9867717d11..80242172a5df 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -9,9 +9,11 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/compiler.h> #include <linux/skbuff.h> #include <linux/spinlock.h> #include <linux/icmpv6.h> +#include <linux/netfilter_ipv6.h> #include <net/dsfield.h> #include <net/inet_ecn.h> #include <net/ipv6.h> @@ -92,7 +94,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) return ret; } -int xfrm6_output(struct sk_buff *skb) +static int xfrm6_output_one(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; struct xfrm_state *x = dst->xfrm; @@ -110,29 +112,35 @@ int xfrm6_output(struct sk_buff *skb) goto error_nolock; } - spin_lock_bh(&x->lock); - err = xfrm_state_check(x, skb); - if (err) - goto error; + do { + spin_lock_bh(&x->lock); + err = xfrm_state_check(x, skb); + if (err) + goto error; - xfrm6_encap(skb); + xfrm6_encap(skb); - err = x->type->output(x, skb); - if (err) - goto error; + err = x->type->output(x, skb); + if (err) + goto error; - x->curlft.bytes += skb->len; - x->curlft.packets++; + x->curlft.bytes += skb->len; + x->curlft.packets++; - spin_unlock_bh(&x->lock); + spin_unlock_bh(&x->lock); - skb->nh.raw = skb->data; - - if (!(skb->dst = dst_pop(dst))) { - err = -EHOSTUNREACH; - goto error_nolock; - } - err = NET_XMIT_BYPASS; + skb->nh.raw = skb->data; + + if (!(skb->dst = dst_pop(dst))) { + err = -EHOSTUNREACH; + goto error_nolock; + } + dst = skb->dst; + x = dst->xfrm; + } while (x && !x->props.mode); + + IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; + err = 0; out_exit: return err; @@ -142,3 +150,33 @@ error_nolock: kfree_skb(skb); goto out_exit; } + +static int xfrm6_output_finish(struct sk_buff *skb) +{ + int err; + + while (likely((err = xfrm6_output_one(skb)) == 0)) { + nf_reset(skb); + + err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, &skb, NULL, + skb->dst->dev, dst_output); + if (unlikely(err != 1)) + break; + + if (!skb->dst->xfrm) + return dst_output(skb); + + err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, &skb, NULL, + skb->dst->dev, xfrm6_output_finish); + if (unlikely(err != 1)) + break; + } + + return err; +} + +int xfrm6_output(struct sk_buff *skb) +{ + return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev, + xfrm6_output_finish); +} diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index fbef7826a74f..da09ff258648 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -397,7 +397,7 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler) EXPORT_SYMBOL(xfrm6_tunnel_deregister); -static int xfrm6_tunnel_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +static int xfrm6_tunnel_rcv(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct xfrm6_tunnel *handler = xfrm6_tunnel_handler; @@ -405,11 +405,11 @@ static int xfrm6_tunnel_rcv(struct sk_buff **pskb, unsigned int *nhoffp) u32 spi; /* device-like_ip6ip6_handler() */ - if (handler && handler->handler(pskb, nhoffp) == 0) + if (handler && handler->handler(pskb) == 0) return 0; spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr); - return xfrm6_rcv_spi(pskb, nhoffp, spi); + return xfrm6_rcv_spi(pskb, spi); } static void xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, diff --git a/net/key/af_key.c b/net/key/af_key.c index 52efd04cbedb..4c2f6d694f88 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -297,8 +297,7 @@ static int pfkey_error(struct sadb_msg *orig, int err, struct sock *sk) err = EINTR; if (err >= 512) err = EINVAL; - if (err <= 0 || err >= 256) - BUG(); + BUG_ON(err <= 0 || err >= 256); hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg)); pfkey_hdr_dup(hdr, orig); diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 6167137a5cb5..9a1348a51a0d 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -988,7 +988,7 @@ static int tcp_packet(struct nf_conn *conntrack, || (!test_bit(IPS_ASSURED_BIT, &conntrack->status) && conntrack->proto.tcp.last_index == TCP_ACK_SET)) && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) { - /* RST sent to invalid SYN or ACK we had let trough + /* RST sent to invalid SYN or ACK we had let through * at a) and c) above: * * a) SYN was in window then @@ -999,7 +999,7 @@ static int tcp_packet(struct nf_conn *conntrack, * segments we ignored. */ goto in_window; } - /* Just fall trough */ + /* Just fall through */ default: /* Keep compilers happy. */ break; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 7849cac14d3a..a67f1b44c9a3 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -402,7 +402,7 @@ static int netlink_create(struct socket *sock, int protocol) groups = nl_table[protocol].groups; netlink_unlock_table(); - if ((err = __netlink_create(sock, protocol) < 0)) + if ((err = __netlink_create(sock, protocol)) < 0) goto out_module; nlk = nlk_sk(sock->sk); diff --git a/net/sched/Makefile b/net/sched/Makefile index e48d0d456b3e..0f06aec66094 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -7,13 +7,13 @@ obj-y := sch_generic.o obj-$(CONFIG_NET_SCHED) += sch_api.o sch_fifo.o sch_blackhole.o obj-$(CONFIG_NET_CLS) += cls_api.o obj-$(CONFIG_NET_CLS_ACT) += act_api.o -obj-$(CONFIG_NET_ACT_POLICE) += police.o -obj-$(CONFIG_NET_CLS_POLICE) += police.o -obj-$(CONFIG_NET_ACT_GACT) += gact.o -obj-$(CONFIG_NET_ACT_MIRRED) += mirred.o -obj-$(CONFIG_NET_ACT_IPT) += ipt.o -obj-$(CONFIG_NET_ACT_PEDIT) += pedit.o -obj-$(CONFIG_NET_ACT_SIMP) += simple.o +obj-$(CONFIG_NET_ACT_POLICE) += act_police.o +obj-$(CONFIG_NET_CLS_POLICE) += act_police.o +obj-$(CONFIG_NET_ACT_GACT) += act_gact.o +obj-$(CONFIG_NET_ACT_MIRRED) += act_mirred.o +obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o +obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o +obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o obj-$(CONFIG_NET_SCH_HPFQ) += sch_hpfq.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 2ce1cb2aa2ed..792ce59940ec 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -165,7 +165,7 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action *act, while ((a = act) != NULL) { repeat: if (a->ops && a->ops->act) { - ret = a->ops->act(&skb, a, res); + ret = a->ops->act(skb, a, res); if (TC_MUNGED & skb->tc_verd) { /* copied already, allow trampling */ skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); @@ -290,7 +290,7 @@ struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est, if (a_o == NULL) { #ifdef CONFIG_KMOD rtnl_unlock(); - request_module(act_name); + request_module("act_%s", act_name); rtnl_lock(); a_o = tc_lookup_action_n(act_name); diff --git a/net/sched/gact.c b/net/sched/act_gact.c index d1c6d542912a..a1e68f78dcc2 100644 --- a/net/sched/gact.c +++ b/net/sched/act_gact.c @@ -135,10 +135,9 @@ tcf_gact_cleanup(struct tc_action *a, int bind) } static int -tcf_gact(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) +tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) { struct tcf_gact *p = PRIV(a, gact); - struct sk_buff *skb = *pskb; int action = TC_ACT_SHOT; spin_lock(&p->lock); diff --git a/net/sched/ipt.c b/net/sched/act_ipt.c index f50136eed211..b5001939b74b 100644 --- a/net/sched/ipt.c +++ b/net/sched/act_ipt.c @@ -201,11 +201,10 @@ tcf_ipt_cleanup(struct tc_action *a, int bind) } static int -tcf_ipt(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) +tcf_ipt(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) { int ret = 0, result = 0; struct tcf_ipt *p = PRIV(a, ipt); - struct sk_buff *skb = *pskb; if (skb_cloned(skb)) { if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) @@ -222,6 +221,9 @@ tcf_ipt(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) worry later - danger - this API seems to have changed from earlier kernels */ + /* iptables targets take a double skb pointer in case the skb + * needs to be replaced. We don't own the skb, so this must not + * happen. The pskb_expand_head above should make sure of this */ ret = p->t->u.kernel.target->target(&skb, skb->dev, NULL, p->hook, p->t->data, NULL); switch (ret) { diff --git a/net/sched/mirred.c b/net/sched/act_mirred.c index 20d06916dc0b..4fcccbd50885 100644 --- a/net/sched/mirred.c +++ b/net/sched/act_mirred.c @@ -158,12 +158,11 @@ tcf_mirred_cleanup(struct tc_action *a, int bind) } static int -tcf_mirred(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) +tcf_mirred(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) { struct tcf_mirred *p = PRIV(a, mirred); struct net_device *dev; struct sk_buff *skb2 = NULL; - struct sk_buff *skb = *pskb; u32 at = G_TC_AT(skb->tc_verd); spin_lock(&p->lock); diff --git a/net/sched/pedit.c b/net/sched/act_pedit.c index 767d24f4610e..1742a68e0122 100644 --- a/net/sched/pedit.c +++ b/net/sched/act_pedit.c @@ -130,10 +130,9 @@ tcf_pedit_cleanup(struct tc_action *a, int bind) } static int -tcf_pedit(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) +tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) { struct tcf_pedit *p = PRIV(a, pedit); - struct sk_buff *skb = *pskb; int i, munged = 0; u8 *pptr; @@ -246,10 +245,12 @@ tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,int bind, int ref) t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); t.expires = jiffies_to_clock_t(p->tm.expires); RTA_PUT(skb, TCA_PEDIT_TM, sizeof(t), &t); + kfree(opt); return skb->len; rtattr_failure: skb_trim(skb, b - skb->data); + kfree(opt); return -1; } diff --git a/net/sched/police.c b/net/sched/act_police.c index eb39fb2f39b6..fa877f8f652c 100644 --- a/net/sched/police.c +++ b/net/sched/act_police.c @@ -284,11 +284,10 @@ static int tcf_act_police_cleanup(struct tc_action *a, int bind) return 0; } -static int tcf_act_police(struct sk_buff **pskb, struct tc_action *a, +static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) { psched_time_t now; - struct sk_buff *skb = *pskb; struct tcf_police *p = PRIV(a); long toks; long ptoks = 0; @@ -408,7 +407,7 @@ police_cleanup_module(void) module_init(police_init_module); module_exit(police_cleanup_module); -#endif +#else /* CONFIG_NET_CLS_ACT */ struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est) { @@ -545,6 +544,7 @@ int tcf_police(struct sk_buff *skb, struct tcf_police *p) spin_unlock(&p->lock); return p->action; } +EXPORT_SYMBOL(tcf_police); int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p) { @@ -601,13 +601,4 @@ errout: return -1; } - -EXPORT_SYMBOL(tcf_police); -EXPORT_SYMBOL(tcf_police_destroy); -EXPORT_SYMBOL(tcf_police_dump); -EXPORT_SYMBOL(tcf_police_dump_stats); -EXPORT_SYMBOL(tcf_police_hash); -EXPORT_SYMBOL(tcf_police_ht); -EXPORT_SYMBOL(tcf_police_locate); -EXPORT_SYMBOL(tcf_police_lookup); -EXPORT_SYMBOL(tcf_police_new_index); +#endif /* CONFIG_NET_CLS_ACT */ diff --git a/net/sched/simple.c b/net/sched/act_simple.c index 8a6ae4f491e8..e5f2e1f431e2 100644 --- a/net/sched/simple.c +++ b/net/sched/act_simple.c @@ -44,9 +44,8 @@ static DEFINE_RWLOCK(simp_lock); #include <net/pkt_act.h> #include <net/act_generic.h> -static int tcf_simp(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) +static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) { - struct sk_buff *skb = *pskb; struct tcf_defact *p = PRIV(a, defact); spin_lock(&p->lock); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 09453f997d8c..6cd81708bf71 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -257,7 +257,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) (cl = cbq_class_lookup(q, prio)) != NULL) return cl; - *qerr = NET_XMIT_DROP; + *qerr = NET_XMIT_BYPASS; for (;;) { int result = 0; defmap = head->defaults; @@ -413,7 +413,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) q->rx_class = cl; #endif if (cl == NULL) { - if (ret == NET_XMIT_DROP) + if (ret == NET_XMIT_BYPASS) sch->qstats.drops++; kfree_skb(skb); return ret; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index c26764bc4103..91132f6871d7 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -208,7 +208,7 @@ struct hfsc_sched do { \ struct timeval tv; \ do_gettimeofday(&tv); \ - (stamp) = 1000000ULL * tv.tv_sec + tv.tv_usec; \ + (stamp) = 1ULL * USEC_PER_SEC * tv.tv_sec + tv.tv_usec; \ } while (0) #endif @@ -502,8 +502,8 @@ d2dx(u32 d) u64 dx; dx = ((u64)d * PSCHED_JIFFIE2US(HZ)); - dx += 1000000 - 1; - do_div(dx, 1000000); + dx += USEC_PER_SEC - 1; + do_div(dx, USEC_PER_SEC); return dx; } @@ -523,7 +523,7 @@ dx2d(u64 dx) { u64 d; - d = dx * 1000000; + d = dx * USEC_PER_SEC; do_div(d, PSCHED_JIFFIE2US(HZ)); return (u32)d; } @@ -1227,7 +1227,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) if (cl->level == 0) return cl; - *qerr = NET_XMIT_DROP; + *qerr = NET_XMIT_BYPASS; tcf = q->root.filter_list; while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) { #ifdef CONFIG_NET_CLS_ACT @@ -1643,7 +1643,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) cl = hfsc_classify(skb, sch, &err); if (cl == NULL) { - if (err == NET_XMIT_DROP) + if (err == NET_XMIT_BYPASS) sch->qstats.drops++; kfree_skb(skb); return err; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 558cc087e602..3ec95df4a85e 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -321,7 +321,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in if ((cl = htb_find(skb->priority,sch)) != NULL && cl->level == 0) return cl; - *qerr = NET_XMIT_DROP; + *qerr = NET_XMIT_BYPASS; tcf = q->filter_list; while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) { #ifdef CONFIG_NET_CLS_ACT @@ -724,7 +724,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) } #ifdef CONFIG_NET_CLS_ACT } else if (!cl) { - if (ret == NET_XMIT_DROP) + if (ret == NET_XMIT_BYPASS) sch->qstats.drops++; kfree_skb (skb); return ret; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 3ac0f495bad0..5b3a3e48ed92 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -54,7 +54,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) u32 band = skb->priority; struct tcf_result res; - *qerr = NET_XMIT_DROP; + *qerr = NET_XMIT_BYPASS; if (TC_H_MAJ(skb->priority) != sch->handle) { #ifdef CONFIG_NET_CLS_ACT switch (tc_classify(skb, q->filter_list, &res)) { @@ -91,7 +91,8 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch) qdisc = prio_classify(skb, sch, &ret); #ifdef CONFIG_NET_CLS_ACT if (qdisc == NULL) { - if (ret == NET_XMIT_DROP) + + if (ret == NET_XMIT_BYPASS) sch->qstats.drops++; kfree_skb(skb); return ret; @@ -118,7 +119,7 @@ prio_requeue(struct sk_buff *skb, struct Qdisc* sch) qdisc = prio_classify(skb, sch, &ret); #ifdef CONFIG_NET_CLS_ACT if (qdisc == NULL) { - if (ret == NET_XMIT_DROP) + if (ret == NET_XMIT_BYPASS) sch->qstats.drops++; kfree_skb(skb); return ret; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index c4a2a8c4c339..79b8ef34c6e4 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -274,7 +274,7 @@ teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *de static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev) { - struct teql_master *master = (void*)dev->priv; + struct teql_master *master = netdev_priv(dev); struct Qdisc *start, *q; int busy; int nores; @@ -350,7 +350,7 @@ drop: static int teql_master_open(struct net_device *dev) { struct Qdisc * q; - struct teql_master *m = (void*)dev->priv; + struct teql_master *m = netdev_priv(dev); int mtu = 0xFFFE; unsigned flags = IFF_NOARP|IFF_MULTICAST; @@ -397,13 +397,13 @@ static int teql_master_close(struct net_device *dev) static struct net_device_stats *teql_master_stats(struct net_device *dev) { - struct teql_master *m = (void*)dev->priv; + struct teql_master *m = netdev_priv(dev); return &m->stats; } static int teql_master_mtu(struct net_device *dev, int new_mtu) { - struct teql_master *m = (void*)dev->priv; + struct teql_master *m = netdev_priv(dev); struct Qdisc *q; if (new_mtu < 68) @@ -423,7 +423,7 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu) static __init void teql_master_setup(struct net_device *dev) { - struct teql_master *master = dev->priv; + struct teql_master *master = netdev_priv(dev); struct Qdisc_ops *ops = &master->qops; master->dev = dev; @@ -476,7 +476,7 @@ static int __init teql_init(void) break; } - master = dev->priv; + master = netdev_priv(dev); strlcpy(master->qops.id, dev->name, IFNAMSIZ); err = register_qdisc(&master->qops); diff --git a/net/sctp/input.c b/net/sctp/input.c index 238f1bffa684..4aa6fc60357c 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -225,6 +225,7 @@ int sctp_rcv(struct sk_buff *skb) if (!xfrm_policy_check(sk, XFRM_POLICY_IN, skb, family)) goto discard_release; + nf_reset(skb); ret = sk_filter(sk, skb, 1); if (ret) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 15c05165c905..04c7fab4edc4 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -905,7 +905,7 @@ static struct inet_protosw sctpv6_stream_protosw = { .flags = SCTP_PROTOSW_FLAG, }; -static int sctp6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +static int sctp6_rcv(struct sk_buff **pskb) { return sctp_rcv(*pskb) ? -1 : 0; } diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 2d7d8a5db2ac..b8b38aba92b3 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1250,8 +1250,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_TIMER_START: timer = &asoc->timers[cmd->obj.to]; timeout = asoc->timeouts[cmd->obj.to]; - if (!timeout) - BUG(); + BUG_ON(!timeout); timer->expires = jiffies + timeout; sctp_association_hold(asoc); diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 5f1f806a0b11..129e2bd36aff 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -97,13 +97,17 @@ get_key(const void *p, const void *end, struct crypto_tfm **res) alg_mode = CRYPTO_TFM_MODE_CBC; break; default: - dprintk("RPC: get_key: unsupported algorithm %d\n", alg); + printk("gss_kerberos_mech: unsupported algorithm %d\n", alg); goto out_err_free_key; } - if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) + if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) { + printk("gss_kerberos_mech: unable to initialize crypto algorithm %s\n", alg_name); goto out_err_free_key; - if (crypto_cipher_setkey(*res, key.data, key.len)) + } + if (crypto_cipher_setkey(*res, key.data, key.len)) { + printk("gss_kerberos_mech: error setting key for crypto algorithm %s\n", alg_name); goto out_err_free_tfm; + } kfree(key.data); return p; diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index 39b3edc14694..58400807d4df 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -111,14 +111,18 @@ get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg) setkey = 0; break; default: - dprintk("RPC: SPKM3 get_key: unsupported algorithm %d", *resalg); + dprintk("gss_spkm3_mech: unsupported algorithm %d\n", *resalg); goto out_err_free_key; } - if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) + if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) { + printk("gss_spkm3_mech: unable to initialize crypto algorthm %s\n", alg_name); goto out_err_free_key; + } if (setkey) { - if (crypto_cipher_setkey(*res, key.data, key.len)) + if (crypto_cipher_setkey(*res, key.data, key.len)) { + printk("gss_spkm3_mech: error setting key for crypto algorthm %s\n", alg_name); goto out_err_free_tfm; + } } if(key.len > 0) diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c index d1e12b25d6e2..86fbf7c3e39c 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_seal.c +++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c @@ -59,7 +59,7 @@ spkm3_make_token(struct spkm3_ctx *ctx, char tokhdrbuf[25]; struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; struct xdr_netobj mic_hdr = {.len = 0, .data = tokhdrbuf}; - int tmsglen, tokenlen = 0; + int tokenlen = 0; unsigned char *ptr; s32 now; int ctxelen = 0, ctxzbit = 0; @@ -92,24 +92,23 @@ spkm3_make_token(struct spkm3_ctx *ctx, } if (toktype == SPKM_MIC_TOK) { - tmsglen = 0; /* Calculate checksum over the mic-header */ asn1_bitstring_len(&ctx->ctx_id, &ctxelen, &ctxzbit); spkm3_mic_header(&mic_hdr.data, &mic_hdr.len, ctx->ctx_id.data, ctxelen, ctxzbit); if (make_checksum(checksum_type, mic_hdr.data, mic_hdr.len, - text, &md5cksum)) + text, 0, &md5cksum)) goto out_err; asn1_bitstring_len(&md5cksum, &md5elen, &md5zbit); - tokenlen = 10 + ctxelen + 1 + 2 + md5elen + 1; + tokenlen = 10 + ctxelen + 1 + md5elen + 1; /* Create token header using generic routines */ - token->len = g_token_size(&ctx->mech_used, tokenlen + tmsglen); + token->len = g_token_size(&ctx->mech_used, tokenlen); ptr = token->data; - g_make_token_header(&ctx->mech_used, tokenlen + tmsglen, &ptr); + g_make_token_header(&ctx->mech_used, tokenlen, &ptr); spkm3_make_mic_token(&ptr, tokenlen, &mic_hdr, &md5cksum, md5elen, md5zbit); } else if (toktype == SPKM_WRAP_TOK) { /* Not Supported */ diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c index 1f824578d773..af0d7ce74686 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_token.c +++ b/net/sunrpc/auth_gss/gss_spkm3_token.c @@ -182,6 +182,7 @@ spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, unsigned char *ct * *tokp points to the beginning of the SPKM_MIC token described * in rfc 2025, section 3.2.1: * + * toklen is the inner token length */ void spkm3_make_mic_token(unsigned char **tokp, int toklen, struct xdr_netobj *mic_hdr, struct xdr_netobj *md5cksum, int md5elen, int md5zbit) @@ -189,7 +190,7 @@ spkm3_make_mic_token(unsigned char **tokp, int toklen, struct xdr_netobj *mic_hd unsigned char *ict = *tokp; *(u8 *)ict++ = 0xa4; - *(u8 *)ict++ = toklen - 2; + *(u8 *)ict++ = toklen; memcpy(ict, mic_hdr->data, mic_hdr->len); ict += mic_hdr->len; diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c index 241d5b30dfcb..96851b0ba1ba 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c +++ b/net/sunrpc/auth_gss/gss_spkm3_unseal.c @@ -95,7 +95,7 @@ spkm3_read_token(struct spkm3_ctx *ctx, ret = GSS_S_DEFECTIVE_TOKEN; code = make_checksum(CKSUMTYPE_RSA_MD5, ptr + 2, mic_hdrlen + 2, - message_buffer, &md5cksum); + message_buffer, 0, &md5cksum); if (code) goto out; diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index f509e9992767..dcaa0c4453ff 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -575,12 +575,11 @@ cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) if (rp->q.list.next == &cd->queue) { spin_unlock(&queue_lock); up(&queue_io_sem); - if (rp->offset) - BUG(); + BUG_ON(rp->offset); return 0; } rq = container_of(rp->q.list.next, struct cache_request, q.list); - if (rq->q.reader) BUG(); + BUG_ON(rq->q.reader); if (rp->offset == 0) rq->readers++; spin_unlock(&queue_lock); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 61c3abeaccae..a44da8b3d240 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -268,7 +268,8 @@ rpc_shutdown_client(struct rpc_clnt *clnt) clnt->cl_oneshot = 0; clnt->cl_dead = 0; rpc_killall_tasks(clnt); - sleep_on_timeout(&destroy_wait, 1*HZ); + wait_event_timeout(destroy_wait, + atomic_read(&clnt->cl_users) > 0, 1*HZ); } if (atomic_read(&clnt->cl_users) < 0) { @@ -374,19 +375,23 @@ out: * Default callback for async RPC calls */ static void -rpc_default_callback(struct rpc_task *task) +rpc_default_callback(struct rpc_task *task, void *data) { } +static const struct rpc_call_ops rpc_default_ops = { + .rpc_call_done = rpc_default_callback, +}; + /* * Export the signal mask handling for synchronous code that * sleeps on RPC calls */ -#define RPC_INTR_SIGNALS (sigmask(SIGINT) | sigmask(SIGQUIT) | sigmask(SIGKILL)) +#define RPC_INTR_SIGNALS (sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT) | sigmask(SIGTERM)) static void rpc_save_sigmask(sigset_t *oldset, int intr) { - unsigned long sigallow = 0; + unsigned long sigallow = sigmask(SIGKILL); sigset_t sigmask; /* Block all signals except those listed in sigallow */ @@ -432,7 +437,7 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) BUG_ON(flags & RPC_TASK_ASYNC); status = -ENOMEM; - task = rpc_new_task(clnt, NULL, flags); + task = rpc_new_task(clnt, flags, &rpc_default_ops, NULL); if (task == NULL) goto out; @@ -442,14 +447,15 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) rpc_call_setup(task, msg, 0); /* Set up the call info struct and execute the task */ - if (task->tk_status == 0) { + status = task->tk_status; + if (status == 0) { + atomic_inc(&task->tk_count); status = rpc_execute(task); - } else { - status = task->tk_status; - rpc_release_task(task); + if (status == 0) + status = task->tk_status; } - rpc_restore_sigmask(&oldset); + rpc_release_task(task); out: return status; } @@ -459,7 +465,7 @@ out: */ int rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags, - rpc_action callback, void *data) + const struct rpc_call_ops *tk_ops, void *data) { struct rpc_task *task; sigset_t oldset; @@ -472,12 +478,9 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags, flags |= RPC_TASK_ASYNC; /* Create/initialize a new RPC task */ - if (!callback) - callback = rpc_default_callback; status = -ENOMEM; - if (!(task = rpc_new_task(clnt, callback, flags))) + if (!(task = rpc_new_task(clnt, flags, tk_ops, data))) goto out; - task->tk_calldata = data; /* Mask signals on GSS_AUTH upcalls */ rpc_task_sigmask(task, &oldset); @@ -511,7 +514,7 @@ rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags) if (task->tk_status == 0) task->tk_action = call_start; else - task->tk_action = NULL; + task->tk_action = rpc_exit_task; } void @@ -536,6 +539,18 @@ size_t rpc_max_payload(struct rpc_clnt *clnt) } EXPORT_SYMBOL(rpc_max_payload); +/** + * rpc_force_rebind - force transport to check that remote port is unchanged + * @clnt: client to rebind + * + */ +void rpc_force_rebind(struct rpc_clnt *clnt) +{ + if (clnt->cl_autobind) + clnt->cl_port = 0; +} +EXPORT_SYMBOL(rpc_force_rebind); + /* * Restart an (async) RPC call. Usually called from within the * exit handler. @@ -642,24 +657,26 @@ call_reserveresult(struct rpc_task *task) /* * 2. Allocate the buffer. For details, see sched.c:rpc_malloc. - * (Note: buffer memory is freed in rpc_task_release). + * (Note: buffer memory is freed in xprt_release). */ static void call_allocate(struct rpc_task *task) { + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = task->tk_xprt; unsigned int bufsiz; dprintk("RPC: %4d call_allocate (status %d)\n", task->tk_pid, task->tk_status); task->tk_action = call_bind; - if (task->tk_buffer) + if (req->rq_buffer) return; /* FIXME: compute buffer requirements more exactly using * auth->au_wslack */ bufsiz = task->tk_msg.rpc_proc->p_bufsiz + RPC_SLACK_SPACE; - if (rpc_malloc(task, bufsiz << 1) != NULL) + if (xprt->ops->buf_alloc(task, bufsiz << 1) != NULL) return; printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task); @@ -702,14 +719,14 @@ call_encode(struct rpc_task *task) task->tk_pid, task->tk_status); /* Default buffer setup */ - bufsiz = task->tk_bufsize >> 1; - sndbuf->head[0].iov_base = (void *)task->tk_buffer; + bufsiz = req->rq_bufsize >> 1; + sndbuf->head[0].iov_base = (void *)req->rq_buffer; sndbuf->head[0].iov_len = bufsiz; sndbuf->tail[0].iov_len = 0; sndbuf->page_len = 0; sndbuf->len = 0; sndbuf->buflen = bufsiz; - rcvbuf->head[0].iov_base = (void *)((char *)task->tk_buffer + bufsiz); + rcvbuf->head[0].iov_base = (void *)((char *)req->rq_buffer + bufsiz); rcvbuf->head[0].iov_len = bufsiz; rcvbuf->tail[0].iov_len = 0; rcvbuf->page_len = 0; @@ -849,8 +866,7 @@ call_connect_status(struct rpc_task *task) } /* Something failed: remote service port may have changed */ - if (clnt->cl_autobind) - clnt->cl_port = 0; + rpc_force_rebind(clnt); switch (status) { case -ENOTCONN: @@ -892,7 +908,7 @@ call_transmit(struct rpc_task *task) if (task->tk_status < 0) return; if (!task->tk_msg.rpc_proc->p_decode) { - task->tk_action = NULL; + task->tk_action = rpc_exit_task; rpc_wake_up_task(task); } return; @@ -931,8 +947,7 @@ call_status(struct rpc_task *task) break; case -ECONNREFUSED: case -ENOTCONN: - if (clnt->cl_autobind) - clnt->cl_port = 0; + rpc_force_rebind(clnt); task->tk_action = call_bind; break; case -EAGAIN: @@ -943,8 +958,7 @@ call_status(struct rpc_task *task) rpc_exit(task, status); break; default: - if (clnt->cl_chatty) - printk("%s: RPC call returned error %d\n", + printk("%s: RPC call returned error %d\n", clnt->cl_protname, -status); rpc_exit(task, status); break; @@ -979,20 +993,18 @@ call_timeout(struct rpc_task *task) dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid); if (RPC_IS_SOFT(task)) { - if (clnt->cl_chatty) - printk(KERN_NOTICE "%s: server %s not responding, timed out\n", + printk(KERN_NOTICE "%s: server %s not responding, timed out\n", clnt->cl_protname, clnt->cl_server); rpc_exit(task, -EIO); return; } - if (clnt->cl_chatty && !(task->tk_flags & RPC_CALL_MAJORSEEN)) { + if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) { task->tk_flags |= RPC_CALL_MAJORSEEN; printk(KERN_NOTICE "%s: server %s not responding, still trying\n", clnt->cl_protname, clnt->cl_server); } - if (clnt->cl_autobind) - clnt->cl_port = 0; + rpc_force_rebind(clnt); retry: clnt->cl_stats->rpcretrans++; @@ -1014,7 +1026,7 @@ call_decode(struct rpc_task *task) dprintk("RPC: %4d call_decode (status %d)\n", task->tk_pid, task->tk_status); - if (clnt->cl_chatty && (task->tk_flags & RPC_CALL_MAJORSEEN)) { + if (task->tk_flags & RPC_CALL_MAJORSEEN) { printk(KERN_NOTICE "%s: server %s OK\n", clnt->cl_protname, clnt->cl_server); task->tk_flags &= ~RPC_CALL_MAJORSEEN; @@ -1039,13 +1051,14 @@ call_decode(struct rpc_task *task) sizeof(req->rq_rcv_buf)) != 0); /* Verify the RPC header */ - if (!(p = call_verify(task))) { - if (task->tk_action == NULL) - return; - goto out_retry; + p = call_verify(task); + if (IS_ERR(p)) { + if (p == ERR_PTR(-EAGAIN)) + goto out_retry; + return; } - task->tk_action = NULL; + task->tk_action = rpc_exit_task; if (decode) task->tk_status = rpcauth_unwrap_resp(task, decode, req, p, @@ -1138,7 +1151,7 @@ call_verify(struct rpc_task *task) if ((n = ntohl(*p++)) != RPC_REPLY) { printk(KERN_WARNING "call_verify: not an RPC reply: %x\n", n); - goto out_retry; + goto out_garbage; } if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) { if (--len < 0) @@ -1168,7 +1181,7 @@ call_verify(struct rpc_task *task) task->tk_pid); rpcauth_invalcred(task); task->tk_action = call_refresh; - return NULL; + goto out_retry; case RPC_AUTH_BADCRED: case RPC_AUTH_BADVERF: /* possibly garbled cred/verf? */ @@ -1178,7 +1191,7 @@ call_verify(struct rpc_task *task) dprintk("RPC: %4d call_verify: retry garbled creds\n", task->tk_pid); task->tk_action = call_bind; - return NULL; + goto out_retry; case RPC_AUTH_TOOWEAK: printk(KERN_NOTICE "call_verify: server requires stronger " "authentication.\n"); @@ -1193,7 +1206,7 @@ call_verify(struct rpc_task *task) } if (!(p = rpcauth_checkverf(task, p))) { printk(KERN_WARNING "call_verify: auth check failed\n"); - goto out_retry; /* bad verifier, retry */ + goto out_garbage; /* bad verifier, retry */ } len = p - (u32 *)iov->iov_base - 1; if (len < 0) @@ -1230,23 +1243,24 @@ call_verify(struct rpc_task *task) /* Also retry */ } -out_retry: +out_garbage: task->tk_client->cl_stats->rpcgarbage++; if (task->tk_garb_retry) { task->tk_garb_retry--; dprintk("RPC %s: retrying %4d\n", __FUNCTION__, task->tk_pid); task->tk_action = call_bind; - return NULL; +out_retry: + return ERR_PTR(-EAGAIN); } printk(KERN_WARNING "RPC %s: retry failed, exit EIO\n", __FUNCTION__); out_eio: error = -EIO; out_err: rpc_exit(task, error); - return NULL; + return ERR_PTR(error); out_overflow: printk(KERN_WARNING "RPC %s: server reply was truncated.\n", __FUNCTION__); - goto out_retry; + goto out_garbage; } static int rpcproc_encode_null(void *rqstp, u32 *data, void *obj) diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index a398575f94b8..8139ce68e915 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -90,8 +90,7 @@ bailout: map->pm_binding = 0; rpc_wake_up(&map->pm_bindwait); spin_unlock(&pmap_lock); - task->tk_status = -EIO; - task->tk_action = NULL; + rpc_exit(task, -EIO); } #ifdef CONFIG_ROOT_NFS @@ -132,21 +131,22 @@ static void pmap_getport_done(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; + struct rpc_xprt *xprt = task->tk_xprt; struct rpc_portmap *map = clnt->cl_pmap; dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n", task->tk_pid, task->tk_status, clnt->cl_port); + + xprt->ops->set_port(xprt, 0); if (task->tk_status < 0) { /* Make the calling task exit with an error */ - task->tk_action = NULL; + task->tk_action = rpc_exit_task; } else if (clnt->cl_port == 0) { /* Program not registered */ - task->tk_status = -EACCES; - task->tk_action = NULL; + rpc_exit(task, -EACCES); } else { - /* byte-swap port number first */ + xprt->ops->set_port(xprt, clnt->cl_port); clnt->cl_port = htons(clnt->cl_port); - clnt->cl_xprt->addr.sin_port = clnt->cl_port; } spin_lock(&pmap_lock); map->pm_binding = 0; @@ -207,7 +207,7 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileg xprt = xprt_create_proto(proto, srvaddr, NULL); if (IS_ERR(xprt)) return (struct rpc_clnt *)xprt; - xprt->addr.sin_port = htons(RPC_PMAP_PORT); + xprt->ops->set_port(xprt, RPC_PMAP_PORT); if (!privileged) xprt->resvport = 0; @@ -217,7 +217,6 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileg RPC_AUTH_UNIX); if (!IS_ERR(clnt)) { clnt->cl_softrtry = 1; - clnt->cl_chatty = 1; clnt->cl_oneshot = 1; } return clnt; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 16a2458f38f7..9764c80ab0b2 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -69,10 +69,13 @@ rpc_timeout_upcall_queue(void *data) struct rpc_inode *rpci = (struct rpc_inode *)data; struct inode *inode = &rpci->vfs_inode; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); + if (rpci->ops == NULL) + goto out; if (rpci->nreaders == 0 && !list_empty(&rpci->pipe)) __rpc_purge_upcall(inode, -ETIMEDOUT); - up(&inode->i_sem); +out: + mutex_unlock(&inode->i_mutex); } int @@ -81,7 +84,7 @@ rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg) struct rpc_inode *rpci = RPC_I(inode); int res = -EPIPE; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); if (rpci->ops == NULL) goto out; if (rpci->nreaders) { @@ -97,7 +100,7 @@ rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg) res = 0; } out: - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); wake_up(&rpci->waitq); return res; } @@ -113,9 +116,7 @@ rpc_close_pipes(struct inode *inode) { struct rpc_inode *rpci = RPC_I(inode); - cancel_delayed_work(&rpci->queue_timeout); - flush_scheduled_work(); - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); if (rpci->ops != NULL) { rpci->nreaders = 0; __rpc_purge_list(rpci, &rpci->in_upcall, -EPIPE); @@ -126,7 +127,9 @@ rpc_close_pipes(struct inode *inode) rpci->ops = NULL; } rpc_inode_setowner(inode, NULL); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); + cancel_delayed_work(&rpci->queue_timeout); + flush_scheduled_work(); } static struct inode * @@ -151,7 +154,7 @@ rpc_pipe_open(struct inode *inode, struct file *filp) struct rpc_inode *rpci = RPC_I(inode); int res = -ENXIO; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); if (rpci->ops != NULL) { if (filp->f_mode & FMODE_READ) rpci->nreaders ++; @@ -159,17 +162,17 @@ rpc_pipe_open(struct inode *inode, struct file *filp) rpci->nwriters ++; res = 0; } - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return res; } static int rpc_pipe_release(struct inode *inode, struct file *filp) { - struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode); + struct rpc_inode *rpci = RPC_I(inode); struct rpc_pipe_msg *msg; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); if (rpci->ops == NULL) goto out; msg = (struct rpc_pipe_msg *)filp->private_data; @@ -187,7 +190,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp) if (rpci->ops->release_pipe) rpci->ops->release_pipe(inode); out: - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return 0; } @@ -199,7 +202,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset) struct rpc_pipe_msg *msg; int res = 0; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); if (rpci->ops == NULL) { res = -EPIPE; goto out_unlock; @@ -226,7 +229,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset) rpci->ops->destroy_msg(msg); } out_unlock: - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return res; } @@ -237,11 +240,11 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of struct rpc_inode *rpci = RPC_I(inode); int res; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); res = -EPIPE; if (rpci->ops != NULL) res = rpci->ops->downcall(filp, buf, len); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return res; } @@ -319,7 +322,7 @@ rpc_info_open(struct inode *inode, struct file *file) if (!ret) { struct seq_file *m = file->private_data; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); clnt = RPC_I(inode)->private; if (clnt) { atomic_inc(&clnt->cl_users); @@ -328,7 +331,7 @@ rpc_info_open(struct inode *inode, struct file *file) single_release(inode, file); ret = -EINVAL; } - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); } return ret; } @@ -488,11 +491,11 @@ rpc_depopulate(struct dentry *parent) struct dentry *dentry, *dvec[10]; int n = 0; - down(&dir->i_sem); + mutex_lock(&dir->i_mutex); repeat: spin_lock(&dcache_lock); list_for_each_safe(pos, next, &parent->d_subdirs) { - dentry = list_entry(pos, struct dentry, d_child); + dentry = list_entry(pos, struct dentry, d_u.d_child); spin_lock(&dentry->d_lock); if (!d_unhashed(dentry)) { dget_locked(dentry); @@ -516,7 +519,7 @@ repeat: } while (n); goto repeat; } - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); } static int @@ -529,7 +532,7 @@ rpc_populate(struct dentry *parent, struct dentry *dentry; int mode, i; - down(&dir->i_sem); + mutex_lock(&dir->i_mutex); for (i = start; i < eof; i++) { dentry = d_alloc_name(parent, files[i].name); if (!dentry) @@ -549,10 +552,10 @@ rpc_populate(struct dentry *parent, dir->i_nlink++; d_add(dentry, inode); } - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); return 0; out_bad: - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); printk(KERN_WARNING "%s: %s failed to populate directory %s\n", __FILE__, __FUNCTION__, parent->d_name.name); return -ENOMEM; @@ -606,7 +609,7 @@ rpc_lookup_negative(char *path, struct nameidata *nd) if ((error = rpc_lookup_parent(path, nd)) != 0) return ERR_PTR(error); dir = nd->dentry->d_inode; - down(&dir->i_sem); + mutex_lock(&dir->i_mutex); dentry = lookup_hash(nd); if (IS_ERR(dentry)) goto out_err; @@ -617,7 +620,7 @@ rpc_lookup_negative(char *path, struct nameidata *nd) } return dentry; out_err: - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); rpc_release_path(nd); return dentry; } @@ -643,7 +646,7 @@ rpc_mkdir(char *path, struct rpc_clnt *rpc_client) if (error) goto err_depopulate; out: - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); rpc_release_path(&nd); return dentry; err_depopulate: @@ -668,7 +671,7 @@ rpc_rmdir(char *path) if ((error = rpc_lookup_parent(path, &nd)) != 0) return error; dir = nd.dentry->d_inode; - down(&dir->i_sem); + mutex_lock(&dir->i_mutex); dentry = lookup_hash(&nd); if (IS_ERR(dentry)) { error = PTR_ERR(dentry); @@ -678,7 +681,7 @@ rpc_rmdir(char *path) error = __rpc_rmdir(dir, dentry); dput(dentry); out_release: - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); rpc_release_path(&nd); return error; } @@ -707,7 +710,7 @@ rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags) rpci->ops = ops; inode_dir_notify(dir, DN_CREATE); out: - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); rpc_release_path(&nd); return dentry; err_dput: @@ -729,7 +732,7 @@ rpc_unlink(char *path) if ((error = rpc_lookup_parent(path, &nd)) != 0) return error; dir = nd.dentry->d_inode; - down(&dir->i_sem); + mutex_lock(&dir->i_mutex); dentry = lookup_hash(&nd); if (IS_ERR(dentry)) { error = PTR_ERR(dentry); @@ -743,7 +746,7 @@ rpc_unlink(char *path) dput(dentry); inode_dir_notify(dir, DN_DELETE); out_release: - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); rpc_release_path(&nd); return error; } diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 54e60a657500..7415406aa1ae 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -41,8 +41,6 @@ static mempool_t *rpc_buffer_mempool __read_mostly; static void __rpc_default_timer(struct rpc_task *task); static void rpciod_killall(void); -static void rpc_free(struct rpc_task *task); - static void rpc_async_schedule(void *); /* @@ -264,6 +262,35 @@ void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname) } EXPORT_SYMBOL(rpc_init_wait_queue); +static int rpc_wait_bit_interruptible(void *word) +{ + if (signal_pending(current)) + return -ERESTARTSYS; + schedule(); + return 0; +} + +/* + * Mark an RPC call as having completed by clearing the 'active' bit + */ +static inline void rpc_mark_complete_task(struct rpc_task *task) +{ + rpc_clear_active(task); + wake_up_bit(&task->tk_runstate, RPC_TASK_ACTIVE); +} + +/* + * Allow callers to wait for completion of an RPC call + */ +int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *)) +{ + if (action == NULL) + action = rpc_wait_bit_interruptible; + return wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE, + action, TASK_INTERRUPTIBLE); +} +EXPORT_SYMBOL(__rpc_wait_for_completion_task); + /* * Make an RPC task runnable. * @@ -299,10 +326,7 @@ static void rpc_make_runnable(struct rpc_task *task) static inline void rpc_schedule_run(struct rpc_task *task) { - /* Don't run a child twice! */ - if (RPC_IS_ACTIVATED(task)) - return; - task->tk_active = 1; + rpc_set_active(task); rpc_make_runnable(task); } @@ -324,8 +348,7 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, } /* Mark the task as being activated if so needed */ - if (!RPC_IS_ACTIVATED(task)) - task->tk_active = 1; + rpc_set_active(task); __rpc_add_wait_queue(q, task); @@ -555,36 +578,29 @@ __rpc_atrun(struct rpc_task *task) } /* - * Helper that calls task->tk_exit if it exists and then returns - * true if we should exit __rpc_execute. + * Helper to call task->tk_ops->rpc_call_prepare */ -static inline int __rpc_do_exit(struct rpc_task *task) +static void rpc_prepare_task(struct rpc_task *task) { - if (task->tk_exit != NULL) { - lock_kernel(); - task->tk_exit(task); - unlock_kernel(); - /* If tk_action is non-null, we should restart the call */ - if (task->tk_action != NULL) { - if (!RPC_ASSASSINATED(task)) { - /* Release RPC slot and buffer memory */ - xprt_release(task); - rpc_free(task); - return 0; - } - printk(KERN_ERR "RPC: dead task tried to walk away.\n"); - } - } - return 1; + task->tk_ops->rpc_call_prepare(task, task->tk_calldata); } -static int rpc_wait_bit_interruptible(void *word) +/* + * Helper that calls task->tk_ops->rpc_call_done if it exists + */ +void rpc_exit_task(struct rpc_task *task) { - if (signal_pending(current)) - return -ERESTARTSYS; - schedule(); - return 0; + task->tk_action = NULL; + if (task->tk_ops->rpc_call_done != NULL) { + task->tk_ops->rpc_call_done(task, task->tk_calldata); + if (task->tk_action != NULL) { + WARN_ON(RPC_ASSASSINATED(task)); + /* Always release the RPC slot and buffer memory */ + xprt_release(task); + } + } } +EXPORT_SYMBOL(rpc_exit_task); /* * This is the RPC `scheduler' (or rather, the finite state machine). @@ -631,12 +647,11 @@ static int __rpc_execute(struct rpc_task *task) * by someone else. */ if (!RPC_IS_QUEUED(task)) { - if (task->tk_action != NULL) { - lock_kernel(); - task->tk_action(task); - unlock_kernel(); - } else if (__rpc_do_exit(task)) + if (task->tk_action == NULL) break; + lock_kernel(); + task->tk_action(task); + unlock_kernel(); } /* @@ -676,9 +691,9 @@ static int __rpc_execute(struct rpc_task *task) dprintk("RPC: %4d sync task resuming\n", task->tk_pid); } - dprintk("RPC: %4d exit() = %d\n", task->tk_pid, task->tk_status); - status = task->tk_status; - + dprintk("RPC: %4d, return %d, status %d\n", task->tk_pid, status, task->tk_status); + /* Wake up anyone who is waiting for task completion */ + rpc_mark_complete_task(task); /* Release all resources associated with the task */ rpc_release_task(task); return status; @@ -696,9 +711,7 @@ static int __rpc_execute(struct rpc_task *task) int rpc_execute(struct rpc_task *task) { - BUG_ON(task->tk_active); - - task->tk_active = 1; + rpc_set_active(task); rpc_set_running(task); return __rpc_execute(task); } @@ -708,17 +721,19 @@ static void rpc_async_schedule(void *arg) __rpc_execute((struct rpc_task *)arg); } -/* - * Allocate memory for RPC purposes. +/** + * rpc_malloc - allocate an RPC buffer + * @task: RPC task that will use this buffer + * @size: requested byte size * * We try to ensure that some NFS reads and writes can always proceed * by using a mempool when allocating 'small' buffers. * In order to avoid memory starvation triggering more writebacks of * NFS requests, we use GFP_NOFS rather than GFP_KERNEL. */ -void * -rpc_malloc(struct rpc_task *task, size_t size) +void * rpc_malloc(struct rpc_task *task, size_t size) { + struct rpc_rqst *req = task->tk_rqstp; gfp_t gfp; if (task->tk_flags & RPC_TASK_SWAPPER) @@ -727,42 +742,52 @@ rpc_malloc(struct rpc_task *task, size_t size) gfp = GFP_NOFS; if (size > RPC_BUFFER_MAXSIZE) { - task->tk_buffer = kmalloc(size, gfp); - if (task->tk_buffer) - task->tk_bufsize = size; + req->rq_buffer = kmalloc(size, gfp); + if (req->rq_buffer) + req->rq_bufsize = size; } else { - task->tk_buffer = mempool_alloc(rpc_buffer_mempool, gfp); - if (task->tk_buffer) - task->tk_bufsize = RPC_BUFFER_MAXSIZE; + req->rq_buffer = mempool_alloc(rpc_buffer_mempool, gfp); + if (req->rq_buffer) + req->rq_bufsize = RPC_BUFFER_MAXSIZE; } - return task->tk_buffer; + return req->rq_buffer; } -static void -rpc_free(struct rpc_task *task) +/** + * rpc_free - free buffer allocated via rpc_malloc + * @task: RPC task with a buffer to be freed + * + */ +void rpc_free(struct rpc_task *task) { - if (task->tk_buffer) { - if (task->tk_bufsize == RPC_BUFFER_MAXSIZE) - mempool_free(task->tk_buffer, rpc_buffer_mempool); + struct rpc_rqst *req = task->tk_rqstp; + + if (req->rq_buffer) { + if (req->rq_bufsize == RPC_BUFFER_MAXSIZE) + mempool_free(req->rq_buffer, rpc_buffer_mempool); else - kfree(task->tk_buffer); - task->tk_buffer = NULL; - task->tk_bufsize = 0; + kfree(req->rq_buffer); + req->rq_buffer = NULL; + req->rq_bufsize = 0; } } /* * Creation and deletion of RPC task structures */ -void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action callback, int flags) +void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata) { memset(task, 0, sizeof(*task)); init_timer(&task->tk_timer); task->tk_timer.data = (unsigned long) task; task->tk_timer.function = (void (*)(unsigned long)) rpc_run_timer; + atomic_set(&task->tk_count, 1); task->tk_client = clnt; task->tk_flags = flags; - task->tk_exit = callback; + task->tk_ops = tk_ops; + if (tk_ops->rpc_call_prepare != NULL) + task->tk_action = rpc_prepare_task; + task->tk_calldata = calldata; /* Initialize retry counters */ task->tk_garb_retry = 2; @@ -791,6 +816,8 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action call list_add_tail(&task->tk_task, &all_tasks); spin_unlock(&rpc_sched_lock); + BUG_ON(task->tk_ops == NULL); + dprintk("RPC: %4d new task procpid %d\n", task->tk_pid, current->pid); } @@ -801,8 +828,7 @@ rpc_alloc_task(void) return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); } -static void -rpc_default_free_task(struct rpc_task *task) +static void rpc_free_task(struct rpc_task *task) { dprintk("RPC: %4d freeing task\n", task->tk_pid); mempool_free(task, rpc_task_mempool); @@ -813,8 +839,7 @@ rpc_default_free_task(struct rpc_task *task) * clean up after an allocation failure, as the client may * have specified "oneshot". */ -struct rpc_task * -rpc_new_task(struct rpc_clnt *clnt, rpc_action callback, int flags) +struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata) { struct rpc_task *task; @@ -822,10 +847,7 @@ rpc_new_task(struct rpc_clnt *clnt, rpc_action callback, int flags) if (!task) goto cleanup; - rpc_init_task(task, clnt, callback, flags); - - /* Replace tk_release */ - task->tk_release = rpc_default_free_task; + rpc_init_task(task, clnt, flags, tk_ops, calldata); dprintk("RPC: %4d allocated task\n", task->tk_pid); task->tk_flags |= RPC_TASK_DYNAMIC; @@ -845,11 +867,15 @@ cleanup: void rpc_release_task(struct rpc_task *task) { - dprintk("RPC: %4d release task\n", task->tk_pid); + const struct rpc_call_ops *tk_ops = task->tk_ops; + void *calldata = task->tk_calldata; #ifdef RPC_DEBUG BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID); #endif + if (!atomic_dec_and_test(&task->tk_count)) + return; + dprintk("RPC: %4d release task\n", task->tk_pid); /* Remove from global task list */ spin_lock(&rpc_sched_lock); @@ -857,7 +883,6 @@ void rpc_release_task(struct rpc_task *task) spin_unlock(&rpc_sched_lock); BUG_ON (RPC_IS_QUEUED(task)); - task->tk_active = 0; /* Synchronously delete any running timer */ rpc_delete_timer(task); @@ -867,7 +892,6 @@ void rpc_release_task(struct rpc_task *task) xprt_release(task); if (task->tk_msg.rpc_cred) rpcauth_unbindcred(task); - rpc_free(task); if (task->tk_client) { rpc_release_client(task->tk_client); task->tk_client = NULL; @@ -876,11 +900,34 @@ void rpc_release_task(struct rpc_task *task) #ifdef RPC_DEBUG task->tk_magic = 0; #endif - if (task->tk_release) - task->tk_release(task); + if (task->tk_flags & RPC_TASK_DYNAMIC) + rpc_free_task(task); + if (tk_ops->rpc_release) + tk_ops->rpc_release(calldata); } /** + * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it + * @clnt - pointer to RPC client + * @flags - RPC flags + * @ops - RPC call ops + * @data - user call data + */ +struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, + const struct rpc_call_ops *ops, + void *data) +{ + struct rpc_task *task; + task = rpc_new_task(clnt, flags, ops, data); + if (task == NULL) + return ERR_PTR(-ENOMEM); + atomic_inc(&task->tk_count); + rpc_execute(task); + return task; +} +EXPORT_SYMBOL(rpc_run_task); + +/** * rpc_find_parent - find the parent of a child task. * @child: child task * @@ -890,12 +937,11 @@ void rpc_release_task(struct rpc_task *task) * * Caller must hold childq.lock */ -static inline struct rpc_task *rpc_find_parent(struct rpc_task *child) +static inline struct rpc_task *rpc_find_parent(struct rpc_task *child, struct rpc_task *parent) { - struct rpc_task *task, *parent; + struct rpc_task *task; struct list_head *le; - parent = (struct rpc_task *) child->tk_calldata; task_for_each(task, le, &childq.tasks[0]) if (task == parent) return parent; @@ -903,18 +949,22 @@ static inline struct rpc_task *rpc_find_parent(struct rpc_task *child) return NULL; } -static void rpc_child_exit(struct rpc_task *child) +static void rpc_child_exit(struct rpc_task *child, void *calldata) { struct rpc_task *parent; spin_lock_bh(&childq.lock); - if ((parent = rpc_find_parent(child)) != NULL) { + if ((parent = rpc_find_parent(child, calldata)) != NULL) { parent->tk_status = child->tk_status; __rpc_wake_up_task(parent); } spin_unlock_bh(&childq.lock); } +static const struct rpc_call_ops rpc_child_ops = { + .rpc_call_done = rpc_child_exit, +}; + /* * Note: rpc_new_task releases the client after a failure. */ @@ -923,11 +973,9 @@ rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent) { struct rpc_task *task; - task = rpc_new_task(clnt, NULL, RPC_TASK_ASYNC | RPC_TASK_CHILD); + task = rpc_new_task(clnt, RPC_TASK_ASYNC | RPC_TASK_CHILD, &rpc_child_ops, parent); if (!task) goto fail; - task->tk_exit = rpc_child_exit; - task->tk_calldata = parent; return task; fail: @@ -1063,7 +1111,7 @@ void rpc_show_tasks(void) return; } printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " - "-rpcwait -action- --exit--\n"); + "-rpcwait -action- ---ops--\n"); alltask_for_each(t, le, &all_tasks) { const char *rpc_waitq = "none"; @@ -1078,7 +1126,7 @@ void rpc_show_tasks(void) (t->tk_client ? t->tk_client->cl_prog : 0), t->tk_rqstp, t->tk_timeout, rpc_waitq, - t->tk_action, t->tk_exit); + t->tk_action, t->tk_ops); } spin_unlock(&rpc_sched_lock); } diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index a03d4b600c92..9f7373203592 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -30,8 +30,6 @@ EXPORT_SYMBOL(rpc_init_task); EXPORT_SYMBOL(rpc_sleep_on); EXPORT_SYMBOL(rpc_wake_up_next); EXPORT_SYMBOL(rpc_wake_up_task); -EXPORT_SYMBOL(rpc_new_child); -EXPORT_SYMBOL(rpc_run_child); EXPORT_SYMBOL(rpciod_down); EXPORT_SYMBOL(rpciod_up); EXPORT_SYMBOL(rpc_new_task); @@ -45,7 +43,6 @@ EXPORT_SYMBOL(rpc_clone_client); EXPORT_SYMBOL(rpc_bind_new_program); EXPORT_SYMBOL(rpc_destroy_client); EXPORT_SYMBOL(rpc_shutdown_client); -EXPORT_SYMBOL(rpc_release_client); EXPORT_SYMBOL(rpc_killall_tasks); EXPORT_SYMBOL(rpc_call_sync); EXPORT_SYMBOL(rpc_call_async); @@ -120,7 +117,6 @@ EXPORT_SYMBOL(unix_domain_find); /* Generic XDR */ EXPORT_SYMBOL(xdr_encode_string); -EXPORT_SYMBOL(xdr_decode_string); EXPORT_SYMBOL(xdr_decode_string_inplace); EXPORT_SYMBOL(xdr_decode_netobj); EXPORT_SYMBOL(xdr_encode_netobj); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index e4296c8b861e..b19cc26fa9c2 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -122,8 +122,7 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size) rqstp->rq_argused = 0; rqstp->rq_resused = 0; arghi = 0; - if (pages > RPCSVC_MAXPAGES) - BUG(); + BUG_ON(pages > RPCSVC_MAXPAGES); while (pages) { struct page *p = alloc_page(GFP_KERNEL); if (!p) diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index cac2e774dd81..3e6c694bbad1 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -101,10 +101,22 @@ static void ip_map_put(struct cache_head *item, struct cache_detail *cd) } } +#if IP_HASHBITS == 8 +/* hash_long on a 64 bit machine is currently REALLY BAD for + * IP addresses in reverse-endian (i.e. on a little-endian machine). + * So use a trivial but reliable hash instead + */ +static inline int hash_ip(unsigned long ip) +{ + int hash = ip ^ (ip>>16); + return (hash ^ (hash>>8)) & 0xff; +} +#endif + static inline int ip_map_hash(struct ip_map *item) { return hash_str(item->m_class, IP_HASHBITS) ^ - hash_long((unsigned long)item->m_addr.s_addr, IP_HASHBITS); + hash_ip((unsigned long)item->m_addr.s_addr); } static inline int ip_map_match(struct ip_map *item, struct ip_map *tmp) { diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index d68eba481291..e67613e4eb18 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1026,7 +1026,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) } else { printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", svsk->sk_server->sv_name, -len); - svc_sock_received(svsk); + goto err_delete; } return len; diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index aaf08cdd19f0..ca4bfa57e116 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -93,27 +93,6 @@ xdr_encode_string(u32 *p, const char *string) } u32 * -xdr_decode_string(u32 *p, char **sp, int *lenp, int maxlen) -{ - unsigned int len; - char *string; - - if ((len = ntohl(*p++)) > maxlen) - return NULL; - if (lenp) - *lenp = len; - if ((len % 4) != 0) { - string = (char *) p; - } else { - string = (char *) (p - 1); - memmove(string, p, len); - } - string[len] = '\0'; - *sp = string; - return p + XDR_QUADLEN(len); -} - -u32 * xdr_decode_string_inplace(u32 *p, char **sp, int *lenp, int maxlen) { unsigned int len; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 6dda3860351f..8ff2c8acb223 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -119,6 +119,17 @@ out_sleep: return 0; } +static void xprt_clear_locked(struct rpc_xprt *xprt) +{ + xprt->snd_task = NULL; + if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state) || xprt->shutdown) { + smp_mb__before_clear_bit(); + clear_bit(XPRT_LOCKED, &xprt->state); + smp_mb__after_clear_bit(); + } else + schedule_work(&xprt->task_cleanup); +} + /* * xprt_reserve_xprt_cong - serialize write access to transports * @task: task that is requesting access to the transport @@ -145,9 +156,7 @@ int xprt_reserve_xprt_cong(struct rpc_task *task) } return 1; } - smp_mb__before_clear_bit(); - clear_bit(XPRT_LOCKED, &xprt->state); - smp_mb__after_clear_bit(); + xprt_clear_locked(xprt); out_sleep: dprintk("RPC: %4d failed to lock transport %p\n", task->tk_pid, xprt); task->tk_timeout = 0; @@ -193,9 +202,7 @@ static void __xprt_lock_write_next(struct rpc_xprt *xprt) return; out_unlock: - smp_mb__before_clear_bit(); - clear_bit(XPRT_LOCKED, &xprt->state); - smp_mb__after_clear_bit(); + xprt_clear_locked(xprt); } static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) @@ -222,9 +229,7 @@ static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) return; } out_unlock: - smp_mb__before_clear_bit(); - clear_bit(XPRT_LOCKED, &xprt->state); - smp_mb__after_clear_bit(); + xprt_clear_locked(xprt); } /** @@ -237,10 +242,7 @@ out_unlock: void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) { if (xprt->snd_task == task) { - xprt->snd_task = NULL; - smp_mb__before_clear_bit(); - clear_bit(XPRT_LOCKED, &xprt->state); - smp_mb__after_clear_bit(); + xprt_clear_locked(xprt); __xprt_lock_write_next(xprt); } } @@ -256,10 +258,7 @@ void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) { if (xprt->snd_task == task) { - xprt->snd_task = NULL; - smp_mb__before_clear_bit(); - clear_bit(XPRT_LOCKED, &xprt->state); - smp_mb__after_clear_bit(); + xprt_clear_locked(xprt); __xprt_lock_write_next_cong(xprt); } } @@ -535,10 +534,6 @@ void xprt_connect(struct rpc_task *task) dprintk("RPC: %4d xprt_connect xprt %p %s connected\n", task->tk_pid, xprt, (xprt_connected(xprt) ? "is" : "is not")); - if (xprt->shutdown) { - task->tk_status = -EIO; - return; - } if (!xprt->addr.sin_port) { task->tk_status = -EIO; return; @@ -687,9 +682,6 @@ int xprt_prepare_transmit(struct rpc_task *task) dprintk("RPC: %4d xprt_prepare_transmit\n", task->tk_pid); - if (xprt->shutdown) - return -EIO; - spin_lock_bh(&xprt->transport_lock); if (req->rq_received && !req->rq_bytes_sent) { err = req->rq_received; @@ -814,11 +806,9 @@ void xprt_reserve(struct rpc_task *task) struct rpc_xprt *xprt = task->tk_xprt; task->tk_status = -EIO; - if (!xprt->shutdown) { - spin_lock(&xprt->reserve_lock); - do_xprt_reserve(task); - spin_unlock(&xprt->reserve_lock); - } + spin_lock(&xprt->reserve_lock); + do_xprt_reserve(task); + spin_unlock(&xprt->reserve_lock); } static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt) @@ -838,6 +828,8 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) req->rq_timeout = xprt->timeout.to_initval; req->rq_task = task; req->rq_xprt = xprt; + req->rq_buffer = NULL; + req->rq_bufsize = 0; req->rq_xid = xprt_alloc_xid(xprt); req->rq_release_snd_buf = NULL; dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, @@ -863,10 +855,11 @@ void xprt_release(struct rpc_task *task) if (!list_empty(&req->rq_list)) list_del(&req->rq_list); xprt->last_used = jiffies; - if (list_empty(&xprt->recv) && !xprt->shutdown) + if (list_empty(&xprt->recv)) mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout); spin_unlock_bh(&xprt->transport_lock); + xprt->ops->buf_free(task); task->tk_rqstp = NULL; if (req->rq_release_snd_buf) req->rq_release_snd_buf(req); @@ -974,16 +967,6 @@ struct rpc_xprt *xprt_create_proto(int proto, struct sockaddr_in *sap, struct rp return xprt; } -static void xprt_shutdown(struct rpc_xprt *xprt) -{ - xprt->shutdown = 1; - rpc_wake_up(&xprt->sending); - rpc_wake_up(&xprt->resend); - xprt_wake_pending_tasks(xprt, -EIO); - rpc_wake_up(&xprt->backlog); - del_timer_sync(&xprt->timer); -} - /** * xprt_destroy - destroy an RPC transport, killing off all requests. * @xprt: transport to destroy @@ -992,7 +975,8 @@ static void xprt_shutdown(struct rpc_xprt *xprt) int xprt_destroy(struct rpc_xprt *xprt) { dprintk("RPC: destroying transport %p\n", xprt); - xprt_shutdown(xprt); + xprt->shutdown = 1; + del_timer_sync(&xprt->timer); xprt->ops->destroy(xprt); kfree(xprt); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 77e8800d4127..c458f8d1d6d1 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -28,6 +28,7 @@ #include <linux/udp.h> #include <linux/tcp.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/sched.h> #include <linux/file.h> #include <net/sock.h> @@ -424,7 +425,7 @@ static void xs_close(struct rpc_xprt *xprt) struct sock *sk = xprt->inet; if (!sk) - return; + goto clear_close_wait; dprintk("RPC: xs_close xprt %p\n", xprt); @@ -441,6 +442,10 @@ static void xs_close(struct rpc_xprt *xprt) sk->sk_no_check = 0; sock_release(sock); +clear_close_wait: + smp_mb__before_clear_bit(); + clear_bit(XPRT_CLOSE_WAIT, &xprt->state); + smp_mb__after_clear_bit(); } /** @@ -800,9 +805,13 @@ static void xs_tcp_state_change(struct sock *sk) case TCP_SYN_SENT: case TCP_SYN_RECV: break; + case TCP_CLOSE_WAIT: + /* Try to schedule an autoclose RPC calls */ + set_bit(XPRT_CLOSE_WAIT, &xprt->state); + if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) + schedule_work(&xprt->task_cleanup); default: xprt_disconnect(xprt); - break; } out: read_unlock(&sk->sk_callback_lock); @@ -920,6 +929,18 @@ static void xs_udp_timer(struct rpc_task *task) xprt_adjust_cwnd(task, -ETIMEDOUT); } +/** + * xs_set_port - reset the port number in the remote endpoint address + * @xprt: generic transport + * @port: new port number + * + */ +static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) +{ + dprintk("RPC: setting port for xprt %p to %u\n", xprt, port); + xprt->addr.sin_port = htons(port); +} + static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) { struct sockaddr_in myaddr = { @@ -1160,7 +1181,10 @@ static struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, .reserve_xprt = xprt_reserve_xprt_cong, .release_xprt = xprt_release_xprt_cong, + .set_port = xs_set_port, .connect = xs_connect, + .buf_alloc = rpc_malloc, + .buf_free = rpc_free, .send_request = xs_udp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_rtt, .timer = xs_udp_timer, @@ -1172,7 +1196,10 @@ static struct rpc_xprt_ops xs_udp_ops = { static struct rpc_xprt_ops xs_tcp_ops = { .reserve_xprt = xprt_reserve_xprt, .release_xprt = xprt_release_xprt, + .set_port = xs_set_port, .connect = xs_connect, + .buf_alloc = rpc_malloc, + .buf_free = rpc_free, .send_request = xs_tcp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, .close = xs_close, diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 5f6ae79b8b16..1b5989b1b670 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -784,7 +784,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0); if (err) goto out_mknod_dput; - up(&nd.dentry->d_inode->i_sem); + mutex_unlock(&nd.dentry->d_inode->i_mutex); dput(nd.dentry); nd.dentry = dentry; @@ -823,7 +823,7 @@ out: out_mknod_dput: dput(dentry); out_mknod_unlock: - up(&nd.dentry->d_inode->i_sem); + mutex_unlock(&nd.dentry->d_inode->i_mutex); path_release(&nd); out_mknod_parent: if (err==-EEXIST) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 16459c7f54b2..bfabaf9cba87 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -540,12 +540,7 @@ static struct sock *x25_make_new(struct sock *osk) sk->sk_state = TCP_ESTABLISHED; sk->sk_sleep = osk->sk_sleep; sk->sk_backlog_rcv = osk->sk_backlog_rcv; - - if (sock_flag(osk, SOCK_ZAPPED)) - sock_set_flag(sk, SOCK_ZAPPED); - - if (sock_flag(osk, SOCK_DBG)) - sock_set_flag(sk, SOCK_DBG); + sock_copy_flags(sk, osk); ox25 = x25_sk(osk); x25->t21 = ox25->t21; diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 2f4531fcaca2..6ed3302312fb 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -540,8 +540,7 @@ void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm, start = end; } } - if (len) - BUG(); + BUG_ON(len); } EXPORT_SYMBOL_GPL(skb_icv_walk); @@ -610,8 +609,7 @@ skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) start = end; } } - if (len) - BUG(); + BUG_ON(len); return elt; } EXPORT_SYMBOL_GPL(skb_to_sgvec); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 64a447375fdb..077bbf9fb9b7 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -22,6 +22,7 @@ #include <linux/workqueue.h> #include <linux/notifier.h> #include <linux/netdevice.h> +#include <linux/netfilter.h> #include <linux/module.h> #include <net/xfrm.h> #include <net/ip.h> @@ -247,11 +248,9 @@ EXPORT_SYMBOL(xfrm_policy_alloc); void __xfrm_policy_destroy(struct xfrm_policy *policy) { - if (!policy->dead) - BUG(); + BUG_ON(!policy->dead); - if (policy->bundles) - BUG(); + BUG_ON(policy->bundles); if (del_timer(&policy->timer)) BUG(); @@ -951,8 +950,8 @@ xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, return start; } -static int -_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) +int +xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); @@ -963,6 +962,7 @@ _decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) xfrm_policy_put_afinfo(afinfo); return 0; } +EXPORT_SYMBOL(xfrm_decode_session); static inline int secpath_has_tunnel(struct sec_path *sp, int k) { @@ -982,8 +982,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, u8 fl_dir = policy_to_flow_dir(dir); u32 sk_sid; - if (_decode_session(skb, &fl, family) < 0) + if (xfrm_decode_session(skb, &fl, family) < 0) return 0; + nf_nat_decode_session(skb, &fl, family); sk_sid = security_sk_sid(sk, &fl, fl_dir); @@ -1055,7 +1056,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) { struct flowi fl; - if (_decode_session(skb, &fl, family) < 0) + if (xfrm_decode_session(skb, &fl, family) < 0) return 0; return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 92e2b804c606..ac87a09ba83e 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -802,6 +802,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY; err = xfrm_policy_insert(p->dir, xp, excl); if (err) { + security_xfrm_policy_free(xp); kfree(xp); return err; } |