summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.h4
-rw-r--r--net/8021q/vlan_core.c4
-rw-r--r--net/8021q/vlan_dev.c18
-rw-r--r--net/8021q/vlan_netlink.c4
-rw-r--r--net/9p/client.c2
-rw-r--r--net/9p/trans_virtio.c5
-rw-r--r--net/appletalk/aarp.c4
-rw-r--r--net/appletalk/ddp.c100
-rw-r--r--net/batman-adv/bat_iv_ogm.c30
-rw-r--r--net/batman-adv/gateway_client.c2
-rw-r--r--net/batman-adv/hard-interface.c22
-rw-r--r--net/batman-adv/originator.c36
-rw-r--r--net/batman-adv/originator.h4
-rw-r--r--net/batman-adv/routing.c4
-rw-r--r--net/batman-adv/send.c9
-rw-r--r--net/batman-adv/translation-table.c23
-rw-r--r--net/bluetooth/6lowpan.c2
-rw-r--r--net/bluetooth/hidp/core.c16
-rw-r--r--net/bluetooth/hidp/hidp.h4
-rw-r--r--net/bridge/br_device.c69
-rw-r--r--net/bridge/br_fdb.c137
-rw-r--r--net/bridge/br_if.c6
-rw-r--r--net/bridge/br_input.c4
-rw-r--r--net/bridge/br_multicast.c37
-rw-r--r--net/bridge/br_netfilter.c4
-rw-r--r--net/bridge/br_private.h17
-rw-r--r--net/bridge/br_stp_if.c2
-rw-r--r--net/bridge/br_vlan.c27
-rw-r--r--net/bridge/netfilter/ebt_among.c2
-rw-r--r--net/bridge/netfilter/ebt_dnat.c2
-rw-r--r--net/bridge/netfilter/ebt_redirect.c6
-rw-r--r--net/bridge/netfilter/ebt_snat.c2
-rw-r--r--net/caif/caif_dev.c1
-rw-r--r--net/caif/cfsrvl.c1
-rw-r--r--net/can/af_can.c3
-rw-r--r--net/can/bcm.c4
-rw-r--r--net/can/raw.c27
-rw-r--r--net/ceph/osd_client.c2
-rw-r--r--net/core/dev.c49
-rw-r--r--net/core/fib_rules.c7
-rw-r--r--net/core/flow.c132
-rw-r--r--net/core/flow_dissector.c34
-rw-r--r--net/core/neighbour.c17
-rw-r--r--net/core/net-sysfs.c11
-rw-r--r--net/core/netpoll.c496
-rw-r--r--net/core/pktgen.c32
-rw-r--r--net/core/request_sock.c1
-rw-r--r--net/core/rtnetlink.c128
-rw-r--r--net/core/skbuff.c269
-rw-r--r--net/core/sock.c11
-rw-r--r--net/dccp/ccids/lib/tfrc.c2
-rw-r--r--net/dccp/ccids/lib/tfrc.h1
-rw-r--r--net/decnet/af_decnet.c5
-rw-r--r--net/hsr/hsr_device.c10
-rw-r--r--net/hsr/hsr_framereg.c22
-rw-r--r--net/hsr/hsr_main.c4
-rw-r--r--net/ieee802154/6lowpan.h319
-rw-r--r--net/ieee802154/6lowpan_iphc.c3
-rw-r--r--net/ieee802154/6lowpan_rtnl.c (renamed from net/ieee802154/6lowpan.c)411
-rw-r--r--net/ieee802154/Kconfig2
-rw-r--r--net/ieee802154/Makefile6
-rw-r--r--net/ieee802154/af802154.h5
-rw-r--r--net/ieee802154/af_ieee802154.c22
-rw-r--r--net/ieee802154/dgram.c60
-rw-r--r--net/ieee802154/header_ops.c287
-rw-r--r--net/ieee802154/ieee802154.h1
-rw-r--r--net/ieee802154/netlink.c1
-rw-r--r--net/ieee802154/nl-mac.c102
-rw-r--r--net/ieee802154/nl-phy.c200
-rw-r--r--net/ieee802154/nl_policy.c10
-rw-r--r--net/ieee802154/raw.c18
-rw-r--r--net/ieee802154/reassembly.c571
-rw-r--r--net/ieee802154/reassembly.h41
-rw-r--r--net/ieee802154/wpan-class.c10
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/af_inet.c11
-rw-r--r--net/ipv4/ah4.c78
-rw-r--r--net/ipv4/devinet.c3
-rw-r--r--net/ipv4/esp4.c26
-rw-r--r--net/ipv4/inet_fragment.c5
-rw-r--r--net/ipv4/ip_forward.c78
-rw-r--r--net/ipv4/ip_output.c15
-rw-r--r--net/ipv4/ip_sockglue.c21
-rw-r--r--net/ipv4/ip_tunnel.c113
-rw-r--r--net/ipv4/ip_tunnel_core.c47
-rw-r--r--net/ipv4/ip_vti.c310
-rw-r--r--net/ipv4/ipcomp.c26
-rw-r--r--net/ipv4/ipconfig.c2
-rw-r--r--net/ipv4/netfilter.c2
-rw-r--r--net/ipv4/netfilter/Kconfig5
-rw-r--r--net/ipv4/netfilter/Makefile1
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c5
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c4
-rw-r--r--net/ipv4/netfilter/nft_reject_ipv4.c75
-rw-r--r--net/ipv4/ping.c2
-rw-r--r--net/ipv4/proc.c6
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c14
-rw-r--r--net/ipv4/tcp.c23
-rw-r--r--net/ipv4/tcp_cong.c13
-rw-r--r--net/ipv4/tcp_cubic.c4
-rw-r--r--net/ipv4/tcp_highspeed.c1
-rw-r--r--net/ipv4/tcp_hybla.c13
-rw-r--r--net/ipv4/tcp_illinois.c2
-rw-r--r--net/ipv4/tcp_input.c196
-rw-r--r--net/ipv4/tcp_ipv4.c8
-rw-r--r--net/ipv4/tcp_lp.c2
-rw-r--r--net/ipv4/tcp_metrics.c83
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/tcp_output.c107
-rw-r--r--net/ipv4/tcp_probe.c2
-rw-r--r--net/ipv4/tcp_scalable.c1
-rw-r--r--net/ipv4/tcp_timer.c3
-rw-r--r--net/ipv4/tcp_vegas.c2
-rw-r--r--net/ipv4/tcp_veno.c1
-rw-r--r--net/ipv4/tcp_westwood.c1
-rw-r--r--net/ipv4/tcp_yeah.c2
-rw-r--r--net/ipv4/udp.c3
-rw-r--r--net/ipv4/udp_offload.c17
-rw-r--r--net/ipv4/xfrm4_input.c9
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c68
-rw-r--r--net/ipv4/xfrm4_policy.c1
-rw-r--r--net/ipv4/xfrm4_protocol.c286
-rw-r--r--net/ipv6/Kconfig1
-rw-r--r--net/ipv6/Makefile2
-rw-r--r--net/ipv6/addrconf.c7
-rw-r--r--net/ipv6/addrlabel.c59
-rw-r--r--net/ipv6/ah6.c80
-rw-r--r--net/ipv6/esp6.c26
-rw-r--r--net/ipv6/exthdrs_core.c2
-rw-r--r--net/ipv6/exthdrs_offload.c4
-rw-r--r--net/ipv6/icmp.c2
-rw-r--r--net/ipv6/ip6_checksum.c4
-rw-r--r--net/ipv6/ip6_flowlabel.c6
-rw-r--r--net/ipv6/ip6_gre.c9
-rw-r--r--net/ipv6/ip6_offload.c20
-rw-r--r--net/ipv6/ip6_output.c35
-rw-r--r--net/ipv6/ip6_tunnel.c13
-rw-r--r--net/ipv6/ip6_vti.c316
-rw-r--r--net/ipv6/ipcomp6.c22
-rw-r--r--net/ipv6/ipv6_sockglue.c2
-rw-r--r--net/ipv6/netfilter/Kconfig5
-rw-r--r--net/ipv6/netfilter/Makefile1
-rw-r--r--net/ipv6/netfilter/nft_reject_ipv6.c76
-rw-r--r--net/ipv6/output_core.c2
-rw-r--r--net/ipv6/ping.c1
-rw-r--r--net/ipv6/route.c2
-rw-r--r--net/ipv6/sit.c29
-rw-r--r--net/ipv6/tcp_ipv6.c4
-rw-r--r--net/ipv6/udp_offload.c2
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c63
-rw-r--r--net/ipv6/xfrm6_policy.c7
-rw-r--r--net/ipv6/xfrm6_protocol.c270
-rw-r--r--net/ipx/af_ipx.c50
-rw-r--r--net/ipx/ipx_route.c4
-rw-r--r--net/iucv/af_iucv.c1
-rw-r--r--net/key/af_key.c39
-rw-r--r--net/l2tp/l2tp_core.c28
-rw-r--r--net/l2tp/l2tp_core.h1
-rw-r--r--net/l2tp/l2tp_netlink.c4
-rw-r--r--net/l2tp/l2tp_ppp.c16
-rw-r--r--net/mac80211/chan.c6
-rw-r--r--net/mac80211/iface.c6
-rw-r--r--net/mac80211/mesh_ps.c1
-rw-r--r--net/mac80211/sta_info.c1
-rw-r--r--net/mac802154/Makefile2
-rw-r--r--net/mac802154/ieee802154_dev.c72
-rw-r--r--net/mac802154/mac802154.h9
-rw-r--r--net/mac802154/mac_cmd.c2
-rw-r--r--net/mac802154/mib.c26
-rw-r--r--net/mac802154/rx.c1
-rw-r--r--net/mac802154/wpan.c376
-rw-r--r--net/netfilter/Kconfig6
-rw-r--r--net/netfilter/Makefile1
-rw-r--r--net/netfilter/ipset/Kconfig9
-rw-r--r--net/netfilter/ipset/Makefile1
-rw-r--r--net/netfilter/ipset/ip_set_core.c54
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h43
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c321
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c10
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c3
-rw-r--r--net/netfilter/ipset/pfxlen.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c13
-rw-r--r--net/netfilter/nf_conntrack_core.c479
-rw-r--r--net/netfilter/nf_conntrack_expect.c36
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c4
-rw-r--r--net/netfilter/nf_conntrack_helper.c41
-rw-r--r--net/netfilter/nf_conntrack_netlink.c168
-rw-r--r--net/netfilter/nf_conntrack_sip.c8
-rw-r--r--net/netfilter/nf_nat_core.c56
-rw-r--r--net/netfilter/nf_synproxy_core.c5
-rw-r--r--net/netfilter/nf_tables_api.c160
-rw-r--r--net/netfilter/nf_tables_core.c6
-rw-r--r--net/netfilter/nfnetlink.c8
-rw-r--r--net/netfilter/nfnetlink_log.c8
-rw-r--r--net/netfilter/nft_compat.c4
-rw-r--r--net/netfilter/nft_ct.c52
-rw-r--r--net/netfilter/nft_hash.c261
-rw-r--r--net/netfilter/nft_immediate.c3
-rw-r--r--net/netfilter/nft_log.c8
-rw-r--r--net/netfilter/nft_lookup.c6
-rw-r--r--net/netfilter/nft_meta.c4
-rw-r--r--net/netfilter/nft_nat.c22
-rw-r--r--net/netfilter/nft_payload.c3
-rw-r--r--net/netfilter/nft_queue.c4
-rw-r--r--net/netfilter/nft_rbtree.c16
-rw-r--r--net/netfilter/nft_reject.c89
-rw-r--r--net/netfilter/nft_reject_inet.c63
-rw-r--r--net/netfilter/xt_AUDIT.c4
-rw-r--r--net/netfilter/xt_CT.c7
-rw-r--r--net/netfilter/xt_connlimit.c311
-rw-r--r--net/netfilter/xt_ipcomp.c2
-rw-r--r--net/netlink/af_netlink.c35
-rw-r--r--net/netlink/af_netlink.h1
-rw-r--r--net/openvswitch/datapath.c43
-rw-r--r--net/openvswitch/datapath.h2
-rw-r--r--net/openvswitch/flow_table.c88
-rw-r--r--net/openvswitch/flow_table.h2
-rw-r--r--net/openvswitch/vport.c14
-rw-r--r--net/packet/af_packet.c42
-rw-r--r--net/rxrpc/Makefile5
-rw-r--r--net/rxrpc/af_rxrpc.c9
-rw-r--r--net/rxrpc/ar-ack.c61
-rw-r--r--net/rxrpc/ar-call.c213
-rw-r--r--net/rxrpc/ar-connection.c10
-rw-r--r--net/rxrpc/ar-error.c1
-rw-r--r--net/rxrpc/ar-input.c190
-rw-r--r--net/rxrpc/ar-internal.h40
-rw-r--r--net/rxrpc/ar-output.c15
-rw-r--r--net/rxrpc/ar-recvmsg.c25
-rw-r--r--net/rxrpc/ar-skbuff.c7
-rw-r--r--net/rxrpc/ar-transport.c10
-rw-r--r--net/rxrpc/sysctl.c146
-rw-r--r--net/sched/act_api.c142
-rw-r--r--net/sched/act_csum.c31
-rw-r--r--net/sched/act_gact.c34
-rw-r--r--net/sched/act_ipt.c68
-rw-r--r--net/sched/act_mirred.c56
-rw-r--r--net/sched/act_nat.c33
-rw-r--r--net/sched/act_pedit.c45
-rw-r--r--net/sched/act_police.c22
-rw-r--r--net/sched/act_simple.c64
-rw-r--r--net/sched/act_skbedit.c36
-rw-r--r--net/sched/cls_fw.c33
-rw-r--r--net/sched/sch_api.c15
-rw-r--r--net/sched/sch_atm.c3
-rw-r--r--net/sched/sch_cbq.c6
-rw-r--r--net/sched/sch_fq.c24
-rw-r--r--net/sched/sch_fq_codel.c3
-rw-r--r--net/sched/sch_hfsc.c3
-rw-r--r--net/sched/sch_hhf.c3
-rw-r--r--net/sched/sch_htb.c20
-rw-r--r--net/sched/sch_ingress.c3
-rw-r--r--net/sched/sch_netem.c79
-rw-r--r--net/sched/sch_pie.c21
-rw-r--r--net/sched/sch_tbf.c50
-rw-r--r--net/sctp/associola.c210
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sctp/sm_make_chunk.c4
-rw-r--r--net/sctp/sm_sideeffect.c7
-rw-r--r--net/sctp/sm_statefuns.c12
-rw-r--r--net/sctp/socket.c47
-rw-r--r--net/sctp/sysctl.c18
-rw-r--r--net/sctp/transport.c1
-rw-r--r--net/sctp/ulpevent.c8
-rw-r--r--net/socket.c30
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c19
-rw-r--r--net/sunrpc/backchannel_rqst.c6
-rw-r--r--net/sunrpc/svc_xprt.c6
-rw-r--r--net/sunrpc/xprtsock.c6
-rw-r--r--net/tipc/addr.h2
-rw-r--r--net/tipc/bcast.c28
-rw-r--r--net/tipc/bcast.h4
-rw-r--r--net/tipc/bearer.c51
-rw-r--r--net/tipc/bearer.h7
-rw-r--r--net/tipc/config.c11
-rw-r--r--net/tipc/core.c111
-rw-r--r--net/tipc/core.h2
-rw-r--r--net/tipc/discover.c4
-rw-r--r--net/tipc/discover.h2
-rw-r--r--net/tipc/handler.c1
-rw-r--r--net/tipc/link.c619
-rw-r--r--net/tipc/link.h57
-rw-r--r--net/tipc/name_distr.c8
-rw-r--r--net/tipc/name_distr.h2
-rw-r--r--net/tipc/name_table.c40
-rw-r--r--net/tipc/net.c10
-rw-r--r--net/tipc/netlink.c8
-rw-r--r--net/tipc/node.c14
-rw-r--r--net/tipc/port.c301
-rw-r--r--net/tipc/port.h130
-rw-r--r--net/tipc/ref.c30
-rw-r--r--net/tipc/ref.h1
-rw-r--r--net/tipc/server.c19
-rw-r--r--net/tipc/server.h2
-rw-r--r--net/tipc/socket.c426
-rw-r--r--net/tipc/socket.h72
-rw-r--r--net/tipc/subscr.c19
-rw-r--r--net/unix/af_unix.c3
-rw-r--r--net/wireless/chan.c2
-rw-r--r--net/wireless/core.c2
-rw-r--r--net/wireless/util.c16
-rw-r--r--net/xfrm/xfrm_input.c97
-rw-r--r--net/xfrm/xfrm_policy.c42
-rw-r--r--net/xfrm/xfrm_state.c95
-rw-r--r--net/xfrm/xfrm_user.c42
315 files changed, 8591 insertions, 5524 deletions
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 5704ed9c3a23..9d010a09ab98 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -38,9 +38,9 @@ struct vlan_info {
static inline unsigned int vlan_proto_idx(__be16 proto)
{
switch (proto) {
- case __constant_htons(ETH_P_8021Q):
+ case htons(ETH_P_8021Q):
return VLAN_PROTO_8021Q;
- case __constant_htons(ETH_P_8021AD):
+ case htons(ETH_P_8021AD):
return VLAN_PROTO_8021AD;
default:
BUG();
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 6ee48aac776f..35b3c192d7b9 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -22,11 +22,11 @@ bool vlan_do_receive(struct sk_buff **skbp)
return false;
skb->dev = vlan_dev;
- if (skb->pkt_type == PACKET_OTHERHOST) {
+ if (unlikely(skb->pkt_type == PACKET_OTHERHOST)) {
/* Our lower layer thinks this is not local, let's make sure.
* This allows the VLAN to have a different MAC than the
* underlying device, and still route correctly. */
- if (ether_addr_equal(eth_hdr(skb)->h_dest, vlan_dev->dev_addr))
+ if (ether_addr_equal_64bits(eth_hdr(skb)->h_dest, vlan_dev->dev_addr))
skb->pkt_type = PACKET_HOST;
}
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index de51c48c4393..4f3e9073cb49 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -538,6 +538,9 @@ static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev
struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
struct net_device *real_dev = vlan->real_dev;
+ if (saddr == NULL)
+ saddr = dev->dev_addr;
+
return dev_hard_header(skb, real_dev, type, daddr, saddr, len);
}
@@ -556,7 +559,7 @@ static const struct net_device_ops vlan_netdev_ops;
static int vlan_dev_init(struct net_device *dev)
{
struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
- int subclass = 0, i;
+ int subclass = 0;
netif_carrier_off(dev);
@@ -606,17 +609,10 @@ static int vlan_dev_init(struct net_device *dev)
vlan_dev_set_lockdep_class(dev, subclass);
- vlan_dev_priv(dev)->vlan_pcpu_stats = alloc_percpu(struct vlan_pcpu_stats);
+ vlan_dev_priv(dev)->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats);
if (!vlan_dev_priv(dev)->vlan_pcpu_stats)
return -ENOMEM;
- for_each_possible_cpu(i) {
- struct vlan_pcpu_stats *vlan_stat;
- vlan_stat = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i);
- u64_stats_init(&vlan_stat->syncp);
- }
-
-
return 0;
}
@@ -682,13 +678,13 @@ static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, st
p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i);
do {
- start = u64_stats_fetch_begin_bh(&p->syncp);
+ start = u64_stats_fetch_begin_irq(&p->syncp);
rxpackets = p->rx_packets;
rxbytes = p->rx_bytes;
rxmulticast = p->rx_multicast;
txpackets = p->tx_packets;
txbytes = p->tx_bytes;
- } while (u64_stats_fetch_retry_bh(&p->syncp, start));
+ } while (u64_stats_fetch_retry_irq(&p->syncp, start));
stats->rx_packets += rxpackets;
stats->rx_bytes += rxbytes;
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index c7e634af8516..8ac8a5cc2143 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -56,8 +56,8 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
if (data[IFLA_VLAN_PROTOCOL]) {
switch (nla_get_be16(data[IFLA_VLAN_PROTOCOL])) {
- case __constant_htons(ETH_P_8021Q):
- case __constant_htons(ETH_P_8021AD):
+ case htons(ETH_P_8021Q):
+ case htons(ETH_P_8021AD):
break;
default:
return -EPROTONOSUPPORT;
diff --git a/net/9p/client.c b/net/9p/client.c
index a5e4d2dcb03e..9186550d77a6 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -204,7 +204,7 @@ free_and_return:
return ret;
}
-struct p9_fcall *p9_fcall_alloc(int alloc_msize)
+static struct p9_fcall *p9_fcall_alloc(int alloc_msize)
{
struct p9_fcall *fc;
fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS);
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index cd1e1ede73a4..ac2666c1d011 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -340,7 +340,10 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
int count = nr_pages;
while (nr_pages) {
s = rest_of_page(data);
- pages[index++] = kmap_to_page(data);
+ if (is_vmalloc_addr(data))
+ pages[index++] = vmalloc_to_page(data);
+ else
+ pages[index++] = kmap_to_page(data);
data += s;
nr_pages--;
}
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index d27b86dfb0e9..d1c55d8dd0a2 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -926,7 +926,7 @@ static struct aarp_entry *iter_next(struct aarp_iter_state *iter, loff_t *pos)
struct aarp_entry *entry;
rescan:
- while(ct < AARP_HASH_SIZE) {
+ while (ct < AARP_HASH_SIZE) {
for (entry = table[ct]; entry; entry = entry->next) {
if (!pos || ++off == *pos) {
iter->table = table;
@@ -995,7 +995,7 @@ static const char *dt2str(unsigned long ticks)
{
static char buf[32];
- sprintf(buf, "%ld.%02ld", ticks / HZ, ((ticks % HZ) * 100 ) / HZ);
+ sprintf(buf, "%ld.%02ld", ticks / HZ, ((ticks % HZ) * 100) / HZ);
return buf;
}
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 02806c6b2ff3..786ee2f83d5f 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -293,7 +293,7 @@ static int atif_probe_device(struct atalk_iface *atif)
/* Perform AARP probing for a proxy address */
static int atif_proxy_probe_device(struct atalk_iface *atif,
- struct atalk_addr* proxy_addr)
+ struct atalk_addr *proxy_addr)
{
int netrange = ntohs(atif->nets.nr_lastnet) -
ntohs(atif->nets.nr_firstnet) + 1;
@@ -581,7 +581,7 @@ out:
}
/* Delete a route. Find it and discard it */
-static int atrtr_delete(struct atalk_addr * addr)
+static int atrtr_delete(struct atalk_addr *addr)
{
struct atalk_route **r = &atalk_routes;
int retval = 0;
@@ -936,11 +936,11 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset,
int i, copy;
/* checksum stuff in header space */
- if ( (copy = start - offset) > 0) {
+ if ((copy = start - offset) > 0) {
if (copy > len)
copy = len;
sum = atalk_sum_partial(skb->data + offset, copy, sum);
- if ( (len -= copy) == 0)
+ if ((len -= copy) == 0)
return sum;
offset += copy;
@@ -1151,7 +1151,7 @@ static int atalk_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out;
at->src_net = addr->sat_addr.s_net = ap->s_net;
- at->src_node = addr->sat_addr.s_node= ap->s_node;
+ at->src_node = addr->sat_addr.s_node = ap->s_node;
} else {
err = -EADDRNOTAVAIL;
if (!atalk_find_interface(addr->sat_addr.s_net,
@@ -1790,53 +1790,53 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
void __user *argp = (void __user *)arg;
switch (cmd) {
- /* Protocol layer */
- case TIOCOUTQ: {
- long amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
+ /* Protocol layer */
+ case TIOCOUTQ: {
+ long amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
- if (amount < 0)
- amount = 0;
- rc = put_user(amount, (int __user *)argp);
- break;
- }
- case TIOCINQ: {
- /*
- * These two are safe on a single CPU system as only
- * user tasks fiddle here
- */
- struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
- long amount = 0;
+ if (amount < 0)
+ amount = 0;
+ rc = put_user(amount, (int __user *)argp);
+ break;
+ }
+ case TIOCINQ: {
+ /*
+ * These two are safe on a single CPU system as only
+ * user tasks fiddle here
+ */
+ struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
+ long amount = 0;
- if (skb)
- amount = skb->len - sizeof(struct ddpehdr);
- rc = put_user(amount, (int __user *)argp);
- break;
- }
- case SIOCGSTAMP:
- rc = sock_get_timestamp(sk, argp);
- break;
- case SIOCGSTAMPNS:
- rc = sock_get_timestampns(sk, argp);
- break;
- /* Routing */
- case SIOCADDRT:
- case SIOCDELRT:
- rc = -EPERM;
- if (capable(CAP_NET_ADMIN))
- rc = atrtr_ioctl(cmd, argp);
- break;
- /* Interface */
- case SIOCGIFADDR:
- case SIOCSIFADDR:
- case SIOCGIFBRDADDR:
- case SIOCATALKDIFADDR:
- case SIOCDIFADDR:
- case SIOCSARP: /* proxy AARP */
- case SIOCDARP: /* proxy AARP */
- rtnl_lock();
- rc = atif_ioctl(cmd, argp);
- rtnl_unlock();
- break;
+ if (skb)
+ amount = skb->len - sizeof(struct ddpehdr);
+ rc = put_user(amount, (int __user *)argp);
+ break;
+ }
+ case SIOCGSTAMP:
+ rc = sock_get_timestamp(sk, argp);
+ break;
+ case SIOCGSTAMPNS:
+ rc = sock_get_timestampns(sk, argp);
+ break;
+ /* Routing */
+ case SIOCADDRT:
+ case SIOCDELRT:
+ rc = -EPERM;
+ if (capable(CAP_NET_ADMIN))
+ rc = atrtr_ioctl(cmd, argp);
+ break;
+ /* Interface */
+ case SIOCGIFADDR:
+ case SIOCSIFADDR:
+ case SIOCGIFBRDADDR:
+ case SIOCATALKDIFADDR:
+ case SIOCDIFADDR:
+ case SIOCSARP: /* proxy AARP */
+ case SIOCDARP: /* proxy AARP */
+ rtnl_lock();
+ rc = atif_ioctl(cmd, argp);
+ rtnl_unlock();
+ break;
}
return rc;
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 512159bf607f..8323bced8e5b 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -241,19 +241,19 @@ batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const uint8_t *addr)
size = bat_priv->num_ifaces * sizeof(uint8_t);
orig_node->bat_iv.bcast_own_sum = kzalloc(size, GFP_ATOMIC);
if (!orig_node->bat_iv.bcast_own_sum)
- goto free_bcast_own;
+ goto free_orig_node;
hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig,
batadv_choose_orig, orig_node,
&orig_node->hash_entry);
if (hash_added != 0)
- goto free_bcast_own;
+ goto free_orig_node;
return orig_node;
-free_bcast_own:
- kfree(orig_node->bat_iv.bcast_own);
free_orig_node:
+ /* free twice, as batadv_orig_node_new sets refcount to 2 */
+ batadv_orig_node_free_ref(orig_node);
batadv_orig_node_free_ref(orig_node);
return NULL;
@@ -266,7 +266,7 @@ batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface,
struct batadv_orig_node *orig_neigh)
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
- struct batadv_neigh_node *neigh_node;
+ struct batadv_neigh_node *neigh_node, *tmp_neigh_node;
neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr, orig_node);
if (!neigh_node)
@@ -281,14 +281,24 @@ batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface,
neigh_node->orig_node = orig_neigh;
neigh_node->if_incoming = hard_iface;
- batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "Creating new neighbor %pM for orig_node %pM on interface %s\n",
- neigh_addr, orig_node->orig, hard_iface->net_dev->name);
-
spin_lock_bh(&orig_node->neigh_list_lock);
- hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list);
+ tmp_neigh_node = batadv_neigh_node_get(orig_node, hard_iface,
+ neigh_addr);
+ if (!tmp_neigh_node) {
+ hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list);
+ } else {
+ kfree(neigh_node);
+ batadv_hardif_free_ref(hard_iface);
+ neigh_node = tmp_neigh_node;
+ }
spin_unlock_bh(&orig_node->neigh_list_lock);
+ if (!tmp_neigh_node)
+ batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+ "Creating new neighbor %pM for orig_node %pM on interface %s\n",
+ neigh_addr, orig_node->orig,
+ hard_iface->net_dev->name);
+
out:
return neigh_node;
}
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 55cf2260d295..d7fafc1009a0 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -389,8 +389,6 @@ out:
batadv_neigh_ifinfo_free_ref(router_gw_tq);
if (router_orig_tq)
batadv_neigh_ifinfo_free_ref(router_orig_tq);
-
- return;
}
/**
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 3d417d3641c6..b851cc580853 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -241,7 +241,7 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface)
{
struct batadv_priv *bat_priv = netdev_priv(soft_iface);
const struct batadv_hard_iface *hard_iface;
- int min_mtu = ETH_DATA_LEN;
+ int min_mtu = INT_MAX;
rcu_read_lock();
list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
@@ -256,8 +256,6 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface)
}
rcu_read_unlock();
- atomic_set(&bat_priv->packet_size_max, min_mtu);
-
if (atomic_read(&bat_priv->fragmentation) == 0)
goto out;
@@ -268,13 +266,21 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface)
min_mtu = min_t(int, min_mtu, BATADV_FRAG_MAX_FRAG_SIZE);
min_mtu -= sizeof(struct batadv_frag_packet);
min_mtu *= BATADV_FRAG_MAX_FRAGMENTS;
- atomic_set(&bat_priv->packet_size_max, min_mtu);
-
- /* with fragmentation enabled we can fragment external packets easily */
- min_mtu = min_t(int, min_mtu, ETH_DATA_LEN);
out:
- return min_mtu - batadv_max_header_len();
+ /* report to the other components the maximum amount of bytes that
+ * batman-adv can send over the wire (without considering the payload
+ * overhead). For example, this value is used by TT to compute the
+ * maximum local table table size
+ */
+ atomic_set(&bat_priv->packet_size_max, min_mtu);
+
+ /* the real soft-interface MTU is computed by removing the payload
+ * overhead from the maximum amount of bytes that was just computed.
+ *
+ * However batman-adv does not support MTUs bigger than ETH_DATA_LEN
+ */
+ return min_t(int, min_mtu - batadv_max_header_len(), ETH_DATA_LEN);
}
/* adjusts the MTU if a new interface with a smaller MTU appeared. */
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 6df12a2e3605..853941629dc1 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -458,6 +458,42 @@ out:
}
/**
+ * batadv_neigh_node_get - retrieve a neighbour from the list
+ * @orig_node: originator which the neighbour belongs to
+ * @hard_iface: the interface where this neighbour is connected to
+ * @addr: the address of the neighbour
+ *
+ * Looks for and possibly returns a neighbour belonging to this originator list
+ * which is connected through the provided hard interface.
+ * Returns NULL if the neighbour is not found.
+ */
+struct batadv_neigh_node *
+batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
+ const struct batadv_hard_iface *hard_iface,
+ const uint8_t *addr)
+{
+ struct batadv_neigh_node *tmp_neigh_node, *res = NULL;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(tmp_neigh_node, &orig_node->neigh_list, list) {
+ if (!batadv_compare_eth(tmp_neigh_node->addr, addr))
+ continue;
+
+ if (tmp_neigh_node->if_incoming != hard_iface)
+ continue;
+
+ if (!atomic_inc_not_zero(&tmp_neigh_node->refcount))
+ continue;
+
+ res = tmp_neigh_node;
+ break;
+ }
+ rcu_read_unlock();
+
+ return res;
+}
+
+/**
* batadv_orig_ifinfo_free_rcu - free the orig_ifinfo object
* @rcu: rcu pointer of the orig_ifinfo object
*/
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 37be290f63f6..db3a9ed734cb 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -29,6 +29,10 @@ void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node);
struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
const uint8_t *addr);
struct batadv_neigh_node *
+batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
+ const struct batadv_hard_iface *hard_iface,
+ const uint8_t *addr);
+struct batadv_neigh_node *
batadv_neigh_node_new(struct batadv_hard_iface *hard_iface,
const uint8_t *neigh_addr,
struct batadv_orig_node *orig_node);
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 1ed9f7c9ecea..a953d5b196a3 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -688,7 +688,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
int is_old_ttvn;
/* check if there is enough data before accessing it */
- if (pskb_may_pull(skb, hdr_len + ETH_HLEN) < 0)
+ if (!pskb_may_pull(skb, hdr_len + ETH_HLEN))
return 0;
/* create a copy of the skb (in case of for re-routing) to modify it. */
@@ -918,6 +918,8 @@ int batadv_recv_unicast_tvlv(struct sk_buff *skb,
if (ret != NET_RX_SUCCESS)
ret = batadv_route_unicast_packet(skb, recv_if);
+ else
+ consume_skb(skb);
return ret;
}
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 579f5f00a385..843febd1e519 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -254,9 +254,9 @@ static int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
unsigned short vid)
{
- struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
+ struct ethhdr *ethhdr;
struct batadv_unicast_packet *unicast_packet;
- int ret = NET_XMIT_DROP;
+ int ret = NET_XMIT_DROP, hdr_size;
if (!orig_node)
goto out;
@@ -265,12 +265,16 @@ static int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
case BATADV_UNICAST:
if (!batadv_send_skb_prepare_unicast(skb, orig_node))
goto out;
+
+ hdr_size = sizeof(*unicast_packet);
break;
case BATADV_UNICAST_4ADDR:
if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, skb,
orig_node,
packet_subtype))
goto out;
+
+ hdr_size = sizeof(struct batadv_unicast_4addr_packet);
break;
default:
/* this function supports UNICAST and UNICAST_4ADDR only. It
@@ -279,6 +283,7 @@ static int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
goto out;
}
+ ethhdr = (struct ethhdr *)(skb->data + hdr_size);
unicast_packet = (struct batadv_unicast_packet *)skb->data;
/* inform the destination node that we are still missing a correct route
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index b6071f675a3e..959dde721c46 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1975,6 +1975,7 @@ static uint32_t batadv_tt_global_crc(struct batadv_priv *bat_priv,
struct hlist_head *head;
uint32_t i, crc_tmp, crc = 0;
uint8_t flags;
+ __be16 tmp_vid;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -2011,8 +2012,11 @@ static uint32_t batadv_tt_global_crc(struct batadv_priv *bat_priv,
orig_node))
continue;
- crc_tmp = crc32c(0, &tt_common->vid,
- sizeof(tt_common->vid));
+ /* use network order to read the VID: this ensures that
+ * every node reads the bytes in the same order.
+ */
+ tmp_vid = htons(tt_common->vid);
+ crc_tmp = crc32c(0, &tmp_vid, sizeof(tmp_vid));
/* compute the CRC on flags that have to be kept in sync
* among nodes
@@ -2046,6 +2050,7 @@ static uint32_t batadv_tt_local_crc(struct batadv_priv *bat_priv,
struct hlist_head *head;
uint32_t i, crc_tmp, crc = 0;
uint8_t flags;
+ __be16 tmp_vid;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -2064,8 +2069,11 @@ static uint32_t batadv_tt_local_crc(struct batadv_priv *bat_priv,
if (tt_common->flags & BATADV_TT_CLIENT_NEW)
continue;
- crc_tmp = crc32c(0, &tt_common->vid,
- sizeof(tt_common->vid));
+ /* use network order to read the VID: this ensures that
+ * every node reads the bytes in the same order.
+ */
+ tmp_vid = htons(tt_common->vid);
+ crc_tmp = crc32c(0, &tmp_vid, sizeof(tmp_vid));
/* compute the CRC on flags that have to be kept in sync
* among nodes
@@ -2262,6 +2270,7 @@ static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node,
{
struct batadv_tvlv_tt_vlan_data *tt_vlan_tmp;
struct batadv_orig_node_vlan *vlan;
+ uint32_t crc;
int i;
/* check if each received CRC matches the locally stored one */
@@ -2281,7 +2290,10 @@ static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node,
if (!vlan)
return false;
- if (vlan->tt.crc != ntohl(tt_vlan_tmp->crc))
+ crc = vlan->tt.crc;
+ batadv_orig_node_vlan_free_ref(vlan);
+
+ if (crc != ntohl(tt_vlan_tmp->crc))
return false;
}
@@ -3218,7 +3230,6 @@ static void batadv_tt_update_orig(struct batadv_priv *bat_priv,
spin_lock_bh(&orig_node->tt_lock);
- tt_change = (struct batadv_tvlv_tt_change *)tt_buff;
batadv_tt_update_changes(bat_priv, orig_node, tt_num_changes,
ttvn, tt_change);
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index adb3ea04adaa..73492b91105a 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -27,7 +27,7 @@
#include "6lowpan.h"
-#include "../ieee802154/6lowpan.h" /* for the compression support */
+#include <net/6lowpan.h> /* for the compression support */
#define IFACE_NAME_TEMPLATE "bt%d"
#define EUI64_ADDR_LEN 8
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 292e619db896..d9fb93451442 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -430,6 +430,16 @@ static void hidp_del_timer(struct hidp_session *session)
del_timer(&session->timer);
}
+static void hidp_process_report(struct hidp_session *session,
+ int type, const u8 *data, int len, int intr)
+{
+ if (len > HID_MAX_BUFFER_SIZE)
+ len = HID_MAX_BUFFER_SIZE;
+
+ memcpy(session->input_buf, data, len);
+ hid_input_report(session->hid, type, session->input_buf, len, intr);
+}
+
static void hidp_process_handshake(struct hidp_session *session,
unsigned char param)
{
@@ -502,7 +512,8 @@ static int hidp_process_data(struct hidp_session *session, struct sk_buff *skb,
hidp_input_report(session, skb);
if (session->hid)
- hid_input_report(session->hid, HID_INPUT_REPORT, skb->data, skb->len, 0);
+ hidp_process_report(session, HID_INPUT_REPORT,
+ skb->data, skb->len, 0);
break;
case HIDP_DATA_RTYPE_OTHER:
@@ -584,7 +595,8 @@ static void hidp_recv_intr_frame(struct hidp_session *session,
hidp_input_report(session, skb);
if (session->hid) {
- hid_input_report(session->hid, HID_INPUT_REPORT, skb->data, skb->len, 1);
+ hidp_process_report(session, HID_INPUT_REPORT,
+ skb->data, skb->len, 1);
BT_DBG("report len %d", skb->len);
}
} else {
diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h
index ab5241400cf7..8798492a6e99 100644
--- a/net/bluetooth/hidp/hidp.h
+++ b/net/bluetooth/hidp/hidp.h
@@ -24,6 +24,7 @@
#define __HIDP_H
#include <linux/types.h>
+#include <linux/hid.h>
#include <linux/kref.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/l2cap.h>
@@ -179,6 +180,9 @@ struct hidp_session {
/* Used in hidp_output_raw_report() */
int output_report_success; /* boolean */
+
+ /* temporary input buffer */
+ u8 input_buf[HID_MAX_BUFFER_SIZE];
};
/* HIDP init defines */
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index e4401a531afb..f2a08477e0f5 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -88,18 +88,11 @@ out:
static int br_dev_init(struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
- int i;
- br->stats = alloc_percpu(struct pcpu_sw_netstats);
+ br->stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!br->stats)
return -ENOMEM;
- for_each_possible_cpu(i) {
- struct pcpu_sw_netstats *br_dev_stats;
- br_dev_stats = per_cpu_ptr(br->stats, i);
- u64_stats_init(&br_dev_stats->syncp);
- }
-
return 0;
}
@@ -143,9 +136,9 @@ static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev,
const struct pcpu_sw_netstats *bstats
= per_cpu_ptr(br->stats, cpu);
do {
- start = u64_stats_fetch_begin_bh(&bstats->syncp);
+ start = u64_stats_fetch_begin_irq(&bstats->syncp);
memcpy(&tmp, bstats, sizeof(tmp));
- } while (u64_stats_fetch_retry_bh(&bstats->syncp, start));
+ } while (u64_stats_fetch_retry_irq(&bstats->syncp, start));
sum.tx_bytes += tmp.tx_bytes;
sum.tx_packets += tmp.tx_packets;
sum.rx_bytes += tmp.rx_bytes;
@@ -187,8 +180,7 @@ static int br_set_mac_address(struct net_device *dev, void *p)
spin_lock_bh(&br->lock);
if (!ether_addr_equal(dev->dev_addr, addr->sa_data)) {
- memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
- br_fdb_change_mac_address(br, addr->sa_data);
+ /* Mac address will be changed in br_stp_change_bridge_id(). */
br_stp_change_bridge_id(br, addr->sa_data);
}
spin_unlock_bh(&br->lock);
@@ -226,6 +218,33 @@ static void br_netpoll_cleanup(struct net_device *dev)
br_netpoll_disable(p);
}
+static int __br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
+{
+ struct netpoll *np;
+ int err;
+
+ np = kzalloc(sizeof(*p->np), gfp);
+ if (!np)
+ return -ENOMEM;
+
+ err = __netpoll_setup(np, p->dev, gfp);
+ if (err) {
+ kfree(np);
+ return err;
+ }
+
+ p->np = np;
+ return err;
+}
+
+int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
+{
+ if (!p->br->dev->npinfo)
+ return 0;
+
+ return __br_netpoll_enable(p, gfp);
+}
+
static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni,
gfp_t gfp)
{
@@ -236,7 +255,7 @@ static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni,
list_for_each_entry(p, &br->port_list, list) {
if (!p->dev)
continue;
- err = br_netpoll_enable(p, gfp);
+ err = __br_netpoll_enable(p, gfp);
if (err)
goto fail;
}
@@ -249,28 +268,6 @@ fail:
goto out;
}
-int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
-{
- struct netpoll *np;
- int err;
-
- if (!p->br->dev->npinfo)
- return 0;
-
- np = kzalloc(sizeof(*p->np), gfp);
- if (!np)
- return -ENOMEM;
-
- err = __netpoll_setup(np, p->dev, gfp);
- if (err) {
- kfree(np);
- return err;
- }
-
- p->np = np;
- return err;
-}
-
void br_netpoll_disable(struct net_bridge_port *p)
{
struct netpoll *np = p->np;
@@ -370,7 +367,7 @@ void br_dev_setup(struct net_device *dev)
br->bridge_id.prio[0] = 0x80;
br->bridge_id.prio[1] = 0x00;
- memcpy(br->group_addr, eth_reserved_addr_base, ETH_ALEN);
+ ether_addr_copy(br->group_addr, eth_reserved_addr_base);
br->stp_enabled = BR_NO_STP;
br->group_fwd_mask = BR_GROUPFWD_DEFAULT;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index c5f5a4a933f4..9203d5a1943f 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -27,6 +27,9 @@
#include "br_private.h"
static struct kmem_cache *br_fdb_cache __read_mostly;
+static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head,
+ const unsigned char *addr,
+ __u16 vid);
static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid);
static void fdb_notify(struct net_bridge *br,
@@ -89,11 +92,57 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
call_rcu(&f->rcu, fdb_rcu_free);
}
+/* Delete a local entry if no other port had the same address. */
+static void fdb_delete_local(struct net_bridge *br,
+ const struct net_bridge_port *p,
+ struct net_bridge_fdb_entry *f)
+{
+ const unsigned char *addr = f->addr.addr;
+ u16 vid = f->vlan_id;
+ struct net_bridge_port *op;
+
+ /* Maybe another port has same hw addr? */
+ list_for_each_entry(op, &br->port_list, list) {
+ if (op != p && ether_addr_equal(op->dev->dev_addr, addr) &&
+ (!vid || nbp_vlan_find(op, vid))) {
+ f->dst = op;
+ f->added_by_user = 0;
+ return;
+ }
+ }
+
+ /* Maybe bridge device has same hw addr? */
+ if (p && ether_addr_equal(br->dev->dev_addr, addr) &&
+ (!vid || br_vlan_find(br, vid))) {
+ f->dst = NULL;
+ f->added_by_user = 0;
+ return;
+ }
+
+ fdb_delete(br, f);
+}
+
+void br_fdb_find_delete_local(struct net_bridge *br,
+ const struct net_bridge_port *p,
+ const unsigned char *addr, u16 vid)
+{
+ struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
+ struct net_bridge_fdb_entry *f;
+
+ spin_lock_bh(&br->hash_lock);
+ f = fdb_find(head, addr, vid);
+ if (f && f->is_local && !f->added_by_user && f->dst == p)
+ fdb_delete_local(br, p, f);
+ spin_unlock_bh(&br->hash_lock);
+}
+
void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
{
struct net_bridge *br = p->br;
- bool no_vlan = (nbp_get_vlan_info(p) == NULL) ? true : false;
+ struct net_port_vlans *pv = nbp_get_vlan_info(p);
+ bool no_vlan = !pv;
int i;
+ u16 vid;
spin_lock_bh(&br->hash_lock);
@@ -104,38 +153,34 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
struct net_bridge_fdb_entry *f;
f = hlist_entry(h, struct net_bridge_fdb_entry, hlist);
- if (f->dst == p && f->is_local) {
- /* maybe another port has same hw addr? */
- struct net_bridge_port *op;
- u16 vid = f->vlan_id;
- list_for_each_entry(op, &br->port_list, list) {
- if (op != p &&
- ether_addr_equal(op->dev->dev_addr,
- f->addr.addr) &&
- nbp_vlan_find(op, vid)) {
- f->dst = op;
- goto insert;
- }
- }
-
+ if (f->dst == p && f->is_local && !f->added_by_user) {
/* delete old one */
- fdb_delete(br, f);
-insert:
- /* insert new address, may fail if invalid
- * address or dup.
- */
- fdb_insert(br, p, newaddr, vid);
+ fdb_delete_local(br, p, f);
/* if this port has no vlan information
* configured, we can safely be done at
* this point.
*/
if (no_vlan)
- goto done;
+ goto insert;
}
}
}
+insert:
+ /* insert new address, may fail if invalid address or dup. */
+ fdb_insert(br, p, newaddr, 0);
+
+ if (no_vlan)
+ goto done;
+
+ /* Now add entries for every VLAN configured on the port.
+ * This function runs under RTNL so the bitmap will not change
+ * from under us.
+ */
+ for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID)
+ fdb_insert(br, p, newaddr, vid);
+
done:
spin_unlock_bh(&br->hash_lock);
}
@@ -146,10 +191,12 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
struct net_port_vlans *pv;
u16 vid = 0;
+ spin_lock_bh(&br->hash_lock);
+
/* If old entry was unassociated with any port, then delete it. */
f = __br_fdb_get(br, br->dev->dev_addr, 0);
if (f && f->is_local && !f->dst)
- fdb_delete(br, f);
+ fdb_delete_local(br, NULL, f);
fdb_insert(br, NULL, newaddr, 0);
@@ -159,14 +206,16 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
*/
pv = br_get_vlan_info(br);
if (!pv)
- return;
+ goto out;
for_each_set_bit_from(vid, pv->vlan_bitmap, VLAN_N_VID) {
f = __br_fdb_get(br, br->dev->dev_addr, vid);
if (f && f->is_local && !f->dst)
- fdb_delete(br, f);
+ fdb_delete_local(br, NULL, f);
fdb_insert(br, NULL, newaddr, vid);
}
+out:
+ spin_unlock_bh(&br->hash_lock);
}
void br_fdb_cleanup(unsigned long _data)
@@ -235,25 +284,11 @@ void br_fdb_delete_by_port(struct net_bridge *br,
if (f->is_static && !do_all)
continue;
- /*
- * if multiple ports all have the same device address
- * then when one port is deleted, assign
- * the local entry to other port
- */
- if (f->is_local) {
- struct net_bridge_port *op;
- list_for_each_entry(op, &br->port_list, list) {
- if (op != p &&
- ether_addr_equal(op->dev->dev_addr,
- f->addr.addr)) {
- f->dst = op;
- goto skip_delete;
- }
- }
- }
- fdb_delete(br, f);
- skip_delete: ;
+ if (f->is_local)
+ fdb_delete_local(br, p, f);
+ else
+ fdb_delete(br, f);
}
}
spin_unlock_bh(&br->hash_lock);
@@ -397,6 +432,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
fdb->vlan_id = vid;
fdb->is_local = 0;
fdb->is_static = 0;
+ fdb->added_by_user = 0;
fdb->updated = fdb->used = jiffies;
hlist_add_head_rcu(&fdb->hlist, head);
}
@@ -447,7 +483,7 @@ int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
}
void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
- const unsigned char *addr, u16 vid)
+ const unsigned char *addr, u16 vid, bool added_by_user)
{
struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
struct net_bridge_fdb_entry *fdb;
@@ -473,13 +509,18 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
/* fastpath: update of existing entry */
fdb->dst = source;
fdb->updated = jiffies;
+ if (unlikely(added_by_user))
+ fdb->added_by_user = 1;
}
} else {
spin_lock(&br->hash_lock);
if (likely(!fdb_find(head, addr, vid))) {
fdb = fdb_create(head, source, addr, vid);
- if (fdb)
+ if (fdb) {
+ if (unlikely(added_by_user))
+ fdb->added_by_user = 1;
fdb_notify(br, fdb, RTM_NEWNEIGH);
+ }
}
/* else we lose race and someone else inserts
* it first, don't bother updating
@@ -647,6 +688,7 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
modified = true;
}
+ fdb->added_by_user = 1;
fdb->used = jiffies;
if (modified) {
@@ -664,7 +706,7 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p,
if (ndm->ndm_flags & NTF_USE) {
rcu_read_lock();
- br_fdb_update(p->br, p, addr, vid);
+ br_fdb_update(p->br, p, addr, vid, true);
rcu_read_unlock();
} else {
spin_lock_bh(&p->br->hash_lock);
@@ -749,8 +791,7 @@ out:
return err;
}
-int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr,
- u16 vlan)
+static int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, u16 vlan)
{
struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan)];
struct net_bridge_fdb_entry *fdb;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index cffe1d666ba1..54d207d3a31c 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -389,6 +389,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
if (br->dev->needed_headroom < dev->needed_headroom)
br->dev->needed_headroom = dev->needed_headroom;
+ if (br_fdb_insert(br, p, dev->dev_addr, 0))
+ netdev_err(dev, "failed insert local address bridge forwarding table\n");
+
spin_lock_bh(&br->lock);
changed_addr = br_stp_recalculate_bridge_id(br);
@@ -404,9 +407,6 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
dev_set_mtu(br->dev, br_min_mtu(br));
- if (br_fdb_insert(br, p, dev->dev_addr, 0))
- netdev_err(dev, "failed insert local address bridge forwarding table\n");
-
kobject_uevent(&p->kobj, KOBJ_ADD);
return 0;
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index bf8dc7d308d6..28d544627422 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -77,7 +77,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
/* insert into forwarding database after filtering to avoid spoofing */
br = p->br;
if (p->flags & BR_LEARNING)
- br_fdb_update(br, p, eth_hdr(skb)->h_source, vid);
+ br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, false);
if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) &&
br_multicast_rcv(br, p, skb, vid))
@@ -148,7 +148,7 @@ static int br_handle_local_finish(struct sk_buff *skb)
br_vlan_get_tag(skb, &vid);
if (p->flags & BR_LEARNING)
- br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid);
+ br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false);
return 0; /* process further */
}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index ef66365b7354..7b757b5dc773 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -363,7 +363,7 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
skb_reset_mac_header(skb);
eth = eth_hdr(skb);
- memcpy(eth->h_source, br->dev->dev_addr, ETH_ALEN);
+ ether_addr_copy(eth->h_source, br->dev->dev_addr);
eth->h_dest[0] = 1;
eth->h_dest[1] = 0;
eth->h_dest[2] = 0x5e;
@@ -433,7 +433,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
skb_reset_mac_header(skb);
eth = eth_hdr(skb);
- memcpy(eth->h_source, br->dev->dev_addr, ETH_ALEN);
+ ether_addr_copy(eth->h_source, br->dev->dev_addr);
eth->h_proto = htons(ETH_P_IPV6);
skb_put(skb, sizeof(*eth));
@@ -1127,9 +1127,10 @@ static void br_multicast_query_received(struct net_bridge *br,
struct net_bridge_port *port,
struct bridge_mcast_querier *querier,
int saddr,
+ bool is_general_query,
unsigned long max_delay)
{
- if (saddr)
+ if (saddr && is_general_query)
br_multicast_update_querier_timer(br, querier, max_delay);
else if (timer_pending(&querier->timer))
return;
@@ -1181,8 +1182,16 @@ static int br_ip4_multicast_query(struct net_bridge *br,
IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1;
}
+ /* RFC2236+RFC3376 (IGMPv2+IGMPv3) require the multicast link layer
+ * all-systems destination addresses (224.0.0.1) for general queries
+ */
+ if (!group && iph->daddr != htonl(INADDR_ALLHOSTS_GROUP)) {
+ err = -EINVAL;
+ goto out;
+ }
+
br_multicast_query_received(br, port, &br->ip4_querier, !!iph->saddr,
- max_delay);
+ !group, max_delay);
if (!group)
goto out;
@@ -1228,6 +1237,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
unsigned long max_delay;
unsigned long now = jiffies;
const struct in6_addr *group = NULL;
+ bool is_general_query;
int err = 0;
spin_lock(&br->multicast_lock);
@@ -1235,6 +1245,12 @@ static int br_ip6_multicast_query(struct net_bridge *br,
(port && port->state == BR_STATE_DISABLED))
goto out;
+ /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */
+ if (!(ipv6_addr_type(&ip6h->saddr) & IPV6_ADDR_LINKLOCAL)) {
+ err = -EINVAL;
+ goto out;
+ }
+
if (skb->len == sizeof(*mld)) {
if (!pskb_may_pull(skb, sizeof(*mld))) {
err = -EINVAL;
@@ -1256,8 +1272,19 @@ static int br_ip6_multicast_query(struct net_bridge *br,
max_delay = max(msecs_to_jiffies(mldv2_mrc(mld2q)), 1UL);
}
+ is_general_query = group && ipv6_addr_any(group);
+
+ /* RFC2710+RFC3810 (MLDv1+MLDv2) require the multicast link layer
+ * all-nodes destination address (ff02::1) for general queries
+ */
+ if (is_general_query && !ipv6_addr_is_ll_all_nodes(&ip6h->daddr)) {
+ err = -EINVAL;
+ goto out;
+ }
+
br_multicast_query_received(br, port, &br->ip6_querier,
- !ipv6_addr_any(&ip6h->saddr), max_delay);
+ !ipv6_addr_any(&ip6h->saddr),
+ is_general_query, max_delay);
if (!group)
goto out;
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index b008c59a92c4..80e1b0f60a30 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -167,7 +167,7 @@ void br_netfilter_rtable_init(struct net_bridge *br)
rt->dst.dev = br->dev;
rt->dst.path = &rt->dst;
dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
- rt->dst.flags = DST_NOXFRM | DST_NOPEER | DST_FAKE_RTABLE;
+ rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE;
rt->dst.ops = &fake_dst_ops;
}
@@ -506,7 +506,7 @@ bridged_dnat:
1);
return 0;
}
- memcpy(eth_hdr(skb)->h_dest, dev->dev_addr, ETH_ALEN);
+ ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr);
skb->pkt_type = PACKET_HOST;
}
} else {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index fcd12333c59b..e1ca1dc916a4 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -46,12 +46,12 @@ typedef __u16 port_id;
struct bridge_id
{
unsigned char prio[2];
- unsigned char addr[6];
+ unsigned char addr[ETH_ALEN];
};
struct mac_addr
{
- unsigned char addr[6];
+ unsigned char addr[ETH_ALEN];
};
struct br_ip
@@ -104,6 +104,7 @@ struct net_bridge_fdb_entry
mac_addr addr;
unsigned char is_local;
unsigned char is_static;
+ unsigned char added_by_user;
__u16 vlan_id;
};
@@ -370,6 +371,9 @@ static inline void br_netpoll_disable(struct net_bridge_port *p)
int br_fdb_init(void);
void br_fdb_fini(void);
void br_fdb_flush(struct net_bridge *br);
+void br_fdb_find_delete_local(struct net_bridge *br,
+ const struct net_bridge_port *p,
+ const unsigned char *addr, u16 vid);
void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr);
void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr);
void br_fdb_cleanup(unsigned long arg);
@@ -383,8 +387,7 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, unsigned long count,
int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid);
void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
- const unsigned char *addr, u16 vid);
-int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, u16 vid);
+ const unsigned char *addr, u16 vid, bool added_by_user);
int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev, const unsigned char *addr);
@@ -584,6 +587,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags);
int br_vlan_delete(struct net_bridge *br, u16 vid);
void br_vlan_flush(struct net_bridge *br);
+bool br_vlan_find(struct net_bridge *br, u16 vid);
int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags);
int nbp_vlan_delete(struct net_bridge_port *port, u16 vid);
@@ -665,6 +669,11 @@ static inline void br_vlan_flush(struct net_bridge *br)
{
}
+static inline bool br_vlan_find(struct net_bridge *br, u16 vid)
+{
+ return false;
+}
+
static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
{
return -EOPNOTSUPP;
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 656a6f3e40de..189ba1e7d851 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -194,6 +194,8 @@ void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr)
wasroot = br_is_root_bridge(br);
+ br_fdb_change_mac_address(br, addr);
+
memcpy(oldaddr, br->bridge_id.addr, ETH_ALEN);
memcpy(br->bridge_id.addr, addr, ETH_ALEN);
memcpy(br->dev->dev_addr, addr, ETH_ALEN);
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 4ca4d0a0151c..8249ca764c79 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -275,9 +275,7 @@ int br_vlan_delete(struct net_bridge *br, u16 vid)
if (!pv)
return -EINVAL;
- spin_lock_bh(&br->hash_lock);
- fdb_delete_by_addr(br, br->dev->dev_addr, vid);
- spin_unlock_bh(&br->hash_lock);
+ br_fdb_find_delete_local(br, NULL, br->dev->dev_addr, vid);
__vlan_del(pv, vid);
return 0;
@@ -295,6 +293,25 @@ void br_vlan_flush(struct net_bridge *br)
__vlan_flush(pv);
}
+bool br_vlan_find(struct net_bridge *br, u16 vid)
+{
+ struct net_port_vlans *pv;
+ bool found = false;
+
+ rcu_read_lock();
+ pv = rcu_dereference(br->vlan_info);
+
+ if (!pv)
+ goto out;
+
+ if (test_bit(vid, pv->vlan_bitmap))
+ found = true;
+
+out:
+ rcu_read_unlock();
+ return found;
+}
+
int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val)
{
if (!rtnl_trylock())
@@ -359,9 +376,7 @@ int nbp_vlan_delete(struct net_bridge_port *port, u16 vid)
if (!pv)
return -EINVAL;
- spin_lock_bh(&port->br->hash_lock);
- fdb_delete_by_addr(port->br, port->dev->dev_addr, vid);
- spin_unlock_bh(&port->br->hash_lock);
+ br_fdb_find_delete_local(port->br, port, port->dev->dev_addr, vid);
return __vlan_del(pv, vid);
}
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index 3fb3c848affe..9024283d2bca 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -28,7 +28,7 @@ static bool ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh,
uint32_t cmp[2] = { 0, 0 };
int key = ((const unsigned char *)mac)[5];
- memcpy(((char *) cmp) + 2, mac, ETH_ALEN);
+ ether_addr_copy(((char *) cmp) + 2, mac);
start = wh->table[key];
limit = wh->table[key + 1];
if (ip) {
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index c59f7bfae6e2..4e0b0c359325 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -22,7 +22,7 @@ ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par)
if (!skb_make_writable(skb, 0))
return EBT_DROP;
- memcpy(eth_hdr(skb)->h_dest, info->mac, ETH_ALEN);
+ ether_addr_copy(eth_hdr(skb)->h_dest, info->mac);
return info->target;
}
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 46624bb6d9be..203964997a51 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -25,10 +25,10 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
if (par->hooknum != NF_BR_BROUTING)
/* rcu_read_lock()ed by nf_hook_slow */
- memcpy(eth_hdr(skb)->h_dest,
- br_port_get_rcu(par->in)->br->dev->dev_addr, ETH_ALEN);
+ ether_addr_copy(eth_hdr(skb)->h_dest,
+ br_port_get_rcu(par->in)->br->dev->dev_addr);
else
- memcpy(eth_hdr(skb)->h_dest, par->in->dev_addr, ETH_ALEN);
+ ether_addr_copy(eth_hdr(skb)->h_dest, par->in->dev_addr);
skb->pkt_type = PACKET_HOST;
return info->target;
}
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index 0f6b118d6cb2..e56ccd060d26 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -24,7 +24,7 @@ ebt_snat_tg(struct sk_buff *skb, const struct xt_action_param *par)
if (!skb_make_writable(skb, 0))
return EBT_DROP;
- memcpy(eth_hdr(skb)->h_source, info->mac, ETH_ALEN);
+ ether_addr_copy(eth_hdr(skb)->h_source, info->mac);
if (!(info->target & NAT_ARP_BIT) &&
eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) {
const struct arphdr *ap;
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 4dca159435cf..edbca468fa73 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -22,6 +22,7 @@
#include <net/pkt_sched.h>
#include <net/caif/caif_device.h>
#include <net/caif/caif_layer.h>
+#include <net/caif/caif_dev.h>
#include <net/caif/cfpkt.h>
#include <net/caif/cfcnfg.h>
#include <net/caif/cfserl.h>
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index 353f793d1b3b..a6e115463052 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -15,6 +15,7 @@
#include <net/caif/caif_layer.h>
#include <net/caif/cfsrvl.h>
#include <net/caif/cfpkt.h>
+#include <net/caif/caif_dev.h>
#define SRVL_CTRL_PKT_SIZE 1
#define SRVL_FLOW_OFF 0x81
diff --git a/net/can/af_can.c b/net/can/af_can.c
index d249874a366d..a27f8aad9e99 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -57,6 +57,7 @@
#include <linux/skbuff.h>
#include <linux/can.h>
#include <linux/can/core.h>
+#include <linux/can/skb.h>
#include <linux/ratelimit.h>
#include <net/net_namespace.h>
#include <net/sock.h>
@@ -290,7 +291,7 @@ int can_send(struct sk_buff *skb, int loop)
return -ENOMEM;
}
- newskb->sk = skb->sk;
+ can_skb_set_owner(newskb, skb->sk);
newskb->ip_summed = CHECKSUM_UNNECESSARY;
newskb->pkt_type = PACKET_BROADCAST;
}
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 3fc737b214c7..dcb75c0e66c1 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -268,7 +268,7 @@ static void bcm_can_tx(struct bcm_op *op)
/* send with loopback */
skb->dev = dev;
- skb->sk = op->sk;
+ can_skb_set_owner(skb, op->sk);
can_send(skb, 1);
/* update statistics */
@@ -1223,7 +1223,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
can_skb_prv(skb)->ifindex = dev->ifindex;
skb->dev = dev;
- skb->sk = sk;
+ can_skb_set_owner(skb, sk);
err = can_send(skb, 1); /* send with loopback */
dev_put(dev);
diff --git a/net/can/raw.c b/net/can/raw.c
index 07d72d852324..081e81fd017f 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -121,13 +121,9 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
if (!ro->recv_own_msgs && oskb->sk == sk)
return;
- /* do not pass frames with DLC > 8 to a legacy socket */
- if (!ro->fd_frames) {
- struct canfd_frame *cfd = (struct canfd_frame *)oskb->data;
-
- if (unlikely(cfd->len > CAN_MAX_DLEN))
- return;
- }
+ /* do not pass non-CAN2.0 frames to a legacy socket */
+ if (!ro->fd_frames && oskb->len != CAN_MTU)
+ return;
/* clone the given skb to be able to enqueue it into the rcv queue */
skb = skb_clone(oskb, GFP_ATOMIC);
@@ -715,6 +711,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
skb->dev = dev;
skb->sk = sk;
+ skb->priority = sk->sk_priority;
err = can_send(skb, ro->loopback);
@@ -737,9 +734,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size, int flags)
{
struct sock *sk = sock->sk;
- struct raw_sock *ro = raw_sk(sk);
struct sk_buff *skb;
- int rxmtu;
int err = 0;
int noblock;
@@ -750,20 +745,10 @@ static int raw_recvmsg(struct kiocb *iocb, struct socket *sock,
if (!skb)
return err;
- /*
- * when serving a legacy socket the DLC <= 8 is already checked inside
- * raw_rcv(). Now check if we need to pass a canfd_frame to a legacy
- * socket and cut the possible CANFD_MTU/CAN_MTU length to CAN_MTU
- */
- if (!ro->fd_frames)
- rxmtu = CAN_MTU;
- else
- rxmtu = skb->len;
-
- if (size < rxmtu)
+ if (size < skb->len)
msg->msg_flags |= MSG_TRUNC;
else
- size = rxmtu;
+ size = skb->len;
err = memcpy_toiovec(msg->msg_iov, skb->data, size);
if (err < 0) {
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 0676f2b199d6..82750f915865 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -2082,7 +2082,6 @@ bad:
pr_err("osdc handle_map corrupt msg\n");
ceph_msg_dump(msg);
up_write(&osdc->map_sem);
- return;
}
/*
@@ -2281,7 +2280,6 @@ done_err:
bad:
pr_err("osdc handle_watch_notify corrupt msg\n");
- return;
}
/*
diff --git a/net/core/dev.c b/net/core/dev.c
index 3721db716350..55f8e64c03a2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2420,7 +2420,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault);
* 2. No high memory really exists on this machine.
*/
-static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
+static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb)
{
#ifdef CONFIG_HIGHMEM
int i;
@@ -2495,34 +2495,36 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
}
static netdev_features_t harmonize_features(struct sk_buff *skb,
- netdev_features_t features)
+ const struct net_device *dev,
+ netdev_features_t features)
{
if (skb->ip_summed != CHECKSUM_NONE &&
!can_checksum_protocol(features, skb_network_protocol(skb))) {
features &= ~NETIF_F_ALL_CSUM;
- } else if (illegal_highdma(skb->dev, skb)) {
+ } else if (illegal_highdma(dev, skb)) {
features &= ~NETIF_F_SG;
}
return features;
}
-netdev_features_t netif_skb_features(struct sk_buff *skb)
+netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
+ const struct net_device *dev)
{
__be16 protocol = skb->protocol;
- netdev_features_t features = skb->dev->features;
+ netdev_features_t features = dev->features;
- if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
+ if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs)
features &= ~NETIF_F_GSO_MASK;
if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
protocol = veh->h_vlan_encapsulated_proto;
} else if (!vlan_tx_tag_present(skb)) {
- return harmonize_features(skb, features);
+ return harmonize_features(skb, dev, features);
}
- features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
+ features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX);
if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
@@ -2530,9 +2532,9 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX;
- return harmonize_features(skb, features);
+ return harmonize_features(skb, dev, features);
}
-EXPORT_SYMBOL(netif_skb_features);
+EXPORT_SYMBOL(netif_skb_dev_features);
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq)
@@ -2803,7 +2805,7 @@ EXPORT_SYMBOL(dev_loopback_xmit);
* the BH enable code must have IRQs enabled so that it will not deadlock.
* --BLG
*/
-int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
+static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
{
struct net_device *dev = skb->dev;
struct netdev_queue *txq;
@@ -3229,10 +3231,6 @@ static int netif_rx_internal(struct sk_buff *skb)
{
int ret;
- /* if netpoll wants it, pretend we never saw it */
- if (netpoll_rx(skb))
- return NET_RX_DROP;
-
net_timestamp_check(netdev_tstamp_prequeue, skb);
trace_netif_rx(skb);
@@ -3493,11 +3491,11 @@ EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
{
switch (skb->protocol) {
- case __constant_htons(ETH_P_ARP):
- case __constant_htons(ETH_P_IP):
- case __constant_htons(ETH_P_IPV6):
- case __constant_htons(ETH_P_8021Q):
- case __constant_htons(ETH_P_8021AD):
+ case htons(ETH_P_ARP):
+ case htons(ETH_P_IP):
+ case htons(ETH_P_IPV6):
+ case htons(ETH_P_8021Q):
+ case htons(ETH_P_8021AD):
return true;
default:
return false;
@@ -3518,10 +3516,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
trace_netif_receive_skb(skb);
- /* if we've gotten here through NAPI, check netpoll */
- if (netpoll_receive_skb(skb))
- goto out;
-
orig_dev = skb->dev;
skb_reset_network_header(skb);
@@ -3648,7 +3642,6 @@ drop:
unlock:
rcu_read_unlock();
-out:
return ret;
}
@@ -3873,7 +3866,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
int same_flow;
enum gro_result ret;
- if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
+ if (!(skb->dev->features & NETIF_F_GRO))
goto normal;
if (skb_is_gso(skb) || skb_has_frag_list(skb))
@@ -4637,7 +4630,7 @@ struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
}
EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
-int netdev_adjacent_sysfs_add(struct net_device *dev,
+static int netdev_adjacent_sysfs_add(struct net_device *dev,
struct net_device *adj_dev,
struct list_head *dev_list)
{
@@ -4647,7 +4640,7 @@ int netdev_adjacent_sysfs_add(struct net_device *dev,
return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj),
linkname);
}
-void netdev_adjacent_sysfs_del(struct net_device *dev,
+static void netdev_adjacent_sysfs_del(struct net_device *dev,
char *name,
struct list_head *dev_list)
{
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index f409e0bd35c0..185c341fafbd 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -745,6 +745,13 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
attach_rules(&ops->rules_list, dev);
break;
+ case NETDEV_CHANGENAME:
+ list_for_each_entry(ops, &net->rules_ops, list) {
+ detach_rules(&ops->rules_list, dev);
+ attach_rules(&ops->rules_list, dev);
+ }
+ break;
+
case NETDEV_UNREGISTER:
list_for_each_entry(ops, &net->rules_ops, list)
detach_rules(&ops->rules_list, dev);
diff --git a/net/core/flow.c b/net/core/flow.c
index dfa602ceb8cd..31cfb365e0c6 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -24,6 +24,7 @@
#include <net/flow.h>
#include <linux/atomic.h>
#include <linux/security.h>
+#include <net/net_namespace.h>
struct flow_cache_entry {
union {
@@ -38,37 +39,14 @@ struct flow_cache_entry {
struct flow_cache_object *object;
};
-struct flow_cache_percpu {
- struct hlist_head *hash_table;
- int hash_count;
- u32 hash_rnd;
- int hash_rnd_recalc;
- struct tasklet_struct flush_tasklet;
-};
-
struct flow_flush_info {
struct flow_cache *cache;
atomic_t cpuleft;
struct completion completion;
};
-struct flow_cache {
- u32 hash_shift;
- struct flow_cache_percpu __percpu *percpu;
- struct notifier_block hotcpu_notifier;
- int low_watermark;
- int high_watermark;
- struct timer_list rnd_timer;
-};
-
-atomic_t flow_cache_genid = ATOMIC_INIT(0);
-EXPORT_SYMBOL(flow_cache_genid);
-static struct flow_cache flow_cache_global;
static struct kmem_cache *flow_cachep __read_mostly;
-static DEFINE_SPINLOCK(flow_cache_gc_lock);
-static LIST_HEAD(flow_cache_gc_list);
-
#define flow_cache_hash_size(cache) (1 << (cache)->hash_shift)
#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
@@ -84,16 +62,18 @@ static void flow_cache_new_hashrnd(unsigned long arg)
add_timer(&fc->rnd_timer);
}
-static int flow_entry_valid(struct flow_cache_entry *fle)
+static int flow_entry_valid(struct flow_cache_entry *fle,
+ struct netns_xfrm *xfrm)
{
- if (atomic_read(&flow_cache_genid) != fle->genid)
+ if (atomic_read(&xfrm->flow_cache_genid) != fle->genid)
return 0;
if (fle->object && !fle->object->ops->check(fle->object))
return 0;
return 1;
}
-static void flow_entry_kill(struct flow_cache_entry *fle)
+static void flow_entry_kill(struct flow_cache_entry *fle,
+ struct netns_xfrm *xfrm)
{
if (fle->object)
fle->object->ops->delete(fle->object);
@@ -104,26 +84,28 @@ static void flow_cache_gc_task(struct work_struct *work)
{
struct list_head gc_list;
struct flow_cache_entry *fce, *n;
+ struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
+ flow_cache_gc_work);
INIT_LIST_HEAD(&gc_list);
- spin_lock_bh(&flow_cache_gc_lock);
- list_splice_tail_init(&flow_cache_gc_list, &gc_list);
- spin_unlock_bh(&flow_cache_gc_lock);
+ spin_lock_bh(&xfrm->flow_cache_gc_lock);
+ list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list);
+ spin_unlock_bh(&xfrm->flow_cache_gc_lock);
list_for_each_entry_safe(fce, n, &gc_list, u.gc_list)
- flow_entry_kill(fce);
+ flow_entry_kill(fce, xfrm);
}
-static DECLARE_WORK(flow_cache_gc_work, flow_cache_gc_task);
static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
- int deleted, struct list_head *gc_list)
+ int deleted, struct list_head *gc_list,
+ struct netns_xfrm *xfrm)
{
if (deleted) {
fcp->hash_count -= deleted;
- spin_lock_bh(&flow_cache_gc_lock);
- list_splice_tail(gc_list, &flow_cache_gc_list);
- spin_unlock_bh(&flow_cache_gc_lock);
- schedule_work(&flow_cache_gc_work);
+ spin_lock_bh(&xfrm->flow_cache_gc_lock);
+ list_splice_tail(gc_list, &xfrm->flow_cache_gc_list);
+ spin_unlock_bh(&xfrm->flow_cache_gc_lock);
+ schedule_work(&xfrm->flow_cache_gc_work);
}
}
@@ -135,6 +117,8 @@ static void __flow_cache_shrink(struct flow_cache *fc,
struct hlist_node *tmp;
LIST_HEAD(gc_list);
int i, deleted = 0;
+ struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
+ flow_cache_global);
for (i = 0; i < flow_cache_hash_size(fc); i++) {
int saved = 0;
@@ -142,7 +126,7 @@ static void __flow_cache_shrink(struct flow_cache *fc,
hlist_for_each_entry_safe(fle, tmp,
&fcp->hash_table[i], u.hlist) {
if (saved < shrink_to &&
- flow_entry_valid(fle)) {
+ flow_entry_valid(fle, xfrm)) {
saved++;
} else {
deleted++;
@@ -152,7 +136,7 @@ static void __flow_cache_shrink(struct flow_cache *fc,
}
}
- flow_cache_queue_garbage(fcp, deleted, &gc_list);
+ flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
}
static void flow_cache_shrink(struct flow_cache *fc,
@@ -208,7 +192,7 @@ struct flow_cache_object *
flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
flow_resolve_t resolver, void *ctx)
{
- struct flow_cache *fc = &flow_cache_global;
+ struct flow_cache *fc = &net->xfrm.flow_cache_global;
struct flow_cache_percpu *fcp;
struct flow_cache_entry *fle, *tfle;
struct flow_cache_object *flo;
@@ -258,7 +242,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
fcp->hash_count++;
}
- } else if (likely(fle->genid == atomic_read(&flow_cache_genid))) {
+ } else if (likely(fle->genid == atomic_read(&net->xfrm.flow_cache_genid))) {
flo = fle->object;
if (!flo)
goto ret_object;
@@ -279,7 +263,7 @@ nocache:
}
flo = resolver(net, key, family, dir, flo, ctx);
if (fle) {
- fle->genid = atomic_read(&flow_cache_genid);
+ fle->genid = atomic_read(&net->xfrm.flow_cache_genid);
if (!IS_ERR(flo))
fle->object = flo;
else
@@ -303,12 +287,14 @@ static void flow_cache_flush_tasklet(unsigned long data)
struct hlist_node *tmp;
LIST_HEAD(gc_list);
int i, deleted = 0;
+ struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
+ flow_cache_global);
fcp = this_cpu_ptr(fc->percpu);
for (i = 0; i < flow_cache_hash_size(fc); i++) {
hlist_for_each_entry_safe(fle, tmp,
&fcp->hash_table[i], u.hlist) {
- if (flow_entry_valid(fle))
+ if (flow_entry_valid(fle, xfrm))
continue;
deleted++;
@@ -317,7 +303,7 @@ static void flow_cache_flush_tasklet(unsigned long data)
}
}
- flow_cache_queue_garbage(fcp, deleted, &gc_list);
+ flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
if (atomic_dec_and_test(&info->cpuleft))
complete(&info->completion);
@@ -351,10 +337,9 @@ static void flow_cache_flush_per_cpu(void *data)
tasklet_schedule(tasklet);
}
-void flow_cache_flush(void)
+void flow_cache_flush(struct net *net)
{
struct flow_flush_info info;
- static DEFINE_MUTEX(flow_flush_sem);
cpumask_var_t mask;
int i, self;
@@ -365,8 +350,8 @@ void flow_cache_flush(void)
/* Don't want cpus going down or up during this. */
get_online_cpus();
- mutex_lock(&flow_flush_sem);
- info.cache = &flow_cache_global;
+ mutex_lock(&net->xfrm.flow_flush_sem);
+ info.cache = &net->xfrm.flow_cache_global;
for_each_online_cpu(i)
if (!flow_cache_percpu_empty(info.cache, i))
cpumask_set_cpu(i, mask);
@@ -386,21 +371,23 @@ void flow_cache_flush(void)
wait_for_completion(&info.completion);
done:
- mutex_unlock(&flow_flush_sem);
+ mutex_unlock(&net->xfrm.flow_flush_sem);
put_online_cpus();
free_cpumask_var(mask);
}
static void flow_cache_flush_task(struct work_struct *work)
{
- flow_cache_flush();
-}
+ struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
+ flow_cache_gc_work);
+ struct net *net = container_of(xfrm, struct net, xfrm);
-static DECLARE_WORK(flow_cache_flush_work, flow_cache_flush_task);
+ flow_cache_flush(net);
+}
-void flow_cache_flush_deferred(void)
+void flow_cache_flush_deferred(struct net *net)
{
- schedule_work(&flow_cache_flush_work);
+ schedule_work(&net->xfrm.flow_cache_flush_work);
}
static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
@@ -425,7 +412,8 @@ static int flow_cache_cpu(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
- struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier);
+ struct flow_cache *fc = container_of(nfb, struct flow_cache,
+ hotcpu_notifier);
int res, cpu = (unsigned long) hcpu;
struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
@@ -444,9 +432,20 @@ static int flow_cache_cpu(struct notifier_block *nfb,
return NOTIFY_OK;
}
-static int __init flow_cache_init(struct flow_cache *fc)
+int flow_cache_init(struct net *net)
{
int i;
+ struct flow_cache *fc = &net->xfrm.flow_cache_global;
+
+ if (!flow_cachep)
+ flow_cachep = kmem_cache_create("flow_cache",
+ sizeof(struct flow_cache_entry),
+ 0, SLAB_PANIC, NULL);
+ spin_lock_init(&net->xfrm.flow_cache_gc_lock);
+ INIT_LIST_HEAD(&net->xfrm.flow_cache_gc_list);
+ INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task);
+ INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task);
+ mutex_init(&net->xfrm.flow_flush_sem);
fc->hash_shift = 10;
fc->low_watermark = 2 * flow_cache_hash_size(fc);
@@ -484,14 +483,23 @@ err:
return -ENOMEM;
}
+EXPORT_SYMBOL(flow_cache_init);
-static int __init flow_cache_init_global(void)
+void flow_cache_fini(struct net *net)
{
- flow_cachep = kmem_cache_create("flow_cache",
- sizeof(struct flow_cache_entry),
- 0, SLAB_PANIC, NULL);
+ int i;
+ struct flow_cache *fc = &net->xfrm.flow_cache_global;
- return flow_cache_init(&flow_cache_global);
-}
+ del_timer_sync(&fc->rnd_timer);
+ unregister_hotcpu_notifier(&fc->hotcpu_notifier);
-module_init(flow_cache_init_global);
+ for_each_possible_cpu(i) {
+ struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i);
+ kfree(fcp->hash_table);
+ fcp->hash_table = NULL;
+ }
+
+ free_percpu(fc->percpu);
+ fc->percpu = NULL;
+}
+EXPORT_SYMBOL(flow_cache_fini);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 87577d447554..80201bf69d59 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -61,7 +61,7 @@ bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow)
again:
switch (proto) {
- case __constant_htons(ETH_P_IP): {
+ case htons(ETH_P_IP): {
const struct iphdr *iph;
struct iphdr _iph;
ip:
@@ -77,7 +77,7 @@ ip:
iph_to_flow_copy_addrs(flow, iph);
break;
}
- case __constant_htons(ETH_P_IPV6): {
+ case htons(ETH_P_IPV6): {
const struct ipv6hdr *iph;
struct ipv6hdr _iph;
ipv6:
@@ -91,8 +91,8 @@ ipv6:
nhoff += sizeof(struct ipv6hdr);
break;
}
- case __constant_htons(ETH_P_8021AD):
- case __constant_htons(ETH_P_8021Q): {
+ case htons(ETH_P_8021AD):
+ case htons(ETH_P_8021Q): {
const struct vlan_hdr *vlan;
struct vlan_hdr _vlan;
@@ -104,7 +104,7 @@ ipv6:
nhoff += sizeof(*vlan);
goto again;
}
- case __constant_htons(ETH_P_PPP_SES): {
+ case htons(ETH_P_PPP_SES): {
struct {
struct pppoe_hdr hdr;
__be16 proto;
@@ -115,9 +115,9 @@ ipv6:
proto = hdr->proto;
nhoff += PPPOE_SES_HLEN;
switch (proto) {
- case __constant_htons(PPP_IP):
+ case htons(PPP_IP):
goto ip;
- case __constant_htons(PPP_IPV6):
+ case htons(PPP_IPV6):
goto ipv6;
default:
return false;
@@ -323,17 +323,6 @@ u32 __skb_get_poff(const struct sk_buff *skb)
return poff;
}
-static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
-{
- if (unlikely(queue_index >= dev->real_num_tx_queues)) {
- net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n",
- dev->name, queue_index,
- dev->real_num_tx_queues);
- return 0;
- }
- return queue_index;
-}
-
static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
{
#ifdef CONFIG_XPS
@@ -372,7 +361,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
#endif
}
-u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
+static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
{
struct sock *sk = skb->sk;
int queue_index = sk_tx_queue_get(sk);
@@ -392,7 +381,6 @@ u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
return queue_index;
}
-EXPORT_SYMBOL(__netdev_pick_tx);
struct netdev_queue *netdev_pick_tx(struct net_device *dev,
struct sk_buff *skb,
@@ -403,13 +391,13 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
if (dev->real_num_tx_queues != 1) {
const struct net_device_ops *ops = dev->netdev_ops;
if (ops->ndo_select_queue)
- queue_index = ops->ndo_select_queue(dev, skb,
- accel_priv);
+ queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
+ __netdev_pick_tx);
else
queue_index = __netdev_pick_tx(dev, skb);
if (!accel_priv)
- queue_index = dev_cap_txqueue(dev, queue_index);
+ queue_index = netdev_cap_txqueue(dev, queue_index);
}
skb_set_queue_mapping(skb, queue_index);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index b9e9e0d38672..8f8a96ef9f3f 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -766,9 +766,6 @@ static void neigh_periodic_work(struct work_struct *work)
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
- if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
- goto out;
-
/*
* periodically recompute ReachableTime from random function
*/
@@ -781,6 +778,9 @@ static void neigh_periodic_work(struct work_struct *work)
neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
}
+ if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
+ goto out;
+
for (i = 0 ; i < (1 << nht->hash_shift); i++) {
np = &nht->hash_buckets[i];
@@ -836,10 +836,10 @@ out:
static __inline__ int neigh_max_probes(struct neighbour *n)
{
struct neigh_parms *p = n->parms;
- return (n->nud_state & NUD_PROBE) ?
- NEIGH_VAR(p, UCAST_PROBES) :
- NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
- NEIGH_VAR(p, MCAST_PROBES);
+ int max_probes = NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES);
+ if (!(n->nud_state & NUD_PROBE))
+ max_probes += NEIGH_VAR(p, MCAST_PROBES);
+ return max_probes;
}
static void neigh_invalidate(struct neighbour *neigh)
@@ -945,6 +945,7 @@ static void neigh_timer_handler(unsigned long arg)
neigh->nud_state = NUD_FAILED;
notify = 1;
neigh_invalidate(neigh);
+ goto out;
}
if (neigh->nud_state & NUD_IN_TIMER) {
@@ -3046,7 +3047,7 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
if (!t)
goto err;
- for (i = 0; i < ARRAY_SIZE(t->neigh_vars); i++) {
+ for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
t->neigh_vars[i].data += (long) p;
t->neigh_vars[i].extra1 = dev;
t->neigh_vars[i].extra2 = p;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 93886246a0b4..daed9a64c6f6 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -104,6 +104,7 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
}
NETDEVICE_SHOW_RO(dev_id, fmt_hex);
+NETDEVICE_SHOW_RO(dev_port, fmt_dec);
NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec);
NETDEVICE_SHOW_RO(addr_len, fmt_dec);
NETDEVICE_SHOW_RO(iflink, fmt_dec);
@@ -373,6 +374,7 @@ static struct attribute *net_class_attrs[] = {
&dev_attr_netdev_group.attr,
&dev_attr_type.attr,
&dev_attr_dev_id.attr,
+ &dev_attr_dev_port.attr,
&dev_attr_iflink.attr,
&dev_attr_ifindex.attr,
&dev_attr_addr_assign_type.attr,
@@ -996,15 +998,12 @@ static struct attribute_group dql_group = {
#endif /* CONFIG_BQL */
#ifdef CONFIG_XPS
-static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
+static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
{
struct net_device *dev = queue->dev;
- int i;
-
- for (i = 0; i < dev->num_tx_queues; i++)
- if (queue == &dev->_tx[i])
- break;
+ unsigned int i;
+ i = queue - dev->_tx;
BUG_ON(i >= dev->num_tx_queues);
return i;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index c03f3dec4763..7291dde93469 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -46,13 +46,9 @@
static struct sk_buff_head skb_pool;
-static atomic_t trapped;
-
DEFINE_STATIC_SRCU(netpoll_srcu);
#define USEC_PER_POLL 50
-#define NETPOLL_RX_ENABLED 1
-#define NETPOLL_RX_DROP 2
#define MAX_SKB_SIZE \
(sizeof(struct ethhdr) + \
@@ -61,7 +57,6 @@ DEFINE_STATIC_SRCU(netpoll_srcu);
MAX_UDP_CHUNK)
static void zap_completion_queue(void);
-static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
static void netpoll_async_cleanup(struct work_struct *work);
static unsigned int carrier_timeout = 4;
@@ -109,25 +104,6 @@ static void queue_process(struct work_struct *work)
}
}
-static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
- unsigned short ulen, __be32 saddr, __be32 daddr)
-{
- __wsum psum;
-
- if (uh->check == 0 || skb_csum_unnecessary(skb))
- return 0;
-
- psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
-
- if (skb->ip_summed == CHECKSUM_COMPLETE &&
- !csum_fold(csum_add(psum, skb->csum)))
- return 0;
-
- skb->csum = psum;
-
- return __skb_checksum_complete(skb);
-}
-
/*
* Check whether delayed processing was scheduled for our NIC. If so,
* we attempt to grab the poll lock and use ->poll() to pump the card.
@@ -138,14 +114,8 @@ static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
* trylock here and interrupts are already disabled in the softirq
* case. Further, we test the poll_owner to avoid recursion on UP
* systems where the lock doesn't exist.
- *
- * In cases where there is bi-directional communications, reading only
- * one message at a time can lead to packets being dropped by the
- * network adapter, forcing superfluous retries and possibly timeouts.
- * Thus, we set our budget to greater than 1.
*/
-static int poll_one_napi(struct netpoll_info *npinfo,
- struct napi_struct *napi, int budget)
+static int poll_one_napi(struct napi_struct *napi, int budget)
{
int work;
@@ -156,52 +126,35 @@ static int poll_one_napi(struct netpoll_info *npinfo,
if (!test_bit(NAPI_STATE_SCHED, &napi->state))
return budget;
- npinfo->rx_flags |= NETPOLL_RX_DROP;
- atomic_inc(&trapped);
set_bit(NAPI_STATE_NPSVC, &napi->state);
work = napi->poll(napi, budget);
+ WARN_ONCE(work > budget, "%pF exceeded budget in poll\n", napi->poll);
trace_napi_poll(napi);
clear_bit(NAPI_STATE_NPSVC, &napi->state);
- atomic_dec(&trapped);
- npinfo->rx_flags &= ~NETPOLL_RX_DROP;
return budget - work;
}
-static void poll_napi(struct net_device *dev)
+static void poll_napi(struct net_device *dev, int budget)
{
struct napi_struct *napi;
- int budget = 16;
list_for_each_entry(napi, &dev->napi_list, dev_list) {
if (napi->poll_owner != smp_processor_id() &&
spin_trylock(&napi->poll_lock)) {
- budget = poll_one_napi(rcu_dereference_bh(dev->npinfo),
- napi, budget);
+ budget = poll_one_napi(napi, budget);
spin_unlock(&napi->poll_lock);
-
- if (!budget)
- break;
}
}
}
-static void service_neigh_queue(struct netpoll_info *npi)
-{
- if (npi) {
- struct sk_buff *skb;
-
- while ((skb = skb_dequeue(&npi->neigh_tx)))
- netpoll_neigh_reply(skb, npi);
- }
-}
-
static void netpoll_poll_dev(struct net_device *dev)
{
const struct net_device_ops *ops;
struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
+ int budget = 0;
/* Don't do any rx activity if the dev_lock mutex is held
* the dev_open/close paths use this to block netpoll activity
@@ -224,27 +177,10 @@ static void netpoll_poll_dev(struct net_device *dev)
/* Process pending work on NIC */
ops->ndo_poll_controller(dev);
- poll_napi(dev);
+ poll_napi(dev, budget);
up(&ni->dev_lock);
- if (dev->flags & IFF_SLAVE) {
- if (ni) {
- struct net_device *bond_dev;
- struct sk_buff *skb;
- struct netpoll_info *bond_ni;
-
- bond_dev = netdev_master_upper_dev_get_rcu(dev);
- bond_ni = rcu_dereference_bh(bond_dev->npinfo);
- while ((skb = skb_dequeue(&ni->neigh_tx))) {
- skb->dev = bond_dev;
- skb_queue_tail(&bond_ni->neigh_tx, skb);
- }
- }
- }
-
- service_neigh_queue(ni);
-
zap_completion_queue();
}
@@ -529,384 +465,6 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
}
EXPORT_SYMBOL(netpoll_send_udp);
-static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
-{
- int size, type = ARPOP_REPLY;
- __be32 sip, tip;
- unsigned char *sha;
- struct sk_buff *send_skb;
- struct netpoll *np, *tmp;
- unsigned long flags;
- int hlen, tlen;
- int hits = 0, proto;
-
- if (list_empty(&npinfo->rx_np))
- return;
-
- /* Before checking the packet, we do some early
- inspection whether this is interesting at all */
- spin_lock_irqsave(&npinfo->rx_lock, flags);
- list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
- if (np->dev == skb->dev)
- hits++;
- }
- spin_unlock_irqrestore(&npinfo->rx_lock, flags);
-
- /* No netpoll struct is using this dev */
- if (!hits)
- return;
-
- proto = ntohs(eth_hdr(skb)->h_proto);
- if (proto == ETH_P_ARP) {
- struct arphdr *arp;
- unsigned char *arp_ptr;
- /* No arp on this interface */
- if (skb->dev->flags & IFF_NOARP)
- return;
-
- if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
- return;
-
- skb_reset_network_header(skb);
- skb_reset_transport_header(skb);
- arp = arp_hdr(skb);
-
- if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
- arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
- arp->ar_pro != htons(ETH_P_IP) ||
- arp->ar_op != htons(ARPOP_REQUEST))
- return;
-
- arp_ptr = (unsigned char *)(arp+1);
- /* save the location of the src hw addr */
- sha = arp_ptr;
- arp_ptr += skb->dev->addr_len;
- memcpy(&sip, arp_ptr, 4);
- arp_ptr += 4;
- /* If we actually cared about dst hw addr,
- it would get copied here */
- arp_ptr += skb->dev->addr_len;
- memcpy(&tip, arp_ptr, 4);
-
- /* Should we ignore arp? */
- if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
- return;
-
- size = arp_hdr_len(skb->dev);
-
- spin_lock_irqsave(&npinfo->rx_lock, flags);
- list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
- if (tip != np->local_ip.ip)
- continue;
-
- hlen = LL_RESERVED_SPACE(np->dev);
- tlen = np->dev->needed_tailroom;
- send_skb = find_skb(np, size + hlen + tlen, hlen);
- if (!send_skb)
- continue;
-
- skb_reset_network_header(send_skb);
- arp = (struct arphdr *) skb_put(send_skb, size);
- send_skb->dev = skb->dev;
- send_skb->protocol = htons(ETH_P_ARP);
-
- /* Fill the device header for the ARP frame */
- if (dev_hard_header(send_skb, skb->dev, ETH_P_ARP,
- sha, np->dev->dev_addr,
- send_skb->len) < 0) {
- kfree_skb(send_skb);
- continue;
- }
-
- /*
- * Fill out the arp protocol part.
- *
- * we only support ethernet device type,
- * which (according to RFC 1390) should
- * always equal 1 (Ethernet).
- */
-
- arp->ar_hrd = htons(np->dev->type);
- arp->ar_pro = htons(ETH_P_IP);
- arp->ar_hln = np->dev->addr_len;
- arp->ar_pln = 4;
- arp->ar_op = htons(type);
-
- arp_ptr = (unsigned char *)(arp + 1);
- memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
- arp_ptr += np->dev->addr_len;
- memcpy(arp_ptr, &tip, 4);
- arp_ptr += 4;
- memcpy(arp_ptr, sha, np->dev->addr_len);
- arp_ptr += np->dev->addr_len;
- memcpy(arp_ptr, &sip, 4);
-
- netpoll_send_skb(np, send_skb);
-
- /* If there are several rx_skb_hooks for the same
- * address we're fine by sending a single reply
- */
- break;
- }
- spin_unlock_irqrestore(&npinfo->rx_lock, flags);
- } else if( proto == ETH_P_IPV6) {
-#if IS_ENABLED(CONFIG_IPV6)
- struct nd_msg *msg;
- u8 *lladdr = NULL;
- struct ipv6hdr *hdr;
- struct icmp6hdr *icmp6h;
- const struct in6_addr *saddr;
- const struct in6_addr *daddr;
- struct inet6_dev *in6_dev = NULL;
- struct in6_addr *target;
-
- in6_dev = in6_dev_get(skb->dev);
- if (!in6_dev || !in6_dev->cnf.accept_ra)
- return;
-
- if (!pskb_may_pull(skb, skb->len))
- return;
-
- msg = (struct nd_msg *)skb_transport_header(skb);
-
- __skb_push(skb, skb->data - skb_transport_header(skb));
-
- if (ipv6_hdr(skb)->hop_limit != 255)
- return;
- if (msg->icmph.icmp6_code != 0)
- return;
- if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
- return;
-
- saddr = &ipv6_hdr(skb)->saddr;
- daddr = &ipv6_hdr(skb)->daddr;
-
- size = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
-
- spin_lock_irqsave(&npinfo->rx_lock, flags);
- list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
- if (!ipv6_addr_equal(daddr, &np->local_ip.in6))
- continue;
-
- hlen = LL_RESERVED_SPACE(np->dev);
- tlen = np->dev->needed_tailroom;
- send_skb = find_skb(np, size + hlen + tlen, hlen);
- if (!send_skb)
- continue;
-
- send_skb->protocol = htons(ETH_P_IPV6);
- send_skb->dev = skb->dev;
-
- skb_reset_network_header(send_skb);
- hdr = (struct ipv6hdr *) skb_put(send_skb, sizeof(struct ipv6hdr));
- *(__be32*)hdr = htonl(0x60000000);
- hdr->payload_len = htons(size);
- hdr->nexthdr = IPPROTO_ICMPV6;
- hdr->hop_limit = 255;
- hdr->saddr = *saddr;
- hdr->daddr = *daddr;
-
- icmp6h = (struct icmp6hdr *) skb_put(send_skb, sizeof(struct icmp6hdr));
- icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
- icmp6h->icmp6_router = 0;
- icmp6h->icmp6_solicited = 1;
-
- target = (struct in6_addr *) skb_put(send_skb, sizeof(struct in6_addr));
- *target = msg->target;
- icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size,
- IPPROTO_ICMPV6,
- csum_partial(icmp6h,
- size, 0));
-
- if (dev_hard_header(send_skb, skb->dev, ETH_P_IPV6,
- lladdr, np->dev->dev_addr,
- send_skb->len) < 0) {
- kfree_skb(send_skb);
- continue;
- }
-
- netpoll_send_skb(np, send_skb);
-
- /* If there are several rx_skb_hooks for the same
- * address, we're fine by sending a single reply
- */
- break;
- }
- spin_unlock_irqrestore(&npinfo->rx_lock, flags);
-#endif
- }
-}
-
-static bool pkt_is_ns(struct sk_buff *skb)
-{
- struct nd_msg *msg;
- struct ipv6hdr *hdr;
-
- if (skb->protocol != htons(ETH_P_ARP))
- return false;
- if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg)))
- return false;
-
- msg = (struct nd_msg *)skb_transport_header(skb);
- __skb_push(skb, skb->data - skb_transport_header(skb));
- hdr = ipv6_hdr(skb);
-
- if (hdr->nexthdr != IPPROTO_ICMPV6)
- return false;
- if (hdr->hop_limit != 255)
- return false;
- if (msg->icmph.icmp6_code != 0)
- return false;
- if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
- return false;
-
- return true;
-}
-
-int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
-{
- int proto, len, ulen, data_len;
- int hits = 0, offset;
- const struct iphdr *iph;
- struct udphdr *uh;
- struct netpoll *np, *tmp;
- uint16_t source;
-
- if (list_empty(&npinfo->rx_np))
- goto out;
-
- if (skb->dev->type != ARPHRD_ETHER)
- goto out;
-
- /* check if netpoll clients need ARP */
- if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) {
- skb_queue_tail(&npinfo->neigh_tx, skb);
- return 1;
- } else if (pkt_is_ns(skb) && atomic_read(&trapped)) {
- skb_queue_tail(&npinfo->neigh_tx, skb);
- return 1;
- }
-
- if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
- skb = vlan_untag(skb);
- if (unlikely(!skb))
- goto out;
- }
-
- proto = ntohs(eth_hdr(skb)->h_proto);
- if (proto != ETH_P_IP && proto != ETH_P_IPV6)
- goto out;
- if (skb->pkt_type == PACKET_OTHERHOST)
- goto out;
- if (skb_shared(skb))
- goto out;
-
- if (proto == ETH_P_IP) {
- if (!pskb_may_pull(skb, sizeof(struct iphdr)))
- goto out;
- iph = (struct iphdr *)skb->data;
- if (iph->ihl < 5 || iph->version != 4)
- goto out;
- if (!pskb_may_pull(skb, iph->ihl*4))
- goto out;
- iph = (struct iphdr *)skb->data;
- if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
- goto out;
-
- len = ntohs(iph->tot_len);
- if (skb->len < len || len < iph->ihl*4)
- goto out;
-
- /*
- * Our transport medium may have padded the buffer out.
- * Now We trim to the true length of the frame.
- */
- if (pskb_trim_rcsum(skb, len))
- goto out;
-
- iph = (struct iphdr *)skb->data;
- if (iph->protocol != IPPROTO_UDP)
- goto out;
-
- len -= iph->ihl*4;
- uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
- offset = (unsigned char *)(uh + 1) - skb->data;
- ulen = ntohs(uh->len);
- data_len = skb->len - offset;
- source = ntohs(uh->source);
-
- if (ulen != len)
- goto out;
- if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
- goto out;
- list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
- if (np->local_ip.ip && np->local_ip.ip != iph->daddr)
- continue;
- if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr)
- continue;
- if (np->local_port && np->local_port != ntohs(uh->dest))
- continue;
-
- np->rx_skb_hook(np, source, skb, offset, data_len);
- hits++;
- }
- } else {
-#if IS_ENABLED(CONFIG_IPV6)
- const struct ipv6hdr *ip6h;
-
- if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
- goto out;
- ip6h = (struct ipv6hdr *)skb->data;
- if (ip6h->version != 6)
- goto out;
- len = ntohs(ip6h->payload_len);
- if (!len)
- goto out;
- if (len + sizeof(struct ipv6hdr) > skb->len)
- goto out;
- if (pskb_trim_rcsum(skb, len + sizeof(struct ipv6hdr)))
- goto out;
- ip6h = ipv6_hdr(skb);
- if (!pskb_may_pull(skb, sizeof(struct udphdr)))
- goto out;
- uh = udp_hdr(skb);
- offset = (unsigned char *)(uh + 1) - skb->data;
- ulen = ntohs(uh->len);
- data_len = skb->len - offset;
- source = ntohs(uh->source);
- if (ulen != skb->len)
- goto out;
- if (udp6_csum_init(skb, uh, IPPROTO_UDP))
- goto out;
- list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
- if (!ipv6_addr_equal(&np->local_ip.in6, &ip6h->daddr))
- continue;
- if (!ipv6_addr_equal(&np->remote_ip.in6, &ip6h->saddr))
- continue;
- if (np->local_port && np->local_port != ntohs(uh->dest))
- continue;
-
- np->rx_skb_hook(np, source, skb, offset, data_len);
- hits++;
- }
-#endif
- }
-
- if (!hits)
- goto out;
-
- kfree_skb(skb);
- return 1;
-
-out:
- if (atomic_read(&trapped)) {
- kfree_skb(skb);
- return 1;
- }
-
- return 0;
-}
-
void netpoll_print_options(struct netpoll *np)
{
np_info(np, "local port %d\n", np->local_port);
@@ -948,6 +506,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
{
char *cur=opt, *delim;
int ipv6;
+ bool ipversion_set = false;
if (*cur != '@') {
if ((delim = strchr(cur, '@')) == NULL)
@@ -960,6 +519,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
cur++;
if (*cur != '/') {
+ ipversion_set = true;
if ((delim = strchr(cur, '/')) == NULL)
goto parse_failed;
*delim = 0;
@@ -1002,7 +562,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
if (ipv6 < 0)
goto parse_failed;
- else if (np->ipv6 != (bool)ipv6)
+ else if (ipversion_set && np->ipv6 != (bool)ipv6)
goto parse_failed;
else
np->ipv6 = (bool)ipv6;
@@ -1028,7 +588,6 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
{
struct netpoll_info *npinfo;
const struct net_device_ops *ops;
- unsigned long flags;
int err;
np->dev = ndev;
@@ -1050,12 +609,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
goto out;
}
- npinfo->rx_flags = 0;
- INIT_LIST_HEAD(&npinfo->rx_np);
-
- spin_lock_init(&npinfo->rx_lock);
sema_init(&npinfo->dev_lock, 1);
- skb_queue_head_init(&npinfo->neigh_tx);
skb_queue_head_init(&npinfo->txq);
INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
@@ -1074,13 +628,6 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
npinfo->netpoll = np;
- if (np->rx_skb_hook) {
- spin_lock_irqsave(&npinfo->rx_lock, flags);
- npinfo->rx_flags |= NETPOLL_RX_ENABLED;
- list_add_tail(&np->rx, &npinfo->rx_np);
- spin_unlock_irqrestore(&npinfo->rx_lock, flags);
- }
-
/* last thing to do is link it to the net device structure */
rcu_assign_pointer(ndev->npinfo, npinfo);
@@ -1229,7 +776,6 @@ static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
struct netpoll_info *npinfo =
container_of(rcu_head, struct netpoll_info, rcu);
- skb_queue_purge(&npinfo->neigh_tx);
skb_queue_purge(&npinfo->txq);
/* we can't call cancel_delayed_work_sync here, as we are in softirq */
@@ -1245,7 +791,6 @@ static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
void __netpoll_cleanup(struct netpoll *np)
{
struct netpoll_info *npinfo;
- unsigned long flags;
/* rtnl_dereference would be preferable here but
* rcu_cleanup_netpoll path can put us in here safely without
@@ -1255,14 +800,6 @@ void __netpoll_cleanup(struct netpoll *np)
if (!npinfo)
return;
- if (!list_empty(&npinfo->rx_np)) {
- spin_lock_irqsave(&npinfo->rx_lock, flags);
- list_del(&np->rx);
- if (list_empty(&npinfo->rx_np))
- npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
- spin_unlock_irqrestore(&npinfo->rx_lock, flags);
- }
-
synchronize_srcu(&netpoll_srcu);
if (atomic_dec_and_test(&npinfo->refcnt)) {
@@ -1306,18 +843,3 @@ out:
rtnl_unlock();
}
EXPORT_SYMBOL(netpoll_cleanup);
-
-int netpoll_trap(void)
-{
- return atomic_read(&trapped);
-}
-EXPORT_SYMBOL(netpoll_trap);
-
-void netpoll_set_trap(int trap)
-{
- if (trap)
- atomic_inc(&trapped);
- else
- atomic_dec(&trapped);
-}
-EXPORT_SYMBOL(netpoll_set_trap);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index fdac61cac1bd..d0dac57291af 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -476,23 +476,22 @@ static int pgctrl_show(struct seq_file *seq, void *v)
static ssize_t pgctrl_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
- int err = 0;
char data[128];
struct pktgen_net *pn = net_generic(current->nsproxy->net_ns, pg_net_id);
- if (!capable(CAP_NET_ADMIN)) {
- err = -EPERM;
- goto out;
- }
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (count == 0)
+ return -EINVAL;
if (count > sizeof(data))
count = sizeof(data);
- if (copy_from_user(data, buf, count)) {
- err = -EFAULT;
- goto out;
- }
- data[count - 1] = 0; /* Make string */
+ if (copy_from_user(data, buf, count))
+ return -EFAULT;
+
+ data[count - 1] = 0; /* Strip trailing '\n' and terminate string */
if (!strcmp(data, "stop"))
pktgen_stop_all_threads_ifs(pn);
@@ -506,10 +505,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
else
pr_warning("Unknown command: %s\n", data);
- err = count;
-
-out:
- return err;
+ return count;
}
static int pgctrl_open(struct inode *inode, struct file *file)
@@ -1251,7 +1247,13 @@ static ssize_t pktgen_if_write(struct file *file,
"Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
f,
"IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, "
- "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC, NODE_ALLOC\n");
+ "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, "
+ "MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, "
+ "QUEUE_MAP_RND, QUEUE_MAP_CPU, UDPCSUM, "
+#ifdef CONFIG_XFRM
+ "IPSEC, "
+#endif
+ "NODE_ALLOC\n");
return count;
}
sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags);
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 4425148d2b51..467f326126e0 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -221,5 +221,4 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
out:
spin_unlock_bh(&fastopenq->lock);
sock_put(lsk);
- return;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 393b1bc9a618..fc122fdb266a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -374,7 +374,7 @@ static size_t rtnl_link_get_slave_info_data_size(const struct net_device *dev)
if (!master_dev)
return 0;
ops = master_dev->rtnl_link_ops;
- if (!ops->get_slave_size)
+ if (!ops || !ops->get_slave_size)
return 0;
/* IFLA_INFO_SLAVE_DATA + nested data */
return nla_total_size(sizeof(struct nlattr)) +
@@ -1121,56 +1121,7 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct net *net = sock_net(skb->sk);
- int h, s_h;
- int idx = 0, s_idx;
- struct net_device *dev;
- struct hlist_head *head;
- struct nlattr *tb[IFLA_MAX+1];
- u32 ext_filter_mask = 0;
-
- s_h = cb->args[0];
- s_idx = cb->args[1];
-
- rcu_read_lock();
- cb->seq = net->dev_base_seq;
-
- if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
- ifla_policy) >= 0) {
-
- if (tb[IFLA_EXT_MASK])
- ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
- }
-
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- idx = 0;
- head = &net->dev_index_head[h];
- hlist_for_each_entry_rcu(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, 0,
- NLM_F_MULTI,
- ext_filter_mask) <= 0)
- goto out;
-
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-cont:
- idx++;
- }
- }
-out:
- rcu_read_unlock();
- cb->args[1] = idx;
- cb->args[0] = h;
-
- return skb->len;
-}
-
-const struct nla_policy ifla_policy[IFLA_MAX+1] = {
+static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 },
[IFLA_ADDRESS] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
[IFLA_BROADCAST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
@@ -1197,7 +1148,6 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 },
[IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_PORT_ID_LEN },
};
-EXPORT_SYMBOL(ifla_policy);
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
[IFLA_INFO_KIND] = { .type = NLA_STRING },
@@ -1235,6 +1185,61 @@ static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
[IFLA_PORT_RESPONSE] = { .type = NLA_U16, },
};
+static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ int h, s_h;
+ int idx = 0, s_idx;
+ struct net_device *dev;
+ struct hlist_head *head;
+ struct nlattr *tb[IFLA_MAX+1];
+ u32 ext_filter_mask = 0;
+
+ s_h = cb->args[0];
+ s_idx = cb->args[1];
+
+ rcu_read_lock();
+ cb->seq = net->dev_base_seq;
+
+ if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
+ ifla_policy) >= 0) {
+
+ if (tb[IFLA_EXT_MASK])
+ ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+ }
+
+ for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+ idx = 0;
+ head = &net->dev_index_head[h];
+ hlist_for_each_entry_rcu(dev, head, index_hlist) {
+ if (idx < s_idx)
+ goto cont;
+ if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, 0,
+ NLM_F_MULTI,
+ ext_filter_mask) <= 0)
+ goto out;
+
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+cont:
+ idx++;
+ }
+ }
+out:
+ rcu_read_unlock();
+ cb->args[1] = idx;
+ cb->args[0] = h;
+
+ return skb->len;
+}
+
+int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len)
+{
+ return nla_parse(tb, IFLA_MAX, head, len, ifla_policy);
+}
+EXPORT_SYMBOL(rtnl_nla_parse_ifla);
+
struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
{
struct net *net;
@@ -1963,16 +1968,21 @@ replay:
dev->ifindex = ifm->ifi_index;
- if (ops->newlink)
+ if (ops->newlink) {
err = ops->newlink(net, dev, tb, data);
- else
+ /* Drivers should call free_netdev() in ->destructor
+ * and unregister it on failure so that device could be
+ * finally freed in rtnl_unlock.
+ */
+ if (err < 0)
+ goto out;
+ } else {
err = register_netdevice(dev);
-
- if (err < 0) {
- free_netdev(dev);
- goto out;
+ if (err < 0) {
+ free_netdev(dev);
+ goto out;
+ }
}
-
err = rtnl_configure_link(dev, ifm);
if (err < 0)
unregister_netdevice(dev);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 5976ef0846bd..3f14c638c2b1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -707,9 +707,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->mark = old->mark;
new->skb_iif = old->skb_iif;
__nf_copy(new, old);
-#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
- new->nf_trace = old->nf_trace;
-#endif
#ifdef CONFIG_NET_SCHED
new->tc_index = old->tc_index;
#ifdef CONFIG_NET_CLS_ACT
@@ -2841,81 +2838,84 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum);
/**
* skb_segment - Perform protocol segmentation on skb.
- * @skb: buffer to segment
+ * @head_skb: buffer to segment
* @features: features for the output path (see dev->features)
*
* This function performs segmentation on the given skb. It returns
* a pointer to the first in a list of new skbs for the segments.
* In case of error it returns ERR_PTR(err).
*/
-struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
+struct sk_buff *skb_segment(struct sk_buff *head_skb,
+ netdev_features_t features)
{
struct sk_buff *segs = NULL;
struct sk_buff *tail = NULL;
- struct sk_buff *fskb = skb_shinfo(skb)->frag_list;
- skb_frag_t *skb_frag = skb_shinfo(skb)->frags;
- unsigned int mss = skb_shinfo(skb)->gso_size;
- unsigned int doffset = skb->data - skb_mac_header(skb);
+ struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
+ skb_frag_t *frag = skb_shinfo(head_skb)->frags;
+ unsigned int mss = skb_shinfo(head_skb)->gso_size;
+ unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
+ struct sk_buff *frag_skb = head_skb;
unsigned int offset = doffset;
- unsigned int tnl_hlen = skb_tnl_header_len(skb);
+ unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
unsigned int headroom;
unsigned int len;
__be16 proto;
bool csum;
int sg = !!(features & NETIF_F_SG);
- int nfrags = skb_shinfo(skb)->nr_frags;
+ int nfrags = skb_shinfo(head_skb)->nr_frags;
int err = -ENOMEM;
int i = 0;
int pos;
- proto = skb_network_protocol(skb);
+ proto = skb_network_protocol(head_skb);
if (unlikely(!proto))
return ERR_PTR(-EINVAL);
csum = !!can_checksum_protocol(features, proto);
- __skb_push(skb, doffset);
- headroom = skb_headroom(skb);
- pos = skb_headlen(skb);
+ __skb_push(head_skb, doffset);
+ headroom = skb_headroom(head_skb);
+ pos = skb_headlen(head_skb);
do {
struct sk_buff *nskb;
- skb_frag_t *frag;
+ skb_frag_t *nskb_frag;
int hsize;
int size;
- len = skb->len - offset;
+ len = head_skb->len - offset;
if (len > mss)
len = mss;
- hsize = skb_headlen(skb) - offset;
+ hsize = skb_headlen(head_skb) - offset;
if (hsize < 0)
hsize = 0;
if (hsize > len || !sg)
hsize = len;
- if (!hsize && i >= nfrags && skb_headlen(fskb) &&
- (skb_headlen(fskb) == len || sg)) {
- BUG_ON(skb_headlen(fskb) > len);
+ if (!hsize && i >= nfrags && skb_headlen(list_skb) &&
+ (skb_headlen(list_skb) == len || sg)) {
+ BUG_ON(skb_headlen(list_skb) > len);
i = 0;
- nfrags = skb_shinfo(fskb)->nr_frags;
- skb_frag = skb_shinfo(fskb)->frags;
- pos += skb_headlen(fskb);
+ nfrags = skb_shinfo(list_skb)->nr_frags;
+ frag = skb_shinfo(list_skb)->frags;
+ frag_skb = list_skb;
+ pos += skb_headlen(list_skb);
while (pos < offset + len) {
BUG_ON(i >= nfrags);
- size = skb_frag_size(skb_frag);
+ size = skb_frag_size(frag);
if (pos + size > offset + len)
break;
i++;
pos += size;
- skb_frag++;
+ frag++;
}
- nskb = skb_clone(fskb, GFP_ATOMIC);
- fskb = fskb->next;
+ nskb = skb_clone(list_skb, GFP_ATOMIC);
+ list_skb = list_skb->next;
if (unlikely(!nskb))
goto err;
@@ -2936,7 +2936,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
__skb_push(nskb, doffset);
} else {
nskb = __alloc_skb(hsize + doffset + headroom,
- GFP_ATOMIC, skb_alloc_rx_flag(skb),
+ GFP_ATOMIC, skb_alloc_rx_flag(head_skb),
NUMA_NO_NODE);
if (unlikely(!nskb))
@@ -2952,12 +2952,12 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
segs = nskb;
tail = nskb;
- __copy_skb_header(nskb, skb);
- nskb->mac_len = skb->mac_len;
+ __copy_skb_header(nskb, head_skb);
+ nskb->mac_len = head_skb->mac_len;
skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom);
- skb_copy_from_linear_data_offset(skb, -tnl_hlen,
+ skb_copy_from_linear_data_offset(head_skb, -tnl_hlen,
nskb->data - tnl_hlen,
doffset + tnl_hlen);
@@ -2966,30 +2966,32 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
if (!sg) {
nskb->ip_summed = CHECKSUM_NONE;
- nskb->csum = skb_copy_and_csum_bits(skb, offset,
+ nskb->csum = skb_copy_and_csum_bits(head_skb, offset,
skb_put(nskb, len),
len, 0);
continue;
}
- frag = skb_shinfo(nskb)->frags;
+ nskb_frag = skb_shinfo(nskb)->frags;
- skb_copy_from_linear_data_offset(skb, offset,
+ skb_copy_from_linear_data_offset(head_skb, offset,
skb_put(nskb, hsize), hsize);
- skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG;
+ skb_shinfo(nskb)->tx_flags = skb_shinfo(head_skb)->tx_flags &
+ SKBTX_SHARED_FRAG;
while (pos < offset + len) {
if (i >= nfrags) {
- BUG_ON(skb_headlen(fskb));
+ BUG_ON(skb_headlen(list_skb));
i = 0;
- nfrags = skb_shinfo(fskb)->nr_frags;
- skb_frag = skb_shinfo(fskb)->frags;
+ nfrags = skb_shinfo(list_skb)->nr_frags;
+ frag = skb_shinfo(list_skb)->frags;
+ frag_skb = list_skb;
BUG_ON(!nfrags);
- fskb = fskb->next;
+ list_skb = list_skb->next;
}
if (unlikely(skb_shinfo(nskb)->nr_frags >=
@@ -3000,27 +3002,30 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
goto err;
}
- *frag = *skb_frag;
- __skb_frag_ref(frag);
- size = skb_frag_size(frag);
+ if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC)))
+ goto err;
+
+ *nskb_frag = *frag;
+ __skb_frag_ref(nskb_frag);
+ size = skb_frag_size(nskb_frag);
if (pos < offset) {
- frag->page_offset += offset - pos;
- skb_frag_size_sub(frag, offset - pos);
+ nskb_frag->page_offset += offset - pos;
+ skb_frag_size_sub(nskb_frag, offset - pos);
}
skb_shinfo(nskb)->nr_frags++;
if (pos + size <= offset + len) {
i++;
- skb_frag++;
+ frag++;
pos += size;
} else {
- skb_frag_size_sub(frag, pos + size - (offset + len));
+ skb_frag_size_sub(nskb_frag, pos + size - (offset + len));
goto skip_fraglist;
}
- frag++;
+ nskb_frag++;
}
skip_fraglist:
@@ -3034,7 +3039,7 @@ perform_csum_check:
nskb->len - doffset, 0);
nskb->ip_summed = CHECKSUM_NONE;
}
- } while ((offset += len) < skb->len);
+ } while ((offset += len) < head_skb->len);
return segs;
@@ -3281,6 +3286,32 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
return elt;
}
+/* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given
+ * sglist without mark the sg which contain last skb data as the end.
+ * So the caller can mannipulate sg list as will when padding new data after
+ * the first call without calling sg_unmark_end to expend sg list.
+ *
+ * Scenario to use skb_to_sgvec_nomark:
+ * 1. sg_init_table
+ * 2. skb_to_sgvec_nomark(payload1)
+ * 3. skb_to_sgvec_nomark(payload2)
+ *
+ * This is equivalent to:
+ * 1. sg_init_table
+ * 2. skb_to_sgvec(payload1)
+ * 3. sg_unmark_end
+ * 4. skb_to_sgvec(payload2)
+ *
+ * When mapping mutilple payload conditionally, skb_to_sgvec_nomark
+ * is more preferable.
+ */
+int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
+ int offset, int len)
+{
+ return __skb_to_sgvec(skb, sg, offset, len);
+}
+EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark);
+
int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
{
int nsg = __skb_to_sgvec(skb, sg, offset, len);
@@ -3543,15 +3574,47 @@ static int skb_maybe_pull_tail(struct sk_buff *skb, unsigned int len,
return 0;
}
+#define MAX_TCP_HDR_LEN (15 * 4)
+
+static __sum16 *skb_checksum_setup_ip(struct sk_buff *skb,
+ typeof(IPPROTO_IP) proto,
+ unsigned int off)
+{
+ switch (proto) {
+ int err;
+
+ case IPPROTO_TCP:
+ err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr),
+ off + MAX_TCP_HDR_LEN);
+ if (!err && !skb_partial_csum_set(skb, off,
+ offsetof(struct tcphdr,
+ check)))
+ err = -EPROTO;
+ return err ? ERR_PTR(err) : &tcp_hdr(skb)->check;
+
+ case IPPROTO_UDP:
+ err = skb_maybe_pull_tail(skb, off + sizeof(struct udphdr),
+ off + sizeof(struct udphdr));
+ if (!err && !skb_partial_csum_set(skb, off,
+ offsetof(struct udphdr,
+ check)))
+ err = -EPROTO;
+ return err ? ERR_PTR(err) : &udp_hdr(skb)->check;
+ }
+
+ return ERR_PTR(-EPROTO);
+}
+
/* This value should be large enough to cover a tagged ethernet header plus
* maximally sized IP and TCP or UDP headers.
*/
#define MAX_IP_HDR_LEN 128
-static int skb_checksum_setup_ip(struct sk_buff *skb, bool recalculate)
+static int skb_checksum_setup_ipv4(struct sk_buff *skb, bool recalculate)
{
unsigned int off;
bool fragment;
+ __sum16 *csum;
int err;
fragment = false;
@@ -3572,51 +3635,15 @@ static int skb_checksum_setup_ip(struct sk_buff *skb, bool recalculate)
if (fragment)
goto out;
- switch (ip_hdr(skb)->protocol) {
- case IPPROTO_TCP:
- err = skb_maybe_pull_tail(skb,
- off + sizeof(struct tcphdr),
- MAX_IP_HDR_LEN);
- if (err < 0)
- goto out;
-
- if (!skb_partial_csum_set(skb, off,
- offsetof(struct tcphdr, check))) {
- err = -EPROTO;
- goto out;
- }
-
- if (recalculate)
- tcp_hdr(skb)->check =
- ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
- ip_hdr(skb)->daddr,
- skb->len - off,
- IPPROTO_TCP, 0);
- break;
- case IPPROTO_UDP:
- err = skb_maybe_pull_tail(skb,
- off + sizeof(struct udphdr),
- MAX_IP_HDR_LEN);
- if (err < 0)
- goto out;
-
- if (!skb_partial_csum_set(skb, off,
- offsetof(struct udphdr, check))) {
- err = -EPROTO;
- goto out;
- }
-
- if (recalculate)
- udp_hdr(skb)->check =
- ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
- ip_hdr(skb)->daddr,
- skb->len - off,
- IPPROTO_UDP, 0);
- break;
- default:
- goto out;
- }
+ csum = skb_checksum_setup_ip(skb, ip_hdr(skb)->protocol, off);
+ if (IS_ERR(csum))
+ return PTR_ERR(csum);
+ if (recalculate)
+ *csum = ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
+ ip_hdr(skb)->daddr,
+ skb->len - off,
+ ip_hdr(skb)->protocol, 0);
err = 0;
out:
@@ -3639,6 +3666,7 @@ static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate)
unsigned int len;
bool fragment;
bool done;
+ __sum16 *csum;
fragment = false;
done = false;
@@ -3716,51 +3744,14 @@ static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate)
if (!done || fragment)
goto out;
- switch (nexthdr) {
- case IPPROTO_TCP:
- err = skb_maybe_pull_tail(skb,
- off + sizeof(struct tcphdr),
- MAX_IPV6_HDR_LEN);
- if (err < 0)
- goto out;
-
- if (!skb_partial_csum_set(skb, off,
- offsetof(struct tcphdr, check))) {
- err = -EPROTO;
- goto out;
- }
-
- if (recalculate)
- tcp_hdr(skb)->check =
- ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr,
- skb->len - off,
- IPPROTO_TCP, 0);
- break;
- case IPPROTO_UDP:
- err = skb_maybe_pull_tail(skb,
- off + sizeof(struct udphdr),
- MAX_IPV6_HDR_LEN);
- if (err < 0)
- goto out;
-
- if (!skb_partial_csum_set(skb, off,
- offsetof(struct udphdr, check))) {
- err = -EPROTO;
- goto out;
- }
-
- if (recalculate)
- udp_hdr(skb)->check =
- ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr,
- skb->len - off,
- IPPROTO_UDP, 0);
- break;
- default:
- goto out;
- }
+ csum = skb_checksum_setup_ip(skb, nexthdr, off);
+ if (IS_ERR(csum))
+ return PTR_ERR(csum);
+ if (recalculate)
+ *csum = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr,
+ skb->len - off, nexthdr, 0);
err = 0;
out:
@@ -3778,7 +3769,7 @@ int skb_checksum_setup(struct sk_buff *skb, bool recalculate)
switch (skb->protocol) {
case htons(ETH_P_IP):
- err = skb_checksum_setup_ip(skb, recalculate);
+ err = skb_checksum_setup_ipv4(skb, recalculate);
break;
case htons(ETH_P_IPV6):
diff --git a/net/core/sock.c b/net/core/sock.c
index 0c127dcdf6a8..c0fc6bdad1e3 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1775,7 +1775,9 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
while (order) {
if (npages >= 1 << order) {
page = alloc_pages(sk->sk_allocation |
- __GFP_COMP | __GFP_NOWARN,
+ __GFP_COMP |
+ __GFP_NOWARN |
+ __GFP_NORETRY,
order);
if (page)
goto fill_page;
@@ -1845,7 +1847,7 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio)
gfp_t gfp = prio;
if (order)
- gfp |= __GFP_COMP | __GFP_NOWARN;
+ gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY;
pfrag->page = alloc_pages(gfp, order);
if (likely(pfrag->page)) {
pfrag->offset = 0;
@@ -2355,10 +2357,13 @@ void release_sock(struct sock *sk)
if (sk->sk_backlog.tail)
__release_sock(sk);
+ /* Warning : release_cb() might need to release sk ownership,
+ * ie call sock_release_ownership(sk) before us.
+ */
if (sk->sk_prot->release_cb)
sk->sk_prot->release_cb(sk);
- sk->sk_lock.owned = 0;
+ sock_release_ownership(sk);
if (waitqueue_active(&sk->sk_lock.wq))
wake_up(&sk->sk_lock.wq);
spin_unlock_bh(&sk->sk_lock.slock);
diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c
index c073b81a1f3e..62b5828acde0 100644
--- a/net/dccp/ccids/lib/tfrc.c
+++ b/net/dccp/ccids/lib/tfrc.c
@@ -8,7 +8,7 @@
#include "tfrc.h"
#ifdef CONFIG_IP_DCCP_TFRC_DEBUG
-static bool tfrc_debug;
+bool tfrc_debug;
module_param(tfrc_debug, bool, 0644);
MODULE_PARM_DESC(tfrc_debug, "Enable TFRC debug messages");
#endif
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index a3d8f7c76ae0..40ee7d62b652 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -21,6 +21,7 @@
#include "packet_history.h"
#ifdef CONFIG_IP_DCCP_TFRC_DEBUG
+extern bool tfrc_debug;
#define tfrc_pr_debug(format, a...) DCCP_PR_DEBUG(tfrc_debug, format, ##a)
#else
#define tfrc_pr_debug(format, a...)
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 2954dcbca832..4c04848953bd 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -2104,8 +2104,6 @@ static struct notifier_block dn_dev_notifier = {
.notifier_call = dn_device_event,
};
-extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *);
-
static struct packet_type dn_dix_packet_type __read_mostly = {
.type = cpu_to_be16(ETH_P_DNA_RT),
.func = dn_route_rcv,
@@ -2353,9 +2351,6 @@ static const struct proto_ops dn_proto_ops = {
.sendpage = sock_no_sendpage,
};
-void dn_register_sysctl(void);
-void dn_unregister_sysctl(void);
-
MODULE_DESCRIPTION("The Linux DECnet Network Protocol");
MODULE_AUTHOR("Linux DECnet Project Team");
MODULE_LICENSE("GPL");
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index cac505f166d5..e5302b7f7ca9 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -209,7 +209,7 @@ static int slave_xmit(struct sk_buff *skb, struct hsr_priv *hsr_priv,
/* Address substitution (IEC62439-3 pp 26, 50): replace mac
* address of outgoing frame with that of the outgoing slave's.
*/
- memcpy(hsr_ethhdr->ethhdr.h_source, skb->dev->dev_addr, ETH_ALEN);
+ ether_addr_copy(hsr_ethhdr->ethhdr.h_source, skb->dev->dev_addr);
return dev_queue_xmit(skb);
}
@@ -346,7 +346,7 @@ static void send_hsr_supervision_frame(struct net_device *hsr_dev, u8 type)
/* Payload: MacAddressA */
hsr_sp = (typeof(hsr_sp)) skb_put(skb, sizeof(*hsr_sp));
- memcpy(hsr_sp->MacAddressA, hsr_dev->dev_addr, ETH_ALEN);
+ ether_addr_copy(hsr_sp->MacAddressA, hsr_dev->dev_addr);
dev_queue_xmit(skb);
return;
@@ -493,7 +493,7 @@ static int check_slave_ok(struct net_device *dev)
/* Default multicast address for HSR Supervision frames */
-static const unsigned char def_multicast_addr[ETH_ALEN] = {
+static const unsigned char def_multicast_addr[ETH_ALEN] __aligned(2) = {
0x01, 0x15, 0x4e, 0x00, 0x01, 0x00
};
@@ -519,7 +519,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
hsr_priv->announce_timer.function = hsr_announce;
hsr_priv->announce_timer.data = (unsigned long) hsr_priv;
- memcpy(hsr_priv->sup_multicast_addr, def_multicast_addr, ETH_ALEN);
+ ether_addr_copy(hsr_priv->sup_multicast_addr, def_multicast_addr);
hsr_priv->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec;
/* FIXME: should I modify the value of these?
@@ -547,7 +547,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
hsr_dev->features |= NETIF_F_VLAN_CHALLENGED;
/* Set hsr_dev's MAC address to that of mac_slave1 */
- memcpy(hsr_dev->dev_addr, hsr_priv->slave[0]->dev_addr, ETH_ALEN);
+ ether_addr_copy(hsr_dev->dev_addr, hsr_priv->slave[0]->dev_addr);
/* Set required header length */
for (i = 0; i < HSR_MAX_SLAVE; i++) {
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 327060c6c874..83e58449366a 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -108,8 +108,8 @@ int hsr_create_self_node(struct list_head *self_node_db,
if (!node)
return -ENOMEM;
- memcpy(node->MacAddressA, addr_a, ETH_ALEN);
- memcpy(node->MacAddressB, addr_b, ETH_ALEN);
+ ether_addr_copy(node->MacAddressA, addr_a);
+ ether_addr_copy(node->MacAddressB, addr_b);
rcu_read_lock();
oldnode = list_first_or_null_rcu(self_node_db,
@@ -199,7 +199,7 @@ struct node_entry *hsr_merge_node(struct hsr_priv *hsr_priv,
/* Node is known, but frame was received from an unknown
* address. Node is PICS_SUBS capable; merge its AddrB.
*/
- memcpy(node->MacAddressB, hsr_ethsup->ethhdr.h_source, ETH_ALEN);
+ ether_addr_copy(node->MacAddressB, hsr_ethsup->ethhdr.h_source);
node->AddrB_if = dev_idx;
return node;
}
@@ -208,8 +208,8 @@ struct node_entry *hsr_merge_node(struct hsr_priv *hsr_priv,
if (!node)
return NULL;
- memcpy(node->MacAddressA, hsr_sp->MacAddressA, ETH_ALEN);
- memcpy(node->MacAddressB, hsr_ethsup->ethhdr.h_source, ETH_ALEN);
+ ether_addr_copy(node->MacAddressA, hsr_sp->MacAddressA);
+ ether_addr_copy(node->MacAddressB, hsr_ethsup->ethhdr.h_source);
if (!ether_addr_equal(hsr_sp->MacAddressA, hsr_ethsup->ethhdr.h_source))
node->AddrB_if = dev_idx;
else
@@ -250,7 +250,7 @@ void hsr_addr_subst_source(struct hsr_priv *hsr_priv, struct sk_buff *skb)
rcu_read_lock();
node = find_node_by_AddrB(&hsr_priv->node_db, ethhdr->h_source);
if (node)
- memcpy(ethhdr->h_source, node->MacAddressA, ETH_ALEN);
+ ether_addr_copy(ethhdr->h_source, node->MacAddressA);
rcu_read_unlock();
}
@@ -272,7 +272,7 @@ void hsr_addr_subst_dest(struct hsr_priv *hsr_priv, struct ethhdr *ethhdr,
rcu_read_lock();
node = find_node_by_AddrA(&hsr_priv->node_db, ethhdr->h_dest);
if (node && (node->AddrB_if == dev_idx))
- memcpy(ethhdr->h_dest, node->MacAddressB, ETH_ALEN);
+ ether_addr_copy(ethhdr->h_dest, node->MacAddressB);
rcu_read_unlock();
}
@@ -297,7 +297,7 @@ static bool seq_nr_after(u16 a, u16 b)
void hsr_register_frame_in(struct node_entry *node, enum hsr_dev_idx dev_idx)
{
- if ((dev_idx < 0) || (dev_idx >= HSR_MAX_DEV)) {
+ if ((dev_idx < 0) || (dev_idx >= HSR_MAX_SLAVE)) {
WARN_ONCE(1, "%s: Invalid dev_idx (%d)\n", __func__, dev_idx);
return;
}
@@ -428,13 +428,13 @@ void *hsr_get_next_node(struct hsr_priv *hsr_priv, void *_pos,
node = list_first_or_null_rcu(&hsr_priv->node_db,
struct node_entry, mac_list);
if (node)
- memcpy(addr, node->MacAddressA, ETH_ALEN);
+ ether_addr_copy(addr, node->MacAddressA);
return node;
}
node = _pos;
list_for_each_entry_continue_rcu(node, &hsr_priv->node_db, mac_list) {
- memcpy(addr, node->MacAddressA, ETH_ALEN);
+ ether_addr_copy(addr, node->MacAddressA);
return node;
}
@@ -462,7 +462,7 @@ int hsr_get_node_data(struct hsr_priv *hsr_priv,
return -ENOENT; /* No such entry */
}
- memcpy(addr_b, node->MacAddressB, ETH_ALEN);
+ ether_addr_copy(addr_b, node->MacAddressB);
tdiff = jiffies - node->time_in[HSR_DEV_SLAVE_A];
if (node->time_in_stale[HSR_DEV_SLAVE_A])
diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
index af68dd83a4e3..10010c543edf 100644
--- a/net/hsr/hsr_main.c
+++ b/net/hsr/hsr_main.c
@@ -138,8 +138,8 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event,
break;
if (dev == hsr_priv->slave[0])
- memcpy(hsr_priv->dev->dev_addr,
- hsr_priv->slave[0]->dev_addr, ETH_ALEN);
+ ether_addr_copy(hsr_priv->dev->dev_addr,
+ hsr_priv->slave[0]->dev_addr);
/* Make sure we recognize frames from ourselves in hsr_rcv() */
res = hsr_create_self_node(&hsr_priv->self_node_db,
diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h
deleted file mode 100644
index 2b835db3bda8..000000000000
--- a/net/ieee802154/6lowpan.h
+++ /dev/null
@@ -1,319 +0,0 @@
-/*
- * Copyright 2011, Siemens AG
- * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
- */
-
-/*
- * Based on patches from Jon Smirl <jonsmirl@gmail.com>
- * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-/* Jon's code is based on 6lowpan implementation for Contiki which is:
- * Copyright (c) 2008, Swedish Institute of Computer Science.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the Institute nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifndef __6LOWPAN_H__
-#define __6LOWPAN_H__
-
-#define UIP_802154_SHORTADDR_LEN 2 /* compressed ipv6 address length */
-#define UIP_IPH_LEN 40 /* ipv6 fixed header size */
-#define UIP_PROTO_UDP 17 /* ipv6 next header value for UDP */
-#define UIP_FRAGH_LEN 8 /* ipv6 fragment header size */
-
-/*
- * ipv6 address based on mac
- * second bit-flip (Universe/Local) is done according RFC2464
- */
-#define is_addr_mac_addr_based(a, m) \
- ((((a)->s6_addr[8]) == (((m)[0]) ^ 0x02)) && \
- (((a)->s6_addr[9]) == (m)[1]) && \
- (((a)->s6_addr[10]) == (m)[2]) && \
- (((a)->s6_addr[11]) == (m)[3]) && \
- (((a)->s6_addr[12]) == (m)[4]) && \
- (((a)->s6_addr[13]) == (m)[5]) && \
- (((a)->s6_addr[14]) == (m)[6]) && \
- (((a)->s6_addr[15]) == (m)[7]))
-
-/* ipv6 address is unspecified */
-#define is_addr_unspecified(a) \
- ((((a)->s6_addr32[0]) == 0) && \
- (((a)->s6_addr32[1]) == 0) && \
- (((a)->s6_addr32[2]) == 0) && \
- (((a)->s6_addr32[3]) == 0))
-
-/* compare ipv6 addresses prefixes */
-#define ipaddr_prefixcmp(addr1, addr2, length) \
- (memcmp(addr1, addr2, length >> 3) == 0)
-
-/* local link, i.e. FE80::/10 */
-#define is_addr_link_local(a) (((a)->s6_addr16[0]) == htons(0xFE80))
-
-/*
- * check whether we can compress the IID to 16 bits,
- * it's possible for unicast adresses with first 49 bits are zero only.
- */
-#define lowpan_is_iid_16_bit_compressable(a) \
- ((((a)->s6_addr16[4]) == 0) && \
- (((a)->s6_addr[10]) == 0) && \
- (((a)->s6_addr[11]) == 0xff) && \
- (((a)->s6_addr[12]) == 0xfe) && \
- (((a)->s6_addr[13]) == 0))
-
-/* multicast address */
-#define is_addr_mcast(a) (((a)->s6_addr[0]) == 0xFF)
-
-/* check whether the 112-bit gid of the multicast address is mappable to: */
-
-/* 9 bits, for FF02::1 (all nodes) and FF02::2 (all routers) addresses only. */
-#define lowpan_is_mcast_addr_compressable(a) \
- ((((a)->s6_addr16[1]) == 0) && \
- (((a)->s6_addr16[2]) == 0) && \
- (((a)->s6_addr16[3]) == 0) && \
- (((a)->s6_addr16[4]) == 0) && \
- (((a)->s6_addr16[5]) == 0) && \
- (((a)->s6_addr16[6]) == 0) && \
- (((a)->s6_addr[14]) == 0) && \
- ((((a)->s6_addr[15]) == 1) || (((a)->s6_addr[15]) == 2)))
-
-/* 48 bits, FFXX::00XX:XXXX:XXXX */
-#define lowpan_is_mcast_addr_compressable48(a) \
- ((((a)->s6_addr16[1]) == 0) && \
- (((a)->s6_addr16[2]) == 0) && \
- (((a)->s6_addr16[3]) == 0) && \
- (((a)->s6_addr16[4]) == 0) && \
- (((a)->s6_addr[10]) == 0))
-
-/* 32 bits, FFXX::00XX:XXXX */
-#define lowpan_is_mcast_addr_compressable32(a) \
- ((((a)->s6_addr16[1]) == 0) && \
- (((a)->s6_addr16[2]) == 0) && \
- (((a)->s6_addr16[3]) == 0) && \
- (((a)->s6_addr16[4]) == 0) && \
- (((a)->s6_addr16[5]) == 0) && \
- (((a)->s6_addr[12]) == 0))
-
-/* 8 bits, FF02::00XX */
-#define lowpan_is_mcast_addr_compressable8(a) \
- ((((a)->s6_addr[1]) == 2) && \
- (((a)->s6_addr16[1]) == 0) && \
- (((a)->s6_addr16[2]) == 0) && \
- (((a)->s6_addr16[3]) == 0) && \
- (((a)->s6_addr16[4]) == 0) && \
- (((a)->s6_addr16[5]) == 0) && \
- (((a)->s6_addr16[6]) == 0) && \
- (((a)->s6_addr[14]) == 0))
-
-#define lowpan_is_addr_broadcast(a) \
- ((((a)[0]) == 0xFF) && \
- (((a)[1]) == 0xFF) && \
- (((a)[2]) == 0xFF) && \
- (((a)[3]) == 0xFF) && \
- (((a)[4]) == 0xFF) && \
- (((a)[5]) == 0xFF) && \
- (((a)[6]) == 0xFF) && \
- (((a)[7]) == 0xFF))
-
-#define LOWPAN_DISPATCH_IPV6 0x41 /* 01000001 = 65 */
-#define LOWPAN_DISPATCH_HC1 0x42 /* 01000010 = 66 */
-#define LOWPAN_DISPATCH_IPHC 0x60 /* 011xxxxx = ... */
-#define LOWPAN_DISPATCH_FRAG1 0xc0 /* 11000xxx */
-#define LOWPAN_DISPATCH_FRAGN 0xe0 /* 11100xxx */
-
-#define LOWPAN_DISPATCH_MASK 0xf8 /* 11111000 */
-
-#define LOWPAN_FRAG_TIMEOUT (HZ * 60) /* time-out 60 sec */
-
-#define LOWPAN_FRAG1_HEAD_SIZE 0x4
-#define LOWPAN_FRAGN_HEAD_SIZE 0x5
-
-/*
- * According IEEE802.15.4 standard:
- * - MTU is 127 octets
- * - maximum MHR size is 37 octets
- * - MFR size is 2 octets
- *
- * so minimal payload size that we may guarantee is:
- * MTU - MHR - MFR = 88 octets
- */
-#define LOWPAN_FRAG_SIZE 88
-
-/*
- * Values of fields within the IPHC encoding first byte
- * (C stands for compressed and I for inline)
- */
-#define LOWPAN_IPHC_TF 0x18
-
-#define LOWPAN_IPHC_FL_C 0x10
-#define LOWPAN_IPHC_TC_C 0x08
-#define LOWPAN_IPHC_NH_C 0x04
-#define LOWPAN_IPHC_TTL_1 0x01
-#define LOWPAN_IPHC_TTL_64 0x02
-#define LOWPAN_IPHC_TTL_255 0x03
-#define LOWPAN_IPHC_TTL_I 0x00
-
-
-/* Values of fields within the IPHC encoding second byte */
-#define LOWPAN_IPHC_CID 0x80
-
-#define LOWPAN_IPHC_ADDR_00 0x00
-#define LOWPAN_IPHC_ADDR_01 0x01
-#define LOWPAN_IPHC_ADDR_02 0x02
-#define LOWPAN_IPHC_ADDR_03 0x03
-
-#define LOWPAN_IPHC_SAC 0x40
-#define LOWPAN_IPHC_SAM 0x30
-
-#define LOWPAN_IPHC_SAM_BIT 4
-
-#define LOWPAN_IPHC_M 0x08
-#define LOWPAN_IPHC_DAC 0x04
-#define LOWPAN_IPHC_DAM_00 0x00
-#define LOWPAN_IPHC_DAM_01 0x01
-#define LOWPAN_IPHC_DAM_10 0x02
-#define LOWPAN_IPHC_DAM_11 0x03
-
-#define LOWPAN_IPHC_DAM_BIT 0
-/*
- * LOWPAN_UDP encoding (works together with IPHC)
- */
-#define LOWPAN_NHC_UDP_MASK 0xF8
-#define LOWPAN_NHC_UDP_ID 0xF0
-#define LOWPAN_NHC_UDP_CHECKSUMC 0x04
-#define LOWPAN_NHC_UDP_CHECKSUMI 0x00
-
-#define LOWPAN_NHC_UDP_4BIT_PORT 0xF0B0
-#define LOWPAN_NHC_UDP_4BIT_MASK 0xFFF0
-#define LOWPAN_NHC_UDP_8BIT_PORT 0xF000
-#define LOWPAN_NHC_UDP_8BIT_MASK 0xFF00
-
-/* values for port compression, _with checksum_ ie bit 5 set to 0 */
-#define LOWPAN_NHC_UDP_CS_P_00 0xF0 /* all inline */
-#define LOWPAN_NHC_UDP_CS_P_01 0xF1 /* source 16bit inline,
- dest = 0xF0 + 8 bit inline */
-#define LOWPAN_NHC_UDP_CS_P_10 0xF2 /* source = 0xF0 + 8bit inline,
- dest = 16 bit inline */
-#define LOWPAN_NHC_UDP_CS_P_11 0xF3 /* source & dest = 0xF0B + 4bit inline */
-#define LOWPAN_NHC_UDP_CS_C 0x04 /* checksum elided */
-
-#ifdef DEBUG
-/* print data in line */
-static inline void raw_dump_inline(const char *caller, char *msg,
- unsigned char *buf, int len)
-{
- if (msg)
- pr_debug("%s():%s: ", caller, msg);
-
- print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, buf, len, false);
-}
-
-/* print data in a table format:
- *
- * addr: xx xx xx xx xx xx
- * addr: xx xx xx xx xx xx
- * ...
- */
-static inline void raw_dump_table(const char *caller, char *msg,
- unsigned char *buf, int len)
-{
- if (msg)
- pr_debug("%s():%s:\n", caller, msg);
-
- print_hex_dump_debug("\t", DUMP_PREFIX_OFFSET, 16, 1, buf, len, false);
-}
-#else
-static inline void raw_dump_table(const char *caller, char *msg,
- unsigned char *buf, int len) { }
-static inline void raw_dump_inline(const char *caller, char *msg,
- unsigned char *buf, int len) { }
-#endif
-
-static inline int lowpan_fetch_skb_u8(struct sk_buff *skb, u8 *val)
-{
- if (unlikely(!pskb_may_pull(skb, 1)))
- return -EINVAL;
-
- *val = skb->data[0];
- skb_pull(skb, 1);
-
- return 0;
-}
-
-static inline int lowpan_fetch_skb_u16(struct sk_buff *skb, u16 *val)
-{
- if (unlikely(!pskb_may_pull(skb, 2)))
- return -EINVAL;
-
- *val = (skb->data[0] << 8) | skb->data[1];
- skb_pull(skb, 2);
-
- return 0;
-}
-
-static inline bool lowpan_fetch_skb(struct sk_buff *skb,
- void *data, const unsigned int len)
-{
- if (unlikely(!pskb_may_pull(skb, len)))
- return true;
-
- skb_copy_from_linear_data(skb, data, len);
- skb_pull(skb, len);
-
- return false;
-}
-
-static inline void lowpan_push_hc_data(u8 **hc_ptr, const void *data,
- const size_t len)
-{
- memcpy(*hc_ptr, data, len);
- *hc_ptr += len;
-}
-
-typedef int (*skb_delivery_cb)(struct sk_buff *skb, struct net_device *dev);
-
-int lowpan_process_data(struct sk_buff *skb, struct net_device *dev,
- const u8 *saddr, const u8 saddr_type, const u8 saddr_len,
- const u8 *daddr, const u8 daddr_type, const u8 daddr_len,
- u8 iphc0, u8 iphc1, skb_delivery_cb skb_deliver);
-int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
- unsigned short type, const void *_daddr,
- const void *_saddr, unsigned int len);
-
-#endif /* __6LOWPAN_H__ */
diff --git a/net/ieee802154/6lowpan_iphc.c b/net/ieee802154/6lowpan_iphc.c
index 860aa2d445ba..211b5686d719 100644
--- a/net/ieee802154/6lowpan_iphc.c
+++ b/net/ieee802154/6lowpan_iphc.c
@@ -54,11 +54,10 @@
#include <linux/if_arp.h>
#include <linux/module.h>
#include <linux/netdevice.h>
+#include <net/6lowpan.h>
#include <net/ipv6.h>
#include <net/af_ieee802154.h>
-#include "6lowpan.h"
-
/*
* Uncompress address function for source and
* destination address(non-multicast).
diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan_rtnl.c
index 48b25c0af4d0..0f5a69ed746d 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan_rtnl.c
@@ -1,10 +1,8 @@
-/*
- * Copyright 2011, Siemens AG
+/* Copyright 2011, Siemens AG
* written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
*/
-/*
- * Based on patches from Jon Smirl <jonsmirl@gmail.com>
+/* Based on patches from Jon Smirl <jonsmirl@gmail.com>
* Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
@@ -15,10 +13,6 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
/* Jon's code is based on 6lowpan implementation for Contiki which is:
@@ -58,9 +52,10 @@
#include <net/af_ieee802154.h>
#include <net/ieee802154.h>
#include <net/ieee802154_netdev.h>
+#include <net/6lowpan.h>
#include <net/ipv6.h>
-#include "6lowpan.h"
+#include "reassembly.h"
static LIST_HEAD(lowpan_devices);
@@ -68,7 +63,7 @@ static LIST_HEAD(lowpan_devices);
struct lowpan_dev_info {
struct net_device *real_dev; /* real WPAN device ptr */
struct mutex dev_list_mtx; /* mutex for list ops */
- unsigned short fragment_tag;
+ __be16 fragment_tag;
};
struct lowpan_dev_record {
@@ -76,18 +71,6 @@ struct lowpan_dev_record {
struct list_head list;
};
-struct lowpan_fragment {
- struct sk_buff *skb; /* skb to be assembled */
- u16 length; /* length to be assemled */
- u32 bytes_rcv; /* bytes received */
- u16 tag; /* current fragment tag */
- struct timer_list timer; /* assembling timer */
- struct list_head list; /* fragments list */
-};
-
-static LIST_HEAD(lowpan_fragments);
-static DEFINE_SPINLOCK(flist_lock);
-
static inline struct
lowpan_dev_info *lowpan_dev_info(const struct net_device *dev)
{
@@ -106,7 +89,6 @@ static int lowpan_header_create(struct sk_buff *skb,
unsigned short type, const void *_daddr,
const void *_saddr, unsigned int len)
{
- struct ipv6hdr *hdr;
const u8 *saddr = _saddr;
const u8 *daddr = _daddr;
struct ieee802154_addr sa, da;
@@ -117,8 +99,6 @@ static int lowpan_header_create(struct sk_buff *skb,
if (type != ETH_P_IPV6)
return 0;
- hdr = ipv6_hdr(skb);
-
if (!saddr)
saddr = dev->dev_addr;
@@ -127,13 +107,11 @@ static int lowpan_header_create(struct sk_buff *skb,
lowpan_header_compress(skb, dev, type, daddr, saddr, len);
- /*
- * NOTE1: I'm still unsure about the fact that compression and WPAN
+ /* NOTE1: I'm still unsure about the fact that compression and WPAN
* header are created here and not later in the xmit. So wait for
* an opinion of net maintainers.
*/
- /*
- * NOTE2: to be absolutely correct, we must derive PANid information
+ /* NOTE2: to be absolutely correct, we must derive PANid information
* from MAC subif of the 'dev' and 'real_dev' network devices, but
* this isn't implemented in mainline yet, so currently we assign 0xff
*/
@@ -141,30 +119,29 @@ static int lowpan_header_create(struct sk_buff *skb,
mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev);
/* prepare wpan address data */
- sa.addr_type = IEEE802154_ADDR_LONG;
+ sa.mode = IEEE802154_ADDR_LONG;
sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
+ sa.extended_addr = ieee802154_devaddr_from_raw(saddr);
- memcpy(&(sa.hwaddr), saddr, 8);
/* intra-PAN communications */
- da.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
+ da.pan_id = sa.pan_id;
- /*
- * if the destination address is the broadcast address, use the
+ /* if the destination address is the broadcast address, use the
* corresponding short address
*/
if (lowpan_is_addr_broadcast(daddr)) {
- da.addr_type = IEEE802154_ADDR_SHORT;
- da.short_addr = IEEE802154_ADDR_BROADCAST;
+ da.mode = IEEE802154_ADDR_SHORT;
+ da.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST);
} else {
- da.addr_type = IEEE802154_ADDR_LONG;
- memcpy(&(da.hwaddr), daddr, IEEE802154_ADDR_LEN);
+ da.mode = IEEE802154_ADDR_LONG;
+ da.extended_addr = ieee802154_devaddr_from_raw(daddr);
/* request acknowledgment */
mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ;
}
return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev,
- type, (void *)&da, (void *)&sa, skb->len);
+ type, (void *)&da, (void *)&sa, 0);
}
static int lowpan_give_skb_to_devices(struct sk_buff *skb,
@@ -191,73 +168,11 @@ static int lowpan_give_skb_to_devices(struct sk_buff *skb,
return stat;
}
-static void lowpan_fragment_timer_expired(unsigned long entry_addr)
-{
- struct lowpan_fragment *entry = (struct lowpan_fragment *)entry_addr;
-
- pr_debug("timer expired for frame with tag %d\n", entry->tag);
-
- list_del(&entry->list);
- dev_kfree_skb(entry->skb);
- kfree(entry);
-}
-
-static struct lowpan_fragment *
-lowpan_alloc_new_frame(struct sk_buff *skb, u16 len, u16 tag)
-{
- struct lowpan_fragment *frame;
-
- frame = kzalloc(sizeof(struct lowpan_fragment),
- GFP_ATOMIC);
- if (!frame)
- goto frame_err;
-
- INIT_LIST_HEAD(&frame->list);
-
- frame->length = len;
- frame->tag = tag;
-
- /* allocate buffer for frame assembling */
- frame->skb = netdev_alloc_skb_ip_align(skb->dev, frame->length +
- sizeof(struct ipv6hdr));
-
- if (!frame->skb)
- goto skb_err;
-
- frame->skb->priority = skb->priority;
-
- /* reserve headroom for uncompressed ipv6 header */
- skb_reserve(frame->skb, sizeof(struct ipv6hdr));
- skb_put(frame->skb, frame->length);
-
- /* copy the first control block to keep a
- * trace of the link-layer addresses in case
- * of a link-local compressed address
- */
- memcpy(frame->skb->cb, skb->cb, sizeof(skb->cb));
-
- init_timer(&frame->timer);
- /* time out is the same as for ipv6 - 60 sec */
- frame->timer.expires = jiffies + LOWPAN_FRAG_TIMEOUT;
- frame->timer.data = (unsigned long)frame;
- frame->timer.function = lowpan_fragment_timer_expired;
-
- add_timer(&frame->timer);
-
- list_add_tail(&frame->list, &lowpan_fragments);
-
- return frame;
-
-skb_err:
- kfree(frame);
-frame_err:
- return NULL;
-}
-
-static int process_data(struct sk_buff *skb)
+static int process_data(struct sk_buff *skb, const struct ieee802154_hdr *hdr)
{
u8 iphc0, iphc1;
- const struct ieee802154_addr *_saddr, *_daddr;
+ struct ieee802154_addr_sa sa, da;
+ void *sap, *dap;
raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len);
/* at least two bytes will be used for the encoding */
@@ -267,108 +182,27 @@ static int process_data(struct sk_buff *skb)
if (lowpan_fetch_skb_u8(skb, &iphc0))
goto drop;
- /* fragments assembling */
- switch (iphc0 & LOWPAN_DISPATCH_MASK) {
- case LOWPAN_DISPATCH_FRAG1:
- case LOWPAN_DISPATCH_FRAGN:
- {
- struct lowpan_fragment *frame;
- /* slen stores the rightmost 8 bits of the 11 bits length */
- u8 slen, offset = 0;
- u16 len, tag;
- bool found = false;
-
- if (lowpan_fetch_skb_u8(skb, &slen) || /* frame length */
- lowpan_fetch_skb_u16(skb, &tag)) /* fragment tag */
- goto drop;
-
- /* adds the 3 MSB to the 8 LSB to retrieve the 11 bits length */
- len = ((iphc0 & 7) << 8) | slen;
-
- if ((iphc0 & LOWPAN_DISPATCH_MASK) == LOWPAN_DISPATCH_FRAG1) {
- pr_debug("%s received a FRAG1 packet (tag: %d, "
- "size of the entire IP packet: %d)",
- __func__, tag, len);
- } else { /* FRAGN */
- if (lowpan_fetch_skb_u8(skb, &offset))
- goto unlock_and_drop;
- pr_debug("%s received a FRAGN packet (tag: %d, "
- "size of the entire IP packet: %d, "
- "offset: %d)", __func__, tag, len, offset * 8);
- }
-
- /*
- * check if frame assembling with the same tag is
- * already in progress
- */
- spin_lock_bh(&flist_lock);
-
- list_for_each_entry(frame, &lowpan_fragments, list)
- if (frame->tag == tag) {
- found = true;
- break;
- }
-
- /* alloc new frame structure */
- if (!found) {
- pr_debug("%s first fragment received for tag %d, "
- "begin packet reassembly", __func__, tag);
- frame = lowpan_alloc_new_frame(skb, len, tag);
- if (!frame)
- goto unlock_and_drop;
- }
-
- /* if payload fits buffer, copy it */
- if (likely((offset * 8 + skb->len) <= frame->length))
- skb_copy_to_linear_data_offset(frame->skb, offset * 8,
- skb->data, skb->len);
- else
- goto unlock_and_drop;
-
- frame->bytes_rcv += skb->len;
-
- /* frame assembling complete */
- if ((frame->bytes_rcv == frame->length) &&
- frame->timer.expires > jiffies) {
- /* if timer haven't expired - first of all delete it */
- del_timer_sync(&frame->timer);
- list_del(&frame->list);
- spin_unlock_bh(&flist_lock);
-
- pr_debug("%s successfully reassembled fragment "
- "(tag %d)", __func__, tag);
-
- dev_kfree_skb(skb);
- skb = frame->skb;
- kfree(frame);
-
- if (lowpan_fetch_skb_u8(skb, &iphc0))
- goto drop;
-
- break;
- }
- spin_unlock_bh(&flist_lock);
-
- return kfree_skb(skb), 0;
- }
- default:
- break;
- }
-
if (lowpan_fetch_skb_u8(skb, &iphc1))
goto drop;
- _saddr = &mac_cb(skb)->sa;
- _daddr = &mac_cb(skb)->da;
+ ieee802154_addr_to_sa(&sa, &hdr->source);
+ ieee802154_addr_to_sa(&da, &hdr->dest);
+
+ if (sa.addr_type == IEEE802154_ADDR_SHORT)
+ sap = &sa.short_addr;
+ else
+ sap = &sa.hwaddr;
- return lowpan_process_data(skb, skb->dev, (u8 *)_saddr->hwaddr,
- _saddr->addr_type, IEEE802154_ADDR_LEN,
- (u8 *)_daddr->hwaddr, _daddr->addr_type,
- IEEE802154_ADDR_LEN, iphc0, iphc1,
- lowpan_give_skb_to_devices);
+ if (da.addr_type == IEEE802154_ADDR_SHORT)
+ dap = &da.short_addr;
+ else
+ dap = &da.hwaddr;
+
+ return lowpan_process_data(skb, skb->dev, sap, sa.addr_type,
+ IEEE802154_ADDR_LEN, dap, da.addr_type,
+ IEEE802154_ADDR_LEN, iphc0, iphc1,
+ lowpan_give_skb_to_devices);
-unlock_and_drop:
- spin_unlock_bh(&flist_lock);
drop:
kfree_skb(skb);
return -EINVAL;
@@ -389,7 +223,7 @@ static int lowpan_set_address(struct net_device *dev, void *p)
static int
lowpan_fragment_xmit(struct sk_buff *skb, u8 *head,
- int mlen, int plen, int offset, int type)
+ int mlen, int plen, int offset, int type)
{
struct sk_buff *frag;
int hlen;
@@ -425,51 +259,68 @@ lowpan_fragment_xmit(struct sk_buff *skb, u8 *head,
static int
lowpan_skb_fragmentation(struct sk_buff *skb, struct net_device *dev)
{
- int err, header_length, payload_length, tag, offset = 0;
+ int err;
+ u16 dgram_offset, dgram_size, payload_length, header_length,
+ lowpan_size, frag_plen, offset;
+ __be16 tag;
u8 head[5];
header_length = skb->mac_len;
payload_length = skb->len - header_length;
tag = lowpan_dev_info(dev)->fragment_tag++;
+ lowpan_size = skb_network_header_len(skb);
+ dgram_size = lowpan_uncompress_size(skb, &dgram_offset) -
+ header_length;
/* first fragment header */
- head[0] = LOWPAN_DISPATCH_FRAG1 | ((payload_length >> 8) & 0x7);
- head[1] = payload_length & 0xff;
- head[2] = tag >> 8;
- head[3] = tag & 0xff;
+ head[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x7);
+ head[1] = dgram_size & 0xff;
+ memcpy(head + 2, &tag, sizeof(tag));
- err = lowpan_fragment_xmit(skb, head, header_length, LOWPAN_FRAG_SIZE,
- 0, LOWPAN_DISPATCH_FRAG1);
+ /* calc the nearest payload length(divided to 8) for first fragment
+ * which fits into a IEEE802154_MTU
+ */
+ frag_plen = round_down(IEEE802154_MTU - header_length -
+ LOWPAN_FRAG1_HEAD_SIZE - lowpan_size -
+ IEEE802154_MFR_SIZE, 8);
+ err = lowpan_fragment_xmit(skb, head, header_length,
+ frag_plen + lowpan_size, 0,
+ LOWPAN_DISPATCH_FRAG1);
if (err) {
pr_debug("%s unable to send FRAG1 packet (tag: %d)",
__func__, tag);
goto exit;
}
- offset = LOWPAN_FRAG_SIZE;
+ offset = lowpan_size + frag_plen;
+ dgram_offset += frag_plen;
/* next fragment header */
head[0] &= ~LOWPAN_DISPATCH_FRAG1;
head[0] |= LOWPAN_DISPATCH_FRAGN;
+ frag_plen = round_down(IEEE802154_MTU - header_length -
+ LOWPAN_FRAGN_HEAD_SIZE - IEEE802154_MFR_SIZE, 8);
+
while (payload_length - offset > 0) {
- int len = LOWPAN_FRAG_SIZE;
+ int len = frag_plen;
- head[4] = offset / 8;
+ head[4] = dgram_offset >> 3;
if (payload_length - offset < len)
len = payload_length - offset;
- err = lowpan_fragment_xmit(skb, head, header_length,
- len, offset, LOWPAN_DISPATCH_FRAGN);
+ err = lowpan_fragment_xmit(skb, head, header_length, len,
+ offset, LOWPAN_DISPATCH_FRAGN);
if (err) {
- pr_debug("%s unable to send a subsequent FRAGN packet "
- "(tag: %d, offset: %d", __func__, tag, offset);
+ pr_debug("%s unable to send a FRAGN packet. (tag: %d, offset: %d)\n",
+ __func__, tag, offset);
goto exit;
}
offset += len;
+ dgram_offset += len;
}
exit:
@@ -511,13 +362,13 @@ static struct wpan_phy *lowpan_get_phy(const struct net_device *dev)
return ieee802154_mlme_ops(real_dev)->get_phy(real_dev);
}
-static u16 lowpan_get_pan_id(const struct net_device *dev)
+static __le16 lowpan_get_pan_id(const struct net_device *dev)
{
struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
return ieee802154_mlme_ops(real_dev)->get_pan_id(real_dev);
}
-static u16 lowpan_get_short_addr(const struct net_device *dev)
+static __le16 lowpan_get_short_addr(const struct net_device *dev)
{
struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev);
@@ -533,7 +384,27 @@ static struct header_ops lowpan_header_ops = {
.create = lowpan_header_create,
};
+static struct lock_class_key lowpan_tx_busylock;
+static struct lock_class_key lowpan_netdev_xmit_lock_key;
+
+static void lowpan_set_lockdep_class_one(struct net_device *dev,
+ struct netdev_queue *txq,
+ void *_unused)
+{
+ lockdep_set_class(&txq->_xmit_lock,
+ &lowpan_netdev_xmit_lock_key);
+}
+
+
+static int lowpan_dev_init(struct net_device *dev)
+{
+ netdev_for_each_tx_queue(dev, lowpan_set_lockdep_class_one, NULL);
+ dev->qdisc_tx_busylock = &lowpan_tx_busylock;
+ return 0;
+}
+
static const struct net_device_ops lowpan_netdev_ops = {
+ .ndo_init = lowpan_dev_init,
.ndo_start_xmit = lowpan_xmit,
.ndo_set_mac_address = lowpan_set_address,
};
@@ -576,45 +447,55 @@ static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[])
static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
- struct sk_buff *local_skb;
+ struct ieee802154_hdr hdr;
+ int ret;
- if (!netif_running(dev))
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (!skb)
goto drop;
+ if (!netif_running(dev))
+ goto drop_skb;
+
if (dev->type != ARPHRD_IEEE802154)
- goto drop;
+ goto drop_skb;
+
+ if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0)
+ goto drop_skb;
/* check that it's our buffer */
if (skb->data[0] == LOWPAN_DISPATCH_IPV6) {
- /* Copy the packet so that the IPv6 header is
- * properly aligned.
- */
- local_skb = skb_copy_expand(skb, NET_SKB_PAD - 1,
- skb_tailroom(skb), GFP_ATOMIC);
- if (!local_skb)
- goto drop;
-
- local_skb->protocol = htons(ETH_P_IPV6);
- local_skb->pkt_type = PACKET_HOST;
+ skb->protocol = htons(ETH_P_IPV6);
+ skb->pkt_type = PACKET_HOST;
/* Pull off the 1-byte of 6lowpan header. */
- skb_pull(local_skb, 1);
-
- lowpan_give_skb_to_devices(local_skb, NULL);
+ skb_pull(skb, 1);
- kfree_skb(local_skb);
- kfree_skb(skb);
+ ret = lowpan_give_skb_to_devices(skb, NULL);
+ if (ret == NET_RX_DROP)
+ goto drop;
} else {
switch (skb->data[0] & 0xe0) {
case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */
+ ret = process_data(skb, &hdr);
+ if (ret == NET_RX_DROP)
+ goto drop;
+ break;
case LOWPAN_DISPATCH_FRAG1: /* first fragment header */
+ ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAG1);
+ if (ret == 1) {
+ ret = process_data(skb, &hdr);
+ if (ret == NET_RX_DROP)
+ goto drop;
+ }
+ break;
case LOWPAN_DISPATCH_FRAGN: /* next fragments headers */
- local_skb = skb_clone(skb, GFP_ATOMIC);
- if (!local_skb)
- goto drop;
- process_data(local_skb);
-
- kfree_skb(skb);
+ ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAGN);
+ if (ret == 1) {
+ ret = process_data(skb, &hdr);
+ if (ret == NET_RX_DROP)
+ goto drop;
+ }
break;
default:
break;
@@ -622,9 +503,9 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev,
}
return NET_RX_SUCCESS;
-
-drop:
+drop_skb:
kfree_skb(skb);
+drop:
return NET_RX_DROP;
}
@@ -648,10 +529,9 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,
}
lowpan_dev_info(dev)->real_dev = real_dev;
- lowpan_dev_info(dev)->fragment_tag = 0;
mutex_init(&lowpan_dev_info(dev)->dev_list_mtx);
- entry = kzalloc(sizeof(struct lowpan_dev_record), GFP_KERNEL);
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry) {
dev_put(real_dev);
lowpan_dev_info(dev)->real_dev = NULL;
@@ -744,7 +624,7 @@ static struct notifier_block lowpan_dev_notifier = {
};
static struct packet_type lowpan_packet_type = {
- .type = __constant_htons(ETH_P_IEEE802154),
+ .type = htons(ETH_P_IEEE802154),
.func = lowpan_rcv,
};
@@ -752,43 +632,40 @@ static int __init lowpan_init_module(void)
{
int err = 0;
- err = lowpan_netlink_init();
+ err = lowpan_net_frag_init();
if (err < 0)
goto out;
+ err = lowpan_netlink_init();
+ if (err < 0)
+ goto out_frag;
+
dev_add_pack(&lowpan_packet_type);
err = register_netdevice_notifier(&lowpan_dev_notifier);
- if (err < 0) {
- dev_remove_pack(&lowpan_packet_type);
- lowpan_netlink_fini();
- }
+ if (err < 0)
+ goto out_pack;
+
+ return 0;
+
+out_pack:
+ dev_remove_pack(&lowpan_packet_type);
+ lowpan_netlink_fini();
+out_frag:
+ lowpan_net_frag_exit();
out:
return err;
}
static void __exit lowpan_cleanup_module(void)
{
- struct lowpan_fragment *frame, *tframe;
-
lowpan_netlink_fini();
dev_remove_pack(&lowpan_packet_type);
- unregister_netdevice_notifier(&lowpan_dev_notifier);
+ lowpan_net_frag_exit();
- /* Now 6lowpan packet_type is removed, so no new fragments are
- * expected on RX, therefore that's the time to clean incomplete
- * fragments.
- */
- spin_lock_bh(&flist_lock);
- list_for_each_entry_safe(frame, tframe, &lowpan_fragments, list) {
- del_timer_sync(&frame->timer);
- list_del(&frame->list);
- dev_kfree_skb(frame->skb);
- kfree(frame);
- }
- spin_unlock_bh(&flist_lock);
+ unregister_netdevice_notifier(&lowpan_dev_notifier);
}
module_init(lowpan_init_module);
diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig
index 9c9879d5ea64..8af1330b3137 100644
--- a/net/ieee802154/Kconfig
+++ b/net/ieee802154/Kconfig
@@ -15,7 +15,7 @@ config IEEE802154_6LOWPAN
depends on IEEE802154 && IPV6
select 6LOWPAN_IPHC
---help---
- IPv6 compression over IEEE 802.15.4.
+ IPv6 compression over IEEE 802.15.4.
config 6LOWPAN_IPHC
tristate
diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile
index e8f05885ced6..bf1b51497a41 100644
--- a/net/ieee802154/Makefile
+++ b/net/ieee802154/Makefile
@@ -2,5 +2,9 @@ obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o
obj-$(CONFIG_IEEE802154_6LOWPAN) += 6lowpan.o
obj-$(CONFIG_6LOWPAN_IPHC) += 6lowpan_iphc.o
-ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o
+6lowpan-y := 6lowpan_rtnl.o reassembly.o
+ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o \
+ header_ops.o
af_802154-y := af_ieee802154.o raw.o dgram.o
+
+ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/ieee802154/af802154.h b/net/ieee802154/af802154.h
index b1ec52537522..8330a09bfc95 100644
--- a/net/ieee802154/af802154.h
+++ b/net/ieee802154/af802154.h
@@ -25,12 +25,13 @@
#define AF802154_H
struct sk_buff;
-struct net_devce;
+struct net_device;
+struct ieee802154_addr;
extern struct proto ieee802154_raw_prot;
extern struct proto ieee802154_dgram_prot;
void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb);
int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb);
struct net_device *ieee802154_get_dev(struct net *net,
- struct ieee802154_addr *addr);
+ const struct ieee802154_addr *addr);
#endif
diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c
index 40e606f3788f..351d9a94ec2f 100644
--- a/net/ieee802154/af_ieee802154.c
+++ b/net/ieee802154/af_ieee802154.c
@@ -43,25 +43,27 @@
/*
* Utility function for families
*/
-struct net_device *ieee802154_get_dev(struct net *net,
- struct ieee802154_addr *addr)
+struct net_device*
+ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr)
{
struct net_device *dev = NULL;
struct net_device *tmp;
- u16 pan_id, short_addr;
+ __le16 pan_id, short_addr;
+ u8 hwaddr[IEEE802154_ADDR_LEN];
- switch (addr->addr_type) {
+ switch (addr->mode) {
case IEEE802154_ADDR_LONG:
+ ieee802154_devaddr_to_raw(hwaddr, addr->extended_addr);
rcu_read_lock();
- dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, addr->hwaddr);
+ dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, hwaddr);
if (dev)
dev_hold(dev);
rcu_read_unlock();
break;
case IEEE802154_ADDR_SHORT:
- if (addr->pan_id == 0xffff ||
- addr->short_addr == IEEE802154_ADDR_UNDEF ||
- addr->short_addr == 0xffff)
+ if (addr->pan_id == cpu_to_le16(IEEE802154_PANID_BROADCAST) ||
+ addr->short_addr == cpu_to_le16(IEEE802154_ADDR_UNDEF) ||
+ addr->short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST))
break;
rtnl_lock();
@@ -86,7 +88,7 @@ struct net_device *ieee802154_get_dev(struct net *net,
break;
default:
pr_warning("Unsupported ieee802154 address type: %d\n",
- addr->addr_type);
+ addr->mode);
break;
}
@@ -326,7 +328,7 @@ drop:
static struct packet_type ieee802154_packet_type = {
- .type = __constant_htons(ETH_P_IEEE802154),
+ .type = htons(ETH_P_IEEE802154),
.func = ieee802154_rcv,
};
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
index 1846c1fe0d06..786437bc0c08 100644
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -73,10 +73,10 @@ static int dgram_init(struct sock *sk)
{
struct dgram_sock *ro = dgram_sk(sk);
- ro->dst_addr.addr_type = IEEE802154_ADDR_LONG;
- ro->dst_addr.pan_id = 0xffff;
+ ro->dst_addr.mode = IEEE802154_ADDR_LONG;
+ ro->dst_addr.pan_id = cpu_to_le16(IEEE802154_PANID_BROADCAST);
ro->want_ack = 1;
- memset(&ro->dst_addr.hwaddr, 0xff, sizeof(ro->dst_addr.hwaddr));
+ memset(&ro->dst_addr.extended_addr, 0xff, IEEE802154_ADDR_LEN);
return 0;
}
@@ -88,6 +88,7 @@ static void dgram_close(struct sock *sk, long timeout)
static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len)
{
struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr;
+ struct ieee802154_addr haddr;
struct dgram_sock *ro = dgram_sk(sk);
int err = -EINVAL;
struct net_device *dev;
@@ -102,7 +103,8 @@ static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len)
if (addr->family != AF_IEEE802154)
goto out;
- dev = ieee802154_get_dev(sock_net(sk), &addr->addr);
+ ieee802154_addr_from_sa(&haddr, &addr->addr);
+ dev = ieee802154_get_dev(sock_net(sk), &haddr);
if (!dev) {
err = -ENODEV;
goto out;
@@ -113,7 +115,7 @@ static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len)
goto out_put;
}
- memcpy(&ro->src_addr, &addr->addr, sizeof(struct ieee802154_addr));
+ ro->src_addr = haddr;
ro->bound = 1;
err = 0;
@@ -149,8 +151,7 @@ static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg)
* of this packet since that is all
* that will be read.
*/
- /* FIXME: parse the header for more correct value */
- amount = skb->len - (3+8+8);
+ amount = skb->len - ieee802154_hdr_length(skb);
}
spin_unlock_bh(&sk->sk_receive_queue.lock);
return put_user(amount, (int __user *)arg);
@@ -181,7 +182,7 @@ static int dgram_connect(struct sock *sk, struct sockaddr *uaddr,
goto out;
}
- memcpy(&ro->dst_addr, &addr->addr, sizeof(struct ieee802154_addr));
+ ieee802154_addr_from_sa(&ro->dst_addr, &addr->addr);
out:
release_sock(sk);
@@ -194,8 +195,8 @@ static int dgram_disconnect(struct sock *sk, int flags)
lock_sock(sk);
- ro->dst_addr.addr_type = IEEE802154_ADDR_LONG;
- memset(&ro->dst_addr.hwaddr, 0xff, sizeof(ro->dst_addr.hwaddr));
+ ro->dst_addr.mode = IEEE802154_ADDR_LONG;
+ memset(&ro->dst_addr.extended_addr, 0xff, IEEE802154_ADDR_LEN);
release_sock(sk);
@@ -232,7 +233,7 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk,
if (size > mtu) {
pr_debug("size = %Zu, mtu = %u\n", size, mtu);
- err = -EINVAL;
+ err = -EMSGSIZE;
goto out_dev;
}
@@ -312,7 +313,7 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,
if (saddr) {
saddr->family = AF_IEEE802154;
- saddr->addr = mac_cb(skb)->sa;
+ ieee802154_addr_to_sa(&saddr->addr, &mac_cb(skb)->source);
*addr_len = sizeof(*saddr);
}
@@ -328,6 +329,10 @@ out:
static int dgram_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (!skb)
+ return NET_RX_DROP;
+
if (sock_queue_rcv_skb(sk, skb) < 0) {
kfree_skb(skb);
return NET_RX_DROP;
@@ -336,40 +341,43 @@ static int dgram_rcv_skb(struct sock *sk, struct sk_buff *skb)
return NET_RX_SUCCESS;
}
-static inline int ieee802154_match_sock(u8 *hw_addr, u16 pan_id,
- u16 short_addr, struct dgram_sock *ro)
+static inline bool
+ieee802154_match_sock(__le64 hw_addr, __le16 pan_id, __le16 short_addr,
+ struct dgram_sock *ro)
{
if (!ro->bound)
- return 1;
+ return true;
- if (ro->src_addr.addr_type == IEEE802154_ADDR_LONG &&
- !memcmp(ro->src_addr.hwaddr, hw_addr, IEEE802154_ADDR_LEN))
- return 1;
+ if (ro->src_addr.mode == IEEE802154_ADDR_LONG &&
+ hw_addr == ro->src_addr.extended_addr)
+ return true;
- if (ro->src_addr.addr_type == IEEE802154_ADDR_SHORT &&
- pan_id == ro->src_addr.pan_id &&
- short_addr == ro->src_addr.short_addr)
- return 1;
+ if (ro->src_addr.mode == IEEE802154_ADDR_SHORT &&
+ pan_id == ro->src_addr.pan_id &&
+ short_addr == ro->src_addr.short_addr)
+ return true;
- return 0;
+ return false;
}
int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb)
{
struct sock *sk, *prev = NULL;
int ret = NET_RX_SUCCESS;
- u16 pan_id, short_addr;
+ __le16 pan_id, short_addr;
+ __le64 hw_addr;
/* Data frame processing */
BUG_ON(dev->type != ARPHRD_IEEE802154);
pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
short_addr = ieee802154_mlme_ops(dev)->get_short_addr(dev);
+ hw_addr = ieee802154_devaddr_from_raw(dev->dev_addr);
read_lock(&dgram_lock);
sk_for_each(sk, &dgram_head) {
- if (ieee802154_match_sock(dev->dev_addr, pan_id, short_addr,
- dgram_sk(sk))) {
+ if (ieee802154_match_sock(hw_addr, pan_id, short_addr,
+ dgram_sk(sk))) {
if (prev) {
struct sk_buff *clone;
clone = skb_clone(skb, GFP_ATOMIC);
diff --git a/net/ieee802154/header_ops.c b/net/ieee802154/header_ops.c
new file mode 100644
index 000000000000..bed42a48408c
--- /dev/null
+++ b/net/ieee802154/header_ops.c
@@ -0,0 +1,287 @@
+/*
+ * Copyright (C) 2014 Fraunhofer ITWM
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Written by:
+ * Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
+ */
+
+#include <net/mac802154.h>
+#include <net/ieee802154.h>
+#include <net/ieee802154_netdev.h>
+
+static int
+ieee802154_hdr_push_addr(u8 *buf, const struct ieee802154_addr *addr,
+ bool omit_pan)
+{
+ int pos = 0;
+
+ if (addr->mode == IEEE802154_ADDR_NONE)
+ return 0;
+
+ if (!omit_pan) {
+ memcpy(buf + pos, &addr->pan_id, 2);
+ pos += 2;
+ }
+
+ switch (addr->mode) {
+ case IEEE802154_ADDR_SHORT:
+ memcpy(buf + pos, &addr->short_addr, 2);
+ pos += 2;
+ break;
+
+ case IEEE802154_ADDR_LONG:
+ memcpy(buf + pos, &addr->extended_addr, IEEE802154_ADDR_LEN);
+ pos += IEEE802154_ADDR_LEN;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return pos;
+}
+
+static int
+ieee802154_hdr_push_sechdr(u8 *buf, const struct ieee802154_sechdr *hdr)
+{
+ int pos = 5;
+
+ memcpy(buf, hdr, 1);
+ memcpy(buf + 1, &hdr->frame_counter, 4);
+
+ switch (hdr->key_id_mode) {
+ case IEEE802154_SCF_KEY_IMPLICIT:
+ return pos;
+
+ case IEEE802154_SCF_KEY_INDEX:
+ break;
+
+ case IEEE802154_SCF_KEY_SHORT_INDEX:
+ memcpy(buf + pos, &hdr->short_src, 4);
+ pos += 4;
+ break;
+
+ case IEEE802154_SCF_KEY_HW_INDEX:
+ memcpy(buf + pos, &hdr->extended_src, IEEE802154_ADDR_LEN);
+ pos += IEEE802154_ADDR_LEN;
+ break;
+ }
+
+ buf[pos++] = hdr->key_id;
+
+ return pos;
+}
+
+int
+ieee802154_hdr_push(struct sk_buff *skb, const struct ieee802154_hdr *hdr)
+{
+ u8 buf[MAC802154_FRAME_HARD_HEADER_LEN];
+ int pos = 2;
+ int rc;
+ struct ieee802154_hdr_fc fc = hdr->fc;
+
+ buf[pos++] = hdr->seq;
+
+ fc.dest_addr_mode = hdr->dest.mode;
+
+ rc = ieee802154_hdr_push_addr(buf + pos, &hdr->dest, false);
+ if (rc < 0)
+ return -EINVAL;
+ pos += rc;
+
+ fc.source_addr_mode = hdr->source.mode;
+
+ if (hdr->source.pan_id == hdr->dest.pan_id &&
+ hdr->dest.mode != IEEE802154_ADDR_NONE)
+ fc.intra_pan = true;
+
+ rc = ieee802154_hdr_push_addr(buf + pos, &hdr->source, fc.intra_pan);
+ if (rc < 0)
+ return -EINVAL;
+ pos += rc;
+
+ if (fc.security_enabled) {
+ fc.version = 1;
+
+ rc = ieee802154_hdr_push_sechdr(buf + pos, &hdr->sec);
+ if (rc < 0)
+ return -EINVAL;
+
+ pos += rc;
+ }
+
+ memcpy(buf, &fc, 2);
+
+ memcpy(skb_push(skb, pos), buf, pos);
+
+ return pos;
+}
+EXPORT_SYMBOL_GPL(ieee802154_hdr_push);
+
+static int
+ieee802154_hdr_get_addr(const u8 *buf, int mode, bool omit_pan,
+ struct ieee802154_addr *addr)
+{
+ int pos = 0;
+
+ addr->mode = mode;
+
+ if (mode == IEEE802154_ADDR_NONE)
+ return 0;
+
+ if (!omit_pan) {
+ memcpy(&addr->pan_id, buf + pos, 2);
+ pos += 2;
+ }
+
+ if (mode == IEEE802154_ADDR_SHORT) {
+ memcpy(&addr->short_addr, buf + pos, 2);
+ return pos + 2;
+ } else {
+ memcpy(&addr->extended_addr, buf + pos, IEEE802154_ADDR_LEN);
+ return pos + IEEE802154_ADDR_LEN;
+ }
+}
+
+static int ieee802154_hdr_addr_len(int mode, bool omit_pan)
+{
+ int pan_len = omit_pan ? 0 : 2;
+
+ switch (mode) {
+ case IEEE802154_ADDR_NONE: return 0;
+ case IEEE802154_ADDR_SHORT: return 2 + pan_len;
+ case IEEE802154_ADDR_LONG: return IEEE802154_ADDR_LEN + pan_len;
+ default: return -EINVAL;
+ }
+}
+
+static int
+ieee802154_hdr_get_sechdr(const u8 *buf, struct ieee802154_sechdr *hdr)
+{
+ int pos = 5;
+
+ memcpy(hdr, buf, 1);
+ memcpy(&hdr->frame_counter, buf + 1, 4);
+
+ switch (hdr->key_id_mode) {
+ case IEEE802154_SCF_KEY_IMPLICIT:
+ return pos;
+
+ case IEEE802154_SCF_KEY_INDEX:
+ break;
+
+ case IEEE802154_SCF_KEY_SHORT_INDEX:
+ memcpy(&hdr->short_src, buf + pos, 4);
+ pos += 4;
+ break;
+
+ case IEEE802154_SCF_KEY_HW_INDEX:
+ memcpy(&hdr->extended_src, buf + pos, IEEE802154_ADDR_LEN);
+ pos += IEEE802154_ADDR_LEN;
+ break;
+ }
+
+ hdr->key_id = buf[pos++];
+
+ return pos;
+}
+
+static int ieee802154_hdr_sechdr_len(u8 sc)
+{
+ switch (IEEE802154_SCF_KEY_ID_MODE(sc)) {
+ case IEEE802154_SCF_KEY_IMPLICIT: return 5;
+ case IEEE802154_SCF_KEY_INDEX: return 6;
+ case IEEE802154_SCF_KEY_SHORT_INDEX: return 10;
+ case IEEE802154_SCF_KEY_HW_INDEX: return 14;
+ default: return -EINVAL;
+ }
+}
+
+static int ieee802154_hdr_minlen(const struct ieee802154_hdr *hdr)
+{
+ int dlen, slen;
+
+ dlen = ieee802154_hdr_addr_len(hdr->fc.dest_addr_mode, false);
+ slen = ieee802154_hdr_addr_len(hdr->fc.source_addr_mode,
+ hdr->fc.intra_pan);
+
+ if (slen < 0 || dlen < 0)
+ return -EINVAL;
+
+ return 3 + dlen + slen + hdr->fc.security_enabled;
+}
+
+static int
+ieee802154_hdr_get_addrs(const u8 *buf, struct ieee802154_hdr *hdr)
+{
+ int pos = 0;
+
+ pos += ieee802154_hdr_get_addr(buf + pos, hdr->fc.dest_addr_mode,
+ false, &hdr->dest);
+ pos += ieee802154_hdr_get_addr(buf + pos, hdr->fc.source_addr_mode,
+ hdr->fc.intra_pan, &hdr->source);
+
+ if (hdr->fc.intra_pan)
+ hdr->source.pan_id = hdr->dest.pan_id;
+
+ return pos;
+}
+
+int
+ieee802154_hdr_pull(struct sk_buff *skb, struct ieee802154_hdr *hdr)
+{
+ int pos = 3, rc;
+
+ if (!pskb_may_pull(skb, 3))
+ return -EINVAL;
+
+ memcpy(hdr, skb->data, 3);
+
+ rc = ieee802154_hdr_minlen(hdr);
+ if (rc < 0 || !pskb_may_pull(skb, rc))
+ return -EINVAL;
+
+ pos += ieee802154_hdr_get_addrs(skb->data + pos, hdr);
+
+ if (hdr->fc.security_enabled) {
+ int want = pos + ieee802154_hdr_sechdr_len(skb->data[pos]);
+
+ if (!pskb_may_pull(skb, want))
+ return -EINVAL;
+
+ pos += ieee802154_hdr_get_sechdr(skb->data + pos, &hdr->sec);
+ }
+
+ skb_pull(skb, pos);
+ return pos;
+}
+EXPORT_SYMBOL_GPL(ieee802154_hdr_pull);
+
+int
+ieee802154_hdr_peek_addrs(const struct sk_buff *skb, struct ieee802154_hdr *hdr)
+{
+ const u8 *buf = skb_mac_header(skb);
+ int pos = 3, rc;
+
+ if (buf + 3 > skb_tail_pointer(skb))
+ return -EINVAL;
+
+ memcpy(hdr, buf, 3);
+
+ rc = ieee802154_hdr_minlen(hdr);
+ if (rc < 0 || buf + rc > skb_tail_pointer(skb))
+ return -EINVAL;
+
+ pos += ieee802154_hdr_get_addrs(buf + pos, hdr);
+ return pos;
+}
+EXPORT_SYMBOL_GPL(ieee802154_hdr_peek_addrs);
diff --git a/net/ieee802154/ieee802154.h b/net/ieee802154/ieee802154.h
index cee4425b9956..6cbc8965be91 100644
--- a/net/ieee802154/ieee802154.h
+++ b/net/ieee802154/ieee802154.h
@@ -53,6 +53,7 @@ int ieee802154_list_phy(struct sk_buff *skb, struct genl_info *info);
int ieee802154_dump_phy(struct sk_buff *skb, struct netlink_callback *cb);
int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info);
int ieee802154_del_iface(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_set_phyparams(struct sk_buff *skb, struct genl_info *info);
enum ieee802154_mcgrp_ids {
IEEE802154_COORD_MCGRP,
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index 43f1b2bf469f..67c151bf4b91 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -115,6 +115,7 @@ static const struct genl_ops ieee8021154_ops[] = {
ieee802154_dump_phy),
IEEE802154_OP(IEEE802154_ADD_IFACE, ieee802154_add_iface),
IEEE802154_OP(IEEE802154_DEL_IFACE, ieee802154_del_iface),
+ IEEE802154_OP(IEEE802154_SET_PHYPARAMS, ieee802154_set_phyparams),
/* see nl-mac.c */
IEEE802154_OP(IEEE802154_ASSOCIATE_REQ, ieee802154_associate_req),
IEEE802154_OP(IEEE802154_ASSOCIATE_RESP, ieee802154_associate_resp),
diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index ba5c1e002f37..bda8dba4f993 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -39,6 +39,26 @@
#include "ieee802154.h"
+static int nla_put_hwaddr(struct sk_buff *msg, int type, __le64 hwaddr)
+{
+ return nla_put_u64(msg, type, swab64((__force u64)hwaddr));
+}
+
+static __le64 nla_get_hwaddr(const struct nlattr *nla)
+{
+ return ieee802154_devaddr_from_raw(nla_data(nla));
+}
+
+static int nla_put_shortaddr(struct sk_buff *msg, int type, __le16 addr)
+{
+ return nla_put_u16(msg, type, le16_to_cpu(addr));
+}
+
+static __le16 nla_get_shortaddr(const struct nlattr *nla)
+{
+ return cpu_to_le16(nla_get_u16(nla));
+}
+
int ieee802154_nl_assoc_indic(struct net_device *dev,
struct ieee802154_addr *addr, u8 cap)
{
@@ -46,7 +66,7 @@ int ieee802154_nl_assoc_indic(struct net_device *dev,
pr_debug("%s\n", __func__);
- if (addr->addr_type != IEEE802154_ADDR_LONG) {
+ if (addr->mode != IEEE802154_ADDR_LONG) {
pr_err("%s: received non-long source address!\n", __func__);
return -EINVAL;
}
@@ -59,8 +79,8 @@ int ieee802154_nl_assoc_indic(struct net_device *dev,
nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
nla_put(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN,
dev->dev_addr) ||
- nla_put(msg, IEEE802154_ATTR_SRC_HW_ADDR, IEEE802154_ADDR_LEN,
- addr->hwaddr) ||
+ nla_put_hwaddr(msg, IEEE802154_ATTR_SRC_HW_ADDR,
+ addr->extended_addr) ||
nla_put_u8(msg, IEEE802154_ATTR_CAPABILITY, cap))
goto nla_put_failure;
@@ -72,7 +92,7 @@ nla_put_failure:
}
EXPORT_SYMBOL(ieee802154_nl_assoc_indic);
-int ieee802154_nl_assoc_confirm(struct net_device *dev, u16 short_addr,
+int ieee802154_nl_assoc_confirm(struct net_device *dev, __le16 short_addr,
u8 status)
{
struct sk_buff *msg;
@@ -87,7 +107,7 @@ int ieee802154_nl_assoc_confirm(struct net_device *dev, u16 short_addr,
nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
nla_put(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN,
dev->dev_addr) ||
- nla_put_u16(msg, IEEE802154_ATTR_SHORT_ADDR, short_addr) ||
+ nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR, short_addr) ||
nla_put_u8(msg, IEEE802154_ATTR_STATUS, status))
goto nla_put_failure;
return ieee802154_nl_mcast(msg, IEEE802154_COORD_MCGRP);
@@ -114,13 +134,13 @@ int ieee802154_nl_disassoc_indic(struct net_device *dev,
nla_put(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN,
dev->dev_addr))
goto nla_put_failure;
- if (addr->addr_type == IEEE802154_ADDR_LONG) {
- if (nla_put(msg, IEEE802154_ATTR_SRC_HW_ADDR, IEEE802154_ADDR_LEN,
- addr->hwaddr))
+ if (addr->mode == IEEE802154_ADDR_LONG) {
+ if (nla_put_hwaddr(msg, IEEE802154_ATTR_SRC_HW_ADDR,
+ addr->extended_addr))
goto nla_put_failure;
} else {
- if (nla_put_u16(msg, IEEE802154_ATTR_SRC_SHORT_ADDR,
- addr->short_addr))
+ if (nla_put_shortaddr(msg, IEEE802154_ATTR_SRC_SHORT_ADDR,
+ addr->short_addr))
goto nla_put_failure;
}
if (nla_put_u8(msg, IEEE802154_ATTR_REASON, reason))
@@ -157,8 +177,8 @@ nla_put_failure:
}
EXPORT_SYMBOL(ieee802154_nl_disassoc_confirm);
-int ieee802154_nl_beacon_indic(struct net_device *dev,
- u16 panid, u16 coord_addr)
+int ieee802154_nl_beacon_indic(struct net_device *dev, __le16 panid,
+ __le16 coord_addr)
{
struct sk_buff *msg;
@@ -172,8 +192,9 @@ int ieee802154_nl_beacon_indic(struct net_device *dev,
nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
nla_put(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN,
dev->dev_addr) ||
- nla_put_u16(msg, IEEE802154_ATTR_COORD_SHORT_ADDR, coord_addr) ||
- nla_put_u16(msg, IEEE802154_ATTR_COORD_PAN_ID, panid))
+ nla_put_shortaddr(msg, IEEE802154_ATTR_COORD_SHORT_ADDR,
+ coord_addr) ||
+ nla_put_shortaddr(msg, IEEE802154_ATTR_COORD_PAN_ID, panid))
goto nla_put_failure;
return ieee802154_nl_mcast(msg, IEEE802154_COORD_MCGRP);
@@ -243,6 +264,7 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid,
{
void *hdr;
struct wpan_phy *phy;
+ __le16 short_addr, pan_id;
pr_debug("%s\n", __func__);
@@ -254,15 +276,16 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid,
phy = ieee802154_mlme_ops(dev)->get_phy(dev);
BUG_ON(!phy);
+ short_addr = ieee802154_mlme_ops(dev)->get_short_addr(dev);
+ pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
+
if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) ||
nla_put_string(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)) ||
nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
nla_put(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN,
dev->dev_addr) ||
- nla_put_u16(msg, IEEE802154_ATTR_SHORT_ADDR,
- ieee802154_mlme_ops(dev)->get_short_addr(dev)) ||
- nla_put_u16(msg, IEEE802154_ATTR_PAN_ID,
- ieee802154_mlme_ops(dev)->get_pan_id(dev)))
+ nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR, short_addr) ||
+ nla_put_shortaddr(msg, IEEE802154_ATTR_PAN_ID, pan_id))
goto nla_put_failure;
wpan_phy_put(phy);
return genlmsg_end(msg, hdr);
@@ -322,16 +345,16 @@ int ieee802154_associate_req(struct sk_buff *skb, struct genl_info *info)
goto out;
if (info->attrs[IEEE802154_ATTR_COORD_HW_ADDR]) {
- addr.addr_type = IEEE802154_ADDR_LONG;
- nla_memcpy(addr.hwaddr,
- info->attrs[IEEE802154_ATTR_COORD_HW_ADDR],
- IEEE802154_ADDR_LEN);
+ addr.mode = IEEE802154_ADDR_LONG;
+ addr.extended_addr = nla_get_hwaddr(
+ info->attrs[IEEE802154_ATTR_COORD_HW_ADDR]);
} else {
- addr.addr_type = IEEE802154_ADDR_SHORT;
- addr.short_addr = nla_get_u16(
+ addr.mode = IEEE802154_ADDR_SHORT;
+ addr.short_addr = nla_get_shortaddr(
info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR]);
}
- addr.pan_id = nla_get_u16(info->attrs[IEEE802154_ATTR_COORD_PAN_ID]);
+ addr.pan_id = nla_get_shortaddr(
+ info->attrs[IEEE802154_ATTR_COORD_PAN_ID]);
if (info->attrs[IEEE802154_ATTR_PAGE])
page = nla_get_u8(info->attrs[IEEE802154_ATTR_PAGE]);
@@ -365,14 +388,13 @@ int ieee802154_associate_resp(struct sk_buff *skb, struct genl_info *info)
if (!ieee802154_mlme_ops(dev)->assoc_resp)
goto out;
- addr.addr_type = IEEE802154_ADDR_LONG;
- nla_memcpy(addr.hwaddr, info->attrs[IEEE802154_ATTR_DEST_HW_ADDR],
- IEEE802154_ADDR_LEN);
+ addr.mode = IEEE802154_ADDR_LONG;
+ addr.extended_addr = nla_get_hwaddr(
+ info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]);
addr.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
-
ret = ieee802154_mlme_ops(dev)->assoc_resp(dev, &addr,
- nla_get_u16(info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]),
+ nla_get_shortaddr(info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]),
nla_get_u8(info->attrs[IEEE802154_ATTR_STATUS]));
out:
@@ -398,13 +420,12 @@ int ieee802154_disassociate_req(struct sk_buff *skb, struct genl_info *info)
goto out;
if (info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]) {
- addr.addr_type = IEEE802154_ADDR_LONG;
- nla_memcpy(addr.hwaddr,
- info->attrs[IEEE802154_ATTR_DEST_HW_ADDR],
- IEEE802154_ADDR_LEN);
+ addr.mode = IEEE802154_ADDR_LONG;
+ addr.extended_addr = nla_get_hwaddr(
+ info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]);
} else {
- addr.addr_type = IEEE802154_ADDR_SHORT;
- addr.short_addr = nla_get_u16(
+ addr.mode = IEEE802154_ADDR_SHORT;
+ addr.short_addr = nla_get_shortaddr(
info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]);
}
addr.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
@@ -449,10 +470,11 @@ int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
if (!ieee802154_mlme_ops(dev)->start_req)
goto out;
- addr.addr_type = IEEE802154_ADDR_SHORT;
- addr.short_addr = nla_get_u16(
+ addr.mode = IEEE802154_ADDR_SHORT;
+ addr.short_addr = nla_get_shortaddr(
info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR]);
- addr.pan_id = nla_get_u16(info->attrs[IEEE802154_ATTR_COORD_PAN_ID]);
+ addr.pan_id = nla_get_shortaddr(
+ info->attrs[IEEE802154_ATTR_COORD_PAN_ID]);
channel = nla_get_u8(info->attrs[IEEE802154_ATTR_CHANNEL]);
bcn_ord = nla_get_u8(info->attrs[IEEE802154_ATTR_BCN_ORD]);
@@ -467,7 +489,7 @@ int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
page = 0;
- if (addr.short_addr == IEEE802154_ADDR_BROADCAST) {
+ if (addr.short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST)) {
ieee802154_nl_start_confirm(dev, IEEE802154_NO_SHORT_ADDRESS);
dev_put(dev);
return -EINVAL;
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index 89b265aea151..222310a07762 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -55,7 +55,15 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid,
mutex_lock(&phy->pib_lock);
if (nla_put_string(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)) ||
nla_put_u8(msg, IEEE802154_ATTR_PAGE, phy->current_page) ||
- nla_put_u8(msg, IEEE802154_ATTR_CHANNEL, phy->current_channel))
+ nla_put_u8(msg, IEEE802154_ATTR_CHANNEL, phy->current_channel) ||
+ nla_put_s8(msg, IEEE802154_ATTR_TXPOWER, phy->transmit_power) ||
+ nla_put_u8(msg, IEEE802154_ATTR_LBT_ENABLED, phy->lbt) ||
+ nla_put_u8(msg, IEEE802154_ATTR_CCA_MODE, phy->cca_mode) ||
+ nla_put_s32(msg, IEEE802154_ATTR_CCA_ED_LEVEL, phy->cca_ed_level) ||
+ nla_put_u8(msg, IEEE802154_ATTR_CSMA_RETRIES, phy->csma_retries) ||
+ nla_put_u8(msg, IEEE802154_ATTR_CSMA_MIN_BE, phy->min_be) ||
+ nla_put_u8(msg, IEEE802154_ATTR_CSMA_MAX_BE, phy->max_be) ||
+ nla_put_s8(msg, IEEE802154_ATTR_FRAME_RETRIES, phy->frame_retries))
goto nla_put_failure;
for (i = 0; i < 32; i++) {
if (phy->channels_supported[i])
@@ -354,3 +362,193 @@ out_dev:
return rc;
}
+
+static int phy_set_txpower(struct wpan_phy *phy, struct genl_info *info)
+{
+ int txpower = nla_get_s8(info->attrs[IEEE802154_ATTR_TXPOWER]);
+ int rc;
+
+ rc = phy->set_txpower(phy, txpower);
+ if (rc < 0)
+ return rc;
+
+ phy->transmit_power = txpower;
+
+ return 0;
+}
+
+static int phy_set_lbt(struct wpan_phy *phy, struct genl_info *info)
+{
+ u8 on = !!nla_get_u8(info->attrs[IEEE802154_ATTR_LBT_ENABLED]);
+ int rc;
+
+ rc = phy->set_lbt(phy, on);
+ if (rc < 0)
+ return rc;
+
+ phy->lbt = on;
+
+ return 0;
+}
+
+static int phy_set_cca_mode(struct wpan_phy *phy, struct genl_info *info)
+{
+ u8 mode = nla_get_u8(info->attrs[IEEE802154_ATTR_CCA_MODE]);
+ int rc;
+
+ if (mode > 3)
+ return -EINVAL;
+
+ rc = phy->set_cca_mode(phy, mode);
+ if (rc < 0)
+ return rc;
+
+ phy->cca_mode = mode;
+
+ return 0;
+}
+
+static int phy_set_cca_ed_level(struct wpan_phy *phy, struct genl_info *info)
+{
+ s32 level = nla_get_s32(info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]);
+ int rc;
+
+ rc = phy->set_cca_ed_level(phy, level);
+ if (rc < 0)
+ return rc;
+
+ phy->cca_ed_level = level;
+
+ return 0;
+}
+
+static int phy_set_csma_params(struct wpan_phy *phy, struct genl_info *info)
+{
+ int rc;
+ u8 min_be = phy->min_be;
+ u8 max_be = phy->max_be;
+ u8 retries = phy->csma_retries;
+
+ if (info->attrs[IEEE802154_ATTR_CSMA_RETRIES])
+ retries = nla_get_u8(info->attrs[IEEE802154_ATTR_CSMA_RETRIES]);
+ if (info->attrs[IEEE802154_ATTR_CSMA_MIN_BE])
+ min_be = nla_get_u8(info->attrs[IEEE802154_ATTR_CSMA_MIN_BE]);
+ if (info->attrs[IEEE802154_ATTR_CSMA_MAX_BE])
+ max_be = nla_get_u8(info->attrs[IEEE802154_ATTR_CSMA_MAX_BE]);
+
+ if (retries > 5 || max_be < 3 || max_be > 8 || min_be > max_be)
+ return -EINVAL;
+
+ rc = phy->set_csma_params(phy, min_be, max_be, retries);
+ if (rc < 0)
+ return rc;
+
+ phy->min_be = min_be;
+ phy->max_be = max_be;
+ phy->csma_retries = retries;
+
+ return 0;
+}
+
+static int phy_set_frame_retries(struct wpan_phy *phy, struct genl_info *info)
+{
+ s8 retries = nla_get_s8(info->attrs[IEEE802154_ATTR_FRAME_RETRIES]);
+ int rc;
+
+ if (retries < -1 || retries > 7)
+ return -EINVAL;
+
+ rc = phy->set_frame_retries(phy, retries);
+ if (rc < 0)
+ return rc;
+
+ phy->frame_retries = retries;
+
+ return 0;
+}
+
+int ieee802154_set_phyparams(struct sk_buff *skb, struct genl_info *info)
+{
+ struct wpan_phy *phy;
+ const char *name;
+ int rc = -ENOTSUPP;
+
+ pr_debug("%s\n", __func__);
+
+ if (!info->attrs[IEEE802154_ATTR_PHY_NAME] &&
+ !info->attrs[IEEE802154_ATTR_LBT_ENABLED] &&
+ !info->attrs[IEEE802154_ATTR_CCA_MODE] &&
+ !info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL] &&
+ !info->attrs[IEEE802154_ATTR_CSMA_RETRIES] &&
+ !info->attrs[IEEE802154_ATTR_CSMA_MIN_BE] &&
+ !info->attrs[IEEE802154_ATTR_CSMA_MAX_BE] &&
+ !info->attrs[IEEE802154_ATTR_FRAME_RETRIES])
+ return -EINVAL;
+
+ name = nla_data(info->attrs[IEEE802154_ATTR_PHY_NAME]);
+ if (name[nla_len(info->attrs[IEEE802154_ATTR_PHY_NAME]) - 1] != '\0')
+ return -EINVAL; /* phy name should be null-terminated */
+
+ phy = wpan_phy_find(name);
+ if (!phy)
+ return -ENODEV;
+
+ if ((!phy->set_txpower && info->attrs[IEEE802154_ATTR_TXPOWER]) ||
+ (!phy->set_lbt && info->attrs[IEEE802154_ATTR_LBT_ENABLED]) ||
+ (!phy->set_cca_mode && info->attrs[IEEE802154_ATTR_CCA_MODE]) ||
+ (!phy->set_cca_ed_level &&
+ info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]))
+ goto out;
+
+ mutex_lock(&phy->pib_lock);
+
+ if (info->attrs[IEEE802154_ATTR_TXPOWER]) {
+ rc = phy_set_txpower(phy, info);
+ if (rc < 0)
+ goto error;
+ }
+
+ if (info->attrs[IEEE802154_ATTR_LBT_ENABLED]) {
+ rc = phy_set_lbt(phy, info);
+ if (rc < 0)
+ goto error;
+ }
+
+ if (info->attrs[IEEE802154_ATTR_CCA_MODE]) {
+ rc = phy_set_cca_mode(phy, info);
+ if (rc < 0)
+ goto error;
+ }
+
+ if (info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]) {
+ rc = phy_set_cca_ed_level(phy, info);
+ if (rc < 0)
+ goto error;
+ }
+
+ if (info->attrs[IEEE802154_ATTR_CSMA_RETRIES] ||
+ info->attrs[IEEE802154_ATTR_CSMA_MIN_BE] ||
+ info->attrs[IEEE802154_ATTR_CSMA_MAX_BE]) {
+ rc = phy_set_csma_params(phy, info);
+ if (rc < 0)
+ goto error;
+ }
+
+ if (info->attrs[IEEE802154_ATTR_FRAME_RETRIES]) {
+ rc = phy_set_frame_retries(phy, info);
+ if (rc < 0)
+ goto error;
+ }
+
+ mutex_unlock(&phy->pib_lock);
+
+ wpan_phy_put(phy);
+
+ return 0;
+
+error:
+ mutex_unlock(&phy->pib_lock);
+out:
+ wpan_phy_put(phy);
+ return rc;
+}
diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c
index 6adda4d46f95..fd7be5e45cef 100644
--- a/net/ieee802154/nl_policy.c
+++ b/net/ieee802154/nl_policy.c
@@ -52,5 +52,15 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = {
[IEEE802154_ATTR_DURATION] = { .type = NLA_U8, },
[IEEE802154_ATTR_ED_LIST] = { .len = 27 },
[IEEE802154_ATTR_CHANNEL_PAGE_LIST] = { .len = 32 * 4, },
+
+ [IEEE802154_ATTR_TXPOWER] = { .type = NLA_S8, },
+ [IEEE802154_ATTR_LBT_ENABLED] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_CCA_MODE] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_CCA_ED_LEVEL] = { .type = NLA_S32, },
+ [IEEE802154_ATTR_CSMA_RETRIES] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_CSMA_MIN_BE] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_CSMA_MAX_BE] = { .type = NLA_U8, },
+
+ [IEEE802154_ATTR_FRAME_RETRIES] = { .type = NLA_S8, },
};
diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c
index 41f538b8e59c..74d54fae33d7 100644
--- a/net/ieee802154/raw.c
+++ b/net/ieee802154/raw.c
@@ -28,6 +28,7 @@
#include <linux/slab.h>
#include <net/sock.h>
#include <net/af_ieee802154.h>
+#include <net/ieee802154_netdev.h>
#include "af802154.h"
@@ -55,21 +56,24 @@ static void raw_close(struct sock *sk, long timeout)
sk_common_release(sk);
}
-static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int len)
+static int raw_bind(struct sock *sk, struct sockaddr *_uaddr, int len)
{
- struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr;
+ struct ieee802154_addr addr;
+ struct sockaddr_ieee802154 *uaddr = (struct sockaddr_ieee802154 *)_uaddr;
int err = 0;
struct net_device *dev = NULL;
- if (len < sizeof(*addr))
+ if (len < sizeof(*uaddr))
return -EINVAL;
- if (addr->family != AF_IEEE802154)
+ uaddr = (struct sockaddr_ieee802154 *)_uaddr;
+ if (uaddr->family != AF_IEEE802154)
return -EINVAL;
lock_sock(sk);
- dev = ieee802154_get_dev(sock_net(sk), &addr->addr);
+ ieee802154_addr_from_sa(&addr, &uaddr->addr);
+ dev = ieee802154_get_dev(sock_net(sk), &addr);
if (!dev) {
err = -ENODEV;
goto out;
@@ -209,6 +213,10 @@ out:
static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (!skb)
+ return NET_RX_DROP;
+
if (sock_queue_rcv_skb(sk, skb) < 0) {
kfree_skb(skb);
return NET_RX_DROP;
diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/reassembly.c
new file mode 100644
index 000000000000..ef2d54372b13
--- /dev/null
+++ b/net/ieee802154/reassembly.c
@@ -0,0 +1,571 @@
+/* 6LoWPAN fragment reassembly
+ *
+ *
+ * Authors:
+ * Alexander Aring <aar@pengutronix.de>
+ *
+ * Based on: net/ipv6/reassembly.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "6LoWPAN: " fmt
+
+#include <linux/net.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/random.h>
+#include <linux/jhash.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+#include <net/ieee802154_netdev.h>
+#include <net/6lowpan.h>
+#include <net/ipv6.h>
+#include <net/inet_frag.h>
+
+#include "reassembly.h"
+
+struct lowpan_frag_info {
+ __be16 d_tag;
+ u16 d_size;
+ u8 d_offset;
+};
+
+struct lowpan_frag_info *lowpan_cb(struct sk_buff *skb)
+{
+ return (struct lowpan_frag_info *)skb->cb;
+}
+
+static struct inet_frags lowpan_frags;
+
+static int lowpan_frag_reasm(struct lowpan_frag_queue *fq,
+ struct sk_buff *prev, struct net_device *dev);
+
+static unsigned int lowpan_hash_frag(__be16 tag, u16 d_size,
+ const struct ieee802154_addr *saddr,
+ const struct ieee802154_addr *daddr)
+{
+ u32 c;
+
+ net_get_random_once(&lowpan_frags.rnd, sizeof(lowpan_frags.rnd));
+ c = jhash_3words(ieee802154_addr_hash(saddr),
+ ieee802154_addr_hash(daddr),
+ (__force u32)(tag + (d_size << 16)),
+ lowpan_frags.rnd);
+
+ return c & (INETFRAGS_HASHSZ - 1);
+}
+
+static unsigned int lowpan_hashfn(struct inet_frag_queue *q)
+{
+ struct lowpan_frag_queue *fq;
+
+ fq = container_of(q, struct lowpan_frag_queue, q);
+ return lowpan_hash_frag(fq->tag, fq->d_size, &fq->saddr, &fq->daddr);
+}
+
+static bool lowpan_frag_match(struct inet_frag_queue *q, void *a)
+{
+ struct lowpan_frag_queue *fq;
+ struct lowpan_create_arg *arg = a;
+
+ fq = container_of(q, struct lowpan_frag_queue, q);
+ return fq->tag == arg->tag && fq->d_size == arg->d_size &&
+ ieee802154_addr_equal(&fq->saddr, arg->src) &&
+ ieee802154_addr_equal(&fq->daddr, arg->dst);
+}
+
+static void lowpan_frag_init(struct inet_frag_queue *q, void *a)
+{
+ struct lowpan_frag_queue *fq;
+ struct lowpan_create_arg *arg = a;
+
+ fq = container_of(q, struct lowpan_frag_queue, q);
+
+ fq->tag = arg->tag;
+ fq->d_size = arg->d_size;
+ fq->saddr = *arg->src;
+ fq->daddr = *arg->dst;
+}
+
+static void lowpan_frag_expire(unsigned long data)
+{
+ struct frag_queue *fq;
+ struct net *net;
+
+ fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+ net = container_of(fq->q.net, struct net, ieee802154_lowpan.frags);
+
+ spin_lock(&fq->q.lock);
+
+ if (fq->q.last_in & INET_FRAG_COMPLETE)
+ goto out;
+
+ inet_frag_kill(&fq->q, &lowpan_frags);
+out:
+ spin_unlock(&fq->q.lock);
+ inet_frag_put(&fq->q, &lowpan_frags);
+}
+
+static inline struct lowpan_frag_queue *
+fq_find(struct net *net, const struct lowpan_frag_info *frag_info,
+ const struct ieee802154_addr *src,
+ const struct ieee802154_addr *dst)
+{
+ struct inet_frag_queue *q;
+ struct lowpan_create_arg arg;
+ unsigned int hash;
+
+ arg.tag = frag_info->d_tag;
+ arg.d_size = frag_info->d_size;
+ arg.src = src;
+ arg.dst = dst;
+
+ read_lock(&lowpan_frags.lock);
+ hash = lowpan_hash_frag(frag_info->d_tag, frag_info->d_size, src, dst);
+
+ q = inet_frag_find(&net->ieee802154_lowpan.frags,
+ &lowpan_frags, &arg, hash);
+ if (IS_ERR_OR_NULL(q)) {
+ inet_frag_maybe_warn_overflow(q, pr_fmt());
+ return NULL;
+ }
+ return container_of(q, struct lowpan_frag_queue, q);
+}
+
+static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
+ struct sk_buff *skb, const u8 frag_type)
+{
+ struct sk_buff *prev, *next;
+ struct net_device *dev;
+ int end, offset;
+
+ if (fq->q.last_in & INET_FRAG_COMPLETE)
+ goto err;
+
+ offset = lowpan_cb(skb)->d_offset << 3;
+ end = lowpan_cb(skb)->d_size;
+
+ /* Is this the final fragment? */
+ if (offset + skb->len == end) {
+ /* If we already have some bits beyond end
+ * or have different end, the segment is corrupted.
+ */
+ if (end < fq->q.len ||
+ ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len))
+ goto err;
+ fq->q.last_in |= INET_FRAG_LAST_IN;
+ fq->q.len = end;
+ } else {
+ if (end > fq->q.len) {
+ /* Some bits beyond end -> corruption. */
+ if (fq->q.last_in & INET_FRAG_LAST_IN)
+ goto err;
+ fq->q.len = end;
+ }
+ }
+
+ /* Find out which fragments are in front and at the back of us
+ * in the chain of fragments so far. We must know where to put
+ * this fragment, right?
+ */
+ prev = fq->q.fragments_tail;
+ if (!prev || lowpan_cb(prev)->d_offset < lowpan_cb(skb)->d_offset) {
+ next = NULL;
+ goto found;
+ }
+ prev = NULL;
+ for (next = fq->q.fragments; next != NULL; next = next->next) {
+ if (lowpan_cb(next)->d_offset >= lowpan_cb(skb)->d_offset)
+ break; /* bingo! */
+ prev = next;
+ }
+
+found:
+ /* Insert this fragment in the chain of fragments. */
+ skb->next = next;
+ if (!next)
+ fq->q.fragments_tail = skb;
+ if (prev)
+ prev->next = skb;
+ else
+ fq->q.fragments = skb;
+
+ dev = skb->dev;
+ if (dev)
+ skb->dev = NULL;
+
+ fq->q.stamp = skb->tstamp;
+ if (frag_type == LOWPAN_DISPATCH_FRAG1) {
+ /* Calculate uncomp. 6lowpan header to estimate full size */
+ fq->q.meat += lowpan_uncompress_size(skb, NULL);
+ fq->q.last_in |= INET_FRAG_FIRST_IN;
+ } else {
+ fq->q.meat += skb->len;
+ }
+ add_frag_mem_limit(&fq->q, skb->truesize);
+
+ if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
+ fq->q.meat == fq->q.len) {
+ int res;
+ unsigned long orefdst = skb->_skb_refdst;
+
+ skb->_skb_refdst = 0UL;
+ res = lowpan_frag_reasm(fq, prev, dev);
+ skb->_skb_refdst = orefdst;
+ return res;
+ }
+
+ inet_frag_lru_move(&fq->q);
+ return -1;
+err:
+ kfree_skb(skb);
+ return -1;
+}
+
+/* Check if this packet is complete.
+ * Returns NULL on failure by any reason, and pointer
+ * to current nexthdr field in reassembled frame.
+ *
+ * It is called with locked fq, and caller must check that
+ * queue is eligible for reassembly i.e. it is not COMPLETE,
+ * the last and the first frames arrived and all the bits are here.
+ */
+static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
+ struct net_device *dev)
+{
+ struct sk_buff *fp, *head = fq->q.fragments;
+ int sum_truesize;
+
+ inet_frag_kill(&fq->q, &lowpan_frags);
+
+ /* Make the one we just received the head. */
+ if (prev) {
+ head = prev->next;
+ fp = skb_clone(head, GFP_ATOMIC);
+
+ if (!fp)
+ goto out_oom;
+
+ fp->next = head->next;
+ if (!fp->next)
+ fq->q.fragments_tail = fp;
+ prev->next = fp;
+
+ skb_morph(head, fq->q.fragments);
+ head->next = fq->q.fragments->next;
+
+ consume_skb(fq->q.fragments);
+ fq->q.fragments = head;
+ }
+
+ /* Head of list must not be cloned. */
+ if (skb_unclone(head, GFP_ATOMIC))
+ goto out_oom;
+
+ /* If the first fragment is fragmented itself, we split
+ * it to two chunks: the first with data and paged part
+ * and the second, holding only fragments.
+ */
+ if (skb_has_frag_list(head)) {
+ struct sk_buff *clone;
+ int i, plen = 0;
+
+ clone = alloc_skb(0, GFP_ATOMIC);
+ if (!clone)
+ goto out_oom;
+ clone->next = head->next;
+ head->next = clone;
+ skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
+ skb_frag_list_init(head);
+ for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+ plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
+ clone->len = head->data_len - plen;
+ clone->data_len = clone->len;
+ head->data_len -= clone->len;
+ head->len -= clone->len;
+ add_frag_mem_limit(&fq->q, clone->truesize);
+ }
+
+ WARN_ON(head == NULL);
+
+ sum_truesize = head->truesize;
+ for (fp = head->next; fp;) {
+ bool headstolen;
+ int delta;
+ struct sk_buff *next = fp->next;
+
+ sum_truesize += fp->truesize;
+ if (skb_try_coalesce(head, fp, &headstolen, &delta)) {
+ kfree_skb_partial(fp, headstolen);
+ } else {
+ if (!skb_shinfo(head)->frag_list)
+ skb_shinfo(head)->frag_list = fp;
+ head->data_len += fp->len;
+ head->len += fp->len;
+ head->truesize += fp->truesize;
+ }
+ fp = next;
+ }
+ sub_frag_mem_limit(&fq->q, sum_truesize);
+
+ head->next = NULL;
+ head->dev = dev;
+ head->tstamp = fq->q.stamp;
+
+ fq->q.fragments = NULL;
+ fq->q.fragments_tail = NULL;
+
+ return 1;
+out_oom:
+ net_dbg_ratelimited("lowpan_frag_reasm: no memory for reassembly\n");
+ return -1;
+}
+
+static int lowpan_get_frag_info(struct sk_buff *skb, const u8 frag_type,
+ struct lowpan_frag_info *frag_info)
+{
+ bool fail;
+ u8 pattern = 0, low = 0;
+
+ fail = lowpan_fetch_skb(skb, &pattern, 1);
+ fail |= lowpan_fetch_skb(skb, &low, 1);
+ frag_info->d_size = (pattern & 7) << 8 | low;
+ fail |= lowpan_fetch_skb(skb, &frag_info->d_tag, 2);
+
+ if (frag_type == LOWPAN_DISPATCH_FRAGN) {
+ fail |= lowpan_fetch_skb(skb, &frag_info->d_offset, 1);
+ } else {
+ skb_reset_network_header(skb);
+ frag_info->d_offset = 0;
+ }
+
+ if (unlikely(fail))
+ return -EIO;
+
+ return 0;
+}
+
+int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type)
+{
+ struct lowpan_frag_queue *fq;
+ struct net *net = dev_net(skb->dev);
+ struct lowpan_frag_info *frag_info = lowpan_cb(skb);
+ struct ieee802154_addr source, dest;
+ int err;
+
+ source = mac_cb(skb)->source;
+ dest = mac_cb(skb)->dest;
+
+ err = lowpan_get_frag_info(skb, frag_type, frag_info);
+ if (err < 0)
+ goto err;
+
+ if (frag_info->d_size > net->ieee802154_lowpan.max_dsize)
+ goto err;
+
+ inet_frag_evictor(&net->ieee802154_lowpan.frags, &lowpan_frags, false);
+
+ fq = fq_find(net, frag_info, &source, &dest);
+ if (fq != NULL) {
+ int ret;
+ spin_lock(&fq->q.lock);
+ ret = lowpan_frag_queue(fq, skb, frag_type);
+ spin_unlock(&fq->q.lock);
+
+ inet_frag_put(&fq->q, &lowpan_frags);
+ return ret;
+ }
+
+err:
+ kfree_skb(skb);
+ return -1;
+}
+EXPORT_SYMBOL(lowpan_frag_rcv);
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table lowpan_frags_ns_ctl_table[] = {
+ {
+ .procname = "6lowpanfrag_high_thresh",
+ .data = &init_net.ieee802154_lowpan.frags.high_thresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "6lowpanfrag_low_thresh",
+ .data = &init_net.ieee802154_lowpan.frags.low_thresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "6lowpanfrag_time",
+ .data = &init_net.ieee802154_lowpan.frags.timeout,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .procname = "6lowpanfrag_max_datagram_size",
+ .data = &init_net.ieee802154_lowpan.max_dsize,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ { }
+};
+
+static struct ctl_table lowpan_frags_ctl_table[] = {
+ {
+ .procname = "6lowpanfrag_secret_interval",
+ .data = &lowpan_frags.secret_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ { }
+};
+
+static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
+{
+ struct ctl_table *table;
+ struct ctl_table_header *hdr;
+
+ table = lowpan_frags_ns_ctl_table;
+ if (!net_eq(net, &init_net)) {
+ table = kmemdup(table, sizeof(lowpan_frags_ns_ctl_table),
+ GFP_KERNEL);
+ if (table == NULL)
+ goto err_alloc;
+
+ table[0].data = &net->ieee802154_lowpan.frags.high_thresh;
+ table[1].data = &net->ieee802154_lowpan.frags.low_thresh;
+ table[2].data = &net->ieee802154_lowpan.frags.timeout;
+ table[3].data = &net->ieee802154_lowpan.max_dsize;
+
+ /* Don't export sysctls to unprivileged users */
+ if (net->user_ns != &init_user_ns)
+ table[0].procname = NULL;
+ }
+
+ hdr = register_net_sysctl(net, "net/ieee802154/6lowpan", table);
+ if (hdr == NULL)
+ goto err_reg;
+
+ net->ieee802154_lowpan.sysctl.frags_hdr = hdr;
+ return 0;
+
+err_reg:
+ if (!net_eq(net, &init_net))
+ kfree(table);
+err_alloc:
+ return -ENOMEM;
+}
+
+static void __net_exit lowpan_frags_ns_sysctl_unregister(struct net *net)
+{
+ struct ctl_table *table;
+
+ table = net->ieee802154_lowpan.sysctl.frags_hdr->ctl_table_arg;
+ unregister_net_sysctl_table(net->ieee802154_lowpan.sysctl.frags_hdr);
+ if (!net_eq(net, &init_net))
+ kfree(table);
+}
+
+static struct ctl_table_header *lowpan_ctl_header;
+
+static int lowpan_frags_sysctl_register(void)
+{
+ lowpan_ctl_header = register_net_sysctl(&init_net,
+ "net/ieee802154/6lowpan",
+ lowpan_frags_ctl_table);
+ return lowpan_ctl_header == NULL ? -ENOMEM : 0;
+}
+
+static void lowpan_frags_sysctl_unregister(void)
+{
+ unregister_net_sysctl_table(lowpan_ctl_header);
+}
+#else
+static inline int lowpan_frags_ns_sysctl_register(struct net *net)
+{
+ return 0;
+}
+
+static inline void lowpan_frags_ns_sysctl_unregister(struct net *net)
+{
+}
+
+static inline int lowpan_frags_sysctl_register(void)
+{
+ return 0;
+}
+
+static inline void lowpan_frags_sysctl_unregister(void)
+{
+}
+#endif
+
+static int __net_init lowpan_frags_init_net(struct net *net)
+{
+ net->ieee802154_lowpan.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+ net->ieee802154_lowpan.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+ net->ieee802154_lowpan.frags.timeout = IPV6_FRAG_TIMEOUT;
+ net->ieee802154_lowpan.max_dsize = 0xFFFF;
+
+ inet_frags_init_net(&net->ieee802154_lowpan.frags);
+
+ return lowpan_frags_ns_sysctl_register(net);
+}
+
+static void __net_exit lowpan_frags_exit_net(struct net *net)
+{
+ lowpan_frags_ns_sysctl_unregister(net);
+ inet_frags_exit_net(&net->ieee802154_lowpan.frags, &lowpan_frags);
+}
+
+static struct pernet_operations lowpan_frags_ops = {
+ .init = lowpan_frags_init_net,
+ .exit = lowpan_frags_exit_net,
+};
+
+int __init lowpan_net_frag_init(void)
+{
+ int ret;
+
+ ret = lowpan_frags_sysctl_register();
+ if (ret)
+ return ret;
+
+ ret = register_pernet_subsys(&lowpan_frags_ops);
+ if (ret)
+ goto err_pernet;
+
+ lowpan_frags.hashfn = lowpan_hashfn;
+ lowpan_frags.constructor = lowpan_frag_init;
+ lowpan_frags.destructor = NULL;
+ lowpan_frags.skb_free = NULL;
+ lowpan_frags.qsize = sizeof(struct frag_queue);
+ lowpan_frags.match = lowpan_frag_match;
+ lowpan_frags.frag_expire = lowpan_frag_expire;
+ lowpan_frags.secret_interval = 10 * 60 * HZ;
+ inet_frags_init(&lowpan_frags);
+
+ return ret;
+err_pernet:
+ lowpan_frags_sysctl_unregister();
+ return ret;
+}
+
+void lowpan_net_frag_exit(void)
+{
+ inet_frags_fini(&lowpan_frags);
+ lowpan_frags_sysctl_unregister();
+ unregister_pernet_subsys(&lowpan_frags_ops);
+}
diff --git a/net/ieee802154/reassembly.h b/net/ieee802154/reassembly.h
new file mode 100644
index 000000000000..74e4a7c98191
--- /dev/null
+++ b/net/ieee802154/reassembly.h
@@ -0,0 +1,41 @@
+#ifndef __IEEE802154_6LOWPAN_REASSEMBLY_H__
+#define __IEEE802154_6LOWPAN_REASSEMBLY_H__
+
+#include <net/inet_frag.h>
+
+struct lowpan_create_arg {
+ __be16 tag;
+ u16 d_size;
+ const struct ieee802154_addr *src;
+ const struct ieee802154_addr *dst;
+};
+
+/* Equivalent of ipv4 struct ip
+ */
+struct lowpan_frag_queue {
+ struct inet_frag_queue q;
+
+ __be16 tag;
+ u16 d_size;
+ struct ieee802154_addr saddr;
+ struct ieee802154_addr daddr;
+};
+
+static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a)
+{
+ switch (a->mode) {
+ case IEEE802154_ADDR_LONG:
+ return (((__force u64)a->extended_addr) >> 32) ^
+ (((__force u64)a->extended_addr) & 0xffffffff);
+ case IEEE802154_ADDR_SHORT:
+ return (__force u32)(a->short_addr);
+ default:
+ return 0;
+ }
+}
+
+int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type);
+void lowpan_net_frag_exit(void);
+int lowpan_net_frag_init(void);
+
+#endif /* __IEEE802154_6LOWPAN_REASSEMBLY_H__ */
diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c
index 4dd37615a749..edd0962d55f9 100644
--- a/net/ieee802154/wpan-class.c
+++ b/net/ieee802154/wpan-class.c
@@ -44,9 +44,7 @@ static DEVICE_ATTR_RO(name);
MASTER_SHOW(current_channel, "%d");
MASTER_SHOW(current_page, "%d");
-MASTER_SHOW_COMPLEX(transmit_power, "%d +- %d dB",
- ((signed char) (phy->transmit_power << 2)) >> 2,
- (phy->transmit_power >> 6) ? (phy->transmit_power >> 6) * 3 : 1);
+MASTER_SHOW(transmit_power, "%d +- 1 dB");
MASTER_SHOW(cca_mode, "%d");
static ssize_t channels_supported_show(struct device *dev,
@@ -171,6 +169,12 @@ struct wpan_phy *wpan_phy_alloc(size_t priv_size)
phy->current_channel = -1; /* not initialised */
phy->current_page = 0; /* for compatibility */
+ /* defaults per 802.15.4-2011 */
+ phy->min_be = 3;
+ phy->max_be = 5;
+ phy->csma_retries = 4;
+ phy->frame_retries = -1; /* for compatibility, actual default is 3 */
+
return phy;
out:
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index f8c49ce5b283..f032688d20d3 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -55,4 +55,4 @@ obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
- xfrm4_output.o
+ xfrm4_output.o xfrm4_protocol.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index ecd2c3f245ce..8c54870db792 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1296,8 +1296,11 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
segs = ERR_PTR(-EPROTONOSUPPORT);
- /* Note : following gso_segment() might change skb->encapsulation */
- udpfrag = !skb->encapsulation && proto == IPPROTO_UDP;
+ if (skb->encapsulation &&
+ skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP))
+ udpfrag = proto == IPPROTO_UDP && encap;
+ else
+ udpfrag = proto == IPPROTO_UDP && !skb->encapsulation;
ops = rcu_dereference(inet_offloads[proto]);
if (likely(ops && ops->callbacks.gso_segment))
@@ -1502,9 +1505,9 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset)
bhptr = per_cpu_ptr(mib[0], cpu);
syncp = (struct u64_stats_sync *)(bhptr + syncp_offset);
do {
- start = u64_stats_fetch_begin_bh(syncp);
+ start = u64_stats_fetch_begin_irq(syncp);
v = *(((u64 *) bhptr) + offt);
- } while (u64_stats_fetch_retry_bh(syncp, start));
+ } while (u64_stats_fetch_retry_irq(syncp, start));
res += v;
}
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 717902669d2f..a2afa89513a0 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -155,6 +155,10 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
struct iphdr *iph, *top_iph;
struct ip_auth_hdr *ah;
struct ah_data *ahp;
+ int seqhi_len = 0;
+ __be32 *seqhi;
+ int sglists = 0;
+ struct scatterlist *seqhisg;
ahp = x->data;
ahash = ahp->ahash;
@@ -167,14 +171,19 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
ah = ip_auth_hdr(skb);
ihl = ip_hdrlen(skb);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ sglists = 1;
+ seqhi_len = sizeof(*seqhi);
+ }
err = -ENOMEM;
- iph = ah_alloc_tmp(ahash, nfrags, ihl);
+ iph = ah_alloc_tmp(ahash, nfrags + sglists, ihl + seqhi_len);
if (!iph)
goto out;
-
- icv = ah_tmp_icv(ahash, iph, ihl);
+ seqhi = (__be32 *)((char *)iph + ihl);
+ icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
+ seqhisg = sg + nfrags;
memset(ah->auth_data, 0, ahp->icv_trunc_len);
@@ -210,10 +219,15 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
ah->spi = x->id.spi;
ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, 0, skb->len);
+ sg_init_table(sg, nfrags + sglists);
+ skb_to_sgvec_nomark(skb, sg, 0, skb->len);
- ahash_request_set_crypt(req, sg, icv, skb->len);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ /* Attach seqhi sg right after packet payload */
+ *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+ sg_set_buf(seqhisg, seqhi, seqhi_len);
+ }
+ ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
ahash_request_set_callback(req, 0, ah_output_done, skb);
AH_SKB_CB(skb)->tmp = iph;
@@ -295,6 +309,10 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
struct ip_auth_hdr *ah;
struct ah_data *ahp;
int err = -ENOMEM;
+ int seqhi_len = 0;
+ __be32 *seqhi;
+ int sglists = 0;
+ struct scatterlist *seqhisg;
if (!pskb_may_pull(skb, sizeof(*ah)))
goto out;
@@ -335,14 +353,22 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
iph = ip_hdr(skb);
ihl = ip_hdrlen(skb);
- work_iph = ah_alloc_tmp(ahash, nfrags, ihl + ahp->icv_trunc_len);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ sglists = 1;
+ seqhi_len = sizeof(*seqhi);
+ }
+
+ work_iph = ah_alloc_tmp(ahash, nfrags + sglists, ihl +
+ ahp->icv_trunc_len + seqhi_len);
if (!work_iph)
goto out;
- auth_data = ah_tmp_auth(work_iph, ihl);
+ seqhi = (__be32 *)((char *)work_iph + ihl);
+ auth_data = ah_tmp_auth(seqhi, seqhi_len);
icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
+ seqhisg = sg + nfrags;
memcpy(work_iph, iph, ihl);
memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
@@ -361,10 +387,15 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
skb_push(skb, ihl);
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, 0, skb->len);
+ sg_init_table(sg, nfrags + sglists);
+ skb_to_sgvec_nomark(skb, sg, 0, skb->len);
- ahash_request_set_crypt(req, sg, icv, skb->len);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ /* Attach seqhi sg right after packet payload */
+ *seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
+ sg_set_buf(seqhisg, seqhi, seqhi_len);
+ }
+ ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
ahash_request_set_callback(req, 0, ah_input_done, skb);
AH_SKB_CB(skb)->tmp = work_iph;
@@ -397,7 +428,7 @@ out:
return err;
}
-static void ah4_err(struct sk_buff *skb, u32 info)
+static int ah4_err(struct sk_buff *skb, u32 info)
{
struct net *net = dev_net(skb->dev);
const struct iphdr *iph = (const struct iphdr *)skb->data;
@@ -407,23 +438,25 @@ static void ah4_err(struct sk_buff *skb, u32 info)
switch (icmp_hdr(skb)->type) {
case ICMP_DEST_UNREACH:
if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
- return;
+ return 0;
case ICMP_REDIRECT:
break;
default:
- return;
+ return 0;
}
x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
ah->spi, IPPROTO_AH, AF_INET);
if (!x)
- return;
+ return 0;
if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0);
else
ipv4_redirect(skb, net, 0, 0, IPPROTO_AH, 0);
xfrm_state_put(x);
+
+ return 0;
}
static int ah_init_state(struct xfrm_state *x)
@@ -505,6 +538,10 @@ static void ah_destroy(struct xfrm_state *x)
kfree(ahp);
}
+static int ah4_rcv_cb(struct sk_buff *skb, int err)
+{
+ return 0;
+}
static const struct xfrm_type ah_type =
{
@@ -518,11 +555,12 @@ static const struct xfrm_type ah_type =
.output = ah_output
};
-static const struct net_protocol ah4_protocol = {
+static struct xfrm4_protocol ah4_protocol = {
.handler = xfrm4_rcv,
+ .input_handler = xfrm_input,
+ .cb_handler = ah4_rcv_cb,
.err_handler = ah4_err,
- .no_policy = 1,
- .netns_ok = 1,
+ .priority = 0,
};
static int __init ah4_init(void)
@@ -531,7 +569,7 @@ static int __init ah4_init(void)
pr_info("%s: can't add xfrm type\n", __func__);
return -EAGAIN;
}
- if (inet_add_protocol(&ah4_protocol, IPPROTO_AH) < 0) {
+ if (xfrm4_protocol_register(&ah4_protocol, IPPROTO_AH) < 0) {
pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&ah_type, AF_INET);
return -EAGAIN;
@@ -541,7 +579,7 @@ static int __init ah4_init(void)
static void __exit ah4_fini(void)
{
- if (inet_del_protocol(&ah4_protocol, IPPROTO_AH) < 0)
+ if (xfrm4_protocol_deregister(&ah4_protocol, IPPROTO_AH) < 0)
pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&ah_type, AF_INET) < 0)
pr_info("%s: can't remove xfrm type\n", __func__);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index ac2dff3c2c1c..bdbf68bb2e2d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1443,7 +1443,8 @@ static size_t inet_nlmsg_size(void)
+ nla_total_size(4) /* IFA_LOCAL */
+ nla_total_size(4) /* IFA_BROADCAST */
+ nla_total_size(IFNAMSIZ) /* IFA_LABEL */
- + nla_total_size(4); /* IFA_FLAGS */
+ + nla_total_size(4) /* IFA_FLAGS */
+ + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
}
static inline u32 cstamp_delta(unsigned long cstamp)
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 7785b28061ac..360b565918c4 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -473,7 +473,7 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
net_adj) & ~(blksize - 1)) + net_adj - 2;
}
-static void esp4_err(struct sk_buff *skb, u32 info)
+static int esp4_err(struct sk_buff *skb, u32 info)
{
struct net *net = dev_net(skb->dev);
const struct iphdr *iph = (const struct iphdr *)skb->data;
@@ -483,23 +483,25 @@ static void esp4_err(struct sk_buff *skb, u32 info)
switch (icmp_hdr(skb)->type) {
case ICMP_DEST_UNREACH:
if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
- return;
+ return 0;
case ICMP_REDIRECT:
break;
default:
- return;
+ return 0;
}
x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
esph->spi, IPPROTO_ESP, AF_INET);
if (!x)
- return;
+ return 0;
if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0);
else
ipv4_redirect(skb, net, 0, 0, IPPROTO_ESP, 0);
xfrm_state_put(x);
+
+ return 0;
}
static void esp_destroy(struct xfrm_state *x)
@@ -672,6 +674,11 @@ error:
return err;
}
+static int esp4_rcv_cb(struct sk_buff *skb, int err)
+{
+ return 0;
+}
+
static const struct xfrm_type esp_type =
{
.description = "ESP4",
@@ -685,11 +692,12 @@ static const struct xfrm_type esp_type =
.output = esp_output
};
-static const struct net_protocol esp4_protocol = {
+static struct xfrm4_protocol esp4_protocol = {
.handler = xfrm4_rcv,
+ .input_handler = xfrm_input,
+ .cb_handler = esp4_rcv_cb,
.err_handler = esp4_err,
- .no_policy = 1,
- .netns_ok = 1,
+ .priority = 0,
};
static int __init esp4_init(void)
@@ -698,7 +706,7 @@ static int __init esp4_init(void)
pr_info("%s: can't add xfrm type\n", __func__);
return -EAGAIN;
}
- if (inet_add_protocol(&esp4_protocol, IPPROTO_ESP) < 0) {
+ if (xfrm4_protocol_register(&esp4_protocol, IPPROTO_ESP) < 0) {
pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&esp_type, AF_INET);
return -EAGAIN;
@@ -708,7 +716,7 @@ static int __init esp4_init(void)
static void __exit esp4_fini(void)
{
- if (inet_del_protocol(&esp4_protocol, IPPROTO_ESP) < 0)
+ if (xfrm4_protocol_deregister(&esp4_protocol, IPPROTO_ESP) < 0)
pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&esp_type, AF_INET) < 0)
pr_info("%s: can't remove xfrm type\n", __func__);
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index bb075fc9a14f..3b01959bf4bb 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -208,7 +208,7 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force)
}
work = frag_mem_limit(nf) - nf->low_thresh;
- while (work > 0) {
+ while (work > 0 || force) {
spin_lock(&nf->lru_lock);
if (list_empty(&nf->lru_list)) {
@@ -278,9 +278,10 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
atomic_inc(&qp->refcnt);
hlist_add_head(&qp->list, &hb->chain);
+ inet_frag_lru_add(nf, qp);
spin_unlock(&hb->chain_lock);
read_unlock(&f->lock);
- inet_frag_lru_add(nf, qp);
+
return qp;
}
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index e9f1217a8afd..be8abe73bb9f 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -39,6 +39,71 @@
#include <net/route.h>
#include <net/xfrm.h>
+static bool ip_may_fragment(const struct sk_buff *skb)
+{
+ return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) ||
+ !skb->local_df;
+}
+
+static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+{
+ if (skb->len <= mtu || skb->local_df)
+ return false;
+
+ if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+ return false;
+
+ return true;
+}
+
+static bool ip_gso_exceeds_dst_mtu(const struct sk_buff *skb)
+{
+ unsigned int mtu;
+
+ if (skb->local_df || !skb_is_gso(skb))
+ return false;
+
+ mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true);
+
+ /* if seglen > mtu, do software segmentation for IP fragmentation on
+ * output. DF bit cannot be set since ip_forward would have sent
+ * icmp error.
+ */
+ return skb_gso_network_seglen(skb) > mtu;
+}
+
+/* called if GSO skb needs to be fragmented on forward */
+static int ip_forward_finish_gso(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ netdev_features_t features;
+ struct sk_buff *segs;
+ int ret = 0;
+
+ features = netif_skb_dev_features(skb, dst->dev);
+ segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+ if (IS_ERR(segs)) {
+ kfree_skb(skb);
+ return -ENOMEM;
+ }
+
+ consume_skb(skb);
+
+ do {
+ struct sk_buff *nskb = segs->next;
+ int err;
+
+ segs->next = NULL;
+ err = dst_output(segs);
+
+ if (err && ret == 0)
+ ret = err;
+ segs = nskb;
+ } while (segs);
+
+ return ret;
+}
+
static int ip_forward_finish(struct sk_buff *skb)
{
struct ip_options *opt = &(IPCB(skb)->opt);
@@ -49,6 +114,9 @@ static int ip_forward_finish(struct sk_buff *skb)
if (unlikely(opt->optlen))
ip_forward_options(skb);
+ if (ip_gso_exceeds_dst_mtu(skb))
+ return ip_forward_finish_gso(skb);
+
return dst_output(skb);
}
@@ -59,6 +127,10 @@ int ip_forward(struct sk_buff *skb)
struct rtable *rt; /* Route we use */
struct ip_options *opt = &(IPCB(skb)->opt);
+ /* that should never happen */
+ if (skb->pkt_type != PACKET_HOST)
+ goto drop;
+
if (skb_warn_if_lro(skb))
goto drop;
@@ -68,9 +140,6 @@ int ip_forward(struct sk_buff *skb)
if (IPCB(skb)->opt.router_alert && ip_call_ra_chain(skb))
return NET_RX_SUCCESS;
- if (skb->pkt_type != PACKET_HOST)
- goto drop;
-
skb_forward_csum(skb);
/*
@@ -91,8 +160,7 @@ int ip_forward(struct sk_buff *skb)
IPCB(skb)->flags |= IPSKB_FORWARDED;
mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
- if (unlikely(skb->len > mtu && !skb_is_gso(skb) &&
- (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
+ if (!ip_may_fragment(skb) && ip_exceeds_mtu(skb, mtu)) {
IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 8971780aec7c..1a0755fea491 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -422,9 +422,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->tc_index = from->tc_index;
#endif
nf_copy(to, from);
-#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
- to->nf_trace = from->nf_trace;
-#endif
#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
to->ipvs_property = from->ipvs_property;
#endif
@@ -449,7 +446,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
__be16 not_last_frag;
struct rtable *rt = skb_rtable(skb);
int err = 0;
- bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED;
dev = rt->dst.dev;
@@ -459,7 +455,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
iph = ip_hdr(skb);
- mtu = ip_dst_mtu_maybe_forward(&rt->dst, forwarding);
+ mtu = ip_skb_dst_mtu(skb);
if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) ||
(IPCB(skb)->frag_max_size &&
IPCB(skb)->frag_max_size > mtu))) {
@@ -825,8 +821,7 @@ static int __ip_append_data(struct sock *sk,
fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
- maxnonfragsize = (inet->pmtudisc >= IP_PMTUDISC_DO) ?
- mtu : 0xFFFF;
+ maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu;
if (cork->length + length > maxnonfragsize - fragheaderlen) {
ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
@@ -1149,8 +1144,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
- maxnonfragsize = (inet->pmtudisc >= IP_PMTUDISC_DO) ?
- mtu : 0xFFFF;
+ maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu;
if (cork->length + size > maxnonfragsize - fragheaderlen) {
ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
@@ -1311,8 +1305,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
* to fragment the frame generated here. No matter, what transforms
* how transforms change size of the packet, it will come out.
*/
- if (inet->pmtudisc < IP_PMTUDISC_DO)
- skb->local_df = 1;
+ skb->local_df = ip_sk_local_df(sk);
/* DF bit is set when we want to see DF on outgoing frames.
* If local_df is set too, we still allow to fragment this frame
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 580dd96666e0..64741b938632 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -186,7 +186,8 @@ void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
}
EXPORT_SYMBOL(ip_cmsg_recv);
-int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
+int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc,
+ bool allow_ipv6)
{
int err, val;
struct cmsghdr *cmsg;
@@ -194,6 +195,22 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
if (!CMSG_OK(msg, cmsg))
return -EINVAL;
+#if defined(CONFIG_IPV6)
+ if (allow_ipv6 &&
+ cmsg->cmsg_level == SOL_IPV6 &&
+ cmsg->cmsg_type == IPV6_PKTINFO) {
+ struct in6_pktinfo *src_info;
+
+ if (cmsg->cmsg_len < CMSG_LEN(sizeof(*src_info)))
+ return -EINVAL;
+ src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
+ if (!ipv6_addr_v4mapped(&src_info->ipi6_addr))
+ return -EINVAL;
+ ipc->oif = src_info->ipi6_ifindex;
+ ipc->addr = src_info->ipi6_addr.s6_addr32[3];
+ continue;
+ }
+#endif
if (cmsg->cmsg_level != SOL_IP)
continue;
switch (cmsg->cmsg_type) {
@@ -626,7 +643,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
inet->nodefrag = val ? 1 : 0;
break;
case IP_MTU_DISCOVER:
- if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_INTERFACE)
+ if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
goto e_inval;
inet->pmtudisc = val;
break;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index bd28f386bd02..66aaf506fbef 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -93,83 +93,32 @@ static void tunnel_dst_reset(struct ip_tunnel *t)
tunnel_dst_set(t, NULL);
}
-static void tunnel_dst_reset_all(struct ip_tunnel *t)
+void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
{
int i;
for_each_possible_cpu(i)
__tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
}
+EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
-static struct dst_entry *tunnel_dst_get(struct ip_tunnel *t)
+static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
{
struct dst_entry *dst;
rcu_read_lock();
dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
- if (dst)
+ if (dst) {
+ if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
+ rcu_read_unlock();
+ tunnel_dst_reset(t);
+ return NULL;
+ }
dst_hold(dst);
- rcu_read_unlock();
- return dst;
-}
-
-static struct dst_entry *tunnel_dst_check(struct ip_tunnel *t, u32 cookie)
-{
- struct dst_entry *dst = tunnel_dst_get(t);
-
- if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
- tunnel_dst_reset(t);
- return NULL;
}
-
- return dst;
-}
-
-/* Often modified stats are per cpu, other are shared (netdev->stats) */
-struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
- struct rtnl_link_stats64 *tot)
-{
- int i;
-
- for_each_possible_cpu(i) {
- const struct pcpu_sw_netstats *tstats =
- per_cpu_ptr(dev->tstats, i);
- u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
- unsigned int start;
-
- do {
- start = u64_stats_fetch_begin_bh(&tstats->syncp);
- rx_packets = tstats->rx_packets;
- tx_packets = tstats->tx_packets;
- rx_bytes = tstats->rx_bytes;
- tx_bytes = tstats->tx_bytes;
- } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
-
- tot->rx_packets += rx_packets;
- tot->tx_packets += tx_packets;
- tot->rx_bytes += rx_bytes;
- tot->tx_bytes += tx_bytes;
- }
-
- tot->multicast = dev->stats.multicast;
-
- tot->rx_crc_errors = dev->stats.rx_crc_errors;
- tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
- tot->rx_length_errors = dev->stats.rx_length_errors;
- tot->rx_frame_errors = dev->stats.rx_frame_errors;
- tot->rx_errors = dev->stats.rx_errors;
-
- tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
- tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
- tot->tx_dropped = dev->stats.tx_dropped;
- tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
- tot->tx_errors = dev->stats.tx_errors;
-
- tot->collisions = dev->stats.collisions;
-
- return tot;
+ rcu_read_unlock();
+ return (struct rtable *)dst;
}
-EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
__be16 flags, __be32 key)
@@ -286,13 +235,17 @@ static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
{
unsigned int h;
__be32 remote;
+ __be32 i_key = parms->i_key;
if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
remote = parms->iph.daddr;
else
remote = 0;
- h = ip_tunnel_hash(parms->i_key, remote);
+ if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
+ i_key = 0;
+
+ h = ip_tunnel_hash(i_key, remote);
return &itn->tunnels[h];
}
@@ -449,7 +402,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
fbt = netdev_priv(itn->fb_tunnel_dev);
dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
if (IS_ERR(dev))
- return NULL;
+ return ERR_CAST(dev);
dev->mtu = ip_tunnel_bind_dev(dev);
@@ -584,7 +537,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
struct flowi4 fl4;
u8 tos, ttl;
__be16 df;
- struct rtable *rt = NULL; /* Route to the other host */
+ struct rtable *rt; /* Route to the other host */
unsigned int max_headroom; /* The extra header space needed */
__be32 dst;
int err;
@@ -657,8 +610,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
- if (connected)
- rt = (struct rtable *)tunnel_dst_check(tunnel, 0);
+ rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL;
if (!rt) {
rt = ip_route_output_key(tunnel->net, &fl4);
@@ -766,7 +718,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
if (set_mtu)
dev->mtu = mtu;
}
- tunnel_dst_reset_all(t);
+ ip_tunnel_dst_reset_all(t);
netdev_state_change(dev);
}
@@ -803,9 +755,13 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
- if (!t && (cmd == SIOCADDTUNNEL))
+ if (!t && (cmd == SIOCADDTUNNEL)) {
t = ip_tunnel_create(net, itn, p);
-
+ if (IS_ERR(t)) {
+ err = PTR_ERR(t);
+ break;
+ }
+ }
if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
if (t != NULL) {
if (t->dev != dev) {
@@ -832,8 +788,9 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
if (t) {
err = 0;
ip_tunnel_update(itn, t, dev, p, true);
- } else
- err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+ } else {
+ err = -ENOENT;
+ }
break;
case SIOCDELTUNNEL:
@@ -1048,19 +1005,13 @@ int ip_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct iphdr *iph = &tunnel->parms.iph;
- int i, err;
+ int err;
dev->destructor = ip_tunnel_dev_free;
- dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
- for_each_possible_cpu(i) {
- struct pcpu_sw_netstats *ipt_stats;
- ipt_stats = per_cpu_ptr(dev->tstats, i);
- u64_stats_init(&ipt_stats->syncp);
- }
-
tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
if (!tunnel->dst_cache) {
free_percpu(dev->tstats);
@@ -1095,7 +1046,7 @@ void ip_tunnel_uninit(struct net_device *dev)
if (itn->fb_tunnel_dev != dev)
ip_tunnel_del(netdev_priv(dev));
- tunnel_dst_reset_all(tunnel);
+ ip_tunnel_dst_reset_all(tunnel);
}
EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 6156f4ef5e91..b86f0a37fa7c 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -108,7 +108,6 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
nf_reset(skb);
secpath_reset(skb);
skb_clear_hash_if_not_l4(skb);
- skb_dst_drop(skb);
skb->vlan_tci = 0;
skb_set_queue_mapping(skb, 0);
skb->pkt_type = PACKET_HOST;
@@ -148,3 +147,49 @@ error:
return ERR_PTR(err);
}
EXPORT_SYMBOL_GPL(iptunnel_handle_offloads);
+
+/* Often modified stats are per cpu, other are shared (netdev->stats) */
+struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
+ struct rtnl_link_stats64 *tot)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ const struct pcpu_sw_netstats *tstats =
+ per_cpu_ptr(dev->tstats, i);
+ u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin_irq(&tstats->syncp);
+ rx_packets = tstats->rx_packets;
+ tx_packets = tstats->tx_packets;
+ rx_bytes = tstats->rx_bytes;
+ tx_bytes = tstats->tx_bytes;
+ } while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
+
+ tot->rx_packets += rx_packets;
+ tot->tx_packets += tx_packets;
+ tot->rx_bytes += rx_bytes;
+ tot->tx_bytes += tx_bytes;
+ }
+
+ tot->multicast = dev->stats.multicast;
+
+ tot->rx_crc_errors = dev->stats.rx_crc_errors;
+ tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
+ tot->rx_length_errors = dev->stats.rx_length_errors;
+ tot->rx_frame_errors = dev->stats.rx_frame_errors;
+ tot->rx_errors = dev->stats.rx_errors;
+
+ tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
+ tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
+ tot->tx_dropped = dev->stats.tx_dropped;
+ tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
+ tot->tx_errors = dev->stats.tx_errors;
+
+ tot->collisions = dev->stats.collisions;
+
+ return tot;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 48eafae51769..687ddef4e574 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -34,6 +34,7 @@
#include <linux/init.h>
#include <linux/netfilter_ipv4.h>
#include <linux/if_ether.h>
+#include <linux/icmpv6.h>
#include <net/sock.h>
#include <net/ip.h>
@@ -49,8 +50,8 @@ static struct rtnl_link_ops vti_link_ops __read_mostly;
static int vti_net_id __read_mostly;
static int vti_tunnel_init(struct net_device *dev);
-/* We dont digest the packet therefore let the packet pass */
-static int vti_rcv(struct sk_buff *skb)
+static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
+ int encap_type)
{
struct ip_tunnel *tunnel;
const struct iphdr *iph = ip_hdr(skb);
@@ -60,79 +61,120 @@ static int vti_rcv(struct sk_buff *skb)
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
iph->saddr, iph->daddr, 0);
if (tunnel != NULL) {
- struct pcpu_sw_netstats *tstats;
- u32 oldmark = skb->mark;
- int ret;
-
-
- /* temporarily mark the skb with the tunnel o_key, to
- * only match policies with this mark.
- */
- skb->mark = be32_to_cpu(tunnel->parms.o_key);
- ret = xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb);
- skb->mark = oldmark;
- if (!ret)
- return -1;
-
- tstats = this_cpu_ptr(tunnel->dev->tstats);
- u64_stats_update_begin(&tstats->syncp);
- tstats->rx_packets++;
- tstats->rx_bytes += skb->len;
- u64_stats_update_end(&tstats->syncp);
-
- secpath_reset(skb);
- skb->dev = tunnel->dev;
+ if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+ goto drop;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel;
+ skb->mark = be32_to_cpu(tunnel->parms.i_key);
+
+ return xfrm_input(skb, nexthdr, spi, encap_type);
+ }
+
+ return -EINVAL;
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
+static int vti_rcv(struct sk_buff *skb)
+{
+ XFRM_SPI_SKB_CB(skb)->family = AF_INET;
+ XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
+
+ return vti_input(skb, ip_hdr(skb)->protocol, 0, 0);
+}
+
+static int vti_rcv_cb(struct sk_buff *skb, int err)
+{
+ unsigned short family;
+ struct net_device *dev;
+ struct pcpu_sw_netstats *tstats;
+ struct xfrm_state *x;
+ struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4;
+
+ if (!tunnel)
return 1;
+
+ dev = tunnel->dev;
+
+ if (err) {
+ dev->stats.rx_errors++;
+ dev->stats.rx_dropped++;
+
+ return 0;
}
- return -1;
+ x = xfrm_input_state(skb);
+ family = x->inner_mode->afinfo->family;
+
+ if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family))
+ return -EPERM;
+
+ skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(skb->dev)));
+ skb->dev = dev;
+
+ tstats = this_cpu_ptr(dev->tstats);
+
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->rx_packets++;
+ tstats->rx_bytes += skb->len;
+ u64_stats_update_end(&tstats->syncp);
+
+ return 0;
}
-/* This function assumes it is being called from dev_queue_xmit()
- * and that skb is filled properly by that function.
- */
+static bool vti_state_check(const struct xfrm_state *x, __be32 dst, __be32 src)
+{
+ xfrm_address_t *daddr = (xfrm_address_t *)&dst;
+ xfrm_address_t *saddr = (xfrm_address_t *)&src;
-static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+ /* if there is no transform then this tunnel is not functional.
+ * Or if the xfrm is not mode tunnel.
+ */
+ if (!x || x->props.mode != XFRM_MODE_TUNNEL ||
+ x->props.family != AF_INET)
+ return false;
+
+ if (!dst)
+ return xfrm_addr_equal(saddr, &x->props.saddr, AF_INET);
+
+ if (!xfrm_state_addr_check(x, daddr, saddr, AF_INET))
+ return false;
+
+ return true;
+}
+
+static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
+ struct flowi *fl)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct iphdr *tiph = &tunnel->parms.iph;
- u8 tos;
- struct rtable *rt; /* Route to the other host */
+ struct ip_tunnel_parm *parms = &tunnel->parms;
+ struct dst_entry *dst = skb_dst(skb);
struct net_device *tdev; /* Device to other host */
- struct iphdr *old_iph = ip_hdr(skb);
- __be32 dst = tiph->daddr;
- struct flowi4 fl4;
int err;
- if (skb->protocol != htons(ETH_P_IP))
- goto tx_error;
-
- tos = old_iph->tos;
+ if (!dst) {
+ dev->stats.tx_carrier_errors++;
+ goto tx_error_icmp;
+ }
- memset(&fl4, 0, sizeof(fl4));
- flowi4_init_output(&fl4, tunnel->parms.link,
- be32_to_cpu(tunnel->parms.o_key), RT_TOS(tos),
- RT_SCOPE_UNIVERSE,
- IPPROTO_IPIP, 0,
- dst, tiph->saddr, 0, 0);
- rt = ip_route_output_key(dev_net(dev), &fl4);
- if (IS_ERR(rt)) {
+ dst_hold(dst);
+ dst = xfrm_lookup(tunnel->net, dst, fl, NULL, 0);
+ if (IS_ERR(dst)) {
dev->stats.tx_carrier_errors++;
goto tx_error_icmp;
}
- /* if there is no transform then this tunnel is not functional.
- * Or if the xfrm is not mode tunnel.
- */
- if (!rt->dst.xfrm ||
- rt->dst.xfrm->props.mode != XFRM_MODE_TUNNEL) {
+
+ if (!vti_state_check(dst->xfrm, parms->iph.daddr, parms->iph.saddr)) {
dev->stats.tx_carrier_errors++;
- ip_rt_put(rt);
+ dst_release(dst);
goto tx_error_icmp;
}
- tdev = rt->dst.dev;
+
+ tdev = dst->dev;
if (tdev == dev) {
- ip_rt_put(rt);
+ dst_release(dst);
dev->stats.collisions++;
goto tx_error;
}
@@ -146,10 +188,8 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
tunnel->err_count = 0;
}
- memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
- nf_reset(skb);
+ skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
+ skb_dst_set(skb, dst);
skb->dev = skb_dst(skb)->dev;
err = dst_output(skb);
@@ -166,6 +206,95 @@ tx_error:
return NETDEV_TX_OK;
}
+/* This function assumes it is being called from dev_queue_xmit()
+ * and that skb is filled properly by that function.
+ */
+static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct flowi fl;
+
+ memset(&fl, 0, sizeof(fl));
+
+ skb->mark = be32_to_cpu(tunnel->parms.o_key);
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ xfrm_decode_session(skb, &fl, AF_INET);
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+ break;
+ case htons(ETH_P_IPV6):
+ xfrm_decode_session(skb, &fl, AF_INET6);
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ break;
+ default:
+ dev->stats.tx_errors++;
+ dev_kfree_skb(skb);
+ return NETDEV_TX_OK;
+ }
+
+ return vti_xmit(skb, dev, &fl);
+}
+
+static int vti4_err(struct sk_buff *skb, u32 info)
+{
+ __be32 spi;
+ struct xfrm_state *x;
+ struct ip_tunnel *tunnel;
+ struct ip_esp_hdr *esph;
+ struct ip_auth_hdr *ah ;
+ struct ip_comp_hdr *ipch;
+ struct net *net = dev_net(skb->dev);
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
+ int protocol = iph->protocol;
+ struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
+
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
+ iph->daddr, iph->saddr, 0);
+ if (!tunnel)
+ return -1;
+
+ switch (protocol) {
+ case IPPROTO_ESP:
+ esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
+ spi = esph->spi;
+ break;
+ case IPPROTO_AH:
+ ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2));
+ spi = ah->spi;
+ break;
+ case IPPROTO_COMP:
+ ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
+ spi = htonl(ntohs(ipch->cpi));
+ break;
+ default:
+ return 0;
+ }
+
+ switch (icmp_hdr(skb)->type) {
+ case ICMP_DEST_UNREACH:
+ if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
+ return 0;
+ case ICMP_REDIRECT:
+ break;
+ default:
+ return 0;
+ }
+
+ x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
+ spi, protocol, AF_INET);
+ if (!x)
+ return 0;
+
+ if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
+ ipv4_update_pmtu(skb, net, info, 0, 0, protocol, 0);
+ else
+ ipv4_redirect(skb, net, 0, 0, protocol, 0);
+ xfrm_state_put(x);
+
+ return 0;
+}
+
static int
vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
@@ -181,12 +310,13 @@ vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return -EINVAL;
}
+ p.i_flags |= VTI_ISVTI;
err = ip_tunnel_ioctl(dev, &p, cmd);
if (err)
return err;
if (cmd != SIOCDELTUNNEL) {
- p.i_flags |= GRE_KEY | VTI_ISVTI;
+ p.i_flags |= GRE_KEY;
p.o_flags |= GRE_KEY;
}
@@ -224,7 +354,6 @@ static int vti_tunnel_init(struct net_device *dev)
dev->flags = IFF_NOARP;
dev->iflink = 0;
dev->addr_len = 4;
- dev->features |= NETIF_F_NETNS_LOCAL;
dev->features |= NETIF_F_LLTX;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
@@ -241,9 +370,28 @@ static void __net_init vti_fb_tunnel_init(struct net_device *dev)
iph->ihl = 5;
}
-static struct xfrm_tunnel_notifier vti_handler __read_mostly = {
+static struct xfrm4_protocol vti_esp4_protocol __read_mostly = {
.handler = vti_rcv,
- .priority = 1,
+ .input_handler = vti_input,
+ .cb_handler = vti_rcv_cb,
+ .err_handler = vti4_err,
+ .priority = 100,
+};
+
+static struct xfrm4_protocol vti_ah4_protocol __read_mostly = {
+ .handler = vti_rcv,
+ .input_handler = vti_input,
+ .cb_handler = vti_rcv_cb,
+ .err_handler = vti4_err,
+ .priority = 100,
+};
+
+static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = {
+ .handler = vti_rcv,
+ .input_handler = vti_input,
+ .cb_handler = vti_rcv_cb,
+ .err_handler = vti4_err,
+ .priority = 100,
};
static int __net_init vti_init_net(struct net *net)
@@ -287,6 +435,8 @@ static void vti_netlink_parms(struct nlattr *data[],
if (!data)
return;
+ parms->i_flags = VTI_ISVTI;
+
if (data[IFLA_VTI_LINK])
parms->link = nla_get_u32(data[IFLA_VTI_LINK]);
@@ -382,10 +532,31 @@ static int __init vti_init(void)
err = register_pernet_device(&vti_net_ops);
if (err < 0)
return err;
- err = xfrm4_mode_tunnel_input_register(&vti_handler);
+ err = xfrm4_protocol_register(&vti_esp4_protocol, IPPROTO_ESP);
+ if (err < 0) {
+ unregister_pernet_device(&vti_net_ops);
+ pr_info("vti init: can't register tunnel\n");
+
+ return err;
+ }
+
+ err = xfrm4_protocol_register(&vti_ah4_protocol, IPPROTO_AH);
+ if (err < 0) {
+ xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
+ unregister_pernet_device(&vti_net_ops);
+ pr_info("vti init: can't register tunnel\n");
+
+ return err;
+ }
+
+ err = xfrm4_protocol_register(&vti_ipcomp4_protocol, IPPROTO_COMP);
if (err < 0) {
+ xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
+ xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
unregister_pernet_device(&vti_net_ops);
pr_info("vti init: can't register tunnel\n");
+
+ return err;
}
err = rtnl_link_register(&vti_link_ops);
@@ -395,7 +566,9 @@ static int __init vti_init(void)
return err;
rtnl_link_failed:
- xfrm4_mode_tunnel_input_deregister(&vti_handler);
+ xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
+ xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
+ xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
unregister_pernet_device(&vti_net_ops);
return err;
}
@@ -403,8 +576,13 @@ rtnl_link_failed:
static void __exit vti_fini(void)
{
rtnl_link_unregister(&vti_link_ops);
- if (xfrm4_mode_tunnel_input_deregister(&vti_handler))
+ if (xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP))
pr_info("vti close: can't deregister tunnel\n");
+ if (xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH))
+ pr_info("vti close: can't deregister tunnel\n");
+ if (xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP))
+ pr_info("vti close: can't deregister tunnel\n");
+
unregister_pernet_device(&vti_net_ops);
}
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 826be4cb482a..c0855d50a3fa 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -23,7 +23,7 @@
#include <net/protocol.h>
#include <net/sock.h>
-static void ipcomp4_err(struct sk_buff *skb, u32 info)
+static int ipcomp4_err(struct sk_buff *skb, u32 info)
{
struct net *net = dev_net(skb->dev);
__be32 spi;
@@ -34,24 +34,26 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
switch (icmp_hdr(skb)->type) {
case ICMP_DEST_UNREACH:
if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
- return;
+ return 0;
case ICMP_REDIRECT:
break;
default:
- return;
+ return 0;
}
spi = htonl(ntohs(ipch->cpi));
x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
spi, IPPROTO_COMP, AF_INET);
if (!x)
- return;
+ return 0;
if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0);
else
ipv4_redirect(skb, net, 0, 0, IPPROTO_COMP, 0);
xfrm_state_put(x);
+
+ return 0;
}
/* We always hold one tunnel user reference to indicate a tunnel */
@@ -147,6 +149,11 @@ out:
return err;
}
+static int ipcomp4_rcv_cb(struct sk_buff *skb, int err)
+{
+ return 0;
+}
+
static const struct xfrm_type ipcomp_type = {
.description = "IPCOMP4",
.owner = THIS_MODULE,
@@ -157,11 +164,12 @@ static const struct xfrm_type ipcomp_type = {
.output = ipcomp_output
};
-static const struct net_protocol ipcomp4_protocol = {
+static struct xfrm4_protocol ipcomp4_protocol = {
.handler = xfrm4_rcv,
+ .input_handler = xfrm_input,
+ .cb_handler = ipcomp4_rcv_cb,
.err_handler = ipcomp4_err,
- .no_policy = 1,
- .netns_ok = 1,
+ .priority = 0,
};
static int __init ipcomp4_init(void)
@@ -170,7 +178,7 @@ static int __init ipcomp4_init(void)
pr_info("%s: can't add xfrm type\n", __func__);
return -EAGAIN;
}
- if (inet_add_protocol(&ipcomp4_protocol, IPPROTO_COMP) < 0) {
+ if (xfrm4_protocol_register(&ipcomp4_protocol, IPPROTO_COMP) < 0) {
pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&ipcomp_type, AF_INET);
return -EAGAIN;
@@ -180,7 +188,7 @@ static int __init ipcomp4_init(void)
static void __exit ipcomp4_fini(void)
{
- if (inet_del_protocol(&ipcomp4_protocol, IPPROTO_COMP) < 0)
+ if (xfrm4_protocol_deregister(&ipcomp4_protocol, IPPROTO_COMP) < 0)
pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&ipcomp_type, AF_INET) < 0)
pr_info("%s: can't remove xfrm type\n", __func__);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index efa1138fa523..b3e86ea7b71b 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -273,7 +273,7 @@ static int __init ic_open_devs(void)
msleep(1);
- if time_before(jiffies, next_msg)
+ if (time_before(jiffies, next_msg))
continue;
elapsed = jiffies_to_msecs(jiffies - start);
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index c3e0adea9c27..7ebd6e37875c 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -61,7 +61,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type)
skb_dst_set(skb, NULL);
dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0);
if (IS_ERR(dst))
- return PTR_ERR(dst);;
+ return PTR_ERR(dst);
skb_dst_set(skb, dst);
}
#endif
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 81c6910cfa92..a26ce035e3fa 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -61,6 +61,11 @@ config NFT_CHAIN_NAT_IPV4
packet transformations such as the source, destination address and
source and destination ports.
+config NFT_REJECT_IPV4
+ depends on NF_TABLES_IPV4
+ default NFT_REJECT
+ tristate
+
config NF_TABLES_ARP
depends on NF_TABLES
tristate "ARP nf_tables support"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index c16be9d58420..90b82405331e 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
+obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
# generic IP tables
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 9eea059dd621..574f7ebba0b6 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -229,7 +229,10 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
ret = nf_ct_expect_related(rtcp_exp);
if (ret == 0)
break;
- else if (ret != -EBUSY) {
+ else if (ret == -EBUSY) {
+ nf_ct_unexpect_related(rtp_exp);
+ continue;
+ } else if (ret < 0) {
nf_ct_unexpect_related(rtp_exp);
nated_port = 0;
break;
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index d551e31b416e..7c676671329d 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1198,8 +1198,8 @@ static int snmp_translate(struct nf_conn *ct,
map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip);
} else {
/* DNAT replies */
- map.from = NOCT1(&ct->tuplehash[dir].tuple.src.u3.ip);
- map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip);
+ map.from = NOCT1(&ct->tuplehash[!dir].tuple.src.u3.ip);
+ map.to = NOCT1(&ct->tuplehash[dir].tuple.dst.u3.ip);
}
if (map.from == map.to)
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
new file mode 100644
index 000000000000..e79718a382f2
--- /dev/null
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2013 Eric Leblond <eric@regit.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/icmp.h>
+#include <net/netfilter/ipv4/nf_reject.h>
+#include <net/netfilter/nft_reject.h>
+
+void nft_reject_ipv4_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_reject *priv = nft_expr_priv(expr);
+
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ nf_send_unreach(pkt->skb, priv->icmp_code);
+ break;
+ case NFT_REJECT_TCP_RST:
+ nf_send_reset(pkt->skb, pkt->ops->hooknum);
+ break;
+ }
+
+ data[NFT_REG_VERDICT].verdict = NF_DROP;
+}
+EXPORT_SYMBOL_GPL(nft_reject_ipv4_eval);
+
+static struct nft_expr_type nft_reject_ipv4_type;
+static const struct nft_expr_ops nft_reject_ipv4_ops = {
+ .type = &nft_reject_ipv4_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)),
+ .eval = nft_reject_ipv4_eval,
+ .init = nft_reject_init,
+ .dump = nft_reject_dump,
+};
+
+static struct nft_expr_type nft_reject_ipv4_type __read_mostly = {
+ .family = NFPROTO_IPV4,
+ .name = "reject",
+ .ops = &nft_reject_ipv4_ops,
+ .policy = nft_reject_policy,
+ .maxattr = NFTA_REJECT_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_reject_ipv4_module_init(void)
+{
+ return nft_register_expr(&nft_reject_ipv4_type);
+}
+
+static void __exit nft_reject_ipv4_module_exit(void)
+{
+ nft_unregister_expr(&nft_reject_ipv4_type);
+}
+
+module_init(nft_reject_ipv4_module_init);
+module_exit(nft_reject_ipv4_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "reject");
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 2d11c094296e..f4b19e5dde54 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -727,7 +727,7 @@ static int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
sock_tx_timestamp(sk, &ipc.tx_flags);
if (msg->msg_controllen) {
- err = ip_cmsg_send(sock_net(sk), msg, &ipc);
+ err = ip_cmsg_send(sock_net(sk), msg, &ipc, false);
if (err)
return err;
if (ipc.opt)
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index a6c8a80ec9d6..ad737fad6d8b 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -273,6 +273,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPChallengeACK", LINUX_MIB_TCPCHALLENGEACK),
SNMP_MIB_ITEM("TCPSYNChallenge", LINUX_MIB_TCPSYNCHALLENGE),
SNMP_MIB_ITEM("TCPFastOpenActive", LINUX_MIB_TCPFASTOPENACTIVE),
+ SNMP_MIB_ITEM("TCPFastOpenActiveFail", LINUX_MIB_TCPFASTOPENACTIVEFAIL),
SNMP_MIB_ITEM("TCPFastOpenPassive", LINUX_MIB_TCPFASTOPENPASSIVE),
SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL),
SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW),
@@ -280,6 +281,11 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES),
SNMP_MIB_ITEM("BusyPollRxPackets", LINUX_MIB_BUSYPOLLRXPACKETS),
SNMP_MIB_ITEM("TCPAutoCorking", LINUX_MIB_TCPAUTOCORKING),
+ SNMP_MIB_ITEM("TCPFromZeroWindowAdv", LINUX_MIB_TCPFROMZEROWINDOWADV),
+ SNMP_MIB_ITEM("TCPToZeroWindowAdv", LINUX_MIB_TCPTOZEROWINDOWADV),
+ SNMP_MIB_ITEM("TCPWantZeroWindowAdv", LINUX_MIB_TCPWANTZEROWINDOWADV),
+ SNMP_MIB_ITEM("TCPSynRetrans", LINUX_MIB_TCPSYNRETRANS),
+ SNMP_MIB_ITEM("TCPOrigDataSent", LINUX_MIB_TCPORIGDATASENT),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index c04518f4850a..a9dbe58bdfe7 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -524,7 +524,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
ipc.oif = sk->sk_bound_dev_if;
if (msg->msg_controllen) {
- err = ip_cmsg_send(sock_net(sk), msg, &ipc);
+ err = ip_cmsg_send(sock_net(sk), msg, &ipc, false);
if (err)
goto out;
if (ipc.opt)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 25071b48921c..11e4384daaf9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -697,7 +697,6 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
out_unlock:
spin_unlock_bh(&fnhe_lock);
- return;
}
static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
@@ -1597,6 +1596,7 @@ static int __mkroute_input(struct sk_buff *skb,
rth->rt_gateway = 0;
rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
+ RT_CACHE_STAT_INC(in_slow_tot);
rth->dst.input = ip_forward;
rth->dst.output = ip_output;
@@ -1695,10 +1695,11 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
fl4.daddr = daddr;
fl4.saddr = saddr;
err = fib_lookup(net, &fl4, &res);
- if (err != 0)
+ if (err != 0) {
+ if (!IN_DEV_FORWARD(in_dev))
+ err = -EHOSTUNREACH;
goto no_route;
-
- RT_CACHE_STAT_INC(in_slow_tot);
+ }
if (res.type == RTN_BROADCAST)
goto brd_input;
@@ -1712,8 +1713,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
goto local_input;
}
- if (!IN_DEV_FORWARD(in_dev))
+ if (!IN_DEV_FORWARD(in_dev)) {
+ err = -EHOSTUNREACH;
goto no_route;
+ }
if (res.type != RTN_UNICAST)
goto martian_destination;
@@ -1768,6 +1771,7 @@ local_input:
rth->rt_gateway = 0;
rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
+ RT_CACHE_STAT_INC(in_slow_tot);
if (res.type == RTN_UNREACHABLE) {
rth->dst.input= ip_error;
rth->dst.error= -err;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4475b3bb494d..4bd6d52eeffb 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -387,7 +387,7 @@ void tcp_init_sock(struct sock *sk)
INIT_LIST_HEAD(&tp->tsq_node);
icsk->icsk_rto = TCP_TIMEOUT_INIT;
- tp->mdev = TCP_TIMEOUT_INIT;
+ tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
/* So many TCP implementations out there (incorrectly) count the
* initial SYN frame in their delayed-ACK and congestion control
@@ -1044,7 +1044,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp)
}
}
-static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size)
+static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
+ int *copied, size_t size)
{
struct tcp_sock *tp = tcp_sk(sk);
int err, flags;
@@ -1059,11 +1060,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size)
if (unlikely(tp->fastopen_req == NULL))
return -ENOBUFS;
tp->fastopen_req->data = msg;
+ tp->fastopen_req->size = size;
flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
msg->msg_namelen, flags);
- *size = tp->fastopen_req->copied;
+ *copied = tp->fastopen_req->copied;
tcp_free_fastopen_req(tp);
return err;
}
@@ -1083,7 +1085,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
flags = msg->msg_flags;
if (flags & MSG_FASTOPEN) {
- err = tcp_sendmsg_fastopen(sk, msg, &copied_syn);
+ err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size);
if (err == -EINPROGRESS && copied_syn > 0)
goto out;
else if (err)
@@ -2229,7 +2231,7 @@ adjudge_to_death:
/* This is a (useful) BSD violating of the RFC. There is a
* problem with TCP as specified in that the other end could
* keep a socket open forever with no application left this end.
- * We use a 3 minute timeout (about the same as BSD) then kill
+ * We use a 1 minute timeout (about the same as BSD) then kill
* our end. If they send after that then tough - BUT: long enough
* that we won't make the old 4*rto = almost no time - whoops
* reset mistake.
@@ -2339,7 +2341,7 @@ int tcp_disconnect(struct sock *sk, int flags)
sk->sk_shutdown = 0;
sock_reset_flag(sk, SOCK_DONE);
- tp->srtt = 0;
+ tp->srtt_us = 0;
if ((tp->write_seq += tp->max_window + 2) == 0)
tp->write_seq = 1;
icsk->icsk_backoff = 0;
@@ -2783,8 +2785,8 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info)
info->tcpi_pmtu = icsk->icsk_pmtu_cookie;
info->tcpi_rcv_ssthresh = tp->rcv_ssthresh;
- info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3;
- info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2;
+ info->tcpi_rtt = tp->srtt_us >> 3;
+ info->tcpi_rttvar = tp->mdev_us >> 2;
info->tcpi_snd_ssthresh = tp->snd_ssthresh;
info->tcpi_snd_cwnd = tp->snd_cwnd;
info->tcpi_advmss = tp->advmss;
@@ -2794,6 +2796,11 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info)
info->tcpi_rcv_space = tp->rcvq_space.space;
info->tcpi_total_retrans = tp->total_retrans;
+
+ info->tcpi_pacing_rate = sk->sk_pacing_rate != ~0U ?
+ sk->sk_pacing_rate : ~0ULL;
+ info->tcpi_max_pacing_rate = sk->sk_max_pacing_rate != ~0U ?
+ sk->sk_max_pacing_rate : ~0ULL;
}
EXPORT_SYMBOL_GPL(tcp_get_info);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index ad37bf18ae4b..2b9464c93b88 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -290,8 +290,7 @@ bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
left = tp->snd_cwnd - in_flight;
if (sk_can_gso(sk) &&
left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
- left * tp->mss_cache < sk->sk_gso_max_size &&
- left < sk->sk_gso_max_segs)
+ left < tp->xmit_size_goal_segs)
return true;
return left <= tcp_max_tso_deferred_mss(tp);
}
@@ -362,21 +361,12 @@ u32 tcp_reno_ssthresh(struct sock *sk)
}
EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);
-/* Lower bound on congestion window with halving. */
-u32 tcp_reno_min_cwnd(const struct sock *sk)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
- return tp->snd_ssthresh/2;
-}
-EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd);
-
struct tcp_congestion_ops tcp_reno = {
.flags = TCP_CONG_NON_RESTRICTED,
.name = "reno",
.owner = THIS_MODULE,
.ssthresh = tcp_reno_ssthresh,
.cong_avoid = tcp_reno_cong_avoid,
- .min_cwnd = tcp_reno_min_cwnd,
};
/* Initial congestion control used (until SYN)
@@ -388,6 +378,5 @@ struct tcp_congestion_ops tcp_init_congestion_ops = {
.owner = THIS_MODULE,
.ssthresh = tcp_reno_ssthresh,
.cong_avoid = tcp_reno_cong_avoid,
- .min_cwnd = tcp_reno_min_cwnd,
};
EXPORT_SYMBOL_GPL(tcp_init_congestion_ops);
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 828e4c3ffbaf..8bf224516ba2 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -476,10 +476,6 @@ static int __init cubictcp_register(void)
/* divide by bic_scale and by constant Srtt (100ms) */
do_div(cube_factor, bic_scale * 10);
- /* hystart needs ms clock resolution */
- if (hystart && HZ < 1000)
- cubictcp.flags |= TCP_CONG_RTT_STAMP;
-
return tcp_register_congestion_control(&cubictcp);
}
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 8ed9305dfdf4..8b9e7bad77c0 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -162,7 +162,6 @@ static struct tcp_congestion_ops tcp_highspeed __read_mostly = {
.init = hstcp_init,
.ssthresh = hstcp_ssthresh,
.cong_avoid = hstcp_cong_avoid,
- .min_cwnd = tcp_reno_min_cwnd,
.owner = THIS_MODULE,
.name = "highspeed"
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 478fe82611bf..a15a799bf768 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -21,7 +21,7 @@ struct hybla {
u32 rho2; /* Rho * Rho, integer part */
u32 rho_3ls; /* Rho parameter, <<3 */
u32 rho2_7ls; /* Rho^2, <<7 */
- u32 minrtt; /* Minimum smoothed round trip time value seen */
+ u32 minrtt_us; /* Minimum smoothed round trip time value seen */
};
/* Hybla reference round trip time (default= 1/40 sec = 25 ms), in ms */
@@ -35,7 +35,9 @@ static inline void hybla_recalc_param (struct sock *sk)
{
struct hybla *ca = inet_csk_ca(sk);
- ca->rho_3ls = max_t(u32, tcp_sk(sk)->srtt / msecs_to_jiffies(rtt0), 8);
+ ca->rho_3ls = max_t(u32,
+ tcp_sk(sk)->srtt_us / (rtt0 * USEC_PER_MSEC),
+ 8U);
ca->rho = ca->rho_3ls >> 3;
ca->rho2_7ls = (ca->rho_3ls * ca->rho_3ls) << 1;
ca->rho2 = ca->rho2_7ls >> 7;
@@ -59,7 +61,7 @@ static void hybla_init(struct sock *sk)
hybla_recalc_param(sk);
/* set minimum rtt as this is the 1st ever seen */
- ca->minrtt = tp->srtt;
+ ca->minrtt_us = tp->srtt_us;
tp->snd_cwnd = ca->rho;
}
@@ -94,9 +96,9 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked,
int is_slowstart = 0;
/* Recalculate rho only if this srtt is the lowest */
- if (tp->srtt < ca->minrtt){
+ if (tp->srtt_us < ca->minrtt_us) {
hybla_recalc_param(sk);
- ca->minrtt = tp->srtt;
+ ca->minrtt_us = tp->srtt_us;
}
if (!tcp_is_cwnd_limited(sk, in_flight))
@@ -166,7 +168,6 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked,
static struct tcp_congestion_ops tcp_hybla __read_mostly = {
.init = hybla_init,
.ssthresh = tcp_reno_ssthresh,
- .min_cwnd = tcp_reno_min_cwnd,
.cong_avoid = hybla_cong_avoid,
.set_state = hybla_state,
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index e498a62b8f97..863d105e3015 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -325,10 +325,8 @@ static void tcp_illinois_info(struct sock *sk, u32 ext,
}
static struct tcp_congestion_ops tcp_illinois __read_mostly = {
- .flags = TCP_CONG_RTT_STAMP,
.init = tcp_illinois_init,
.ssthresh = tcp_illinois_ssthresh,
- .min_cwnd = tcp_reno_min_cwnd,
.cong_avoid = tcp_illinois_cong_avoid,
.set_state = tcp_illinois_state,
.get_info = tcp_illinois_info,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 65cf90e063d5..e1661f46fd19 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -667,10 +667,11 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
* To save cycles in the RFC 1323 implementation it was better to break
* it up into three procedures. -- erics
*/
-static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
+static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
{
struct tcp_sock *tp = tcp_sk(sk);
- long m = mrtt; /* RTT */
+ long m = mrtt_us; /* RTT */
+ u32 srtt = tp->srtt_us;
/* The following amusing code comes from Jacobson's
* article in SIGCOMM '88. Note that rtt and mdev
@@ -688,14 +689,12 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
* does not matter how to _calculate_ it. Seems, it was trap
* that VJ failed to avoid. 8)
*/
- if (m == 0)
- m = 1;
- if (tp->srtt != 0) {
- m -= (tp->srtt >> 3); /* m is now error in rtt est */
- tp->srtt += m; /* rtt = 7/8 rtt + 1/8 new */
+ if (srtt != 0) {
+ m -= (srtt >> 3); /* m is now error in rtt est */
+ srtt += m; /* rtt = 7/8 rtt + 1/8 new */
if (m < 0) {
m = -m; /* m is now abs(error) */
- m -= (tp->mdev >> 2); /* similar update on mdev */
+ m -= (tp->mdev_us >> 2); /* similar update on mdev */
/* This is similar to one of Eifel findings.
* Eifel blocks mdev updates when rtt decreases.
* This solution is a bit different: we use finer gain
@@ -707,27 +706,29 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
if (m > 0)
m >>= 3;
} else {
- m -= (tp->mdev >> 2); /* similar update on mdev */
+ m -= (tp->mdev_us >> 2); /* similar update on mdev */
}
- tp->mdev += m; /* mdev = 3/4 mdev + 1/4 new */
- if (tp->mdev > tp->mdev_max) {
- tp->mdev_max = tp->mdev;
- if (tp->mdev_max > tp->rttvar)
- tp->rttvar = tp->mdev_max;
+ tp->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */
+ if (tp->mdev_us > tp->mdev_max_us) {
+ tp->mdev_max_us = tp->mdev_us;
+ if (tp->mdev_max_us > tp->rttvar_us)
+ tp->rttvar_us = tp->mdev_max_us;
}
if (after(tp->snd_una, tp->rtt_seq)) {
- if (tp->mdev_max < tp->rttvar)
- tp->rttvar -= (tp->rttvar - tp->mdev_max) >> 2;
+ if (tp->mdev_max_us < tp->rttvar_us)
+ tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2;
tp->rtt_seq = tp->snd_nxt;
- tp->mdev_max = tcp_rto_min(sk);
+ tp->mdev_max_us = tcp_rto_min_us(sk);
}
} else {
/* no previous measure. */
- tp->srtt = m << 3; /* take the measured time to be rtt */
- tp->mdev = m << 1; /* make sure rto = 3*rtt */
- tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
+ srtt = m << 3; /* take the measured time to be rtt */
+ tp->mdev_us = m << 1; /* make sure rto = 3*rtt */
+ tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk));
+ tp->mdev_max_us = tp->rttvar_us;
tp->rtt_seq = tp->snd_nxt;
}
+ tp->srtt_us = max(1U, srtt);
}
/* Set the sk_pacing_rate to allow proper sizing of TSO packets.
@@ -742,18 +743,12 @@ static void tcp_update_pacing_rate(struct sock *sk)
u64 rate;
/* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */
- rate = (u64)tp->mss_cache * 2 * (HZ << 3);
+ rate = (u64)tp->mss_cache * 2 * (USEC_PER_SEC << 3);
rate *= max(tp->snd_cwnd, tp->packets_out);
- /* Correction for small srtt : minimum srtt being 8 (1 jiffy << 3),
- * be conservative and assume srtt = 1 (125 us instead of 1.25 ms)
- * We probably need usec resolution in the future.
- * Note: This also takes care of possible srtt=0 case,
- * when tcp_rtt_estimator() was not yet called.
- */
- if (tp->srtt > 8 + 2)
- do_div(rate, tp->srtt);
+ if (likely(tp->srtt_us))
+ do_div(rate, tp->srtt_us);
/* ACCESS_ONCE() is needed because sch_fq fetches sk_pacing_rate
* without any lock. We want to make sure compiler wont store
@@ -1120,10 +1115,10 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
}
struct tcp_sacktag_state {
- int reord;
- int fack_count;
- int flag;
- s32 rtt; /* RTT measured by SACKing never-retransmitted data */
+ int reord;
+ int fack_count;
+ long rtt_us; /* RTT measured by SACKing never-retransmitted data */
+ int flag;
};
/* Check if skb is fully within the SACK block. In presence of GSO skbs,
@@ -1184,7 +1179,8 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
static u8 tcp_sacktag_one(struct sock *sk,
struct tcp_sacktag_state *state, u8 sacked,
u32 start_seq, u32 end_seq,
- int dup_sack, int pcount, u32 xmit_time)
+ int dup_sack, int pcount,
+ const struct skb_mstamp *xmit_time)
{
struct tcp_sock *tp = tcp_sk(sk);
int fack_count = state->fack_count;
@@ -1225,8 +1221,13 @@ static u8 tcp_sacktag_one(struct sock *sk,
if (!after(end_seq, tp->high_seq))
state->flag |= FLAG_ORIG_SACK_ACKED;
/* Pick the earliest sequence sacked for RTT */
- if (state->rtt < 0)
- state->rtt = tcp_time_stamp - xmit_time;
+ if (state->rtt_us < 0) {
+ struct skb_mstamp now;
+
+ skb_mstamp_get(&now);
+ state->rtt_us = skb_mstamp_us_delta(&now,
+ xmit_time);
+ }
}
if (sacked & TCPCB_LOST) {
@@ -1285,7 +1286,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
*/
tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
start_seq, end_seq, dup_sack, pcount,
- TCP_SKB_CB(skb)->when);
+ &skb->skb_mstamp);
if (skb == tp->lost_skb_hint)
tp->lost_cnt_hint += pcount;
@@ -1563,7 +1564,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
TCP_SKB_CB(skb)->end_seq,
dup_sack,
tcp_skb_pcount(skb),
- TCP_SKB_CB(skb)->when);
+ &skb->skb_mstamp);
if (!before(TCP_SKB_CB(skb)->seq,
tcp_highest_sack_seq(tp)))
@@ -1620,7 +1621,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl
static int
tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
- u32 prior_snd_una, s32 *sack_rtt)
+ u32 prior_snd_una, long *sack_rtt_us)
{
struct tcp_sock *tp = tcp_sk(sk);
const unsigned char *ptr = (skb_transport_header(ack_skb) +
@@ -1638,7 +1639,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
state.flag = 0;
state.reord = tp->packets_out;
- state.rtt = -1;
+ state.rtt_us = -1L;
if (!tp->sacked_out) {
if (WARN_ON(tp->fackets_out))
@@ -1822,7 +1823,7 @@ out:
WARN_ON((int)tp->retrans_out < 0);
WARN_ON((int)tcp_packets_in_flight(tp) < 0);
#endif
- *sack_rtt = state.rtt;
+ *sack_rtt_us = state.rtt_us;
return state.flag;
}
@@ -1943,8 +1944,9 @@ void tcp_enter_loss(struct sock *sk, int how)
if (skb == tcp_send_head(sk))
break;
- if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
tp->undo_marker = 0;
+
TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
@@ -2032,10 +2034,12 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
* available, or RTO is scheduled to fire first.
*/
if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 ||
- (flag & FLAG_ECE) || !tp->srtt)
+ (flag & FLAG_ECE) || !tp->srtt_us)
return false;
- delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2));
+ delay = max(usecs_to_jiffies(tp->srtt_us >> 5),
+ msecs_to_jiffies(2));
+
if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
return false;
@@ -2882,7 +2886,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
}
static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
- s32 seq_rtt, s32 sack_rtt)
+ long seq_rtt_us, long sack_rtt_us)
{
const struct tcp_sock *tp = tcp_sk(sk);
@@ -2892,10 +2896,10 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
* is acked (RFC6298).
*/
if (flag & FLAG_RETRANS_DATA_ACKED)
- seq_rtt = -1;
+ seq_rtt_us = -1L;
- if (seq_rtt < 0)
- seq_rtt = sack_rtt;
+ if (seq_rtt_us < 0)
+ seq_rtt_us = sack_rtt_us;
/* RTTM Rule: A TSecr value received in a segment is used to
* update the averaged RTT measurement only if the segment
@@ -2903,14 +2907,14 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
* left edge of the send window.
* See draft-ietf-tcplw-high-performance-00, section 3.3.
*/
- if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
+ if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
flag & FLAG_ACKED)
- seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
+ seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - tp->rx_opt.rcv_tsecr);
- if (seq_rtt < 0)
+ if (seq_rtt_us < 0)
return false;
- tcp_rtt_estimator(sk, seq_rtt);
+ tcp_rtt_estimator(sk, seq_rtt_us);
tcp_set_rto(sk);
/* RFC6298: only reset backoff on valid RTT measurement. */
@@ -2922,16 +2926,16 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp)
{
struct tcp_sock *tp = tcp_sk(sk);
- s32 seq_rtt = -1;
+ long seq_rtt_us = -1L;
if (synack_stamp && !tp->total_retrans)
- seq_rtt = tcp_time_stamp - synack_stamp;
+ seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - synack_stamp);
/* If the ACK acks both the SYNACK and the (Fast Open'd) data packets
* sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack()
*/
- if (!tp->srtt)
- tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1);
+ if (!tp->srtt_us)
+ tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L);
}
static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
@@ -3020,26 +3024,27 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
* arrived at the other end.
*/
static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
- u32 prior_snd_una, s32 sack_rtt)
+ u32 prior_snd_una, long sack_rtt_us)
{
- struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
- struct sk_buff *skb;
- u32 now = tcp_time_stamp;
+ struct skb_mstamp first_ackt, last_ackt, now;
+ struct tcp_sock *tp = tcp_sk(sk);
+ u32 prior_sacked = tp->sacked_out;
+ u32 reord = tp->packets_out;
bool fully_acked = true;
- int flag = 0;
+ long ca_seq_rtt_us = -1L;
+ long seq_rtt_us = -1L;
+ struct sk_buff *skb;
u32 pkts_acked = 0;
- u32 reord = tp->packets_out;
- u32 prior_sacked = tp->sacked_out;
- s32 seq_rtt = -1;
- s32 ca_seq_rtt = -1;
- ktime_t last_ackt = net_invalid_timestamp();
bool rtt_update;
+ int flag = 0;
+
+ first_ackt.v64 = 0;
while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
- u32 acked_pcount;
u8 sacked = scb->sacked;
+ u32 acked_pcount;
/* Determine how many packets and what bytes were acked, tso and else */
if (after(scb->end_seq, tp->snd_una)) {
@@ -3061,11 +3066,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
tp->retrans_out -= acked_pcount;
flag |= FLAG_RETRANS_DATA_ACKED;
} else {
- ca_seq_rtt = now - scb->when;
- last_ackt = skb->tstamp;
- if (seq_rtt < 0) {
- seq_rtt = ca_seq_rtt;
- }
+ last_ackt = skb->skb_mstamp;
+ WARN_ON_ONCE(last_ackt.v64 == 0);
+ if (!first_ackt.v64)
+ first_ackt = last_ackt;
+
if (!(sacked & TCPCB_SACKED_ACKED))
reord = min(pkts_acked, reord);
if (!after(scb->end_seq, tp->high_seq))
@@ -3111,7 +3116,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
flag |= FLAG_SACK_RENEGING;
- rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt);
+ skb_mstamp_get(&now);
+ if (first_ackt.v64) {
+ seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
+ ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
+ }
+
+ rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us);
if (flag & FLAG_ACKED) {
const struct tcp_congestion_ops *ca_ops
@@ -3139,25 +3150,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
tp->fackets_out -= min(pkts_acked, tp->fackets_out);
- if (ca_ops->pkts_acked) {
- s32 rtt_us = -1;
-
- /* Is the ACK triggering packet unambiguous? */
- if (!(flag & FLAG_RETRANS_DATA_ACKED)) {
- /* High resolution needed and available? */
- if (ca_ops->flags & TCP_CONG_RTT_STAMP &&
- !ktime_equal(last_ackt,
- net_invalid_timestamp()))
- rtt_us = ktime_us_delta(ktime_get_real(),
- last_ackt);
- else if (ca_seq_rtt >= 0)
- rtt_us = jiffies_to_usecs(ca_seq_rtt);
- }
+ if (ca_ops->pkts_acked)
+ ca_ops->pkts_acked(sk, pkts_acked, ca_seq_rtt_us);
- ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
- }
- } else if (skb && rtt_update && sack_rtt >= 0 &&
- sack_rtt > (s32)(now - TCP_SKB_CB(skb)->when)) {
+ } else if (skb && rtt_update && sack_rtt_us >= 0 &&
+ sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
/* Do not re-arm RTO if the sack RTT is measured from data sent
* after when the head was last (re)transmitted. Otherwise the
* timeout may continue to extend in loss recovery.
@@ -3367,12 +3364,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
bool is_dupack = false;
- u32 prior_in_flight, prior_cwnd = tp->snd_cwnd, prior_rtt = tp->srtt;
+ u32 prior_in_flight;
u32 prior_fackets;
int prior_packets = tp->packets_out;
const int prior_unsacked = tp->packets_out - tp->sacked_out;
int acked = 0; /* Number of packets newly acked */
- s32 sack_rtt = -1;
+ long sack_rtt_us = -1L;
/* If the ack is older than previous acks
* then we can probably ignore it.
@@ -3430,7 +3427,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (TCP_SKB_CB(skb)->sacked)
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
- &sack_rtt);
+ &sack_rtt_us);
if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
flag |= FLAG_ECE;
@@ -3449,7 +3446,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
/* See if we can take anything off of the retransmit queue. */
acked = tp->packets_out;
- flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, sack_rtt);
+ flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
+ sack_rtt_us);
acked -= tp->packets_out;
/* Advance cwnd if state allows */
@@ -3472,8 +3470,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (icsk->icsk_pending == ICSK_TIME_RETRANS)
tcp_schedule_loss_probe(sk);
- if (tp->srtt != prior_rtt || tp->snd_cwnd != prior_cwnd)
- tcp_update_pacing_rate(sk);
+ tcp_update_pacing_rate(sk);
return 1;
no_queue:
@@ -3502,7 +3499,7 @@ old_ack:
*/
if (TCP_SKB_CB(skb)->sacked) {
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
- &sack_rtt);
+ &sack_rtt_us);
tcp_fastretrans_alert(sk, acked, prior_unsacked,
is_dupack, flag);
}
@@ -5398,9 +5395,12 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
break;
}
tcp_rearm_rto(sk);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL);
return true;
}
tp->syn_data_acked = tp->syn_data;
+ if (tp->syn_data_acked)
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
return false;
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3cf976510497..4555244607d0 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -435,7 +435,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
break;
icsk->icsk_backoff--;
- inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
+ inet_csk(sk)->icsk_rto = (tp->srtt_us ? __tcp_set_rto(tp) :
TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
tcp_bound_rto(sk);
@@ -854,8 +854,10 @@ static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
{
int res = tcp_v4_send_synack(sk, NULL, req, 0);
- if (!res)
+ if (!res) {
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
+ }
return res;
}
@@ -878,8 +880,6 @@ bool tcp_syn_flood_action(struct sock *sk,
bool want_cookie = false;
struct listen_sock *lopt;
-
-
#ifdef CONFIG_SYN_COOKIES
if (sysctl_tcp_syncookies) {
msg = "Sending cookies";
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index 991d62a2f9bb..c9aecae31327 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -315,11 +315,9 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, s32 rtt_us)
}
static struct tcp_congestion_ops tcp_lp __read_mostly = {
- .flags = TCP_CONG_RTT_STAMP,
.init = tcp_lp_init,
.ssthresh = tcp_reno_ssthresh,
.cong_avoid = tcp_lp_cong_avoid,
- .min_cwnd = tcp_reno_min_cwnd,
.pkts_acked = tcp_lp_pkts_acked,
.owner = THIS_MODULE,
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index d547075d8300..dcaf72f10216 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -33,6 +33,11 @@ struct tcp_fastopen_metrics {
struct tcp_fastopen_cookie cookie;
};
+/* TCP_METRIC_MAX includes 2 extra fields for userspace compatibility
+ * Kernel only stores RTT and RTTVAR in usec resolution
+ */
+#define TCP_METRIC_MAX_KERNEL (TCP_METRIC_MAX - 2)
+
struct tcp_metrics_block {
struct tcp_metrics_block __rcu *tcpm_next;
struct inetpeer_addr tcpm_saddr;
@@ -41,7 +46,7 @@ struct tcp_metrics_block {
u32 tcpm_ts;
u32 tcpm_ts_stamp;
u32 tcpm_lock;
- u32 tcpm_vals[TCP_METRIC_MAX + 1];
+ u32 tcpm_vals[TCP_METRIC_MAX_KERNEL + 1];
struct tcp_fastopen_metrics tcpm_fastopen;
struct rcu_head rcu_head;
@@ -59,12 +64,6 @@ static u32 tcp_metric_get(struct tcp_metrics_block *tm,
return tm->tcpm_vals[idx];
}
-static u32 tcp_metric_get_jiffies(struct tcp_metrics_block *tm,
- enum tcp_metric_index idx)
-{
- return msecs_to_jiffies(tm->tcpm_vals[idx]);
-}
-
static void tcp_metric_set(struct tcp_metrics_block *tm,
enum tcp_metric_index idx,
u32 val)
@@ -72,13 +71,6 @@ static void tcp_metric_set(struct tcp_metrics_block *tm,
tm->tcpm_vals[idx] = val;
}
-static void tcp_metric_set_msecs(struct tcp_metrics_block *tm,
- enum tcp_metric_index idx,
- u32 val)
-{
- tm->tcpm_vals[idx] = jiffies_to_msecs(val);
-}
-
static bool addr_same(const struct inetpeer_addr *a,
const struct inetpeer_addr *b)
{
@@ -101,9 +93,11 @@ struct tcpm_hash_bucket {
static DEFINE_SPINLOCK(tcp_metrics_lock);
-static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst,
+static void tcpm_suck_dst(struct tcp_metrics_block *tm,
+ const struct dst_entry *dst,
bool fastopen_clear)
{
+ u32 msval;
u32 val;
tm->tcpm_stamp = jiffies;
@@ -121,8 +115,11 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst,
val |= 1 << TCP_METRIC_REORDERING;
tm->tcpm_lock = val;
- tm->tcpm_vals[TCP_METRIC_RTT] = dst_metric_raw(dst, RTAX_RTT);
- tm->tcpm_vals[TCP_METRIC_RTTVAR] = dst_metric_raw(dst, RTAX_RTTVAR);
+ msval = dst_metric_raw(dst, RTAX_RTT);
+ tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC;
+
+ msval = dst_metric_raw(dst, RTAX_RTTVAR);
+ tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC;
tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
@@ -384,7 +381,7 @@ void tcp_update_metrics(struct sock *sk)
dst_confirm(dst);
rcu_read_lock();
- if (icsk->icsk_backoff || !tp->srtt) {
+ if (icsk->icsk_backoff || !tp->srtt_us) {
/* This session failed to estimate rtt. Why?
* Probably, no packets returned in time. Reset our
* results.
@@ -399,8 +396,8 @@ void tcp_update_metrics(struct sock *sk)
if (!tm)
goto out_unlock;
- rtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT);
- m = rtt - tp->srtt;
+ rtt = tcp_metric_get(tm, TCP_METRIC_RTT);
+ m = rtt - tp->srtt_us;
/* If newly calculated rtt larger than stored one, store new
* one. Otherwise, use EWMA. Remember, rtt overestimation is
@@ -408,10 +405,10 @@ void tcp_update_metrics(struct sock *sk)
*/
if (!tcp_metric_locked(tm, TCP_METRIC_RTT)) {
if (m <= 0)
- rtt = tp->srtt;
+ rtt = tp->srtt_us;
else
rtt -= (m >> 3);
- tcp_metric_set_msecs(tm, TCP_METRIC_RTT, rtt);
+ tcp_metric_set(tm, TCP_METRIC_RTT, rtt);
}
if (!tcp_metric_locked(tm, TCP_METRIC_RTTVAR)) {
@@ -422,16 +419,16 @@ void tcp_update_metrics(struct sock *sk)
/* Scale deviation to rttvar fixed point */
m >>= 1;
- if (m < tp->mdev)
- m = tp->mdev;
+ if (m < tp->mdev_us)
+ m = tp->mdev_us;
- var = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR);
+ var = tcp_metric_get(tm, TCP_METRIC_RTTVAR);
if (m >= var)
var = m;
else
var -= (var - m) >> 2;
- tcp_metric_set_msecs(tm, TCP_METRIC_RTTVAR, var);
+ tcp_metric_set(tm, TCP_METRIC_RTTVAR, var);
}
if (tcp_in_initial_slowstart(tp)) {
@@ -528,7 +525,7 @@ void tcp_init_metrics(struct sock *sk)
tp->reordering = val;
}
- crtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT);
+ crtt = tcp_metric_get(tm, TCP_METRIC_RTT);
rcu_read_unlock();
reset:
/* The initial RTT measurement from the SYN/SYN-ACK is not ideal
@@ -551,18 +548,20 @@ reset:
* to low value, and then abruptly stops to do it and starts to delay
* ACKs, wait for troubles.
*/
- if (crtt > tp->srtt) {
+ if (crtt > tp->srtt_us) {
/* Set RTO like tcp_rtt_estimator(), but from cached RTT. */
- crtt >>= 3;
+ crtt /= 8 * USEC_PER_MSEC;
inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk));
- } else if (tp->srtt == 0) {
+ } else if (tp->srtt_us == 0) {
/* RFC6298: 5.7 We've failed to get a valid RTT sample from
* 3WHS. This is most likely due to retransmission,
* including spurious one. Reset the RTO back to 3secs
* from the more aggressive 1sec to avoid more spurious
* retransmission.
*/
- tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK;
+ tp->rttvar_us = jiffies_to_usecs(TCP_TIMEOUT_FALLBACK);
+ tp->mdev_us = tp->mdev_max_us = tp->rttvar_us;
+
inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK;
}
/* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been
@@ -809,10 +808,26 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
nest = nla_nest_start(msg, TCP_METRICS_ATTR_VALS);
if (!nest)
goto nla_put_failure;
- for (i = 0; i < TCP_METRIC_MAX + 1; i++) {
- if (!tm->tcpm_vals[i])
+ for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) {
+ u32 val = tm->tcpm_vals[i];
+
+ if (!val)
continue;
- if (nla_put_u32(msg, i + 1, tm->tcpm_vals[i]) < 0)
+ if (i == TCP_METRIC_RTT) {
+ if (nla_put_u32(msg, TCP_METRIC_RTT_US + 1,
+ val) < 0)
+ goto nla_put_failure;
+ n++;
+ val = max(val / 1000, 1U);
+ }
+ if (i == TCP_METRIC_RTTVAR) {
+ if (nla_put_u32(msg, TCP_METRIC_RTTVAR_US + 1,
+ val) < 0)
+ goto nla_put_failure;
+ n++;
+ val = max(val / 1000, 1U);
+ }
+ if (nla_put_u32(msg, i + 1, val) < 0)
goto nla_put_failure;
n++;
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 7a436c517e44..ca788ada5bd3 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -398,8 +398,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
tcp_init_wl(newtp, treq->rcv_isn);
- newtp->srtt = 0;
- newtp->mdev = TCP_TIMEOUT_INIT;
+ newtp->srtt_us = 0;
+ newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
newicsk->icsk_rto = TCP_TIMEOUT_INIT;
newtp->packets_out = 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 03d26b85eab8..e0a7b33fe953 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -86,6 +86,9 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
tcp_rearm_rto(sk);
}
+
+ NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
+ tcp_skb_pcount(skb));
}
/* SND.NXT, if window was not shrunk.
@@ -269,6 +272,7 @@ EXPORT_SYMBOL(tcp_select_initial_window);
static u16 tcp_select_window(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
+ u32 old_win = tp->rcv_wnd;
u32 cur_win = tcp_receive_window(tp);
u32 new_win = __tcp_select_window(sk);
@@ -281,6 +285,9 @@ static u16 tcp_select_window(struct sock *sk)
*
* Relax Will Robinson.
*/
+ if (new_win == 0)
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPWANTZEROWINDOWADV);
new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
}
tp->rcv_wnd = new_win;
@@ -298,8 +305,14 @@ static u16 tcp_select_window(struct sock *sk)
new_win >>= tp->rx_opt.rcv_wscale;
/* If we advertise zero window, disable fast path. */
- if (new_win == 0)
+ if (new_win == 0) {
tp->pred_flags = 0;
+ if (old_win)
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPTOZEROWINDOWADV);
+ } else if (old_win == 0) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFROMZEROWINDOWADV);
+ }
return new_win;
}
@@ -698,7 +711,8 @@ static void tcp_tsq_handler(struct sock *sk)
if ((1 << sk->sk_state) &
(TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
TCPF_CLOSE_WAIT | TCPF_LAST_ACK))
- tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC);
+ tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle,
+ 0, GFP_ATOMIC);
}
/*
* One tasklet per cpu tries to send more skbs.
@@ -766,6 +780,17 @@ void tcp_release_cb(struct sock *sk)
if (flags & (1UL << TCP_TSQ_DEFERRED))
tcp_tsq_handler(sk);
+ /* Here begins the tricky part :
+ * We are called from release_sock() with :
+ * 1) BH disabled
+ * 2) sk_lock.slock spinlock held
+ * 3) socket owned by us (sk->sk_lock.owned == 1)
+ *
+ * But following code is meant to be called from BH handlers,
+ * so we should keep BH disabled, but early release socket ownership
+ */
+ sock_release_ownership(sk);
+
if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) {
tcp_write_timer_handler(sk);
__sock_put(sk);
@@ -855,16 +880,12 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
if (clone_it) {
const struct sk_buff *fclone = skb + 1;
- /* If congestion control is doing timestamping, we must
- * take such a timestamp before we potentially clone/copy.
- */
- if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
- __net_timestamp(skb);
+ skb_mstamp_get(&skb->skb_mstamp);
if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
fclone->fclone == SKB_FCLONE_CLONE))
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
if (unlikely(skb_cloned(skb)))
skb = pskb_copy(skb, gfp_mask);
@@ -872,6 +893,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
skb = skb_clone(skb, gfp_mask);
if (unlikely(!skb))
return -ENOBUFS;
+ /* Our usage of tstamp should remain private */
+ skb->tstamp.tv64 = 0;
}
inet = inet_sk(sk);
@@ -1904,7 +1927,15 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
if (atomic_read(&sk->sk_wmem_alloc) > limit) {
set_bit(TSQ_THROTTLED, &tp->tsq_flags);
- break;
+ /* It is possible TX completion already happened
+ * before we set TSQ_THROTTLED, so we must
+ * test again the condition.
+ * We abuse smp_mb__after_clear_bit() because
+ * there is no smp_mb__after_set_bit() yet
+ */
+ smp_mb__after_clear_bit();
+ if (atomic_read(&sk->sk_wmem_alloc) > limit)
+ break;
}
limit = mss_now;
@@ -1955,7 +1986,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
u32 timeout, tlp_time_stamp, rto_time_stamp;
- u32 rtt = tp->srtt >> 3;
+ u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS))
return false;
@@ -1977,7 +2008,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
/* Schedule a loss probe in 2*RTT for SACK capable connections
* in Open state, that are either limited by cwnd or application.
*/
- if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out ||
+ if (sysctl_tcp_early_retrans < 3 || !tp->srtt_us || !tp->packets_out ||
!tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
return false;
@@ -2062,7 +2093,6 @@ rearm_timer:
if (likely(!err))
NET_INC_STATS_BH(sock_net(sk),
LINUX_MIB_TCPLOSSPROBES);
- return;
}
/* Push out any pending frames which were held back due to
@@ -2160,7 +2190,8 @@ u32 __tcp_select_window(struct sock *sk)
*/
int mss = icsk->icsk_ack.rcv_mss;
int free_space = tcp_space(sk);
- int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk));
+ int allowed_space = tcp_full_space(sk);
+ int full_space = min_t(int, tp->window_clamp, allowed_space);
int window;
if (mss > full_space)
@@ -2173,7 +2204,19 @@ u32 __tcp_select_window(struct sock *sk)
tp->rcv_ssthresh = min(tp->rcv_ssthresh,
4U * tp->advmss);
- if (free_space < mss)
+ /* free_space might become our new window, make sure we don't
+ * increase it due to wscale.
+ */
+ free_space = round_down(free_space, 1 << tp->rx_opt.rcv_wscale);
+
+ /* if free space is less than mss estimate, or is below 1/16th
+ * of the maximum allowed, try to move to zero-window, else
+ * tcp_clamp_window() will grow rcv buf up to tcp_rmem[2], and
+ * new incoming data is dropped due to memory limits.
+ * With large window, mss test triggers way too late in order
+ * to announce zero window in time before rmem limit kicks in.
+ */
+ if (free_space < (allowed_space >> 4) || free_space < mss)
return 0;
}
@@ -2328,6 +2371,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
unsigned int cur_mss;
+ int err;
/* Inconslusive MTU probe */
if (icsk->icsk_mtup.probe_size) {
@@ -2391,11 +2435,15 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
skb_headroom(skb) >= 0xFFFF)) {
struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
GFP_ATOMIC);
- return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
- -ENOBUFS;
+ err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
+ -ENOBUFS;
} else {
- return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
+ err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
}
+
+ if (likely(!err))
+ TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
+ return err;
}
int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
@@ -2406,7 +2454,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
if (err == 0) {
/* Update global TCP statistics. */
TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
-
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
tp->total_retrans++;
#if FASTRETRANS_DEBUG > 0
@@ -2899,7 +2948,12 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
MAX_TCP_OPTION_SPACE;
- syn_data = skb_copy_expand(syn, skb_headroom(syn), space,
+ space = min_t(size_t, space, fo->size);
+
+ /* limit to order-0 allocations */
+ space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER));
+
+ syn_data = skb_copy_expand(syn, MAX_TCP_HEADER, space,
sk->sk_allocation);
if (syn_data == NULL)
goto fallback;
@@ -2929,9 +2983,15 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
tcp_connect_queue_skb(sk, data);
fo->copied = data->len;
+ /* syn_data is about to be sent, we need to take current time stamps
+ * for the packets that are in write queue : SYN packet and DATA
+ */
+ skb_mstamp_get(&syn->skb_mstamp);
+ data->skb_mstamp = syn->skb_mstamp;
+
if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) {
tp->syn_data = (fo->copied > 0);
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
goto done;
}
syn_data = NULL;
@@ -3019,8 +3079,9 @@ void tcp_send_delayed_ack(struct sock *sk)
* Do not use inet_csk(sk)->icsk_rto here, use results of rtt measurements
* directly.
*/
- if (tp->srtt) {
- int rtt = max(tp->srtt >> 3, TCP_DELACK_MIN);
+ if (tp->srtt_us) {
+ int rtt = max_t(int, usecs_to_jiffies(tp->srtt_us >> 3),
+ TCP_DELACK_MIN);
if (rtt < max_ato)
max_ato = rtt;
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 1f2d37613c9e..3b66610d4156 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -154,7 +154,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
p->snd_wnd = tp->snd_wnd;
p->rcv_wnd = tp->rcv_wnd;
p->ssthresh = tcp_current_ssthresh(sk);
- p->srtt = tp->srtt >> 3;
+ p->srtt = tp->srtt_us >> 3;
tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1);
}
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 19ea6c2951f3..0ac50836da4d 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -39,7 +39,6 @@ static u32 tcp_scalable_ssthresh(struct sock *sk)
static struct tcp_congestion_ops tcp_scalable __read_mostly = {
.ssthresh = tcp_scalable_ssthresh,
.cong_avoid = tcp_scalable_cong_avoid,
- .min_cwnd = tcp_reno_min_cwnd,
.owner = THIS_MODULE,
.name = "scalable",
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 64f0354c84c7..286227abed10 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -165,6 +165,9 @@ static int tcp_write_timeout(struct sock *sk)
dst_negative_advice(sk);
if (tp->syn_fastopen || tp->syn_data)
tcp_fastopen_cache_set(sk, 0, NULL, true);
+ if (tp->syn_data)
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPFASTOPENACTIVEFAIL);
}
retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
syn_set = true;
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 06cae62bf208..48539fff6357 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -306,11 +306,9 @@ void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
EXPORT_SYMBOL_GPL(tcp_vegas_get_info);
static struct tcp_congestion_ops tcp_vegas __read_mostly = {
- .flags = TCP_CONG_RTT_STAMP,
.init = tcp_vegas_init,
.ssthresh = tcp_reno_ssthresh,
.cong_avoid = tcp_vegas_cong_avoid,
- .min_cwnd = tcp_reno_min_cwnd,
.pkts_acked = tcp_vegas_pkts_acked,
.set_state = tcp_vegas_state,
.cwnd_event = tcp_vegas_cwnd_event,
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 326475a94865..1b8e28fcd7e1 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -203,7 +203,6 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
}
static struct tcp_congestion_ops tcp_veno __read_mostly = {
- .flags = TCP_CONG_RTT_STAMP,
.init = tcp_veno_init,
.ssthresh = tcp_veno_ssthresh,
.cong_avoid = tcp_veno_cong_avoid,
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 76a1e23259e1..b94a04ae2ed5 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -276,7 +276,6 @@ static struct tcp_congestion_ops tcp_westwood __read_mostly = {
.init = tcp_westwood_init,
.ssthresh = tcp_reno_ssthresh,
.cong_avoid = tcp_reno_cong_avoid,
- .min_cwnd = tcp_westwood_bw_rttmin,
.cwnd_event = tcp_westwood_event,
.get_info = tcp_westwood_info,
.pkts_acked = tcp_westwood_pkts_acked,
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 1a8d271f994d..5ede0e727945 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -227,11 +227,9 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) {
}
static struct tcp_congestion_ops tcp_yeah __read_mostly = {
- .flags = TCP_CONG_RTT_STAMP,
.init = tcp_yeah_init,
.ssthresh = tcp_yeah_ssthresh,
.cong_avoid = tcp_yeah_cong_avoid,
- .min_cwnd = tcp_reno_min_cwnd,
.set_state = tcp_vegas_state,
.cwnd_event = tcp_vegas_cwnd_event,
.get_info = tcp_vegas_get_info,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 77bd16fa9f34..4468e1adc094 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -931,7 +931,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
sock_tx_timestamp(sk, &ipc.tx_flags);
if (msg->msg_controllen) {
- err = ip_cmsg_send(sock_net(sk), msg, &ipc);
+ err = ip_cmsg_send(sock_net(sk), msg, &ipc,
+ sk->sk_family == AF_INET6);
if (err)
return err;
if (ipc.opt)
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 25f5cee3a08a..88b4023ecfcf 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -17,6 +17,8 @@
static DEFINE_SPINLOCK(udp_offload_lock);
static struct udp_offload_priv __rcu *udp_offload_base __read_mostly;
+#define udp_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&udp_offload_lock))
+
struct udp_offload_priv {
struct udp_offload *offload;
struct rcu_head rcu;
@@ -100,8 +102,7 @@ out:
int udp_add_offload(struct udp_offload *uo)
{
- struct udp_offload_priv __rcu **head = &udp_offload_base;
- struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_KERNEL);
+ struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_ATOMIC);
if (!new_offload)
return -ENOMEM;
@@ -109,8 +110,8 @@ int udp_add_offload(struct udp_offload *uo)
new_offload->offload = uo;
spin_lock(&udp_offload_lock);
- rcu_assign_pointer(new_offload->next, rcu_dereference(*head));
- rcu_assign_pointer(*head, new_offload);
+ new_offload->next = udp_offload_base;
+ rcu_assign_pointer(udp_offload_base, new_offload);
spin_unlock(&udp_offload_lock);
return 0;
@@ -130,12 +131,12 @@ void udp_del_offload(struct udp_offload *uo)
spin_lock(&udp_offload_lock);
- uo_priv = rcu_dereference(*head);
+ uo_priv = udp_deref_protected(*head);
for (; uo_priv != NULL;
- uo_priv = rcu_dereference(*head)) {
-
+ uo_priv = udp_deref_protected(*head)) {
if (uo_priv->offload == uo) {
- rcu_assign_pointer(*head, rcu_dereference(uo_priv->next));
+ rcu_assign_pointer(*head,
+ udp_deref_protected(uo_priv->next));
goto unlock;
}
head = &uo_priv->next;
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 1f12c8b45864..aac6197b7a71 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -37,15 +37,6 @@ drop:
return NET_RX_DROP;
}
-int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
- int encap_type)
-{
- XFRM_SPI_SKB_CB(skb)->family = AF_INET;
- XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
- return xfrm_input(skb, nexthdr, spi, encap_type);
-}
-EXPORT_SYMBOL(xfrm4_rcv_encap);
-
int xfrm4_transport_finish(struct sk_buff *skb, int async)
{
struct iphdr *iph = ip_hdr(skb);
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 31b18152528f..05f2b484954f 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -15,65 +15,6 @@
#include <net/ip.h>
#include <net/xfrm.h>
-/* Informational hook. The decap is still done here. */
-static struct xfrm_tunnel_notifier __rcu *rcv_notify_handlers __read_mostly;
-static DEFINE_MUTEX(xfrm4_mode_tunnel_input_mutex);
-
-int xfrm4_mode_tunnel_input_register(struct xfrm_tunnel_notifier *handler)
-{
- struct xfrm_tunnel_notifier __rcu **pprev;
- struct xfrm_tunnel_notifier *t;
- int ret = -EEXIST;
- int priority = handler->priority;
-
- mutex_lock(&xfrm4_mode_tunnel_input_mutex);
-
- for (pprev = &rcv_notify_handlers;
- (t = rcu_dereference_protected(*pprev,
- lockdep_is_held(&xfrm4_mode_tunnel_input_mutex))) != NULL;
- pprev = &t->next) {
- if (t->priority > priority)
- break;
- if (t->priority == priority)
- goto err;
-
- }
-
- handler->next = *pprev;
- rcu_assign_pointer(*pprev, handler);
-
- ret = 0;
-
-err:
- mutex_unlock(&xfrm4_mode_tunnel_input_mutex);
- return ret;
-}
-EXPORT_SYMBOL_GPL(xfrm4_mode_tunnel_input_register);
-
-int xfrm4_mode_tunnel_input_deregister(struct xfrm_tunnel_notifier *handler)
-{
- struct xfrm_tunnel_notifier __rcu **pprev;
- struct xfrm_tunnel_notifier *t;
- int ret = -ENOENT;
-
- mutex_lock(&xfrm4_mode_tunnel_input_mutex);
- for (pprev = &rcv_notify_handlers;
- (t = rcu_dereference_protected(*pprev,
- lockdep_is_held(&xfrm4_mode_tunnel_input_mutex))) != NULL;
- pprev = &t->next) {
- if (t == handler) {
- *pprev = handler->next;
- ret = 0;
- break;
- }
- }
- mutex_unlock(&xfrm4_mode_tunnel_input_mutex);
- synchronize_net();
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(xfrm4_mode_tunnel_input_deregister);
-
static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
{
struct iphdr *inner_iph = ipip_hdr(skb);
@@ -127,14 +68,8 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
return 0;
}
-#define for_each_input_rcu(head, handler) \
- for (handler = rcu_dereference(head); \
- handler != NULL; \
- handler = rcu_dereference(handler->next))
-
static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
{
- struct xfrm_tunnel_notifier *handler;
int err = -EINVAL;
if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP)
@@ -143,9 +78,6 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto out;
- for_each_input_rcu(rcv_notify_handlers, handler)
- handler->handler(skb);
-
err = skb_unclone(skb, GFP_ATOMIC);
if (err)
goto out;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index e1a63930a967..6156f68a1e90 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -325,6 +325,7 @@ void __init xfrm4_init(void)
xfrm4_state_init();
xfrm4_policy_init();
+ xfrm4_protocol_init();
#ifdef CONFIG_SYSCTL
register_pernet_subsys(&xfrm4_net_ops);
#endif
diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c
new file mode 100644
index 000000000000..7f7b243e8139
--- /dev/null
+++ b/net/ipv4/xfrm4_protocol.c
@@ -0,0 +1,286 @@
+/* xfrm4_protocol.c - Generic xfrm protocol multiplexer.
+ *
+ * Copyright (C) 2013 secunet Security Networks AG
+ *
+ * Author:
+ * Steffen Klassert <steffen.klassert@secunet.com>
+ *
+ * Based on:
+ * net/ipv4/tunnel4.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/skbuff.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+
+static struct xfrm4_protocol __rcu *esp4_handlers __read_mostly;
+static struct xfrm4_protocol __rcu *ah4_handlers __read_mostly;
+static struct xfrm4_protocol __rcu *ipcomp4_handlers __read_mostly;
+static DEFINE_MUTEX(xfrm4_protocol_mutex);
+
+static inline struct xfrm4_protocol __rcu **proto_handlers(u8 protocol)
+{
+ switch (protocol) {
+ case IPPROTO_ESP:
+ return &esp4_handlers;
+ case IPPROTO_AH:
+ return &ah4_handlers;
+ case IPPROTO_COMP:
+ return &ipcomp4_handlers;
+ }
+
+ return NULL;
+}
+
+#define for_each_protocol_rcu(head, handler) \
+ for (handler = rcu_dereference(head); \
+ handler != NULL; \
+ handler = rcu_dereference(handler->next)) \
+
+int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
+{
+ int ret;
+ struct xfrm4_protocol *handler;
+
+ for_each_protocol_rcu(*proto_handlers(protocol), handler)
+ if ((ret = handler->cb_handler(skb, err)) <= 0)
+ return ret;
+
+ return 0;
+}
+EXPORT_SYMBOL(xfrm4_rcv_cb);
+
+int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
+ int encap_type)
+{
+ int ret;
+ struct xfrm4_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
+ XFRM_SPI_SKB_CB(skb)->family = AF_INET;
+ XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
+
+ for_each_protocol_rcu(*proto_handlers(nexthdr), handler)
+ if ((ret = handler->input_handler(skb, nexthdr, spi, encap_type)) != -EINVAL)
+ return ret;
+
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+EXPORT_SYMBOL(xfrm4_rcv_encap);
+
+static int xfrm4_esp_rcv(struct sk_buff *skb)
+{
+ int ret;
+ struct xfrm4_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
+
+ for_each_protocol_rcu(esp4_handlers, handler)
+ if ((ret = handler->handler(skb)) != -EINVAL)
+ return ret;
+
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static void xfrm4_esp_err(struct sk_buff *skb, u32 info)
+{
+ struct xfrm4_protocol *handler;
+
+ for_each_protocol_rcu(esp4_handlers, handler)
+ if (!handler->err_handler(skb, info))
+ break;
+}
+
+static int xfrm4_ah_rcv(struct sk_buff *skb)
+{
+ int ret;
+ struct xfrm4_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
+
+ for_each_protocol_rcu(ah4_handlers, handler)
+ if ((ret = handler->handler(skb)) != -EINVAL)
+ return ret;;
+
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static void xfrm4_ah_err(struct sk_buff *skb, u32 info)
+{
+ struct xfrm4_protocol *handler;
+
+ for_each_protocol_rcu(ah4_handlers, handler)
+ if (!handler->err_handler(skb, info))
+ break;
+}
+
+static int xfrm4_ipcomp_rcv(struct sk_buff *skb)
+{
+ int ret;
+ struct xfrm4_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
+
+ for_each_protocol_rcu(ipcomp4_handlers, handler)
+ if ((ret = handler->handler(skb)) != -EINVAL)
+ return ret;
+
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static void xfrm4_ipcomp_err(struct sk_buff *skb, u32 info)
+{
+ struct xfrm4_protocol *handler;
+
+ for_each_protocol_rcu(ipcomp4_handlers, handler)
+ if (!handler->err_handler(skb, info))
+ break;
+}
+
+static const struct net_protocol esp4_protocol = {
+ .handler = xfrm4_esp_rcv,
+ .err_handler = xfrm4_esp_err,
+ .no_policy = 1,
+ .netns_ok = 1,
+};
+
+static const struct net_protocol ah4_protocol = {
+ .handler = xfrm4_ah_rcv,
+ .err_handler = xfrm4_ah_err,
+ .no_policy = 1,
+ .netns_ok = 1,
+};
+
+static const struct net_protocol ipcomp4_protocol = {
+ .handler = xfrm4_ipcomp_rcv,
+ .err_handler = xfrm4_ipcomp_err,
+ .no_policy = 1,
+ .netns_ok = 1,
+};
+
+static struct xfrm_input_afinfo xfrm4_input_afinfo = {
+ .family = AF_INET,
+ .owner = THIS_MODULE,
+ .callback = xfrm4_rcv_cb,
+};
+
+static inline const struct net_protocol *netproto(unsigned char protocol)
+{
+ switch (protocol) {
+ case IPPROTO_ESP:
+ return &esp4_protocol;
+ case IPPROTO_AH:
+ return &ah4_protocol;
+ case IPPROTO_COMP:
+ return &ipcomp4_protocol;
+ }
+
+ return NULL;
+}
+
+int xfrm4_protocol_register(struct xfrm4_protocol *handler,
+ unsigned char protocol)
+{
+ struct xfrm4_protocol __rcu **pprev;
+ struct xfrm4_protocol *t;
+ bool add_netproto = false;
+ int ret = -EEXIST;
+ int priority = handler->priority;
+
+ mutex_lock(&xfrm4_protocol_mutex);
+
+ if (!rcu_dereference_protected(*proto_handlers(protocol),
+ lockdep_is_held(&xfrm4_protocol_mutex)))
+ add_netproto = true;
+
+ for (pprev = proto_handlers(protocol);
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&xfrm4_protocol_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t->priority < priority)
+ break;
+ if (t->priority == priority)
+ goto err;
+ }
+
+ handler->next = *pprev;
+ rcu_assign_pointer(*pprev, handler);
+
+ ret = 0;
+
+err:
+ mutex_unlock(&xfrm4_protocol_mutex);
+
+ if (add_netproto) {
+ if (inet_add_protocol(netproto(protocol), protocol)) {
+ pr_err("%s: can't add protocol\n", __func__);
+ ret = -EAGAIN;
+ }
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(xfrm4_protocol_register);
+
+int xfrm4_protocol_deregister(struct xfrm4_protocol *handler,
+ unsigned char protocol)
+{
+ struct xfrm4_protocol __rcu **pprev;
+ struct xfrm4_protocol *t;
+ int ret = -ENOENT;
+
+ mutex_lock(&xfrm4_protocol_mutex);
+
+ for (pprev = proto_handlers(protocol);
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&xfrm4_protocol_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t == handler) {
+ *pprev = handler->next;
+ ret = 0;
+ break;
+ }
+ }
+
+ if (!rcu_dereference_protected(*proto_handlers(protocol),
+ lockdep_is_held(&xfrm4_protocol_mutex))) {
+ if (inet_del_protocol(netproto(protocol), protocol) < 0) {
+ pr_err("%s: can't remove protocol\n", __func__);
+ ret = -EAGAIN;
+ }
+ }
+
+ mutex_unlock(&xfrm4_protocol_mutex);
+
+ synchronize_net();
+
+ return ret;
+}
+EXPORT_SYMBOL(xfrm4_protocol_deregister);
+
+void __init xfrm4_protocol_init(void)
+{
+ xfrm_input_register_afinfo(&xfrm4_input_afinfo);
+}
+EXPORT_SYMBOL(xfrm4_protocol_init);
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index d92e5586783e..438a73aa777c 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -138,6 +138,7 @@ config INET6_XFRM_MODE_ROUTEOPTIMIZATION
config IPV6_VTI
tristate "Virtual (secure) IPv6: tunneling"
select IPV6_TUNNEL
+ select NET_IP_TUNNEL
depends on INET6_XFRM_MODE_TUNNEL
---help---
Tunneling means encapsulating data of one protocol type within
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 17bb830872db..2fe68364bb20 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -16,7 +16,7 @@ ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o
ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o
ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
- xfrm6_output.o
+ xfrm6_output.o xfrm6_protocol.o
ipv6-$(CONFIG_NETFILTER) += netfilter.o
ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
ipv6-$(CONFIG_PROC_FS) += proc.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ad235690684c..344e972426df 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1103,8 +1103,11 @@ retry:
* Lifetime is greater than REGEN_ADVANCE time units. In particular,
* an implementation must not create a temporary address with a zero
* Preferred Lifetime.
+ * Use age calculation as in addrconf_verify to avoid unnecessary
+ * temporary addresses being generated.
*/
- if (tmp_prefered_lft <= regen_advance) {
+ age = (now - tmp_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
+ if (tmp_prefered_lft <= regen_advance + age) {
in6_ifa_put(ifp);
in6_dev_put(idev);
ret = -1;
@@ -2783,6 +2786,8 @@ static void addrconf_gre_config(struct net_device *dev)
ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
if (!ipv6_generate_eui64(addr.s6_addr + 8, dev))
addrconf_add_linklocal(idev, &addr);
+ else
+ addrconf_prefix_route(&addr, 64, dev, 0, 0);
}
#endif
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index b30ad3741b46..731e1e1722d9 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -6,7 +6,7 @@
*/
/*
* Author:
- * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org>
+ * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org>
*/
#include <linux/kernel.h>
@@ -22,14 +22,13 @@
#if 0
#define ADDRLABEL(x...) printk(x)
#else
-#define ADDRLABEL(x...) do { ; } while(0)
+#define ADDRLABEL(x...) do { ; } while (0)
#endif
/*
* Policy Table
*/
-struct ip6addrlbl_entry
-{
+struct ip6addrlbl_entry {
#ifdef CONFIG_NET_NS
struct net *lbl_net;
#endif
@@ -88,39 +87,39 @@ static const __net_initconst struct ip6addrlbl_init_table
{ /* ::/0 */
.prefix = &in6addr_any,
.label = 1,
- },{ /* fc00::/7 */
- .prefix = &(struct in6_addr){{{ 0xfc }}},
+ }, { /* fc00::/7 */
+ .prefix = &(struct in6_addr){ { { 0xfc } } } ,
.prefixlen = 7,
.label = 5,
- },{ /* fec0::/10 */
- .prefix = &(struct in6_addr){{{ 0xfe, 0xc0 }}},
+ }, { /* fec0::/10 */
+ .prefix = &(struct in6_addr){ { { 0xfe, 0xc0 } } },
.prefixlen = 10,
.label = 11,
- },{ /* 2002::/16 */
- .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}},
+ }, { /* 2002::/16 */
+ .prefix = &(struct in6_addr){ { { 0x20, 0x02 } } },
.prefixlen = 16,
.label = 2,
- },{ /* 3ffe::/16 */
- .prefix = &(struct in6_addr){{{ 0x3f, 0xfe }}},
+ }, { /* 3ffe::/16 */
+ .prefix = &(struct in6_addr){ { { 0x3f, 0xfe } } },
.prefixlen = 16,
.label = 12,
- },{ /* 2001::/32 */
- .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}},
+ }, { /* 2001::/32 */
+ .prefix = &(struct in6_addr){ { { 0x20, 0x01 } } },
.prefixlen = 32,
.label = 6,
- },{ /* 2001:10::/28 */
- .prefix = &(struct in6_addr){{{ 0x20, 0x01, 0x00, 0x10 }}},
+ }, { /* 2001:10::/28 */
+ .prefix = &(struct in6_addr){ { { 0x20, 0x01, 0x00, 0x10 } } },
.prefixlen = 28,
.label = 7,
- },{ /* ::ffff:0:0 */
- .prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}},
+ }, { /* ::ffff:0:0 */
+ .prefix = &(struct in6_addr){ { { [10] = 0xff, [11] = 0xff } } },
.prefixlen = 96,
.label = 4,
- },{ /* ::/96 */
+ }, { /* ::/96 */
.prefix = &in6addr_any,
.prefixlen = 96,
.label = 3,
- },{ /* ::1/128 */
+ }, { /* ::1/128 */
.prefix = &in6addr_loopback,
.prefixlen = 128,
.label = 0,
@@ -441,7 +440,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh)
if (label == IPV6_ADDR_LABEL_DEFAULT)
return -EINVAL;
- switch(nlh->nlmsg_type) {
+ switch (nlh->nlmsg_type) {
case RTM_NEWADDRLABEL:
if (ifal->ifal_index &&
!__dev_get_by_index(net, ifal->ifal_index))
@@ -505,12 +504,13 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {
if (idx >= s_idx &&
net_eq(ip6addrlbl_net(p), net)) {
- if ((err = ip6addrlbl_fill(skb, p,
- ip6addrlbl_table.seq,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWADDRLABEL,
- NLM_F_MULTI)) <= 0)
+ err = ip6addrlbl_fill(skb, p,
+ ip6addrlbl_table.seq,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWADDRLABEL,
+ NLM_F_MULTI);
+ if (err <= 0)
break;
}
idx++;
@@ -527,7 +527,7 @@ static inline int ip6addrlbl_msgsize(void)
+ nla_total_size(4); /* IFAL_LABEL */
}
-static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh)
+static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(in_skb->sk);
struct ifaddrlblmsg *ifal;
@@ -568,7 +568,8 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh)
goto out;
}
- if (!(skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL))) {
+ skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL);
+ if (!skb) {
ip6addrlbl_put(p);
return -ENOBUFS;
}
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 81e496a2e008..72a4930bdc0a 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -346,6 +346,10 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
struct ip_auth_hdr *ah;
struct ah_data *ahp;
struct tmp_ext *iph_ext;
+ int seqhi_len = 0;
+ __be32 *seqhi;
+ int sglists = 0;
+ struct scatterlist *seqhisg;
ahp = x->data;
ahash = ahp->ahash;
@@ -359,15 +363,22 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
if (extlen)
extlen += sizeof(*iph_ext);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ sglists = 1;
+ seqhi_len = sizeof(*seqhi);
+ }
err = -ENOMEM;
- iph_base = ah_alloc_tmp(ahash, nfrags, IPV6HDR_BASELEN + extlen);
+ iph_base = ah_alloc_tmp(ahash, nfrags + sglists, IPV6HDR_BASELEN +
+ extlen + seqhi_len);
if (!iph_base)
goto out;
iph_ext = ah_tmp_ext(iph_base);
- icv = ah_tmp_icv(ahash, iph_ext, extlen);
+ seqhi = (__be32 *)((char *)iph_ext + extlen);
+ icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
+ seqhisg = sg + nfrags;
ah = ip_auth_hdr(skb);
memset(ah->auth_data, 0, ahp->icv_trunc_len);
@@ -411,10 +422,15 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
ah->spi = x->id.spi;
ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, 0, skb->len);
+ sg_init_table(sg, nfrags + sglists);
+ skb_to_sgvec_nomark(skb, sg, 0, skb->len);
- ahash_request_set_crypt(req, sg, icv, skb->len);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ /* Attach seqhi sg right after packet payload */
+ *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+ sg_set_buf(seqhisg, seqhi, seqhi_len);
+ }
+ ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
ahash_request_set_callback(req, 0, ah6_output_done, skb);
AH_SKB_CB(skb)->tmp = iph_base;
@@ -514,6 +530,10 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
int nexthdr;
int nfrags;
int err = -ENOMEM;
+ int seqhi_len = 0;
+ __be32 *seqhi;
+ int sglists = 0;
+ struct scatterlist *seqhisg;
if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr)))
goto out;
@@ -550,14 +570,22 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
skb_push(skb, hdr_len);
- work_iph = ah_alloc_tmp(ahash, nfrags, hdr_len + ahp->icv_trunc_len);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ sglists = 1;
+ seqhi_len = sizeof(*seqhi);
+ }
+
+ work_iph = ah_alloc_tmp(ahash, nfrags + sglists, hdr_len +
+ ahp->icv_trunc_len + seqhi_len);
if (!work_iph)
goto out;
- auth_data = ah_tmp_auth(work_iph, hdr_len);
- icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len);
+ auth_data = ah_tmp_auth((u8 *)work_iph, hdr_len);
+ seqhi = (__be32 *)(auth_data + ahp->icv_trunc_len);
+ icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
+ seqhisg = sg + nfrags;
memcpy(work_iph, ip6h, hdr_len);
memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
@@ -572,10 +600,16 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
ip6h->flow_lbl[2] = 0;
ip6h->hop_limit = 0;
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, 0, skb->len);
+ sg_init_table(sg, nfrags + sglists);
+ skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+
+ if (x->props.flags & XFRM_STATE_ESN) {
+ /* Attach seqhi sg right after packet payload */
+ *seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
+ sg_set_buf(seqhisg, seqhi, seqhi_len);
+ }
- ahash_request_set_crypt(req, sg, icv, skb->len);
+ ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
ahash_request_set_callback(req, 0, ah6_input_done, skb);
AH_SKB_CB(skb)->tmp = work_iph;
@@ -609,8 +643,8 @@ out:
return err;
}
-static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, __be32 info)
+static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
@@ -619,17 +653,19 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (type != ICMPV6_PKT_TOOBIG &&
type != NDISC_REDIRECT)
- return;
+ return 0;
x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6);
if (!x)
- return;
+ return 0;
if (type == NDISC_REDIRECT)
ip6_redirect(skb, net, skb->dev->ifindex, 0);
else
ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
+
+ return 0;
}
static int ah6_init_state(struct xfrm_state *x)
@@ -714,6 +750,11 @@ static void ah6_destroy(struct xfrm_state *x)
kfree(ahp);
}
+static int ah6_rcv_cb(struct sk_buff *skb, int err)
+{
+ return 0;
+}
+
static const struct xfrm_type ah6_type =
{
.description = "AH6",
@@ -727,10 +768,11 @@ static const struct xfrm_type ah6_type =
.hdr_offset = xfrm6_find_1stfragopt,
};
-static const struct inet6_protocol ah6_protocol = {
+static struct xfrm6_protocol ah6_protocol = {
.handler = xfrm6_rcv,
+ .cb_handler = ah6_rcv_cb,
.err_handler = ah6_err,
- .flags = INET6_PROTO_NOPOLICY,
+ .priority = 0,
};
static int __init ah6_init(void)
@@ -740,7 +782,7 @@ static int __init ah6_init(void)
return -EAGAIN;
}
- if (inet6_add_protocol(&ah6_protocol, IPPROTO_AH) < 0) {
+ if (xfrm6_protocol_register(&ah6_protocol, IPPROTO_AH) < 0) {
pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&ah6_type, AF_INET6);
return -EAGAIN;
@@ -751,7 +793,7 @@ static int __init ah6_init(void)
static void __exit ah6_fini(void)
{
- if (inet6_del_protocol(&ah6_protocol, IPPROTO_AH) < 0)
+ if (xfrm6_protocol_deregister(&ah6_protocol, IPPROTO_AH) < 0)
pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&ah6_type, AF_INET6) < 0)
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 6eef8a7e35f2..d15da1377149 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -421,8 +421,8 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
net_adj) & ~(blksize - 1)) + net_adj - 2;
}
-static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, __be32 info)
+static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
@@ -431,18 +431,20 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (type != ICMPV6_PKT_TOOBIG &&
type != NDISC_REDIRECT)
- return;
+ return 0;
x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
esph->spi, IPPROTO_ESP, AF_INET6);
if (!x)
- return;
+ return 0;
if (type == NDISC_REDIRECT)
ip6_redirect(skb, net, skb->dev->ifindex, 0);
else
ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
+
+ return 0;
}
static void esp6_destroy(struct xfrm_state *x)
@@ -614,6 +616,11 @@ error:
return err;
}
+static int esp6_rcv_cb(struct sk_buff *skb, int err)
+{
+ return 0;
+}
+
static const struct xfrm_type esp6_type =
{
.description = "ESP6",
@@ -628,10 +635,11 @@ static const struct xfrm_type esp6_type =
.hdr_offset = xfrm6_find_1stfragopt,
};
-static const struct inet6_protocol esp6_protocol = {
- .handler = xfrm6_rcv,
+static struct xfrm6_protocol esp6_protocol = {
+ .handler = xfrm6_rcv,
+ .cb_handler = esp6_rcv_cb,
.err_handler = esp6_err,
- .flags = INET6_PROTO_NOPOLICY,
+ .priority = 0,
};
static int __init esp6_init(void)
@@ -640,7 +648,7 @@ static int __init esp6_init(void)
pr_info("%s: can't add xfrm type\n", __func__);
return -EAGAIN;
}
- if (inet6_add_protocol(&esp6_protocol, IPPROTO_ESP) < 0) {
+ if (xfrm6_protocol_register(&esp6_protocol, IPPROTO_ESP) < 0) {
pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&esp6_type, AF_INET6);
return -EAGAIN;
@@ -651,7 +659,7 @@ static int __init esp6_init(void)
static void __exit esp6_fini(void)
{
- if (inet6_del_protocol(&esp6_protocol, IPPROTO_ESP) < 0)
+ if (xfrm6_protocol_deregister(&esp6_protocol, IPPROTO_ESP) < 0)
pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&esp6_type, AF_INET6) < 0)
pr_info("%s: can't remove xfrm type\n", __func__);
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 140748debc4a..8af3eb57f438 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -212,7 +212,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
found = (nexthdr == target);
if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) {
- if (target < 0)
+ if (target < 0 || found)
break;
return -ENOENT;
}
diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c
index cf77f3abfd06..447a7fbd1bb6 100644
--- a/net/ipv6/exthdrs_offload.c
+++ b/net/ipv6/exthdrs_offload.c
@@ -25,11 +25,11 @@ int __init ipv6_exthdrs_offload_init(void)
int ret;
ret = inet6_add_offload(&rthdr_offload, IPPROTO_ROUTING);
- if (!ret)
+ if (ret)
goto out;
ret = inet6_add_offload(&dstopt_offload, IPPROTO_DSTOPTS);
- if (!ret)
+ if (ret)
goto out_rt;
out:
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index f81f59686f21..f2610e157660 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -414,7 +414,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
addr_type = ipv6_addr_type(&hdr->daddr);
if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
- ipv6_anycast_destination(skb))
+ ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
saddr = &hdr->daddr;
/*
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index 72d198b8e4d2..ee7a97f510cb 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -79,7 +79,9 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
/* RFC 2460 section 8.1 says that we SHOULD log
this error. Well, it is reasonable.
*/
- LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n");
+ LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n",
+ &ipv6_hdr(skb)->saddr, ntohs(uh->source),
+ &ipv6_hdr(skb)->daddr, ntohs(uh->dest));
return 1;
}
if (skb->ip_summed == CHECKSUM_COMPLETE &&
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index dfa41bb4e0dc..0961b5ef866d 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -15,9 +15,7 @@
#include <linux/socket.h>
#include <linux/net.h>
#include <linux/netdevice.h>
-#include <linux/if_arp.h>
#include <linux/in6.h>
-#include <linux/route.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
@@ -28,12 +26,8 @@
#include <net/sock.h>
#include <net/ipv6.h>
-#include <net/ndisc.h>
-#include <net/protocol.h>
-#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/rawv6.h>
-#include <net/icmp.h>
#include <net/transp_v6.h>
#include <asm/uaccess.h>
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index f3ffb43f59c0..c98338b81d30 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1454,7 +1454,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
static int ip6gre_tap_init(struct net_device *dev)
{
struct ip6_tnl *tunnel;
- int i;
tunnel = netdev_priv(dev);
@@ -1464,16 +1463,10 @@ static int ip6gre_tap_init(struct net_device *dev)
ip6gre_tnl_link_config(tunnel, 1);
- dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
- for_each_possible_cpu(i) {
- struct pcpu_sw_netstats *ip6gre_tap_stats;
- ip6gre_tap_stats = per_cpu_ptr(dev->tstats, i);
- u64_stats_init(&ip6gre_tap_stats->syncp);
- }
-
return 0;
}
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 1e8683b135bb..59f95affceb0 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -89,7 +89,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
unsigned int unfrag_ip6hlen;
u8 *prevhdr;
int offset = 0;
- bool tunnel;
+ bool encap, udpfrag;
int nhoff;
if (unlikely(skb_shinfo(skb)->gso_type &
@@ -110,8 +110,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
goto out;
- tunnel = SKB_GSO_CB(skb)->encap_level > 0;
- if (tunnel)
+ encap = SKB_GSO_CB(skb)->encap_level > 0;
+ if (encap)
features = skb->dev->hw_enc_features & netif_skb_features(skb);
SKB_GSO_CB(skb)->encap_level += sizeof(*ipv6h);
@@ -121,6 +121,12 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
+ if (skb->encapsulation &&
+ skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP))
+ udpfrag = proto == IPPROTO_UDP && encap;
+ else
+ udpfrag = proto == IPPROTO_UDP && !skb->encapsulation;
+
ops = rcu_dereference(inet6_offloads[proto]);
if (likely(ops && ops->callbacks.gso_segment)) {
skb_reset_transport_header(skb);
@@ -133,13 +139,9 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
for (skb = segs; skb; skb = skb->next) {
ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff);
ipv6h->payload_len = htons(skb->len - nhoff - sizeof(*ipv6h));
- if (tunnel) {
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
- }
skb->network_header = (u8 *)ipv6h - skb->head;
- if (!tunnel && proto == IPPROTO_UDP) {
+ if (udpfrag) {
unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen);
fptr->frag_off = htons(offset);
@@ -148,6 +150,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
offset += (ntohs(ipv6h->payload_len) -
sizeof(struct frag_hdr));
}
+ if (encap)
+ skb_reset_inner_headers(skb);
}
out:
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index ef02b26ccf81..90dd551fdd3c 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -342,6 +342,20 @@ static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
return mtu;
}
+static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
+{
+ if (skb->len <= mtu || skb->local_df)
+ return false;
+
+ if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
+ return true;
+
+ if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+ return false;
+
+ return true;
+}
+
int ip6_forward(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
@@ -353,6 +367,9 @@ int ip6_forward(struct sk_buff *skb)
if (net->ipv6.devconf_all->forwarding == 0)
goto error;
+ if (skb->pkt_type != PACKET_HOST)
+ goto drop;
+
if (skb_warn_if_lro(skb))
goto drop;
@@ -362,9 +379,6 @@ int ip6_forward(struct sk_buff *skb)
goto drop;
}
- if (skb->pkt_type != PACKET_HOST)
- goto drop;
-
skb_forward_csum(skb);
/*
@@ -466,8 +480,7 @@ int ip6_forward(struct sk_buff *skb)
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
- (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
+ if (ip6_pkt_too_big(skb, mtu)) {
/* Again, force OUTPUT device used as source address */
skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
@@ -517,9 +530,6 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->tc_index = from->tc_index;
#endif
nf_copy(to, from);
-#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
- to->nf_trace = from->nf_trace;
-#endif
skb_copy_secmark(to, from);
}
@@ -1221,8 +1231,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
sizeof(struct frag_hdr) : 0) +
rt->rt6i_nfheader_len;
- maxnonfragsize = (np->pmtudisc >= IPV6_PMTUDISC_DO) ?
- mtu : sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
+ if (ip6_sk_local_df(sk))
+ maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
+ else
+ maxnonfragsize = mtu;
/* dontfrag active */
if ((cork->length + length > mtu - headersize) && dontfrag &&
@@ -1530,8 +1542,7 @@ int ip6_push_pending_frames(struct sock *sk)
}
/* Allow local fragmentation. */
- if (np->pmtudisc < IPV6_PMTUDISC_DO)
- skb->local_df = 1;
+ skb->local_df = ip6_sk_local_df(sk);
*final_dst = fl6->daddr;
__skb_pull(skb, skb_network_header_len(skb));
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 5db8d310f9c0..e1df691d78be 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -108,12 +108,12 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
per_cpu_ptr(dev->tstats, i);
do {
- start = u64_stats_fetch_begin_bh(&tstats->syncp);
+ start = u64_stats_fetch_begin_irq(&tstats->syncp);
tmp.rx_packets = tstats->rx_packets;
tmp.rx_bytes = tstats->rx_bytes;
tmp.tx_packets = tstats->tx_packets;
tmp.tx_bytes = tstats->tx_bytes;
- } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
+ } while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
sum.rx_packets += tmp.rx_packets;
sum.rx_bytes += tmp.rx_bytes;
@@ -1502,19 +1502,12 @@ static inline int
ip6_tnl_dev_init_gen(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- int i;
t->dev = dev;
t->net = dev_net(dev);
- dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
-
- for_each_possible_cpu(i) {
- struct pcpu_sw_netstats *ip6_tnl_stats;
- ip6_tnl_stats = per_cpu_ptr(dev->tstats, i);
- u64_stats_init(&ip6_tnl_stats->syncp);
- }
return 0;
}
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 2d19272b8cee..b7c0f827140b 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -278,7 +278,6 @@ static void vti6_dev_uninit(struct net_device *dev)
RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
else
vti6_tnl_unlink(ip6n, t);
- ip6_tnl_dst_reset(t);
dev_put(dev);
}
@@ -288,11 +287,8 @@ static int vti6_rcv(struct sk_buff *skb)
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
rcu_read_lock();
-
if ((t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr,
&ipv6h->daddr)) != NULL) {
- struct pcpu_sw_netstats *tstats;
-
if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) {
rcu_read_unlock();
goto discard;
@@ -309,27 +305,58 @@ static int vti6_rcv(struct sk_buff *skb)
goto discard;
}
- tstats = this_cpu_ptr(t->dev->tstats);
- u64_stats_update_begin(&tstats->syncp);
- tstats->rx_packets++;
- tstats->rx_bytes += skb->len;
- u64_stats_update_end(&tstats->syncp);
-
- skb->mark = 0;
- secpath_reset(skb);
- skb->dev = t->dev;
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
+ skb->mark = be32_to_cpu(t->parms.i_key);
rcu_read_unlock();
- return 0;
+
+ return xfrm6_rcv(skb);
}
rcu_read_unlock();
- return 1;
-
+ return -EINVAL;
discard:
kfree_skb(skb);
return 0;
}
+static int vti6_rcv_cb(struct sk_buff *skb, int err)
+{
+ unsigned short family;
+ struct net_device *dev;
+ struct pcpu_sw_netstats *tstats;
+ struct xfrm_state *x;
+ struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6;
+
+ if (!t)
+ return 1;
+
+ dev = t->dev;
+
+ if (err) {
+ dev->stats.rx_errors++;
+ dev->stats.rx_dropped++;
+
+ return 0;
+ }
+
+ x = xfrm_input_state(skb);
+ family = x->inner_mode->afinfo->family;
+
+ if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family))
+ return -EPERM;
+
+ skb_scrub_packet(skb, !net_eq(t->net, dev_net(skb->dev)));
+ skb->dev = dev;
+
+ tstats = this_cpu_ptr(dev->tstats);
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->rx_packets++;
+ tstats->rx_bytes += skb->len;
+ u64_stats_update_end(&tstats->syncp);
+
+ return 0;
+}
+
/**
* vti6_addr_conflict - compare packet addresses to tunnel's own
* @t: the outgoing tunnel device
@@ -349,44 +376,56 @@ vti6_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
}
+static bool vti6_state_check(const struct xfrm_state *x,
+ const struct in6_addr *dst,
+ const struct in6_addr *src)
+{
+ xfrm_address_t *daddr = (xfrm_address_t *)dst;
+ xfrm_address_t *saddr = (xfrm_address_t *)src;
+
+ /* if there is no transform then this tunnel is not functional.
+ * Or if the xfrm is not mode tunnel.
+ */
+ if (!x || x->props.mode != XFRM_MODE_TUNNEL ||
+ x->props.family != AF_INET6)
+ return false;
+
+ if (ipv6_addr_any(dst))
+ return xfrm_addr_equal(saddr, &x->props.saddr, AF_INET6);
+
+ if (!xfrm_state_addr_check(x, daddr, saddr, AF_INET6))
+ return false;
+
+ return true;
+}
+
/**
* vti6_xmit - send a packet
* @skb: the outgoing socket buffer
* @dev: the outgoing tunnel device
+ * @fl: the flow informations for the xfrm_lookup
**/
-static int vti6_xmit(struct sk_buff *skb, struct net_device *dev)
+static int
+vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
{
- struct net *net = dev_net(dev);
struct ip6_tnl *t = netdev_priv(dev);
struct net_device_stats *stats = &t->dev->stats;
- struct dst_entry *dst = NULL, *ndst = NULL;
- struct flowi6 fl6;
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct dst_entry *dst = skb_dst(skb);
struct net_device *tdev;
int err = -1;
- if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
- !ip6_tnl_xmit_ctl(t) || vti6_addr_conflict(t, ipv6h))
- return err;
-
- dst = ip6_tnl_dst_check(t);
- if (!dst) {
- memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
-
- ndst = ip6_route_output(net, NULL, &fl6);
+ if (!dst)
+ goto tx_err_link_failure;
- if (ndst->error)
- goto tx_err_link_failure;
- ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(&fl6), NULL, 0);
- if (IS_ERR(ndst)) {
- err = PTR_ERR(ndst);
- ndst = NULL;
- goto tx_err_link_failure;
- }
- dst = ndst;
+ dst_hold(dst);
+ dst = xfrm_lookup(t->net, dst, fl, NULL, 0);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ dst = NULL;
+ goto tx_err_link_failure;
}
- if (!dst->xfrm || dst->xfrm->props.mode != XFRM_MODE_TUNNEL)
+ if (!vti6_state_check(dst->xfrm, &t->parms.raddr, &t->parms.laddr))
goto tx_err_link_failure;
tdev = dst->dev;
@@ -398,14 +437,21 @@ static int vti6_xmit(struct sk_buff *skb, struct net_device *dev)
goto tx_err_dst_release;
}
+ skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
+ skb_dst_set(skb, dst);
+ skb->dev = skb_dst(skb)->dev;
- skb_dst_drop(skb);
- skb_dst_set_noref(skb, dst);
+ err = dst_output(skb);
+ if (net_xmit_eval(err) == 0) {
+ struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
- ip6tunnel_xmit(skb, dev);
- if (ndst) {
- dev->mtu = dst_mtu(ndst);
- ip6_tnl_dst_store(t, ndst);
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->tx_bytes += skb->len;
+ tstats->tx_packets++;
+ u64_stats_update_end(&tstats->syncp);
+ } else {
+ stats->tx_errors++;
+ stats->tx_aborted_errors++;
}
return 0;
@@ -413,7 +459,7 @@ tx_err_link_failure:
stats->tx_carrier_errors++;
dst_link_failure(skb);
tx_err_dst_release:
- dst_release(ndst);
+ dst_release(dst);
return err;
}
@@ -422,16 +468,33 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
struct net_device_stats *stats = &t->dev->stats;
+ struct ipv6hdr *ipv6h;
+ struct flowi fl;
int ret;
+ memset(&fl, 0, sizeof(fl));
+ skb->mark = be32_to_cpu(t->parms.o_key);
+
switch (skb->protocol) {
case htons(ETH_P_IPV6):
- ret = vti6_xmit(skb, dev);
+ ipv6h = ipv6_hdr(skb);
+
+ if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
+ !ip6_tnl_xmit_ctl(t) || vti6_addr_conflict(t, ipv6h))
+ goto tx_err;
+
+ xfrm_decode_session(skb, &fl, AF_INET6);
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ break;
+ case htons(ETH_P_IP):
+ xfrm_decode_session(skb, &fl, AF_INET);
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
break;
default:
goto tx_err;
}
+ ret = vti6_xmit(skb, dev, &fl);
if (ret < 0)
goto tx_err;
@@ -444,24 +507,66 @@ tx_err:
return NETDEV_TX_OK;
}
+static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ __be32 spi;
+ struct xfrm_state *x;
+ struct ip6_tnl *t;
+ struct ip_esp_hdr *esph;
+ struct ip_auth_hdr *ah;
+ struct ip_comp_hdr *ipch;
+ struct net *net = dev_net(skb->dev);
+ const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
+ int protocol = iph->nexthdr;
+
+ t = vti6_tnl_lookup(dev_net(skb->dev), &iph->daddr, &iph->saddr);
+ if (!t)
+ return -1;
+
+ switch (protocol) {
+ case IPPROTO_ESP:
+ esph = (struct ip_esp_hdr *)(skb->data + offset);
+ spi = esph->spi;
+ break;
+ case IPPROTO_AH:
+ ah = (struct ip_auth_hdr *)(skb->data + offset);
+ spi = ah->spi;
+ break;
+ case IPPROTO_COMP:
+ ipch = (struct ip_comp_hdr *)(skb->data + offset);
+ spi = htonl(ntohs(ipch->cpi));
+ break;
+ default:
+ return 0;
+ }
+
+ if (type != ICMPV6_PKT_TOOBIG &&
+ type != NDISC_REDIRECT)
+ return 0;
+
+ x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
+ spi, protocol, AF_INET6);
+ if (!x)
+ return 0;
+
+ if (type == NDISC_REDIRECT)
+ ip6_redirect(skb, net, skb->dev->ifindex, 0);
+ else
+ ip6_update_pmtu(skb, net, info, 0, 0);
+ xfrm_state_put(x);
+
+ return 0;
+}
+
static void vti6_link_config(struct ip6_tnl *t)
{
- struct dst_entry *dst;
struct net_device *dev = t->dev;
struct __ip6_tnl_parm *p = &t->parms;
- struct flowi6 *fl6 = &t->fl.u.ip6;
memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
- /* Set up flowi template */
- fl6->saddr = p->laddr;
- fl6->daddr = p->raddr;
- fl6->flowi6_oif = p->link;
- fl6->flowi6_mark = be32_to_cpu(p->i_key);
- fl6->flowi6_proto = p->proto;
- fl6->flowlabel = 0;
-
p->flags &= ~(IP6_TNL_F_CAP_XMIT | IP6_TNL_F_CAP_RCV |
IP6_TNL_F_CAP_PER_PACKET);
p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
@@ -472,28 +577,6 @@ static void vti6_link_config(struct ip6_tnl *t)
dev->flags &= ~IFF_POINTOPOINT;
dev->iflink = p->link;
-
- if (p->flags & IP6_TNL_F_CAP_XMIT) {
-
- dst = ip6_route_output(dev_net(dev), NULL, fl6);
- if (dst->error)
- return;
-
- dst = xfrm_lookup(dev_net(dev), dst, flowi6_to_flowi(fl6),
- NULL, 0);
- if (IS_ERR(dst))
- return;
-
- if (dst->dev) {
- dev->hard_header_len = dst->dev->hard_header_len;
-
- dev->mtu = dst_mtu(dst);
-
- if (dev->mtu < IPV6_MIN_MTU)
- dev->mtu = IPV6_MIN_MTU;
- }
- dst_release(dst);
- }
}
/**
@@ -720,7 +803,6 @@ static void vti6_dev_setup(struct net_device *dev)
t = netdev_priv(dev);
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
- dev->features |= NETIF_F_NETNS_LOCAL;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
}
@@ -731,18 +813,12 @@ static void vti6_dev_setup(struct net_device *dev)
static inline int vti6_dev_init_gen(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- int i;
t->dev = dev;
t->net = dev_net(dev);
- dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
- for_each_possible_cpu(i) {
- struct pcpu_sw_netstats *stats;
- stats = per_cpu_ptr(dev->tstats, i);
- u64_stats_init(&stats->syncp);
- }
return 0;
}
@@ -914,11 +990,6 @@ static struct rtnl_link_ops vti6_link_ops __read_mostly = {
.fill_info = vti6_fill_info,
};
-static struct xfrm_tunnel_notifier vti6_handler __read_mostly = {
- .handler = vti6_rcv,
- .priority = 1,
-};
-
static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n)
{
int h;
@@ -990,6 +1061,27 @@ static struct pernet_operations vti6_net_ops = {
.size = sizeof(struct vti6_net),
};
+static struct xfrm6_protocol vti_esp6_protocol __read_mostly = {
+ .handler = vti6_rcv,
+ .cb_handler = vti6_rcv_cb,
+ .err_handler = vti6_err,
+ .priority = 100,
+};
+
+static struct xfrm6_protocol vti_ah6_protocol __read_mostly = {
+ .handler = vti6_rcv,
+ .cb_handler = vti6_rcv_cb,
+ .err_handler = vti6_err,
+ .priority = 100,
+};
+
+static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = {
+ .handler = vti6_rcv,
+ .cb_handler = vti6_rcv_cb,
+ .err_handler = vti6_err,
+ .priority = 100,
+};
+
/**
* vti6_tunnel_init - register protocol and reserve needed resources
*
@@ -1003,11 +1095,33 @@ static int __init vti6_tunnel_init(void)
if (err < 0)
goto out_pernet;
- err = xfrm6_mode_tunnel_input_register(&vti6_handler);
+ err = xfrm6_protocol_register(&vti_esp6_protocol, IPPROTO_ESP);
if (err < 0) {
- pr_err("%s: can't register vti6\n", __func__);
+ unregister_pernet_device(&vti6_net_ops);
+ pr_err("%s: can't register vti6 protocol\n", __func__);
+
goto out;
}
+
+ err = xfrm6_protocol_register(&vti_ah6_protocol, IPPROTO_AH);
+ if (err < 0) {
+ xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
+ unregister_pernet_device(&vti6_net_ops);
+ pr_err("%s: can't register vti6 protocol\n", __func__);
+
+ goto out;
+ }
+
+ err = xfrm6_protocol_register(&vti_ipcomp6_protocol, IPPROTO_COMP);
+ if (err < 0) {
+ xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
+ xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
+ unregister_pernet_device(&vti6_net_ops);
+ pr_err("%s: can't register vti6 protocol\n", __func__);
+
+ goto out;
+ }
+
err = rtnl_link_register(&vti6_link_ops);
if (err < 0)
goto rtnl_link_failed;
@@ -1015,7 +1129,9 @@ static int __init vti6_tunnel_init(void)
return 0;
rtnl_link_failed:
- xfrm6_mode_tunnel_input_deregister(&vti6_handler);
+ xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP);
+ xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
+ xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
out:
unregister_pernet_device(&vti6_net_ops);
out_pernet:
@@ -1028,8 +1144,12 @@ out_pernet:
static void __exit vti6_tunnel_cleanup(void)
{
rtnl_link_unregister(&vti6_link_ops);
- if (xfrm6_mode_tunnel_input_deregister(&vti6_handler))
- pr_info("%s: can't deregister vti6\n", __func__);
+ if (xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP))
+ pr_info("%s: can't deregister protocol\n", __func__);
+ if (xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH))
+ pr_info("%s: can't deregister protocol\n", __func__);
+ if (xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP))
+ pr_info("%s: can't deregister protocol\n", __func__);
unregister_pernet_device(&vti6_net_ops);
}
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index da9becb42e81..d1c793cffcb5 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -53,7 +53,7 @@
#include <linux/icmpv6.h>
#include <linux/mutex.h>
-static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
@@ -65,19 +65,21 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (type != ICMPV6_PKT_TOOBIG &&
type != NDISC_REDIRECT)
- return;
+ return 0;
spi = htonl(ntohs(ipcomph->cpi));
x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
spi, IPPROTO_COMP, AF_INET6);
if (!x)
- return;
+ return 0;
if (type == NDISC_REDIRECT)
ip6_redirect(skb, net, skb->dev->ifindex, 0);
else
ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
+
+ return 0;
}
static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
@@ -174,6 +176,11 @@ out:
return err;
}
+static int ipcomp6_rcv_cb(struct sk_buff *skb, int err)
+{
+ return 0;
+}
+
static const struct xfrm_type ipcomp6_type =
{
.description = "IPCOMP6",
@@ -186,11 +193,12 @@ static const struct xfrm_type ipcomp6_type =
.hdr_offset = xfrm6_find_1stfragopt,
};
-static const struct inet6_protocol ipcomp6_protocol =
+static struct xfrm6_protocol ipcomp6_protocol =
{
.handler = xfrm6_rcv,
+ .cb_handler = ipcomp6_rcv_cb,
.err_handler = ipcomp6_err,
- .flags = INET6_PROTO_NOPOLICY,
+ .priority = 0,
};
static int __init ipcomp6_init(void)
@@ -199,7 +207,7 @@ static int __init ipcomp6_init(void)
pr_info("%s: can't add xfrm type\n", __func__);
return -EAGAIN;
}
- if (inet6_add_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) {
+ if (xfrm6_protocol_register(&ipcomp6_protocol, IPPROTO_COMP) < 0) {
pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&ipcomp6_type, AF_INET6);
return -EAGAIN;
@@ -209,7 +217,7 @@ static int __init ipcomp6_init(void)
static void __exit ipcomp6_fini(void)
{
- if (inet6_del_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0)
+ if (xfrm6_protocol_deregister(&ipcomp6_protocol, IPPROTO_COMP) < 0)
pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&ipcomp6_type, AF_INET6) < 0)
pr_info("%s: can't remove xfrm type\n", __func__);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 0a00f449de5e..edb58aff4ae7 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -722,7 +722,7 @@ done:
case IPV6_MTU_DISCOVER:
if (optlen < sizeof(int))
goto e_inval;
- if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_INTERFACE)
+ if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT)
goto e_inval;
np->pmtudisc = val;
retv = 0;
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 35750df744dc..4bff1f297e39 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -50,6 +50,11 @@ config NFT_CHAIN_NAT_IPV6
packet transformations such as the source, destination address and
source and destination ports.
+config NFT_REJECT_IPV6
+ depends on NF_TABLES_IPV6
+ default NFT_REJECT
+ tristate
+
config IP6_NF_IPTABLES
tristate "IP6 tables support (required for filtering)"
depends on INET && IPV6
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index d1b4928f34f7..70d3dd66f2cd 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o
+obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c
new file mode 100644
index 000000000000..0bc19fa87821
--- /dev/null
+++ b/net/ipv6/netfilter/nft_reject_ipv6.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2013 Eric Leblond <eric@regit.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_reject.h>
+#include <net/netfilter/ipv6/nf_reject.h>
+
+void nft_reject_ipv6_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_reject *priv = nft_expr_priv(expr);
+ struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
+
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ nf_send_unreach6(net, pkt->skb, priv->icmp_code,
+ pkt->ops->hooknum);
+ break;
+ case NFT_REJECT_TCP_RST:
+ nf_send_reset6(net, pkt->skb, pkt->ops->hooknum);
+ break;
+ }
+
+ data[NFT_REG_VERDICT].verdict = NF_DROP;
+}
+EXPORT_SYMBOL_GPL(nft_reject_ipv6_eval);
+
+static struct nft_expr_type nft_reject_ipv6_type;
+static const struct nft_expr_ops nft_reject_ipv6_ops = {
+ .type = &nft_reject_ipv6_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)),
+ .eval = nft_reject_ipv6_eval,
+ .init = nft_reject_init,
+ .dump = nft_reject_dump,
+};
+
+static struct nft_expr_type nft_reject_ipv6_type __read_mostly = {
+ .family = NFPROTO_IPV6,
+ .name = "reject",
+ .ops = &nft_reject_ipv6_ops,
+ .policy = nft_reject_policy,
+ .maxattr = NFTA_REJECT_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_reject_ipv6_module_init(void)
+{
+ return nft_register_expr(&nft_reject_ipv6_type);
+}
+
+static void __exit nft_reject_ipv6_module_exit(void)
+{
+ nft_unregister_expr(&nft_reject_ipv6_type);
+}
+
+module_init(nft_reject_ipv6_module_init);
+module_exit(nft_reject_ipv6_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "reject");
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 827f795209cf..d1b35d377e62 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -13,7 +13,7 @@ void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
int old, new;
#if IS_ENABLED(CONFIG_IPV6)
- if (rt && !(rt->dst.flags & DST_NOPEER)) {
+ if (rt) {
struct inet_peer *peer;
struct net *net;
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index fb9beb78f00b..587bbdcb22b4 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -135,6 +135,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
fl6.flowi6_proto = IPPROTO_ICMPV6;
fl6.saddr = np->saddr;
fl6.daddr = *daddr;
+ fl6.flowi6_mark = sk->sk_mark;
fl6.fl6_icmp_type = user_icmph.icmp6_type;
fl6.fl6_icmp_code = user_icmph.icmp6_code;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 11dac21e6586..fba54a407bb2 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1513,7 +1513,7 @@ int ip6_route_add(struct fib6_config *cfg)
if (!table)
goto out;
- rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
+ rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
if (!rt) {
err = -ENOMEM;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 3dfbcf1dcb1c..1693c8d885f0 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -475,6 +475,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
ipip6_tunnel_unlink(sitn, tunnel);
ipip6_tunnel_del_prl(tunnel, NULL);
}
+ ip_tunnel_dst_reset_all(tunnel);
dev_put(dev);
}
@@ -1082,6 +1083,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
t->parms.link = p->link;
ipip6_tunnel_bind_dev(t->dev);
}
+ ip_tunnel_dst_reset_all(t);
netdev_state_change(t->dev);
}
@@ -1112,6 +1114,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
t->ip6rd.relay_prefix = relay_prefix;
t->ip6rd.prefixlen = ip6rd->prefixlen;
t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen;
+ ip_tunnel_dst_reset_all(t);
netdev_state_change(t->dev);
return 0;
}
@@ -1271,6 +1274,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL);
break;
}
+ ip_tunnel_dst_reset_all(t);
netdev_state_change(dev);
break;
@@ -1326,6 +1330,9 @@ static const struct net_device_ops ipip6_netdev_ops = {
static void ipip6_dev_free(struct net_device *dev)
{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+
+ free_percpu(tunnel->dst_cache);
free_percpu(dev->tstats);
free_netdev(dev);
}
@@ -1356,7 +1363,6 @@ static void ipip6_tunnel_setup(struct net_device *dev)
static int ipip6_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- int i;
tunnel->dev = dev;
tunnel->net = dev_net(dev);
@@ -1365,14 +1371,14 @@ static int ipip6_tunnel_init(struct net_device *dev)
memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
ipip6_tunnel_bind_dev(dev);
- dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
- for_each_possible_cpu(i) {
- struct pcpu_sw_netstats *ipip6_tunnel_stats;
- ipip6_tunnel_stats = per_cpu_ptr(dev->tstats, i);
- u64_stats_init(&ipip6_tunnel_stats->syncp);
+ tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
+ if (!tunnel->dst_cache) {
+ free_percpu(dev->tstats);
+ return -ENOMEM;
}
return 0;
@@ -1384,7 +1390,6 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
struct iphdr *iph = &tunnel->parms.iph;
struct net *net = dev_net(dev);
struct sit_net *sitn = net_generic(net, sit_net_id);
- int i;
tunnel->dev = dev;
tunnel->net = dev_net(dev);
@@ -1395,14 +1400,14 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
iph->ihl = 5;
iph->ttl = 64;
- dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
- for_each_possible_cpu(i) {
- struct pcpu_sw_netstats *ipip6_fb_stats;
- ipip6_fb_stats = per_cpu_ptr(dev->tstats, i);
- u64_stats_init(&ipip6_fb_stats->syncp);
+ tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
+ if (!tunnel->dst_cache) {
+ free_percpu(dev->tstats);
+ return -ENOMEM;
}
dev_hold(dev);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 889079b2ea85..3277680186b4 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -501,8 +501,10 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)
int res;
res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0);
- if (!res)
+ if (!res) {
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
+ }
return res;
}
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index e7359f9eaa8d..b261ee8b83fc 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -113,7 +113,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
fptr->nexthdr = nexthdr;
fptr->reserved = 0;
- ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb));
+ fptr->identification = skb_shinfo(skb)->ip6_frag_id;
/* Fragment the skb. ipv6 header and the remaining fields of the
* fragment header are updated in ipv6_gso_segment()
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index cb04f7a16b5e..901ef6f8addc 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -18,65 +18,6 @@
#include <net/ipv6.h>
#include <net/xfrm.h>
-/* Informational hook. The decap is still done here. */
-static struct xfrm_tunnel_notifier __rcu *rcv_notify_handlers __read_mostly;
-static DEFINE_MUTEX(xfrm6_mode_tunnel_input_mutex);
-
-int xfrm6_mode_tunnel_input_register(struct xfrm_tunnel_notifier *handler)
-{
- struct xfrm_tunnel_notifier __rcu **pprev;
- struct xfrm_tunnel_notifier *t;
- int ret = -EEXIST;
- int priority = handler->priority;
-
- mutex_lock(&xfrm6_mode_tunnel_input_mutex);
-
- for (pprev = &rcv_notify_handlers;
- (t = rcu_dereference_protected(*pprev,
- lockdep_is_held(&xfrm6_mode_tunnel_input_mutex))) != NULL;
- pprev = &t->next) {
- if (t->priority > priority)
- break;
- if (t->priority == priority)
- goto err;
-
- }
-
- handler->next = *pprev;
- rcu_assign_pointer(*pprev, handler);
-
- ret = 0;
-
-err:
- mutex_unlock(&xfrm6_mode_tunnel_input_mutex);
- return ret;
-}
-EXPORT_SYMBOL_GPL(xfrm6_mode_tunnel_input_register);
-
-int xfrm6_mode_tunnel_input_deregister(struct xfrm_tunnel_notifier *handler)
-{
- struct xfrm_tunnel_notifier __rcu **pprev;
- struct xfrm_tunnel_notifier *t;
- int ret = -ENOENT;
-
- mutex_lock(&xfrm6_mode_tunnel_input_mutex);
- for (pprev = &rcv_notify_handlers;
- (t = rcu_dereference_protected(*pprev,
- lockdep_is_held(&xfrm6_mode_tunnel_input_mutex))) != NULL;
- pprev = &t->next) {
- if (t == handler) {
- *pprev = handler->next;
- ret = 0;
- break;
- }
- }
- mutex_unlock(&xfrm6_mode_tunnel_input_mutex);
- synchronize_net();
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(xfrm6_mode_tunnel_input_deregister);
-
static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
{
const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
@@ -130,7 +71,6 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
{
- struct xfrm_tunnel_notifier *handler;
int err = -EINVAL;
if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6)
@@ -138,9 +78,6 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto out;
- for_each_input_rcu(rcv_notify_handlers, handler)
- handler->handler(skb);
-
err = skb_unclone(skb, GFP_ATOMIC);
if (err)
goto out;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 5f8e128c512d..2a0bbda2c76a 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -389,11 +389,17 @@ int __init xfrm6_init(void)
if (ret)
goto out_policy;
+ ret = xfrm6_protocol_init();
+ if (ret)
+ goto out_state;
+
#ifdef CONFIG_SYSCTL
register_pernet_subsys(&xfrm6_net_ops);
#endif
out:
return ret;
+out_state:
+ xfrm6_state_fini();
out_policy:
xfrm6_policy_fini();
goto out;
@@ -404,6 +410,7 @@ void xfrm6_fini(void)
#ifdef CONFIG_SYSCTL
unregister_pernet_subsys(&xfrm6_net_ops);
#endif
+ xfrm6_protocol_fini();
xfrm6_policy_fini();
xfrm6_state_fini();
dst_entries_destroy(&xfrm6_dst_ops);
diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c
new file mode 100644
index 000000000000..6ab989c486f7
--- /dev/null
+++ b/net/ipv6/xfrm6_protocol.c
@@ -0,0 +1,270 @@
+/* xfrm6_protocol.c - Generic xfrm protocol multiplexer for ipv6.
+ *
+ * Copyright (C) 2013 secunet Security Networks AG
+ *
+ * Author:
+ * Steffen Klassert <steffen.klassert@secunet.com>
+ *
+ * Based on:
+ * net/ipv4/xfrm4_protocol.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/skbuff.h>
+#include <linux/icmpv6.h>
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+
+static struct xfrm6_protocol __rcu *esp6_handlers __read_mostly;
+static struct xfrm6_protocol __rcu *ah6_handlers __read_mostly;
+static struct xfrm6_protocol __rcu *ipcomp6_handlers __read_mostly;
+static DEFINE_MUTEX(xfrm6_protocol_mutex);
+
+static inline struct xfrm6_protocol __rcu **proto_handlers(u8 protocol)
+{
+ switch (protocol) {
+ case IPPROTO_ESP:
+ return &esp6_handlers;
+ case IPPROTO_AH:
+ return &ah6_handlers;
+ case IPPROTO_COMP:
+ return &ipcomp6_handlers;
+ }
+
+ return NULL;
+}
+
+#define for_each_protocol_rcu(head, handler) \
+ for (handler = rcu_dereference(head); \
+ handler != NULL; \
+ handler = rcu_dereference(handler->next)) \
+
+int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+
+ for_each_protocol_rcu(*proto_handlers(protocol), handler)
+ if ((ret = handler->cb_handler(skb, err)) <= 0)
+ return ret;
+
+ return 0;
+}
+EXPORT_SYMBOL(xfrm6_rcv_cb);
+
+static int xfrm6_esp_rcv(struct sk_buff *skb)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+
+ for_each_protocol_rcu(esp6_handlers, handler)
+ if ((ret = handler->handler(skb)) != -EINVAL)
+ return ret;
+
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static void xfrm6_esp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct xfrm6_protocol *handler;
+
+ for_each_protocol_rcu(esp6_handlers, handler)
+ if (!handler->err_handler(skb, opt, type, code, offset, info))
+ break;
+}
+
+static int xfrm6_ah_rcv(struct sk_buff *skb)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+
+ for_each_protocol_rcu(ah6_handlers, handler)
+ if ((ret = handler->handler(skb)) != -EINVAL)
+ return ret;
+
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static void xfrm6_ah_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct xfrm6_protocol *handler;
+
+ for_each_protocol_rcu(ah6_handlers, handler)
+ if (!handler->err_handler(skb, opt, type, code, offset, info))
+ break;
+}
+
+static int xfrm6_ipcomp_rcv(struct sk_buff *skb)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+
+ for_each_protocol_rcu(ipcomp6_handlers, handler)
+ if ((ret = handler->handler(skb)) != -EINVAL)
+ return ret;
+
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static void xfrm6_ipcomp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct xfrm6_protocol *handler;
+
+ for_each_protocol_rcu(ipcomp6_handlers, handler)
+ if (!handler->err_handler(skb, opt, type, code, offset, info))
+ break;
+}
+
+static const struct inet6_protocol esp6_protocol = {
+ .handler = xfrm6_esp_rcv,
+ .err_handler = xfrm6_esp_err,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+static const struct inet6_protocol ah6_protocol = {
+ .handler = xfrm6_ah_rcv,
+ .err_handler = xfrm6_ah_err,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+static const struct inet6_protocol ipcomp6_protocol = {
+ .handler = xfrm6_ipcomp_rcv,
+ .err_handler = xfrm6_ipcomp_err,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+static struct xfrm_input_afinfo xfrm6_input_afinfo = {
+ .family = AF_INET6,
+ .owner = THIS_MODULE,
+ .callback = xfrm6_rcv_cb,
+};
+
+static inline const struct inet6_protocol *netproto(unsigned char protocol)
+{
+ switch (protocol) {
+ case IPPROTO_ESP:
+ return &esp6_protocol;
+ case IPPROTO_AH:
+ return &ah6_protocol;
+ case IPPROTO_COMP:
+ return &ipcomp6_protocol;
+ }
+
+ return NULL;
+}
+
+int xfrm6_protocol_register(struct xfrm6_protocol *handler,
+ unsigned char protocol)
+{
+ struct xfrm6_protocol __rcu **pprev;
+ struct xfrm6_protocol *t;
+ bool add_netproto = false;
+
+ int ret = -EEXIST;
+ int priority = handler->priority;
+
+ mutex_lock(&xfrm6_protocol_mutex);
+
+ if (!rcu_dereference_protected(*proto_handlers(protocol),
+ lockdep_is_held(&xfrm6_protocol_mutex)))
+ add_netproto = true;
+
+ for (pprev = proto_handlers(protocol);
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&xfrm6_protocol_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t->priority < priority)
+ break;
+ if (t->priority == priority)
+ goto err;
+ }
+
+ handler->next = *pprev;
+ rcu_assign_pointer(*pprev, handler);
+
+ ret = 0;
+
+err:
+ mutex_unlock(&xfrm6_protocol_mutex);
+
+ if (add_netproto) {
+ if (inet6_add_protocol(netproto(protocol), protocol)) {
+ pr_err("%s: can't add protocol\n", __func__);
+ ret = -EAGAIN;
+ }
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(xfrm6_protocol_register);
+
+int xfrm6_protocol_deregister(struct xfrm6_protocol *handler,
+ unsigned char protocol)
+{
+ struct xfrm6_protocol __rcu **pprev;
+ struct xfrm6_protocol *t;
+ int ret = -ENOENT;
+
+ mutex_lock(&xfrm6_protocol_mutex);
+
+ for (pprev = proto_handlers(protocol);
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&xfrm6_protocol_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t == handler) {
+ *pprev = handler->next;
+ ret = 0;
+ break;
+ }
+ }
+
+ if (!rcu_dereference_protected(*proto_handlers(protocol),
+ lockdep_is_held(&xfrm6_protocol_mutex))) {
+ if (inet6_del_protocol(netproto(protocol), protocol) < 0) {
+ pr_err("%s: can't remove protocol\n", __func__);
+ ret = -EAGAIN;
+ }
+ }
+
+ mutex_unlock(&xfrm6_protocol_mutex);
+
+ synchronize_net();
+
+ return ret;
+}
+EXPORT_SYMBOL(xfrm6_protocol_deregister);
+
+int __init xfrm6_protocol_init(void)
+{
+ return xfrm_input_register_afinfo(&xfrm6_input_afinfo);
+}
+
+void xfrm6_protocol_fini(void)
+{
+ xfrm_input_unregister_afinfo(&xfrm6_input_afinfo);
+}
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 994e28bfb32e..41e4e93cb3aa 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -52,18 +52,12 @@
#include <net/p8022.h>
#include <net/psnap.h>
#include <net/sock.h>
+#include <net/datalink.h>
#include <net/tcp_states.h>
+#include <net/net_namespace.h>
#include <asm/uaccess.h>
-#ifdef CONFIG_SYSCTL
-extern void ipx_register_sysctl(void);
-extern void ipx_unregister_sysctl(void);
-#else
-#define ipx_register_sysctl()
-#define ipx_unregister_sysctl()
-#endif
-
/* Configuration Variables */
static unsigned char ipxcfg_max_hops = 16;
static char ipxcfg_auto_select_primary;
@@ -84,15 +78,6 @@ DEFINE_SPINLOCK(ipx_interfaces_lock);
struct ipx_interface *ipx_primary_net;
struct ipx_interface *ipx_internal_net;
-extern int ipxrtr_add_route(__be32 network, struct ipx_interface *intrfc,
- unsigned char *node);
-extern void ipxrtr_del_routes(struct ipx_interface *intrfc);
-extern int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx,
- struct iovec *iov, size_t len, int noblock);
-extern int ipxrtr_route_skb(struct sk_buff *skb);
-extern struct ipx_route *ipxrtr_lookup(__be32 net);
-extern int ipxrtr_ioctl(unsigned int cmd, void __user *arg);
-
struct ipx_interface *ipx_interfaces_head(void)
{
struct ipx_interface *rc = NULL;
@@ -1383,6 +1368,7 @@ static int ipx_release(struct socket *sock)
goto out;
lock_sock(sk);
+ sk->sk_shutdown = SHUTDOWN_MASK;
if (!sock_flag(sk, SOCK_DEAD))
sk->sk_state_change(sk);
@@ -1806,8 +1792,11 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
flags & MSG_DONTWAIT, &rc);
- if (!skb)
+ if (!skb) {
+ if (rc == -EAGAIN && (sk->sk_shutdown & RCV_SHUTDOWN))
+ rc = 0;
goto out;
+ }
ipx = ipx_hdr(skb);
copied = ntohs(ipx->ipx_pktsize) - sizeof(struct ipxhdr);
@@ -1937,6 +1926,26 @@ static int ipx_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long
}
#endif
+static int ipx_shutdown(struct socket *sock, int mode)
+{
+ struct sock *sk = sock->sk;
+
+ if (mode < SHUT_RD || mode > SHUT_RDWR)
+ return -EINVAL;
+ /* This maps:
+ * SHUT_RD (0) -> RCV_SHUTDOWN (1)
+ * SHUT_WR (1) -> SEND_SHUTDOWN (2)
+ * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
+ */
+ ++mode;
+
+ lock_sock(sk);
+ sk->sk_shutdown |= mode;
+ release_sock(sk);
+ sk->sk_state_change(sk);
+
+ return 0;
+}
/*
* Socket family declarations
@@ -1963,7 +1972,7 @@ static const struct proto_ops ipx_dgram_ops = {
.compat_ioctl = ipx_compat_ioctl,
#endif
.listen = sock_no_listen,
- .shutdown = sock_no_shutdown, /* FIXME: support shutdown */
+ .shutdown = ipx_shutdown,
.setsockopt = ipx_setsockopt,
.getsockopt = ipx_getsockopt,
.sendmsg = ipx_sendmsg,
@@ -1986,9 +1995,6 @@ static struct notifier_block ipx_dev_notifier = {
.notifier_call = ipxitf_device_event,
};
-extern struct datalink_proto *make_EII_client(void);
-extern void destroy_EII_client(struct datalink_proto *);
-
static const unsigned char ipx_8022_type = 0xE0;
static const unsigned char ipx_snap_id[5] = { 0x0, 0x0, 0x0, 0x81, 0x37 };
static const char ipx_EII_err_msg[] __initconst =
diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c
index 30f4519b092f..c1f03185c5e1 100644
--- a/net/ipx/ipx_route.c
+++ b/net/ipx/ipx_route.c
@@ -20,15 +20,11 @@ DEFINE_RWLOCK(ipx_routes_lock);
extern struct ipx_interface *ipx_internal_net;
-extern __be16 ipx_cksum(struct ipxhdr *packet, int length);
extern struct ipx_interface *ipxitf_find_using_net(__be32 net);
extern int ipxitf_demux_socket(struct ipx_interface *intrfc,
struct sk_buff *skb, int copy);
extern int ipxitf_demux_socket(struct ipx_interface *intrfc,
struct sk_buff *skb, int copy);
-extern int ipxitf_send(struct ipx_interface *intrfc, struct sk_buff *skb,
- char *node);
-extern struct ipx_interface *ipxitf_find_using_net(__be32 net);
struct ipx_route *ipxrtr_lookup(__be32 net)
{
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index c4b7218058b6..a5e03119107a 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1382,6 +1382,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
if (sk->sk_type == SOCK_STREAM) {
if (copied < rlen) {
IUCV_SKB_CB(skb)->offset = offset + copied;
+ skb_queue_head(&sk->sk_receive_queue, skb);
goto done;
}
}
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 1a04c1329362..12651b42aad8 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -365,6 +365,7 @@ static const u8 sadb_ext_min_len[] = {
[SADB_X_EXT_NAT_T_OA] = (u8) sizeof(struct sadb_address),
[SADB_X_EXT_SEC_CTX] = (u8) sizeof(struct sadb_x_sec_ctx),
[SADB_X_EXT_KMADDRESS] = (u8) sizeof(struct sadb_x_kmaddress),
+ [SADB_X_EXT_FILTER] = (u8) sizeof(struct sadb_x_filter),
};
/* Verify sadb_address_{len,prefixlen} against sa_family. */
@@ -1798,6 +1799,7 @@ static void pfkey_dump_sa_done(struct pfkey_sock *pfk)
static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
{
u8 proto;
+ struct xfrm_address_filter *filter = NULL;
struct pfkey_sock *pfk = pfkey_sk(sk);
if (pfk->dump.dump != NULL)
@@ -1807,11 +1809,27 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms
if (proto == 0)
return -EINVAL;
+ if (ext_hdrs[SADB_X_EXT_FILTER - 1]) {
+ struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1];
+
+ filter = kmalloc(sizeof(*filter), GFP_KERNEL);
+ if (filter == NULL)
+ return -ENOMEM;
+
+ memcpy(&filter->saddr, &xfilter->sadb_x_filter_saddr,
+ sizeof(xfrm_address_t));
+ memcpy(&filter->daddr, &xfilter->sadb_x_filter_daddr,
+ sizeof(xfrm_address_t));
+ filter->family = xfilter->sadb_x_filter_family;
+ filter->splen = xfilter->sadb_x_filter_splen;
+ filter->dplen = xfilter->sadb_x_filter_dplen;
+ }
+
pfk->dump.msg_version = hdr->sadb_msg_version;
pfk->dump.msg_portid = hdr->sadb_msg_pid;
pfk->dump.dump = pfkey_dump_sa;
pfk->dump.done = pfkey_dump_sa_done;
- xfrm_state_walk_init(&pfk->dump.u.state, proto);
+ xfrm_state_walk_init(&pfk->dump.u.state, proto, filter);
return pfkey_do_dump(pfk);
}
@@ -3059,6 +3077,24 @@ static u32 get_acqseq(void)
return res;
}
+static bool pfkey_is_alive(const struct km_event *c)
+{
+ struct netns_pfkey *net_pfkey = net_generic(c->net, pfkey_net_id);
+ struct sock *sk;
+ bool is_alive = false;
+
+ rcu_read_lock();
+ sk_for_each_rcu(sk, &net_pfkey->table) {
+ if (pfkey_sk(sk)->registered) {
+ is_alive = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return is_alive;
+}
+
static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp)
{
struct sk_buff *skb;
@@ -3784,6 +3820,7 @@ static struct xfrm_mgr pfkeyv2_mgr =
.new_mapping = pfkey_send_new_mapping,
.notify_policy = pfkey_send_policy_notify,
.migrate = pfkey_send_migrate,
+ .is_alive = pfkey_is_alive,
};
static int __net_init pfkey_net_init(struct net *net)
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 735d0f60c83a..ab48d4192edd 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -112,7 +112,6 @@ struct l2tp_net {
spinlock_t l2tp_session_hlist_lock;
};
-static void l2tp_session_set_header_len(struct l2tp_session *session, int version);
static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk)
@@ -1131,7 +1130,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
/* Queue the packet to IP for output */
skb->local_df = 1;
#if IS_ENABLED(CONFIG_IPV6)
- if (skb->sk->sk_family == PF_INET6 && !tunnel->v4mapped)
+ if (tunnel->sock->sk_family == PF_INET6 && !tunnel->v4mapped)
error = inet6_csk_xmit(skb, NULL);
else
#endif
@@ -1151,23 +1150,6 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
return 0;
}
-/* Automatically called when the skb is freed.
- */
-static void l2tp_sock_wfree(struct sk_buff *skb)
-{
- sock_put(skb->sk);
-}
-
-/* For data skbs that we transmit, we associate with the tunnel socket
- * but don't do accounting.
- */
-static inline void l2tp_skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
-{
- sock_hold(sk);
- skb->sk = sk;
- skb->destructor = l2tp_sock_wfree;
-}
-
#if IS_ENABLED(CONFIG_IPV6)
static void l2tp_xmit_ipv6_csum(struct sock *sk, struct sk_buff *skb,
int udp_len)
@@ -1221,7 +1203,6 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
return NET_XMIT_DROP;
}
- skb_orphan(skb);
/* Setup L2TP header */
session->build_header(session, __skb_push(skb, hdr_len));
@@ -1287,8 +1268,6 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
break;
}
- l2tp_skb_set_owner_w(skb, sk);
-
l2tp_xmit_core(session, skb, fl, data_len);
out_unlock:
bh_unlock_sock(sk);
@@ -1809,8 +1788,6 @@ void l2tp_session_free(struct l2tp_session *session)
}
kfree(session);
-
- return;
}
EXPORT_SYMBOL_GPL(l2tp_session_free);
@@ -1863,7 +1840,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_delete);
/* We come here whenever a session's send_seq, cookie_len or
* l2specific_len parameters are set.
*/
-static void l2tp_session_set_header_len(struct l2tp_session *session, int version)
+void l2tp_session_set_header_len(struct l2tp_session *session, int version)
{
if (version == L2TP_HDR_VER_2) {
session->hdr_len = 6;
@@ -1876,6 +1853,7 @@ static void l2tp_session_set_header_len(struct l2tp_session *session, int versio
}
}
+EXPORT_SYMBOL_GPL(l2tp_session_set_header_len);
struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
{
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 1f01ba3435bc..3f93ccd6ba97 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -263,6 +263,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
int length, int (*payload_hook)(struct sk_buff *skb));
int l2tp_session_queue_purge(struct l2tp_session *session);
int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb);
+void l2tp_session_set_header_len(struct l2tp_session *session, int version);
int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb,
int hdr_len);
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 4cfd722e9153..bd7387adea9e 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -578,8 +578,10 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
if (info->attrs[L2TP_ATTR_RECV_SEQ])
session->recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]);
- if (info->attrs[L2TP_ATTR_SEND_SEQ])
+ if (info->attrs[L2TP_ATTR_SEND_SEQ]) {
session->send_seq = nla_get_u8(info->attrs[L2TP_ATTR_SEND_SEQ]);
+ l2tp_session_set_header_len(session, session->tunnel->version);
+ }
if (info->attrs[L2TP_ATTR_LNS_MODE])
session->lns_mode = nla_get_u8(info->attrs[L2TP_ATTR_LNS_MODE]);
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index be5fadf34739..d276e2d4a589 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -254,12 +254,14 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
po = pppox_sk(sk);
ppp_input(&po->chan, skb);
} else {
- l2tp_info(session, PPPOL2TP_MSG_DATA, "%s: socket not bound\n",
- session->name);
+ l2tp_dbg(session, PPPOL2TP_MSG_DATA,
+ "%s: recv %d byte data frame, passing to L2TP socket\n",
+ session->name, data_len);
- /* Not bound. Nothing we can do, so discard. */
- atomic_long_inc(&session->stats.rx_errors);
- kfree_skb(skb);
+ if (sock_queue_rcv_skb(sk, skb) < 0) {
+ atomic_long_inc(&session->stats.rx_errors);
+ kfree_skb(skb);
+ }
}
return;
@@ -454,13 +456,11 @@ static void pppol2tp_session_close(struct l2tp_session *session)
BUG_ON(session->magic != L2TP_SESSION_MAGIC);
-
if (sock) {
inet_shutdown(sock, 2);
/* Don't let the session go away before our socket does */
l2tp_session_inc_refcount(session);
}
- return;
}
/* Really kill the session socket. (Called from sock_put() if
@@ -474,7 +474,6 @@ static void pppol2tp_session_destruct(struct sock *sk)
BUG_ON(session->magic != L2TP_SESSION_MAGIC);
l2tp_session_dec_refcount(session);
}
- return;
}
/* Called when the PPPoX socket (session) is closed.
@@ -1312,6 +1311,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ :
PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
}
+ l2tp_session_set_header_len(session, session->tunnel->version);
l2tp_info(session, PPPOL2TP_MSG_CONTROL,
"%s: set send_seq=%d\n",
session->name, session->send_seq);
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 42c659229a09..bd1fd8ea5105 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -100,6 +100,12 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local,
}
max_bw = max(max_bw, width);
}
+
+ /* use the configured bandwidth in case of monitor interface */
+ sdata = rcu_dereference(local->monitor_sdata);
+ if (sdata && rcu_access_pointer(sdata->vif.chanctx_conf) == conf)
+ max_bw = max(max_bw, conf->def.width);
+
rcu_read_unlock();
return max_bw;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index be198f42f1f7..b8d331e7d883 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1054,7 +1054,8 @@ static void ieee80211_uninit(struct net_device *dev)
static u16 ieee80211_netdev_select_queue(struct net_device *dev,
struct sk_buff *skb,
- void *accel_priv)
+ void *accel_priv,
+ select_queue_fallback_t fallback)
{
return ieee80211_select_queue(IEEE80211_DEV_TO_SUB_IF(dev), skb);
}
@@ -1072,7 +1073,8 @@ static const struct net_device_ops ieee80211_dataif_ops = {
static u16 ieee80211_monitor_select_queue(struct net_device *dev,
struct sk_buff *skb,
- void *accel_priv)
+ void *accel_priv,
+ select_queue_fallback_t fallback)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c
index 2802f9d9279d..ad8b377b4b9f 100644
--- a/net/mac80211/mesh_ps.c
+++ b/net/mac80211/mesh_ps.c
@@ -36,6 +36,7 @@ static struct sk_buff *mps_qos_null_get(struct sta_info *sta)
sdata->vif.addr);
nullfunc->frame_control = fc;
nullfunc->duration_id = 0;
+ nullfunc->seq_ctrl = 0;
/* no address resolution for this frame -> set addr 1 immediately */
memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN);
memset(skb_put(skb, 2), 0, 2); /* append QoS control field */
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index a023b432143b..137a192e64bc 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1206,6 +1206,7 @@ static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata,
memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN);
memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN);
memcpy(nullfunc->addr3, sdata->vif.addr, ETH_ALEN);
+ nullfunc->seq_ctrl = 0;
skb->priority = tid;
skb_set_queue_mapping(skb, ieee802_1d_to_ac[tid]);
diff --git a/net/mac802154/Makefile b/net/mac802154/Makefile
index 57cf5d1a2e4a..15d62df52182 100644
--- a/net/mac802154/Makefile
+++ b/net/mac802154/Makefile
@@ -1,2 +1,4 @@
obj-$(CONFIG_MAC802154) += mac802154.o
mac802154-objs := ieee802154_dev.o rx.o tx.o mac_cmd.o mib.o monitor.o wpan.o
+
+ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/mac802154/ieee802154_dev.c b/net/mac802154/ieee802154_dev.c
index 52ae6646a411..10cdb091b775 100644
--- a/net/mac802154/ieee802154_dev.c
+++ b/net/mac802154/ieee802154_dev.c
@@ -27,6 +27,7 @@
#include <net/netlink.h>
#include <linux/nl802154.h>
#include <net/mac802154.h>
+#include <net/ieee802154_netdev.h>
#include <net/route.h>
#include <net/wpan-phy.h>
@@ -46,7 +47,9 @@ int mac802154_slave_open(struct net_device *dev)
}
if (ipriv->ops->ieee_addr) {
- res = ipriv->ops->ieee_addr(&ipriv->hw, dev->dev_addr);
+ __le64 addr = ieee802154_devaddr_from_raw(dev->dev_addr);
+
+ res = ipriv->ops->ieee_addr(&ipriv->hw, addr);
WARN_ON(res);
if (res)
goto err;
@@ -165,6 +168,67 @@ err:
return ERR_PTR(err);
}
+static int mac802154_set_txpower(struct wpan_phy *phy, int db)
+{
+ struct mac802154_priv *priv = wpan_phy_priv(phy);
+
+ if (!priv->ops->set_txpower)
+ return -ENOTSUPP;
+
+ return priv->ops->set_txpower(&priv->hw, db);
+}
+
+static int mac802154_set_lbt(struct wpan_phy *phy, bool on)
+{
+ struct mac802154_priv *priv = wpan_phy_priv(phy);
+
+ if (!priv->ops->set_lbt)
+ return -ENOTSUPP;
+
+ return priv->ops->set_lbt(&priv->hw, on);
+}
+
+static int mac802154_set_cca_mode(struct wpan_phy *phy, u8 mode)
+{
+ struct mac802154_priv *priv = wpan_phy_priv(phy);
+
+ if (!priv->ops->set_cca_mode)
+ return -ENOTSUPP;
+
+ return priv->ops->set_cca_mode(&priv->hw, mode);
+}
+
+static int mac802154_set_cca_ed_level(struct wpan_phy *phy, s32 level)
+{
+ struct mac802154_priv *priv = wpan_phy_priv(phy);
+
+ if (!priv->ops->set_cca_ed_level)
+ return -ENOTSUPP;
+
+ return priv->ops->set_cca_ed_level(&priv->hw, level);
+}
+
+static int mac802154_set_csma_params(struct wpan_phy *phy, u8 min_be,
+ u8 max_be, u8 retries)
+{
+ struct mac802154_priv *priv = wpan_phy_priv(phy);
+
+ if (!priv->ops->set_csma_params)
+ return -ENOTSUPP;
+
+ return priv->ops->set_csma_params(&priv->hw, min_be, max_be, retries);
+}
+
+static int mac802154_set_frame_retries(struct wpan_phy *phy, s8 retries)
+{
+ struct mac802154_priv *priv = wpan_phy_priv(phy);
+
+ if (!priv->ops->set_frame_retries)
+ return -ENOTSUPP;
+
+ return priv->ops->set_frame_retries(&priv->hw, retries);
+}
+
struct ieee802154_dev *
ieee802154_alloc_device(size_t priv_data_len, struct ieee802154_ops *ops)
{
@@ -242,6 +306,12 @@ int ieee802154_register_device(struct ieee802154_dev *dev)
priv->phy->add_iface = mac802154_add_iface;
priv->phy->del_iface = mac802154_del_iface;
+ priv->phy->set_txpower = mac802154_set_txpower;
+ priv->phy->set_lbt = mac802154_set_lbt;
+ priv->phy->set_cca_mode = mac802154_set_cca_mode;
+ priv->phy->set_cca_ed_level = mac802154_set_cca_ed_level;
+ priv->phy->set_csma_params = mac802154_set_csma_params;
+ priv->phy->set_frame_retries = mac802154_set_frame_retries;
rc = wpan_phy_register(priv->phy);
if (rc < 0)
diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h
index d48422e27110..4619486f1da2 100644
--- a/net/mac802154/mac802154.h
+++ b/net/mac802154/mac802154.h
@@ -76,6 +76,7 @@ struct mac802154_sub_if_data {
__le16 pan_id;
__le16 short_addr;
+ __le64 extended_addr;
u8 chan;
u8 page;
@@ -106,11 +107,11 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb,
u8 page, u8 chan);
/* MIB callbacks */
-void mac802154_dev_set_short_addr(struct net_device *dev, u16 val);
-u16 mac802154_dev_get_short_addr(const struct net_device *dev);
+void mac802154_dev_set_short_addr(struct net_device *dev, __le16 val);
+__le16 mac802154_dev_get_short_addr(const struct net_device *dev);
void mac802154_dev_set_ieee_addr(struct net_device *dev);
-u16 mac802154_dev_get_pan_id(const struct net_device *dev);
-void mac802154_dev_set_pan_id(struct net_device *dev, u16 val);
+__le16 mac802154_dev_get_pan_id(const struct net_device *dev);
+void mac802154_dev_set_pan_id(struct net_device *dev, __le16 val);
void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan);
u8 mac802154_dev_get_dsn(const struct net_device *dev);
diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c
index a99910d4d52f..15bac3358889 100644
--- a/net/mac802154/mac_cmd.c
+++ b/net/mac802154/mac_cmd.c
@@ -40,7 +40,7 @@ static int mac802154_mlme_start_req(struct net_device *dev,
u8 pan_coord, u8 blx,
u8 coord_realign)
{
- BUG_ON(addr->addr_type != IEEE802154_ADDR_SHORT);
+ BUG_ON(addr->mode != IEEE802154_ADDR_SHORT);
mac802154_dev_set_pan_id(dev, addr->pan_id);
mac802154_dev_set_short_addr(dev, addr->short_addr);
diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c
index 8ded97cf1c33..153bd1ddbfbb 100644
--- a/net/mac802154/mib.c
+++ b/net/mac802154/mib.c
@@ -24,7 +24,9 @@
#include <linux/if_arp.h>
#include <net/mac802154.h>
+#include <net/ieee802154_netdev.h>
#include <net/wpan-phy.h>
+#include <net/ieee802154_netdev.h>
#include "mac802154.h"
@@ -62,8 +64,6 @@ static void hw_addr_notify(struct work_struct *work)
pr_debug("failed changed mask %lx\n", nw->changed);
kfree(nw);
-
- return;
}
static void set_hw_addr_filt(struct net_device *dev, unsigned long changed)
@@ -79,11 +79,9 @@ static void set_hw_addr_filt(struct net_device *dev, unsigned long changed)
work->dev = dev;
work->changed = changed;
queue_work(priv->hw->dev_workqueue, &work->work);
-
- return;
}
-void mac802154_dev_set_short_addr(struct net_device *dev, u16 val)
+void mac802154_dev_set_short_addr(struct net_device *dev, __le16 val)
{
struct mac802154_sub_if_data *priv = netdev_priv(dev);
@@ -100,10 +98,10 @@ void mac802154_dev_set_short_addr(struct net_device *dev, u16 val)
}
}
-u16 mac802154_dev_get_short_addr(const struct net_device *dev)
+__le16 mac802154_dev_get_short_addr(const struct net_device *dev)
{
struct mac802154_sub_if_data *priv = netdev_priv(dev);
- u16 ret;
+ __le16 ret;
BUG_ON(dev->type != ARPHRD_IEEE802154);
@@ -119,19 +117,19 @@ void mac802154_dev_set_ieee_addr(struct net_device *dev)
struct mac802154_sub_if_data *priv = netdev_priv(dev);
struct mac802154_priv *mac = priv->hw;
+ priv->extended_addr = ieee802154_devaddr_from_raw(dev->dev_addr);
+
if (mac->ops->set_hw_addr_filt &&
- memcmp(mac->hw.hw_filt.ieee_addr,
- dev->dev_addr, IEEE802154_ADDR_LEN)) {
- memcpy(mac->hw.hw_filt.ieee_addr,
- dev->dev_addr, IEEE802154_ADDR_LEN);
+ mac->hw.hw_filt.ieee_addr != priv->extended_addr) {
+ mac->hw.hw_filt.ieee_addr = priv->extended_addr;
set_hw_addr_filt(dev, IEEE802515_AFILT_IEEEADDR_CHANGED);
}
}
-u16 mac802154_dev_get_pan_id(const struct net_device *dev)
+__le16 mac802154_dev_get_pan_id(const struct net_device *dev)
{
struct mac802154_sub_if_data *priv = netdev_priv(dev);
- u16 ret;
+ __le16 ret;
BUG_ON(dev->type != ARPHRD_IEEE802154);
@@ -142,7 +140,7 @@ u16 mac802154_dev_get_pan_id(const struct net_device *dev)
return ret;
}
-void mac802154_dev_set_pan_id(struct net_device *dev, u16 val)
+void mac802154_dev_set_pan_id(struct net_device *dev, __le16 val)
{
struct mac802154_sub_if_data *priv = netdev_priv(dev);
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index 38548ec2098f..03855b0677cc 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -80,7 +80,6 @@ mac802154_subif_rx(struct ieee802154_dev *hw, struct sk_buff *skb, u8 lqi)
mac802154_wpans_rx(priv, skb);
out:
dev_kfree_skb(skb);
- return;
}
static void mac802154_rx_worker(struct work_struct *work)
diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c
index 372d8a222b91..80cbee1a2f56 100644
--- a/net/mac802154/wpan.c
+++ b/net/mac802154/wpan.c
@@ -35,35 +35,6 @@
#include "mac802154.h"
-static inline int mac802154_fetch_skb_u8(struct sk_buff *skb, u8 *val)
-{
- if (unlikely(!pskb_may_pull(skb, 1)))
- return -EINVAL;
-
- *val = skb->data[0];
- skb_pull(skb, 1);
-
- return 0;
-}
-
-static inline int mac802154_fetch_skb_u16(struct sk_buff *skb, u16 *val)
-{
- if (unlikely(!pskb_may_pull(skb, 2)))
- return -EINVAL;
-
- *val = skb->data[0] | (skb->data[1] << 8);
- skb_pull(skb, 2);
-
- return 0;
-}
-
-static inline void mac802154_haddr_copy_swap(u8 *dest, const u8 *src)
-{
- int i;
- for (i = 0; i < IEEE802154_ADDR_LEN; i++)
- dest[IEEE802154_ADDR_LEN - i - 1] = src[i];
-}
-
static int
mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
@@ -76,19 +47,25 @@ mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
switch (cmd) {
case SIOCGIFADDR:
- if (priv->pan_id == IEEE802154_PANID_BROADCAST ||
- priv->short_addr == IEEE802154_ADDR_BROADCAST) {
+ {
+ u16 pan_id, short_addr;
+
+ pan_id = le16_to_cpu(priv->pan_id);
+ short_addr = le16_to_cpu(priv->short_addr);
+ if (pan_id == IEEE802154_PANID_BROADCAST ||
+ short_addr == IEEE802154_ADDR_BROADCAST) {
err = -EADDRNOTAVAIL;
break;
}
sa->family = AF_IEEE802154;
sa->addr.addr_type = IEEE802154_ADDR_SHORT;
- sa->addr.pan_id = priv->pan_id;
- sa->addr.short_addr = priv->short_addr;
+ sa->addr.pan_id = pan_id;
+ sa->addr.short_addr = short_addr;
err = 0;
break;
+ }
case SIOCSIFADDR:
dev_warn(&dev->dev,
"Using DEBUGing ioctl SIOCSIFADDR isn't recommened!\n");
@@ -101,8 +78,8 @@ mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
break;
}
- priv->pan_id = sa->addr.pan_id;
- priv->short_addr = sa->addr.short_addr;
+ priv->pan_id = cpu_to_le16(sa->addr.pan_id);
+ priv->short_addr = cpu_to_le16(sa->addr.short_addr);
err = 0;
break;
@@ -128,187 +105,70 @@ static int mac802154_wpan_mac_addr(struct net_device *dev, void *p)
static int mac802154_header_create(struct sk_buff *skb,
struct net_device *dev,
unsigned short type,
- const void *_daddr,
- const void *_saddr,
+ const void *daddr,
+ const void *saddr,
unsigned len)
{
- const struct ieee802154_addr *saddr = _saddr;
- const struct ieee802154_addr *daddr = _daddr;
- struct ieee802154_addr dev_addr;
+ struct ieee802154_hdr hdr;
struct mac802154_sub_if_data *priv = netdev_priv(dev);
- int pos = 2;
- u8 head[MAC802154_FRAME_HARD_HEADER_LEN];
- u16 fc;
+ int hlen;
if (!daddr)
return -EINVAL;
- head[pos++] = mac_cb(skb)->seq; /* DSN/BSN */
- fc = mac_cb_type(skb);
- if (mac_cb_is_ackreq(skb))
- fc |= IEEE802154_FC_ACK_REQ;
+ memset(&hdr.fc, 0, sizeof(hdr.fc));
+ hdr.fc.type = mac_cb_type(skb);
+ hdr.fc.security_enabled = mac_cb_is_secen(skb);
+ hdr.fc.ack_request = mac_cb_is_ackreq(skb);
if (!saddr) {
spin_lock_bh(&priv->mib_lock);
- if (priv->short_addr == IEEE802154_ADDR_BROADCAST ||
- priv->short_addr == IEEE802154_ADDR_UNDEF ||
- priv->pan_id == IEEE802154_PANID_BROADCAST) {
- dev_addr.addr_type = IEEE802154_ADDR_LONG;
- memcpy(dev_addr.hwaddr, dev->dev_addr,
- IEEE802154_ADDR_LEN);
+ if (priv->short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST) ||
+ priv->short_addr == cpu_to_le16(IEEE802154_ADDR_UNDEF) ||
+ priv->pan_id == cpu_to_le16(IEEE802154_PANID_BROADCAST)) {
+ hdr.source.mode = IEEE802154_ADDR_LONG;
+ hdr.source.extended_addr = priv->extended_addr;
} else {
- dev_addr.addr_type = IEEE802154_ADDR_SHORT;
- dev_addr.short_addr = priv->short_addr;
+ hdr.source.mode = IEEE802154_ADDR_SHORT;
+ hdr.source.short_addr = priv->short_addr;
}
- dev_addr.pan_id = priv->pan_id;
- saddr = &dev_addr;
+ hdr.source.pan_id = priv->pan_id;
spin_unlock_bh(&priv->mib_lock);
+ } else {
+ hdr.source = *(const struct ieee802154_addr *)saddr;
}
- if (daddr->addr_type != IEEE802154_ADDR_NONE) {
- fc |= (daddr->addr_type << IEEE802154_FC_DAMODE_SHIFT);
-
- head[pos++] = daddr->pan_id & 0xff;
- head[pos++] = daddr->pan_id >> 8;
-
- if (daddr->addr_type == IEEE802154_ADDR_SHORT) {
- head[pos++] = daddr->short_addr & 0xff;
- head[pos++] = daddr->short_addr >> 8;
- } else {
- mac802154_haddr_copy_swap(head + pos, daddr->hwaddr);
- pos += IEEE802154_ADDR_LEN;
- }
- }
-
- if (saddr->addr_type != IEEE802154_ADDR_NONE) {
- fc |= (saddr->addr_type << IEEE802154_FC_SAMODE_SHIFT);
-
- if ((saddr->pan_id == daddr->pan_id) &&
- (saddr->pan_id != IEEE802154_PANID_BROADCAST)) {
- /* PANID compression/intra PAN */
- fc |= IEEE802154_FC_INTRA_PAN;
- } else {
- head[pos++] = saddr->pan_id & 0xff;
- head[pos++] = saddr->pan_id >> 8;
- }
-
- if (saddr->addr_type == IEEE802154_ADDR_SHORT) {
- head[pos++] = saddr->short_addr & 0xff;
- head[pos++] = saddr->short_addr >> 8;
- } else {
- mac802154_haddr_copy_swap(head + pos, saddr->hwaddr);
- pos += IEEE802154_ADDR_LEN;
- }
- }
+ hdr.dest = *(const struct ieee802154_addr *)daddr;
- head[0] = fc;
- head[1] = fc >> 8;
+ hlen = ieee802154_hdr_push(skb, &hdr);
+ if (hlen < 0)
+ return -EINVAL;
- memcpy(skb_push(skb, pos), head, pos);
skb_reset_mac_header(skb);
- skb->mac_len = pos;
+ skb->mac_len = hlen;
+
+ if (hlen + len + 2 > dev->mtu)
+ return -EMSGSIZE;
- return pos;
+ return hlen;
}
static int
mac802154_header_parse(const struct sk_buff *skb, unsigned char *haddr)
{
- const u8 *hdr = skb_mac_header(skb);
- const u8 *tail = skb_tail_pointer(skb);
+ struct ieee802154_hdr hdr;
struct ieee802154_addr *addr = (struct ieee802154_addr *)haddr;
- u16 fc;
- int da_type;
-
- if (hdr + 3 > tail)
- goto malformed;
-
- fc = hdr[0] | (hdr[1] << 8);
-
- hdr += 3;
-
- da_type = IEEE802154_FC_DAMODE(fc);
- addr->addr_type = IEEE802154_FC_SAMODE(fc);
-
- switch (da_type) {
- case IEEE802154_ADDR_NONE:
- if (fc & IEEE802154_FC_INTRA_PAN)
- goto malformed;
- break;
- case IEEE802154_ADDR_LONG:
- if (fc & IEEE802154_FC_INTRA_PAN) {
- if (hdr + 2 > tail)
- goto malformed;
- addr->pan_id = hdr[0] | (hdr[1] << 8);
- hdr += 2;
- }
-
- if (hdr + IEEE802154_ADDR_LEN > tail)
- goto malformed;
-
- hdr += IEEE802154_ADDR_LEN;
- break;
- case IEEE802154_ADDR_SHORT:
- if (fc & IEEE802154_FC_INTRA_PAN) {
- if (hdr + 2 > tail)
- goto malformed;
- addr->pan_id = hdr[0] | (hdr[1] << 8);
- hdr += 2;
- }
-
- if (hdr + 2 > tail)
- goto malformed;
-
- hdr += 2;
- break;
- default:
- goto malformed;
+ if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) {
+ pr_debug("malformed packet\n");
+ return 0;
}
- switch (addr->addr_type) {
- case IEEE802154_ADDR_NONE:
- break;
- case IEEE802154_ADDR_LONG:
- if (!(fc & IEEE802154_FC_INTRA_PAN)) {
- if (hdr + 2 > tail)
- goto malformed;
- addr->pan_id = hdr[0] | (hdr[1] << 8);
- hdr += 2;
- }
-
- if (hdr + IEEE802154_ADDR_LEN > tail)
- goto malformed;
-
- mac802154_haddr_copy_swap(addr->hwaddr, hdr);
- hdr += IEEE802154_ADDR_LEN;
- break;
- case IEEE802154_ADDR_SHORT:
- if (!(fc & IEEE802154_FC_INTRA_PAN)) {
- if (hdr + 2 > tail)
- goto malformed;
- addr->pan_id = hdr[0] | (hdr[1] << 8);
- hdr += 2;
- }
-
- if (hdr + 2 > tail)
- goto malformed;
-
- addr->short_addr = hdr[0] | (hdr[1] << 8);
- hdr += 2;
- break;
- default:
- goto malformed;
- }
-
- return sizeof(struct ieee802154_addr);
-
-malformed:
- pr_debug("malformed packet\n");
- return 0;
+ *addr = hdr.source;
+ return sizeof(*addr);
}
static netdev_tx_t
@@ -382,8 +242,8 @@ void mac802154_wpan_setup(struct net_device *dev)
get_random_bytes(&priv->bsn, 1);
get_random_bytes(&priv->dsn, 1);
- priv->pan_id = IEEE802154_PANID_BROADCAST;
- priv->short_addr = IEEE802154_ADDR_BROADCAST;
+ priv->pan_id = cpu_to_le16(IEEE802154_PANID_BROADCAST);
+ priv->short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST);
}
static int mac802154_process_data(struct net_device *dev, struct sk_buff *skb)
@@ -394,13 +254,18 @@ static int mac802154_process_data(struct net_device *dev, struct sk_buff *skb)
static int
mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb)
{
+ __le16 span, sshort;
+
pr_debug("getting packet via slave interface %s\n", sdata->dev->name);
spin_lock_bh(&sdata->mib_lock);
- switch (mac_cb(skb)->da.addr_type) {
+ span = sdata->pan_id;
+ sshort = sdata->short_addr;
+
+ switch (mac_cb(skb)->dest.mode) {
case IEEE802154_ADDR_NONE:
- if (mac_cb(skb)->sa.addr_type != IEEE802154_ADDR_NONE)
+ if (mac_cb(skb)->dest.mode != IEEE802154_ADDR_NONE)
/* FIXME: check if we are PAN coordinator */
skb->pkt_type = PACKET_OTHERHOST;
else
@@ -408,23 +273,22 @@ mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb)
skb->pkt_type = PACKET_HOST;
break;
case IEEE802154_ADDR_LONG:
- if (mac_cb(skb)->da.pan_id != sdata->pan_id &&
- mac_cb(skb)->da.pan_id != IEEE802154_PANID_BROADCAST)
+ if (mac_cb(skb)->dest.pan_id != span &&
+ mac_cb(skb)->dest.pan_id != cpu_to_le16(IEEE802154_PANID_BROADCAST))
skb->pkt_type = PACKET_OTHERHOST;
- else if (!memcmp(mac_cb(skb)->da.hwaddr, sdata->dev->dev_addr,
- IEEE802154_ADDR_LEN))
+ else if (mac_cb(skb)->dest.extended_addr == sdata->extended_addr)
skb->pkt_type = PACKET_HOST;
else
skb->pkt_type = PACKET_OTHERHOST;
break;
case IEEE802154_ADDR_SHORT:
- if (mac_cb(skb)->da.pan_id != sdata->pan_id &&
- mac_cb(skb)->da.pan_id != IEEE802154_PANID_BROADCAST)
+ if (mac_cb(skb)->dest.pan_id != span &&
+ mac_cb(skb)->dest.pan_id != cpu_to_le16(IEEE802154_PANID_BROADCAST))
skb->pkt_type = PACKET_OTHERHOST;
- else if (mac_cb(skb)->da.short_addr == sdata->short_addr)
+ else if (mac_cb(skb)->dest.short_addr == sshort)
skb->pkt_type = PACKET_HOST;
- else if (mac_cb(skb)->da.short_addr ==
- IEEE802154_ADDR_BROADCAST)
+ else if (mac_cb(skb)->dest.short_addr ==
+ cpu_to_le16(IEEE802154_ADDR_BROADCAST))
skb->pkt_type = PACKET_BROADCAST;
else
skb->pkt_type = PACKET_OTHERHOST;
@@ -451,88 +315,82 @@ mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb)
}
}
-static int mac802154_parse_frame_start(struct sk_buff *skb)
+static void mac802154_print_addr(const char *name,
+ const struct ieee802154_addr *addr)
{
- u8 *head = skb->data;
- u16 fc;
+ if (addr->mode == IEEE802154_ADDR_NONE)
+ pr_debug("%s not present\n", name);
- if (mac802154_fetch_skb_u16(skb, &fc) ||
- mac802154_fetch_skb_u8(skb, &(mac_cb(skb)->seq)))
- goto err;
+ pr_debug("%s PAN ID: %04x\n", name, le16_to_cpu(addr->pan_id));
+ if (addr->mode == IEEE802154_ADDR_SHORT) {
+ pr_debug("%s is short: %04x\n", name,
+ le16_to_cpu(addr->short_addr));
+ } else {
+ u64 hw = swab64((__force u64) addr->extended_addr);
- pr_debug("fc: %04x dsn: %02x\n", fc, head[2]);
-
- mac_cb(skb)->flags = IEEE802154_FC_TYPE(fc);
- mac_cb(skb)->sa.addr_type = IEEE802154_FC_SAMODE(fc);
- mac_cb(skb)->da.addr_type = IEEE802154_FC_DAMODE(fc);
+ pr_debug("%s is hardware: %8phC\n", name, &hw);
+ }
+}
- if (fc & IEEE802154_FC_INTRA_PAN)
- mac_cb(skb)->flags |= MAC_CB_FLAG_INTRAPAN;
+static int mac802154_parse_frame_start(struct sk_buff *skb)
+{
+ int hlen;
+ struct ieee802154_hdr hdr;
- if (mac_cb(skb)->da.addr_type != IEEE802154_ADDR_NONE) {
- if (mac802154_fetch_skb_u16(skb, &(mac_cb(skb)->da.pan_id)))
- goto err;
+ hlen = ieee802154_hdr_pull(skb, &hdr);
+ if (hlen < 0)
+ return -EINVAL;
- /* source PAN id compression */
- if (mac_cb_is_intrapan(skb))
- mac_cb(skb)->sa.pan_id = mac_cb(skb)->da.pan_id;
+ skb->mac_len = hlen;
- pr_debug("dest PAN addr: %04x\n", mac_cb(skb)->da.pan_id);
+ pr_debug("fc: %04x dsn: %02x\n", le16_to_cpup((__le16 *)&hdr.fc),
+ hdr.seq);
- if (mac_cb(skb)->da.addr_type == IEEE802154_ADDR_SHORT) {
- u16 *da = &(mac_cb(skb)->da.short_addr);
+ mac_cb(skb)->flags = hdr.fc.type;
- if (mac802154_fetch_skb_u16(skb, da))
- goto err;
+ if (hdr.fc.ack_request)
+ mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ;
+ if (hdr.fc.security_enabled)
+ mac_cb(skb)->flags |= MAC_CB_FLAG_SECEN;
- pr_debug("destination address is short: %04x\n",
- mac_cb(skb)->da.short_addr);
- } else {
- if (!pskb_may_pull(skb, IEEE802154_ADDR_LEN))
- goto err;
+ mac802154_print_addr("destination", &hdr.dest);
+ mac802154_print_addr("source", &hdr.source);
- mac802154_haddr_copy_swap(mac_cb(skb)->da.hwaddr,
- skb->data);
- skb_pull(skb, IEEE802154_ADDR_LEN);
+ mac_cb(skb)->source = hdr.source;
+ mac_cb(skb)->dest = hdr.dest;
- pr_debug("destination address is hardware\n");
- }
- }
+ if (hdr.fc.security_enabled) {
+ u64 key;
- if (mac_cb(skb)->sa.addr_type != IEEE802154_ADDR_NONE) {
- /* non PAN-compression, fetch source address id */
- if (!(mac_cb_is_intrapan(skb))) {
- u16 *sa_pan = &(mac_cb(skb)->sa.pan_id);
+ pr_debug("seclevel %i\n", hdr.sec.level);
- if (mac802154_fetch_skb_u16(skb, sa_pan))
- goto err;
- }
-
- pr_debug("source PAN addr: %04x\n", mac_cb(skb)->da.pan_id);
-
- if (mac_cb(skb)->sa.addr_type == IEEE802154_ADDR_SHORT) {
- u16 *sa = &(mac_cb(skb)->sa.short_addr);
-
- if (mac802154_fetch_skb_u16(skb, sa))
- goto err;
+ switch (hdr.sec.key_id_mode) {
+ case IEEE802154_SCF_KEY_IMPLICIT:
+ pr_debug("implicit key\n");
+ break;
- pr_debug("source address is short: %04x\n",
- mac_cb(skb)->sa.short_addr);
- } else {
- if (!pskb_may_pull(skb, IEEE802154_ADDR_LEN))
- goto err;
+ case IEEE802154_SCF_KEY_INDEX:
+ pr_debug("key %02x\n", hdr.sec.key_id);
+ break;
- mac802154_haddr_copy_swap(mac_cb(skb)->sa.hwaddr,
- skb->data);
- skb_pull(skb, IEEE802154_ADDR_LEN);
+ case IEEE802154_SCF_KEY_SHORT_INDEX:
+ pr_debug("key %04x:%04x %02x\n",
+ le32_to_cpu(hdr.sec.short_src) >> 16,
+ le32_to_cpu(hdr.sec.short_src) & 0xffff,
+ hdr.sec.key_id);
+ break;
- pr_debug("source address is hardware\n");
+ case IEEE802154_SCF_KEY_HW_INDEX:
+ key = swab64((__force u64) hdr.sec.extended_src);
+ pr_debug("key source %8phC %02x\n", &key,
+ hdr.sec.key_id);
+ break;
}
+
+ return -EINVAL;
}
return 0;
-err:
- return -EINVAL;
}
void mac802154_wpans_rx(struct mac802154_priv *priv, struct sk_buff *skb)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index c37467562fd0..e9410d17619d 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -513,7 +513,6 @@ config NFT_QUEUE
config NFT_REJECT
depends on NF_TABLES
- depends on NF_TABLES_IPV6 || !NF_TABLES_IPV6
default m if NETFILTER_ADVANCED=n
tristate "Netfilter nf_tables reject support"
help
@@ -521,6 +520,11 @@ config NFT_REJECT
explicitly deny and notify via TCP reset/ICMP informational errors
unallowed traffic.
+config NFT_REJECT_INET
+ depends on NF_TABLES_INET
+ default NFT_REJECT
+ tristate
+
config NFT_COMPAT
depends on NF_TABLES
depends on NETFILTER_XTABLES
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index ee9c4de5f8ed..bffdad774da7 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -79,6 +79,7 @@ obj-$(CONFIG_NFT_LIMIT) += nft_limit.o
obj-$(CONFIG_NFT_NAT) += nft_nat.o
obj-$(CONFIG_NFT_QUEUE) += nft_queue.o
obj-$(CONFIG_NFT_REJECT) += nft_reject.o
+obj-$(CONFIG_NFT_REJECT_INET) += nft_reject_inet.o
obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o
obj-$(CONFIG_NFT_HASH) += nft_hash.o
obj-$(CONFIG_NFT_COUNTER) += nft_counter.o
diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig
index 44cd4f58adf0..2f7f5c32c6f9 100644
--- a/net/netfilter/ipset/Kconfig
+++ b/net/netfilter/ipset/Kconfig
@@ -61,6 +61,15 @@ config IP_SET_HASH_IP
To compile it as a module, choose M here. If unsure, say N.
+config IP_SET_HASH_IPMARK
+ tristate "hash:ip,mark set support"
+ depends on IP_SET
+ help
+ This option adds the hash:ip,mark set type support, by which one
+ can store IPv4/IPv6 address and mark pairs.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config IP_SET_HASH_IPPORT
tristate "hash:ip,port set support"
depends on IP_SET
diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile
index 44b2d38476fa..231f10196cb9 100644
--- a/net/netfilter/ipset/Makefile
+++ b/net/netfilter/ipset/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o
# hash types
obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o
+obj-$(CONFIG_IP_SET_HASH_IPMARK) += ip_set_hash_ipmark.o
obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o
obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o
obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index de770ec39e51..117208321f16 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -54,10 +54,10 @@ MODULE_DESCRIPTION("core IP set support");
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
/* When the nfnl mutex is held: */
-#define nfnl_dereference(p) \
+#define ip_set_dereference(p) \
rcu_dereference_protected(p, 1)
-#define nfnl_set(inst, id) \
- nfnl_dereference((inst)->ip_set_list)[id]
+#define ip_set(inst, id) \
+ ip_set_dereference((inst)->ip_set_list)[id]
/*
* The set types are implemented in modules and registered set types
@@ -368,6 +368,8 @@ ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len)
if (tb[IPSET_ATTR_CADT_FLAGS])
cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+ if (cadt_flags & IPSET_FLAG_WITH_FORCEADD)
+ set->flags |= IPSET_CREATE_FLAG_FORCEADD;
for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
if (!add_extension(id, cadt_flags, tb))
continue;
@@ -510,7 +512,7 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
if (opt->dim < set->type->dimension ||
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
- return 0;
+ return -IPSET_ERR_TYPE_MISMATCH;
write_lock_bh(&set->lock);
ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt);
@@ -533,7 +535,7 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
if (opt->dim < set->type->dimension ||
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
- return 0;
+ return -IPSET_ERR_TYPE_MISMATCH;
write_lock_bh(&set->lock);
ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt);
@@ -640,7 +642,7 @@ ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index)
return IPSET_INVALID_ID;
nfnl_lock(NFNL_SUBSYS_IPSET);
- set = nfnl_set(inst, index);
+ set = ip_set(inst, index);
if (set)
__ip_set_get(set);
else
@@ -666,7 +668,7 @@ ip_set_nfnl_put(struct net *net, ip_set_id_t index)
nfnl_lock(NFNL_SUBSYS_IPSET);
if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */
- set = nfnl_set(inst, index);
+ set = ip_set(inst, index);
if (set != NULL)
__ip_set_put(set);
}
@@ -734,7 +736,7 @@ find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id)
*id = IPSET_INVALID_ID;
for (i = 0; i < inst->ip_set_max; i++) {
- set = nfnl_set(inst, i);
+ set = ip_set(inst, i);
if (set != NULL && STREQ(set->name, name)) {
*id = i;
break;
@@ -760,7 +762,7 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index,
*index = IPSET_INVALID_ID;
for (i = 0; i < inst->ip_set_max; i++) {
- s = nfnl_set(inst, i);
+ s = ip_set(inst, i);
if (s == NULL) {
if (*index == IPSET_INVALID_ID)
*index = i;
@@ -883,7 +885,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
if (!list)
goto cleanup;
/* nfnl mutex is held, both lists are valid */
- tmp = nfnl_dereference(inst->ip_set_list);
+ tmp = ip_set_dereference(inst->ip_set_list);
memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max);
rcu_assign_pointer(inst->ip_set_list, list);
/* Make sure all current packets have passed through */
@@ -900,7 +902,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
* Finally! Add our shiny new set to the list, and be done.
*/
pr_debug("create: '%s' created with index %u!\n", set->name, index);
- nfnl_set(inst, index) = set;
+ ip_set(inst, index) = set;
return ret;
@@ -925,10 +927,10 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
static void
ip_set_destroy_set(struct ip_set_net *inst, ip_set_id_t index)
{
- struct ip_set *set = nfnl_set(inst, index);
+ struct ip_set *set = ip_set(inst, index);
pr_debug("set: %s\n", set->name);
- nfnl_set(inst, index) = NULL;
+ ip_set(inst, index) = NULL;
/* Must call it without holding any lock */
set->variant->destroy(set);
@@ -962,7 +964,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
read_lock_bh(&ip_set_ref_lock);
if (!attr[IPSET_ATTR_SETNAME]) {
for (i = 0; i < inst->ip_set_max; i++) {
- s = nfnl_set(inst, i);
+ s = ip_set(inst, i);
if (s != NULL && s->ref) {
ret = -IPSET_ERR_BUSY;
goto out;
@@ -970,7 +972,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
}
read_unlock_bh(&ip_set_ref_lock);
for (i = 0; i < inst->ip_set_max; i++) {
- s = nfnl_set(inst, i);
+ s = ip_set(inst, i);
if (s != NULL)
ip_set_destroy_set(inst, i);
}
@@ -1020,7 +1022,7 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
if (!attr[IPSET_ATTR_SETNAME]) {
for (i = 0; i < inst->ip_set_max; i++) {
- s = nfnl_set(inst, i);
+ s = ip_set(inst, i);
if (s != NULL)
ip_set_flush_set(s);
}
@@ -1074,7 +1076,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
for (i = 0; i < inst->ip_set_max; i++) {
- s = nfnl_set(inst, i);
+ s = ip_set(inst, i);
if (s != NULL && STREQ(s->name, name2)) {
ret = -IPSET_ERR_EXIST_SETNAME2;
goto out;
@@ -1134,8 +1136,8 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
write_lock_bh(&ip_set_ref_lock);
swap(from->ref, to->ref);
- nfnl_set(inst, from_id) = to;
- nfnl_set(inst, to_id) = from;
+ ip_set(inst, from_id) = to;
+ ip_set(inst, to_id) = from;
write_unlock_bh(&ip_set_ref_lock);
return 0;
@@ -1157,7 +1159,7 @@ ip_set_dump_done(struct netlink_callback *cb)
struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET];
if (cb->args[IPSET_CB_ARG0]) {
pr_debug("release set %s\n",
- nfnl_set(inst, cb->args[IPSET_CB_INDEX])->name);
+ ip_set(inst, cb->args[IPSET_CB_INDEX])->name);
__ip_set_put_byindex(inst,
(ip_set_id_t) cb->args[IPSET_CB_INDEX]);
}
@@ -1254,7 +1256,7 @@ dump_last:
dump_type, dump_flags, cb->args[IPSET_CB_INDEX]);
for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) {
index = (ip_set_id_t) cb->args[IPSET_CB_INDEX];
- set = nfnl_set(inst, index);
+ set = ip_set(inst, index);
if (set == NULL) {
if (dump_type == DUMP_ONE) {
ret = -ENOENT;
@@ -1332,7 +1334,7 @@ next_set:
release_refcount:
/* If there was an error or set is done, release set */
if (ret || !cb->args[IPSET_CB_ARG0]) {
- pr_debug("release set %s\n", nfnl_set(inst, index)->name);
+ pr_debug("release set %s\n", ip_set(inst, index)->name);
__ip_set_put_byindex(inst, index);
cb->args[IPSET_CB_ARG0] = 0;
}
@@ -1887,7 +1889,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
find_set_and_id(inst, req_get->set.name, &id);
req_get->set.index = id;
if (id != IPSET_INVALID_ID)
- req_get->family = nfnl_set(inst, id)->family;
+ req_get->family = ip_set(inst, id)->family;
nfnl_unlock(NFNL_SUBSYS_IPSET);
goto copy;
}
@@ -1901,7 +1903,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
goto done;
}
nfnl_lock(NFNL_SUBSYS_IPSET);
- set = nfnl_set(inst, req_get->set.index);
+ set = ip_set(inst, req_get->set.index);
strncpy(req_get->set.name, set ? set->name : "",
IPSET_MAXNAMELEN);
nfnl_unlock(NFNL_SUBSYS_IPSET);
@@ -1945,7 +1947,6 @@ ip_set_net_init(struct net *net)
return -ENOMEM;
inst->is_deleted = 0;
rcu_assign_pointer(inst->ip_set_list, list);
- pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL);
return 0;
}
@@ -1960,7 +1961,7 @@ ip_set_net_exit(struct net *net)
inst->is_deleted = 1; /* flag for ip_set_nfnl_put */
for (i = 0; i < inst->ip_set_max; i++) {
- set = nfnl_set(inst, i);
+ set = ip_set(inst, i);
if (set != NULL)
ip_set_destroy_set(inst, i);
}
@@ -1996,6 +1997,7 @@ ip_set_init(void)
nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
return ret;
}
+ pr_info("ip_set: protocol %u\n", IPSET_PROTOCOL);
return 0;
}
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index be6932ad3a86..61c7fb052802 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -263,6 +263,9 @@ struct htype {
u32 maxelem; /* max elements in the hash */
u32 elements; /* current element (vs timeout) */
u32 initval; /* random jhash init value */
+#ifdef IP_SET_HASH_WITH_MARKMASK
+ u32 markmask; /* markmask value for mark mask to store */
+#endif
struct timer_list gc; /* garbage collection when timeout enabled */
struct mtype_elem next; /* temporary storage for uadd */
#ifdef IP_SET_HASH_WITH_MULTI
@@ -454,6 +457,9 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b)
#ifdef IP_SET_HASH_WITH_NETMASK
x->netmask == y->netmask &&
#endif
+#ifdef IP_SET_HASH_WITH_MARKMASK
+ x->markmask == y->markmask &&
+#endif
a->extensions == b->extensions;
}
@@ -627,6 +633,18 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
bool flag_exist = flags & IPSET_FLAG_EXIST;
u32 key, multi = 0;
+ if (h->elements >= h->maxelem && SET_WITH_FORCEADD(set)) {
+ rcu_read_lock_bh();
+ t = rcu_dereference_bh(h->table);
+ key = HKEY(value, h->initval, t->htable_bits);
+ n = hbucket(t,key);
+ if (n->pos) {
+ /* Choosing the first entry in the array to replace */
+ j = 0;
+ goto reuse_slot;
+ }
+ rcu_read_unlock_bh();
+ }
if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem)
/* FIXME: when set is full, we slow down here */
mtype_expire(set, h, NLEN(set->family), set->dsize);
@@ -908,6 +926,10 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask))
goto nla_put_failure;
#endif
+#ifdef IP_SET_HASH_WITH_MARKMASK
+ if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask))
+ goto nla_put_failure;
+#endif
if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
goto nla_put_failure;
@@ -1016,6 +1038,9 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
struct nlattr *tb[], u32 flags)
{
u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
+#ifdef IP_SET_HASH_WITH_MARKMASK
+ u32 markmask;
+#endif
u8 hbits;
#ifdef IP_SET_HASH_WITH_NETMASK
u8 netmask;
@@ -1026,6 +1051,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
return -IPSET_ERR_INVALID_FAMILY;
+
+#ifdef IP_SET_HASH_WITH_MARKMASK
+ markmask = 0xffffffff;
+#endif
#ifdef IP_SET_HASH_WITH_NETMASK
netmask = set->family == NFPROTO_IPV4 ? 32 : 128;
pr_debug("Create set %s with family %s\n",
@@ -1034,6 +1063,9 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
+#ifdef IP_SET_HASH_WITH_MARKMASK
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK) ||
+#endif
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
@@ -1057,6 +1089,14 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
return -IPSET_ERR_INVALID_NETMASK;
}
#endif
+#ifdef IP_SET_HASH_WITH_MARKMASK
+ if (tb[IPSET_ATTR_MARKMASK]) {
+ markmask = ntohl(nla_get_u32(tb[IPSET_ATTR_MARKMASK]));
+
+ if ((markmask > 4294967295u) || markmask == 0)
+ return -IPSET_ERR_INVALID_MARKMASK;
+ }
+#endif
hsize = sizeof(*h);
#ifdef IP_SET_HASH_WITH_NETS
@@ -1071,6 +1111,9 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
#ifdef IP_SET_HASH_WITH_NETMASK
h->netmask = netmask;
#endif
+#ifdef IP_SET_HASH_WITH_MARKMASK
+ h->markmask = markmask;
+#endif
get_random_bytes(&h->initval, sizeof(h->initval));
set->timeout = IPSET_NO_TIMEOUT;
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index e65fc2423d56..dd40607f878e 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -25,7 +25,8 @@
#define IPSET_TYPE_REV_MIN 0
/* 1 Counters support */
-#define IPSET_TYPE_REV_MAX 2 /* Comments support */
+/* 2 Comments support */
+#define IPSET_TYPE_REV_MAX 3 /* Forceadd support */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
new file mode 100644
index 000000000000..4eff0a297254
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -0,0 +1,321 @@
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * Copyright (C) 2013 Smoothwall Ltd. <vytas.dauksa@smoothwall.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module implementing an IP set type: the hash:ip,mark type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+#define IPSET_TYPE_REV_MIN 0
+#define IPSET_TYPE_REV_MAX 1 /* Forceadd support */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Vytas Dauksa <vytas.dauksa@smoothwall.net>");
+IP_SET_MODULE_DESC("hash:ip,mark", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_hash:ip,mark");
+
+/* Type specific function prefix */
+#define HTYPE hash_ipmark
+#define IP_SET_HASH_WITH_MARKMASK
+
+/* IPv4 variant */
+
+/* Member elements */
+struct hash_ipmark4_elem {
+ __be32 ip;
+ __u32 mark;
+};
+
+/* Common functions */
+
+static inline bool
+hash_ipmark4_data_equal(const struct hash_ipmark4_elem *ip1,
+ const struct hash_ipmark4_elem *ip2,
+ u32 *multi)
+{
+ return ip1->ip == ip2->ip &&
+ ip1->mark == ip2->mark;
+}
+
+static bool
+hash_ipmark4_data_list(struct sk_buff *skb,
+ const struct hash_ipmark4_elem *data)
+{
+ if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) ||
+ nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return 1;
+}
+
+static inline void
+hash_ipmark4_data_next(struct hash_ipmark4_elem *next,
+ const struct hash_ipmark4_elem *d)
+{
+ next->ip = d->ip;
+}
+
+#define MTYPE hash_ipmark4
+#define PF 4
+#define HOST_MASK 32
+#define HKEY_DATALEN sizeof(struct hash_ipmark4_elem)
+#include "ip_set_hash_gen.h"
+
+static int
+hash_ipmark4_kadt(struct ip_set *set, const struct sk_buff *skb,
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+ const struct hash_ipmark *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_ipmark4_elem e = { };
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+ e.mark = skb->mark;
+ e.mark &= h->markmask;
+
+ ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+ const struct hash_ipmark *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_ipmark4_elem e = { };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ u32 ip, ip_to = 0;
+ int ret;
+
+ if (unlikely(!tb[IPSET_ATTR_IP] ||
+ !ip_set_attr_netorder(tb, IPSET_ATTR_MARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ return -IPSET_ERR_PROTOCOL;
+
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+ ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) ||
+ ip_set_get_extensions(set, tb, &ext);
+ if (ret)
+ return ret;
+
+ e.mark = ntohl(nla_get_u32(tb[IPSET_ATTR_MARK]));
+ e.mark &= h->markmask;
+
+ if (adt == IPSET_TEST ||
+ !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) {
+ ret = adtfn(set, &e, &ext, &ext, flags);
+ return ip_set_eexist(ret, flags) ? 0 : ret;
+ }
+
+ ip_to = ip = ntohl(e.ip);
+ if (tb[IPSET_ATTR_IP_TO]) {
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
+ if (ret)
+ return ret;
+ if (ip > ip_to)
+ swap(ip, ip_to);
+ } else if (tb[IPSET_ATTR_CIDR]) {
+ u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+ if (!cidr || cidr > 32)
+ return -IPSET_ERR_INVALID_CIDR;
+ ip_set_mask_from_to(ip, ip_to, cidr);
+ }
+
+ if (retried)
+ ip = ntohl(h->next.ip);
+ for (; !before(ip_to, ip); ip++) {
+ e.ip = htonl(ip);
+ ret = adtfn(set, &e, &ext, &ext, flags);
+
+ if (ret && !ip_set_eexist(ret, flags))
+ return ret;
+ else
+ ret = 0;
+ }
+ return ret;
+}
+
+/* IPv6 variant */
+
+struct hash_ipmark6_elem {
+ union nf_inet_addr ip;
+ __u32 mark;
+};
+
+/* Common functions */
+
+static inline bool
+hash_ipmark6_data_equal(const struct hash_ipmark6_elem *ip1,
+ const struct hash_ipmark6_elem *ip2,
+ u32 *multi)
+{
+ return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) &&
+ ip1->mark == ip2->mark;
+}
+
+static bool
+hash_ipmark6_data_list(struct sk_buff *skb,
+ const struct hash_ipmark6_elem *data)
+{
+ if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) ||
+ nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return 1;
+}
+
+static inline void
+hash_ipmark6_data_next(struct hash_ipmark4_elem *next,
+ const struct hash_ipmark6_elem *d)
+{
+}
+
+#undef MTYPE
+#undef PF
+#undef HOST_MASK
+#undef HKEY_DATALEN
+
+#define MTYPE hash_ipmark6
+#define PF 6
+#define HOST_MASK 128
+#define HKEY_DATALEN sizeof(struct hash_ipmark6_elem)
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
+
+
+static int
+hash_ipmark6_kadt(struct ip_set *set, const struct sk_buff *skb,
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+ const struct hash_ipmark *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_ipmark6_elem e = { };
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+ e.mark = skb->mark;
+ e.mark &= h->markmask;
+
+ ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[],
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+ const struct hash_ipmark *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_ipmark6_elem e = { };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ int ret;
+
+ if (unlikely(!tb[IPSET_ATTR_IP] ||
+ !ip_set_attr_netorder(tb, IPSET_ATTR_MARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ tb[IPSET_ATTR_IP_TO] ||
+ tb[IPSET_ATTR_CIDR]))
+ return -IPSET_ERR_PROTOCOL;
+
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
+ ip_set_get_extensions(set, tb, &ext);
+ if (ret)
+ return ret;
+
+ e.mark = ntohl(nla_get_u32(tb[IPSET_ATTR_MARK]));
+ e.mark &= h->markmask;
+
+ if (adt == IPSET_TEST) {
+ ret = adtfn(set, &e, &ext, &ext, flags);
+ return ip_set_eexist(ret, flags) ? 0 : ret;
+ }
+
+ ret = adtfn(set, &e, &ext, &ext, flags);
+ if (ret && !ip_set_eexist(ret, flags))
+ return ret;
+ else
+ ret = 0;
+
+ return ret;
+}
+
+static struct ip_set_type hash_ipmark_type __read_mostly = {
+ .name = "hash:ip,mark",
+ .protocol = IPSET_PROTOCOL,
+ .features = IPSET_TYPE_IP | IPSET_TYPE_MARK,
+ .dimension = IPSET_DIM_TWO,
+ .family = NFPROTO_UNSPEC,
+ .revision_min = IPSET_TYPE_REV_MIN,
+ .revision_max = IPSET_TYPE_REV_MAX,
+ .create = hash_ipmark_create,
+ .create_policy = {
+ [IPSET_ATTR_MARKMASK] = { .type = NLA_U32 },
+ [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
+ [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
+ [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
+ [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
+ [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
+ },
+ .adt_policy = {
+ [IPSET_ATTR_IP] = { .type = NLA_NESTED },
+ [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED },
+ [IPSET_ATTR_MARK] = { .type = NLA_U32 },
+ [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
+ [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
+ [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
+ [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ },
+ .me = THIS_MODULE,
+};
+
+static int __init
+hash_ipmark_init(void)
+{
+ return ip_set_type_register(&hash_ipmark_type);
+}
+
+static void __exit
+hash_ipmark_fini(void)
+{
+ ip_set_type_unregister(&hash_ipmark_type);
+}
+
+module_init(hash_ipmark_init);
+module_exit(hash_ipmark_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 525a595dd1fe..7597b82a8b03 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -27,7 +27,8 @@
#define IPSET_TYPE_REV_MIN 0
/* 1 SCTP and UDPLITE support added */
/* 2 Counters support added */
-#define IPSET_TYPE_REV_MAX 3 /* Comments support added */
+/* 3 Comments support added */
+#define IPSET_TYPE_REV_MAX 4 /* Forceadd support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index f5636631466e..672655ffd573 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -27,7 +27,8 @@
#define IPSET_TYPE_REV_MIN 0
/* 1 SCTP and UDPLITE support added */
/* 2 Counters support added */
-#define IPSET_TYPE_REV_MAX 3 /* Comments support added */
+/* 3 Comments support added */
+#define IPSET_TYPE_REV_MAX 4 /* Forceadd support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 5d87fe8a41ff..7308d84f9277 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -29,7 +29,8 @@
/* 2 Range as input support for IPv4 added */
/* 3 nomatch flag support added */
/* 4 Counters support added */
-#define IPSET_TYPE_REV_MAX 5 /* Comments support added */
+/* 5 Comments support added */
+#define IPSET_TYPE_REV_MAX 6 /* Forceadd support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 8295cf4f9fdc..4c7d495783a3 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -26,7 +26,8 @@
/* 1 Range as input support for IPv4 added */
/* 2 nomatch flag support added */
/* 3 Counters support added */
-#define IPSET_TYPE_REV_MAX 4 /* Comments support added */
+/* 4 Comments support added */
+#define IPSET_TYPE_REV_MAX 5 /* Forceadd support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index b827a0f1f351..db2606805b35 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -27,7 +27,8 @@
/* 1 nomatch flag support added */
/* 2 /0 support added */
/* 3 Counters support added */
-#define IPSET_TYPE_REV_MAX 4 /* Comments support added */
+/* 4 Comments support added */
+#define IPSET_TYPE_REV_MAX 5 /* Forceadd support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index 6226803fc490..3e99987e4bf2 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -24,7 +24,7 @@
#include <linux/netfilter/ipset/ip_set_hash.h>
#define IPSET_TYPE_REV_MIN 0
-#define IPSET_TYPE_REV_MAX 0
+#define IPSET_TYPE_REV_MAX 1 /* Forceadd support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
@@ -112,10 +112,10 @@ hash_netnet4_data_list(struct sk_buff *skb,
(flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -334,10 +334,10 @@ hash_netnet6_data_list(struct sk_buff *skb,
(flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 7097fb0141bf..1c645fbd09c7 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -28,7 +28,8 @@
/* 2 Range as input support for IPv4 added */
/* 3 nomatch flag support added */
/* 4 Counters support added */
-#define IPSET_TYPE_REV_MAX 5 /* Comments support added */
+/* 5 Comments support added */
+#define IPSET_TYPE_REV_MAX 6 /* Forceadd support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 703d1192a6a2..c0d2ba73f8b2 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -25,7 +25,8 @@
#include <linux/netfilter/ipset/ip_set_hash.h>
#define IPSET_TYPE_REV_MIN 0
-#define IPSET_TYPE_REV_MAX 0 /* Comments support added */
+/* 0 Comments support added */
+#define IPSET_TYPE_REV_MAX 1 /* Forceadd support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c
index 4f29fa97044b..04d15fdc99ee 100644
--- a/net/netfilter/ipset/pfxlen.c
+++ b/net/netfilter/ipset/pfxlen.c
@@ -7,8 +7,8 @@
#define E(a, b, c, d) \
{.ip6 = { \
- __constant_htonl(a), __constant_htonl(b), \
- __constant_htonl(c), __constant_htonl(d), \
+ htonl(a), htonl(b), \
+ htonl(c), htonl(d), \
} }
/*
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 59a1a85bcb3e..a8eb0a89326a 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -871,11 +871,11 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
cp->protocol = p->protocol;
ip_vs_addr_set(p->af, &cp->caddr, p->caddr);
cp->cport = p->cport;
- ip_vs_addr_set(p->af, &cp->vaddr, p->vaddr);
- cp->vport = p->vport;
- /* proto should only be IPPROTO_IP if d_addr is a fwmark */
+ /* proto should only be IPPROTO_IP if p->vaddr is a fwmark */
ip_vs_addr_set(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
- &cp->daddr, daddr);
+ &cp->vaddr, p->vaddr);
+ cp->vport = p->vport;
+ ip_vs_addr_set(p->af, &cp->daddr, daddr);
cp->dport = dport;
cp->flags = flags;
cp->fwmark = fwmark;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 35be035ee0ce..c42e83d2751c 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2177,10 +2177,10 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
__u64 inbytes, outbytes;
do {
- start = u64_stats_fetch_begin_bh(&u->syncp);
+ start = u64_stats_fetch_begin_irq(&u->syncp);
inbytes = u->ustats.inbytes;
outbytes = u->ustats.outbytes;
- } while (u64_stats_fetch_retry_bh(&u->syncp, start));
+ } while (u64_stats_fetch_retry_irq(&u->syncp, start));
seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
i, u->ustats.conns, u->ustats.inpkts,
@@ -3580,7 +3580,7 @@ out:
}
-static const struct genl_ops ip_vs_genl_ops[] __read_mostly = {
+static const struct genl_ops ip_vs_genl_ops[] = {
{
.cmd = IPVS_CMD_NEW_SERVICE,
.flags = GENL_ADMIN_PERM,
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index ca056a331e60..547ff33c1efd 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -238,7 +238,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc)
spin_lock_bh(&svc->sched_lock);
tbl->dead = 1;
- for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
+ for (i = 0; i < IP_VS_LBLC_TAB_SIZE; i++) {
hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
ip_vs_lblc_del(en);
atomic_dec(&tbl->entries);
@@ -265,7 +265,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
unsigned long now = jiffies;
int i, j;
- for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
+ for (i = 0, j = tbl->rover; i < IP_VS_LBLC_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLC_TAB_MASK;
spin_lock(&svc->sched_lock);
@@ -321,7 +321,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
if (goal > tbl->max_size/2)
goal = tbl->max_size/2;
- for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
+ for (i = 0, j = tbl->rover; i < IP_VS_LBLC_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLC_TAB_MASK;
spin_lock(&svc->sched_lock);
@@ -340,7 +340,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
tbl->rover = j;
out:
- mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
+ mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
}
@@ -363,7 +363,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
/*
* Initialize the hash buckets
*/
- for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
+ for (i = 0; i < IP_VS_LBLC_TAB_SIZE; i++) {
INIT_HLIST_HEAD(&tbl->bucket[i]);
}
tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;
@@ -536,8 +536,7 @@ out:
/*
* IPVS LBLC Scheduler structure
*/
-static struct ip_vs_scheduler ip_vs_lblc_scheduler =
-{
+static struct ip_vs_scheduler ip_vs_lblc_scheduler = {
.name = "lblc",
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 8824ed0ccc9c..6dba48efe01e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -60,8 +60,59 @@ int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
const struct nlattr *attr) __read_mostly;
EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook);
-DEFINE_SPINLOCK(nf_conntrack_lock);
-EXPORT_SYMBOL_GPL(nf_conntrack_lock);
+__cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
+EXPORT_SYMBOL_GPL(nf_conntrack_locks);
+
+__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
+EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
+
+static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2)
+{
+ h1 %= CONNTRACK_LOCKS;
+ h2 %= CONNTRACK_LOCKS;
+ spin_unlock(&nf_conntrack_locks[h1]);
+ if (h1 != h2)
+ spin_unlock(&nf_conntrack_locks[h2]);
+}
+
+/* return true if we need to recompute hashes (in case hash table was resized) */
+static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
+ unsigned int h2, unsigned int sequence)
+{
+ h1 %= CONNTRACK_LOCKS;
+ h2 %= CONNTRACK_LOCKS;
+ if (h1 <= h2) {
+ spin_lock(&nf_conntrack_locks[h1]);
+ if (h1 != h2)
+ spin_lock_nested(&nf_conntrack_locks[h2],
+ SINGLE_DEPTH_NESTING);
+ } else {
+ spin_lock(&nf_conntrack_locks[h2]);
+ spin_lock_nested(&nf_conntrack_locks[h1],
+ SINGLE_DEPTH_NESTING);
+ }
+ if (read_seqcount_retry(&net->ct.generation, sequence)) {
+ nf_conntrack_double_unlock(h1, h2);
+ return true;
+ }
+ return false;
+}
+
+static void nf_conntrack_all_lock(void)
+{
+ int i;
+
+ for (i = 0; i < CONNTRACK_LOCKS; i++)
+ spin_lock_nested(&nf_conntrack_locks[i], i);
+}
+
+static void nf_conntrack_all_unlock(void)
+{
+ int i;
+
+ for (i = 0; i < CONNTRACK_LOCKS; i++)
+ spin_unlock(&nf_conntrack_locks[i]);
+}
unsigned int nf_conntrack_htable_size __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
@@ -192,6 +243,50 @@ clean_from_lists(struct nf_conn *ct)
nf_ct_remove_expectations(ct);
}
+/* must be called with local_bh_disable */
+static void nf_ct_add_to_dying_list(struct nf_conn *ct)
+{
+ struct ct_pcpu *pcpu;
+
+ /* add this conntrack to the (per cpu) dying list */
+ ct->cpu = smp_processor_id();
+ pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
+
+ spin_lock(&pcpu->lock);
+ hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
+ &pcpu->dying);
+ spin_unlock(&pcpu->lock);
+}
+
+/* must be called with local_bh_disable */
+static void nf_ct_add_to_unconfirmed_list(struct nf_conn *ct)
+{
+ struct ct_pcpu *pcpu;
+
+ /* add this conntrack to the (per cpu) unconfirmed list */
+ ct->cpu = smp_processor_id();
+ pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
+
+ spin_lock(&pcpu->lock);
+ hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
+ &pcpu->unconfirmed);
+ spin_unlock(&pcpu->lock);
+}
+
+/* must be called with local_bh_disable */
+static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
+{
+ struct ct_pcpu *pcpu;
+
+ /* We overload first tuple to link into unconfirmed or dying list.*/
+ pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
+
+ spin_lock(&pcpu->lock);
+ BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode));
+ hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
+ spin_unlock(&pcpu->lock);
+}
+
static void
destroy_conntrack(struct nf_conntrack *nfct)
{
@@ -203,9 +298,6 @@ destroy_conntrack(struct nf_conntrack *nfct)
NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
NF_CT_ASSERT(!timer_pending(&ct->timeout));
- /* To make sure we don't get any weird locking issues here:
- * destroy_conntrack() MUST NOT be called with a write lock
- * to nf_conntrack_lock!!! -HW */
rcu_read_lock();
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto && l4proto->destroy)
@@ -213,19 +305,18 @@ destroy_conntrack(struct nf_conntrack *nfct)
rcu_read_unlock();
- spin_lock_bh(&nf_conntrack_lock);
+ local_bh_disable();
/* Expectations will have been removed in clean_from_lists,
* except TFTP can create an expectation on the first packet,
* before connection is in the list, so we need to clean here,
- * too. */
+ * too.
+ */
nf_ct_remove_expectations(ct);
- /* We overload first tuple to link into unconfirmed or dying list.*/
- BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode));
- hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
+ nf_ct_del_from_dying_or_unconfirmed_list(ct);
NF_CT_STAT_INC(net, delete);
- spin_unlock_bh(&nf_conntrack_lock);
+ local_bh_enable();
if (ct->master)
nf_ct_put(ct->master);
@@ -237,17 +328,28 @@ destroy_conntrack(struct nf_conntrack *nfct)
static void nf_ct_delete_from_lists(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
+ unsigned int hash, reply_hash;
+ u16 zone = nf_ct_zone(ct);
+ unsigned int sequence;
nf_ct_helper_destroy(ct);
- spin_lock_bh(&nf_conntrack_lock);
- /* Inside lock so preempt is disabled on module removal path.
- * Otherwise we can get spurious warnings. */
- NF_CT_STAT_INC(net, delete_list);
+
+ local_bh_disable();
+ do {
+ sequence = read_seqcount_begin(&net->ct.generation);
+ hash = hash_conntrack(net, zone,
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ reply_hash = hash_conntrack(net, zone,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
+
clean_from_lists(ct);
- /* add this conntrack to the dying list */
- hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
- &net->ct.dying);
- spin_unlock_bh(&nf_conntrack_lock);
+ nf_conntrack_double_unlock(hash, reply_hash);
+
+ nf_ct_add_to_dying_list(ct);
+
+ NF_CT_STAT_INC(net, delete_list);
+ local_bh_enable();
}
static void death_by_event(unsigned long ul_conntrack)
@@ -312,12 +414,25 @@ static void death_by_timeout(unsigned long ul_conntrack)
nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0);
}
+static inline bool
+nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
+ const struct nf_conntrack_tuple *tuple,
+ u16 zone)
+{
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+ /* A conntrack can be recreated with the equal tuple,
+ * so we need to check that the conntrack is confirmed
+ */
+ return nf_ct_tuple_equal(tuple, &h->tuple) &&
+ nf_ct_zone(ct) == zone &&
+ nf_ct_is_confirmed(ct);
+}
+
/*
* Warning :
* - Caller must take a reference on returned object
* and recheck nf_ct_tuple_equal(tuple, &h->tuple)
- * OR
- * - Caller must lock nf_conntrack_lock before calling this function
*/
static struct nf_conntrack_tuple_hash *
____nf_conntrack_find(struct net *net, u16 zone,
@@ -333,8 +448,7 @@ ____nf_conntrack_find(struct net *net, u16 zone,
local_bh_disable();
begin:
hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) {
- if (nf_ct_tuple_equal(tuple, &h->tuple) &&
- nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) {
+ if (nf_ct_key_equal(h, tuple, zone)) {
NF_CT_STAT_INC(net, found);
local_bh_enable();
return h;
@@ -372,8 +486,7 @@ begin:
!atomic_inc_not_zero(&ct->ct_general.use)))
h = NULL;
else {
- if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple) ||
- nf_ct_zone(ct) != zone)) {
+ if (unlikely(!nf_ct_key_equal(h, tuple, zone))) {
nf_ct_put(ct);
goto begin;
}
@@ -395,32 +508,36 @@ EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
static void __nf_conntrack_hash_insert(struct nf_conn *ct,
unsigned int hash,
- unsigned int repl_hash)
+ unsigned int reply_hash)
{
struct net *net = nf_ct_net(ct);
hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
&net->ct.hash[hash]);
hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
- &net->ct.hash[repl_hash]);
+ &net->ct.hash[reply_hash]);
}
int
nf_conntrack_hash_check_insert(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
- unsigned int hash, repl_hash;
+ unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
u16 zone;
+ unsigned int sequence;
zone = nf_ct_zone(ct);
- hash = hash_conntrack(net, zone,
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- repl_hash = hash_conntrack(net, zone,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
- spin_lock_bh(&nf_conntrack_lock);
+ local_bh_disable();
+ do {
+ sequence = read_seqcount_begin(&net->ct.generation);
+ hash = hash_conntrack(net, zone,
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ reply_hash = hash_conntrack(net, zone,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
/* See if there's one in the list already, including reverse */
hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
@@ -428,32 +545,57 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
&h->tuple) &&
zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
goto out;
- hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode)
+ hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode)
if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
&h->tuple) &&
zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
goto out;
add_timer(&ct->timeout);
- nf_conntrack_get(&ct->ct_general);
- __nf_conntrack_hash_insert(ct, hash, repl_hash);
+ smp_wmb();
+ /* The caller holds a reference to this object */
+ atomic_set(&ct->ct_general.use, 2);
+ __nf_conntrack_hash_insert(ct, hash, reply_hash);
+ nf_conntrack_double_unlock(hash, reply_hash);
NF_CT_STAT_INC(net, insert);
- spin_unlock_bh(&nf_conntrack_lock);
-
+ local_bh_enable();
return 0;
out:
+ nf_conntrack_double_unlock(hash, reply_hash);
NF_CT_STAT_INC(net, insert_failed);
- spin_unlock_bh(&nf_conntrack_lock);
+ local_bh_enable();
return -EEXIST;
}
EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
+/* deletion from this larval template list happens via nf_ct_put() */
+void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl)
+{
+ struct ct_pcpu *pcpu;
+
+ __set_bit(IPS_TEMPLATE_BIT, &tmpl->status);
+ __set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
+ nf_conntrack_get(&tmpl->ct_general);
+
+ /* add this conntrack to the (per cpu) tmpl list */
+ local_bh_disable();
+ tmpl->cpu = smp_processor_id();
+ pcpu = per_cpu_ptr(nf_ct_net(tmpl)->ct.pcpu_lists, tmpl->cpu);
+
+ spin_lock(&pcpu->lock);
+ /* Overload tuple linked list to put us in template list. */
+ hlist_nulls_add_head_rcu(&tmpl->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
+ &pcpu->tmpl);
+ spin_unlock_bh(&pcpu->lock);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_tmpl_insert);
+
/* Confirm a connection given skb; places it in hash table */
int
__nf_conntrack_confirm(struct sk_buff *skb)
{
- unsigned int hash, repl_hash;
+ unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
struct nf_conn_help *help;
@@ -462,6 +604,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
enum ip_conntrack_info ctinfo;
struct net *net;
u16 zone;
+ unsigned int sequence;
ct = nf_ct_get(skb, &ctinfo);
net = nf_ct_net(ct);
@@ -474,31 +617,37 @@ __nf_conntrack_confirm(struct sk_buff *skb)
return NF_ACCEPT;
zone = nf_ct_zone(ct);
- /* reuse the hash saved before */
- hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
- hash = hash_bucket(hash, net);
- repl_hash = hash_conntrack(net, zone,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ local_bh_disable();
+
+ do {
+ sequence = read_seqcount_begin(&net->ct.generation);
+ /* reuse the hash saved before */
+ hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
+ hash = hash_bucket(hash, net);
+ reply_hash = hash_conntrack(net, zone,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+
+ } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
/* We're not in hash table, and we refuse to set up related
- connections for unconfirmed conns. But packet copies and
- REJECT will give spurious warnings here. */
+ * connections for unconfirmed conns. But packet copies and
+ * REJECT will give spurious warnings here.
+ */
/* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
/* No external references means no one else could have
- confirmed us. */
+ * confirmed us.
+ */
NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
pr_debug("Confirming conntrack %p\n", ct);
-
- spin_lock_bh(&nf_conntrack_lock);
-
/* We have to check the DYING flag inside the lock to prevent
a race against nf_ct_get_next_corpse() possibly called from
user context, else we insert an already 'dead' hash, blocking
further use of that particular connection -JM */
if (unlikely(nf_ct_is_dying(ct))) {
- spin_unlock_bh(&nf_conntrack_lock);
+ nf_conntrack_double_unlock(hash, reply_hash);
+ local_bh_enable();
return NF_ACCEPT;
}
@@ -510,14 +659,13 @@ __nf_conntrack_confirm(struct sk_buff *skb)
&h->tuple) &&
zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
goto out;
- hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode)
+ hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode)
if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
&h->tuple) &&
zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
goto out;
- /* Remove from unconfirmed list */
- hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
+ nf_ct_del_from_dying_or_unconfirmed_list(ct);
/* Timer relative to confirmation time, not original
setting time, otherwise we'd get timer wrap in
@@ -540,9 +688,10 @@ __nf_conntrack_confirm(struct sk_buff *skb)
* guarantee that no other CPU can find the conntrack before the above
* stores are visible.
*/
- __nf_conntrack_hash_insert(ct, hash, repl_hash);
+ __nf_conntrack_hash_insert(ct, hash, reply_hash);
+ nf_conntrack_double_unlock(hash, reply_hash);
NF_CT_STAT_INC(net, insert);
- spin_unlock_bh(&nf_conntrack_lock);
+ local_bh_enable();
help = nfct_help(ct);
if (help && help->helper)
@@ -553,8 +702,9 @@ __nf_conntrack_confirm(struct sk_buff *skb)
return NF_ACCEPT;
out:
+ nf_conntrack_double_unlock(hash, reply_hash);
NF_CT_STAT_INC(net, insert_failed);
- spin_unlock_bh(&nf_conntrack_lock);
+ local_bh_enable();
return NF_DROP;
}
EXPORT_SYMBOL_GPL(__nf_conntrack_confirm);
@@ -597,39 +747,48 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
/* There's a small race here where we may free a just-assured
connection. Too bad: we're in trouble anyway. */
-static noinline int early_drop(struct net *net, unsigned int hash)
+static noinline int early_drop(struct net *net, unsigned int _hash)
{
/* Use oldest entry, which is roughly LRU */
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct = NULL, *tmp;
struct hlist_nulls_node *n;
- unsigned int i, cnt = 0;
+ unsigned int i = 0, cnt = 0;
int dropped = 0;
+ unsigned int hash, sequence;
+ spinlock_t *lockp;
- rcu_read_lock();
- for (i = 0; i < net->ct.htable_size; i++) {
+ local_bh_disable();
+restart:
+ sequence = read_seqcount_begin(&net->ct.generation);
+ hash = hash_bucket(_hash, net);
+ for (; i < net->ct.htable_size; i++) {
+ lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
+ spin_lock(lockp);
+ if (read_seqcount_retry(&net->ct.generation, sequence)) {
+ spin_unlock(lockp);
+ goto restart;
+ }
hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash],
hnnode) {
tmp = nf_ct_tuplehash_to_ctrack(h);
- if (!test_bit(IPS_ASSURED_BIT, &tmp->status))
+ if (!test_bit(IPS_ASSURED_BIT, &tmp->status) &&
+ !nf_ct_is_dying(tmp) &&
+ atomic_inc_not_zero(&tmp->ct_general.use)) {
ct = tmp;
+ break;
+ }
cnt++;
}
- if (ct != NULL) {
- if (likely(!nf_ct_is_dying(ct) &&
- atomic_inc_not_zero(&ct->ct_general.use)))
- break;
- else
- ct = NULL;
- }
+ hash = (hash + 1) % net->ct.htable_size;
+ spin_unlock(lockp);
- if (cnt >= NF_CT_EVICTION_RANGE)
+ if (ct || cnt >= NF_CT_EVICTION_RANGE)
break;
- hash = (hash + 1) % net->ct.htable_size;
}
- rcu_read_unlock();
+ local_bh_enable();
if (!ct)
return dropped;
@@ -678,7 +837,7 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
if (nf_conntrack_max &&
unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
- if (!early_drop(net, hash_bucket(hash, net))) {
+ if (!early_drop(net, hash)) {
atomic_dec(&net->ct.count);
net_warn_ratelimited("nf_conntrack: table full, dropping packet\n");
return ERR_PTR(-ENOMEM);
@@ -720,11 +879,10 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
nf_ct_zone->id = zone;
}
#endif
- /*
- * changes to lookup keys must be done before setting refcnt to 1
+ /* Because we use RCU lookups, we set ct_general.use to zero before
+ * this is inserted in any list.
*/
- smp_wmb();
- atomic_set(&ct->ct_general.use, 1);
+ atomic_set(&ct->ct_general.use, 0);
return ct;
#ifdef CONFIG_NF_CONNTRACK_ZONES
@@ -748,6 +906,11 @@ void nf_conntrack_free(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
+ /* A freed object has refcnt == 0, that's
+ * the golden rule for SLAB_DESTROY_BY_RCU
+ */
+ NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0);
+
nf_ct_ext_destroy(ct);
nf_ct_ext_free(ct);
kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
@@ -771,7 +934,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
struct nf_conn_help *help;
struct nf_conntrack_tuple repl_tuple;
struct nf_conntrack_ecache *ecache;
- struct nf_conntrack_expect *exp;
+ struct nf_conntrack_expect *exp = NULL;
u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
struct nf_conn_timeout *timeout_ext;
unsigned int *timeouts;
@@ -815,39 +978,44 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
ecache ? ecache->expmask : 0,
GFP_ATOMIC);
- spin_lock_bh(&nf_conntrack_lock);
- exp = nf_ct_find_expectation(net, zone, tuple);
- if (exp) {
- pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
- ct, exp);
- /* Welcome, Mr. Bond. We've been expecting you... */
- __set_bit(IPS_EXPECTED_BIT, &ct->status);
- ct->master = exp->master;
- if (exp->helper) {
- help = nf_ct_helper_ext_add(ct, exp->helper,
- GFP_ATOMIC);
- if (help)
- rcu_assign_pointer(help->helper, exp->helper);
- }
+ local_bh_disable();
+ if (net->ct.expect_count) {
+ spin_lock(&nf_conntrack_expect_lock);
+ exp = nf_ct_find_expectation(net, zone, tuple);
+ if (exp) {
+ pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
+ ct, exp);
+ /* Welcome, Mr. Bond. We've been expecting you... */
+ __set_bit(IPS_EXPECTED_BIT, &ct->status);
+ /* exp->master safe, refcnt bumped in nf_ct_find_expectation */
+ ct->master = exp->master;
+ if (exp->helper) {
+ help = nf_ct_helper_ext_add(ct, exp->helper,
+ GFP_ATOMIC);
+ if (help)
+ rcu_assign_pointer(help->helper, exp->helper);
+ }
#ifdef CONFIG_NF_CONNTRACK_MARK
- ct->mark = exp->master->mark;
+ ct->mark = exp->master->mark;
#endif
#ifdef CONFIG_NF_CONNTRACK_SECMARK
- ct->secmark = exp->master->secmark;
+ ct->secmark = exp->master->secmark;
#endif
- nf_conntrack_get(&ct->master->ct_general);
- NF_CT_STAT_INC(net, expect_new);
- } else {
+ NF_CT_STAT_INC(net, expect_new);
+ }
+ spin_unlock(&nf_conntrack_expect_lock);
+ }
+ if (!exp) {
__nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
NF_CT_STAT_INC(net, new);
}
- /* Overload tuple linked list to put us in unconfirmed list. */
- hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
- &net->ct.unconfirmed);
+ /* Now it is inserted into the unconfirmed list, bump refcount */
+ nf_conntrack_get(&ct->ct_general);
+ nf_ct_add_to_unconfirmed_list(ct);
- spin_unlock_bh(&nf_conntrack_lock);
+ local_bh_enable();
if (exp) {
if (exp->expectfn)
@@ -1217,27 +1385,42 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
struct hlist_nulls_node *n;
+ int cpu;
+ spinlock_t *lockp;
- spin_lock_bh(&nf_conntrack_lock);
for (; *bucket < net->ct.htable_size; (*bucket)++) {
- hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
- if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
- continue;
+ lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
+ local_bh_disable();
+ spin_lock(lockp);
+ if (*bucket < net->ct.htable_size) {
+ hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
+ if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
+ continue;
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ if (iter(ct, data))
+ goto found;
+ }
+ }
+ spin_unlock(lockp);
+ local_bh_enable();
+ }
+
+ for_each_possible_cpu(cpu) {
+ struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+
+ spin_lock_bh(&pcpu->lock);
+ hlist_nulls_for_each_entry(h, n, &pcpu->unconfirmed, hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
if (iter(ct, data))
- goto found;
+ set_bit(IPS_DYING_BIT, &ct->status);
}
+ spin_unlock_bh(&pcpu->lock);
}
- hlist_nulls_for_each_entry(h, n, &net->ct.unconfirmed, hnnode) {
- ct = nf_ct_tuplehash_to_ctrack(h);
- if (iter(ct, data))
- set_bit(IPS_DYING_BIT, &ct->status);
- }
- spin_unlock_bh(&nf_conntrack_lock);
return NULL;
found:
atomic_inc(&ct->ct_general.use);
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock(lockp);
+ local_bh_enable();
return ct;
}
@@ -1286,14 +1469,19 @@ static void nf_ct_release_dying_list(struct net *net)
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
struct hlist_nulls_node *n;
+ int cpu;
- spin_lock_bh(&nf_conntrack_lock);
- hlist_nulls_for_each_entry(h, n, &net->ct.dying, hnnode) {
- ct = nf_ct_tuplehash_to_ctrack(h);
- /* never fails to remove them, no listeners at this point */
- nf_ct_kill(ct);
+ for_each_possible_cpu(cpu) {
+ struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+
+ spin_lock_bh(&pcpu->lock);
+ hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ /* never fails to remove them, no listeners at this point */
+ nf_ct_kill(ct);
+ }
+ spin_unlock_bh(&pcpu->lock);
}
- spin_unlock_bh(&nf_conntrack_lock);
}
static int untrack_refs(void)
@@ -1380,6 +1568,7 @@ i_see_dead_people:
kmem_cache_destroy(net->ct.nf_conntrack_cachep);
kfree(net->ct.slabname);
free_percpu(net->ct.stat);
+ free_percpu(net->ct.pcpu_lists);
}
}
@@ -1432,12 +1621,16 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
if (!hash)
return -ENOMEM;
+ local_bh_disable();
+ nf_conntrack_all_lock();
+ write_seqcount_begin(&init_net.ct.generation);
+
/* Lookups in the old hash might happen in parallel, which means we
* might get false negatives during connection lookup. New connections
* created because of a false negative won't make it into the hash
- * though since that required taking the lock.
+ * though since that required taking the locks.
*/
- spin_lock_bh(&nf_conntrack_lock);
+
for (i = 0; i < init_net.ct.htable_size; i++) {
while (!hlist_nulls_empty(&init_net.ct.hash[i])) {
h = hlist_nulls_entry(init_net.ct.hash[i].first,
@@ -1454,7 +1647,10 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
init_net.ct.hash = hash;
- spin_unlock_bh(&nf_conntrack_lock);
+
+ write_seqcount_end(&init_net.ct.generation);
+ nf_conntrack_all_unlock();
+ local_bh_enable();
nf_ct_free_hashtable(old_hash, old_size);
return 0;
@@ -1476,7 +1672,10 @@ EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
int nf_conntrack_init_start(void)
{
int max_factor = 8;
- int ret, cpu;
+ int i, ret, cpu;
+
+ for (i = 0; i < CONNTRACK_LOCKS; i++)
+ spin_lock_init(&nf_conntrack_locks[i]);
/* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
* machine has 512 buckets. >= 1GB machines have 16384 buckets. */
@@ -1592,37 +1791,43 @@ void nf_conntrack_init_end(void)
int nf_conntrack_init_net(struct net *net)
{
- int ret;
+ int ret = -ENOMEM;
+ int cpu;
atomic_set(&net->ct.count, 0);
- INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, UNCONFIRMED_NULLS_VAL);
- INIT_HLIST_NULLS_HEAD(&net->ct.dying, DYING_NULLS_VAL);
- INIT_HLIST_NULLS_HEAD(&net->ct.tmpl, TEMPLATE_NULLS_VAL);
- net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
- if (!net->ct.stat) {
- ret = -ENOMEM;
+
+ net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
+ if (!net->ct.pcpu_lists)
goto err_stat;
+
+ for_each_possible_cpu(cpu) {
+ struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+
+ spin_lock_init(&pcpu->lock);
+ INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL);
+ INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL);
+ INIT_HLIST_NULLS_HEAD(&pcpu->tmpl, TEMPLATE_NULLS_VAL);
}
+ net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
+ if (!net->ct.stat)
+ goto err_pcpu_lists;
+
net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net);
- if (!net->ct.slabname) {
- ret = -ENOMEM;
+ if (!net->ct.slabname)
goto err_slabname;
- }
net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname,
sizeof(struct nf_conn), 0,
SLAB_DESTROY_BY_RCU, NULL);
if (!net->ct.nf_conntrack_cachep) {
printk(KERN_ERR "Unable to create nf_conn slab cache\n");
- ret = -ENOMEM;
goto err_cache;
}
net->ct.htable_size = nf_conntrack_htable_size;
net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1);
if (!net->ct.hash) {
- ret = -ENOMEM;
printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
goto err_hash;
}
@@ -1664,6 +1869,8 @@ err_cache:
kfree(net->ct.slabname);
err_slabname:
free_percpu(net->ct.stat);
+err_pcpu_lists:
+ free_percpu(net->ct.pcpu_lists);
err_stat:
return ret;
}
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 4fd1ca94fd4a..f87e8f68ad45 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -66,9 +66,9 @@ static void nf_ct_expectation_timed_out(unsigned long ul_expect)
{
struct nf_conntrack_expect *exp = (void *)ul_expect;
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
nf_ct_unlink_expect(exp);
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
nf_ct_expect_put(exp);
}
@@ -155,6 +155,18 @@ nf_ct_find_expectation(struct net *net, u16 zone,
if (!nf_ct_is_confirmed(exp->master))
return NULL;
+ /* Avoid race with other CPUs, that for exp->master ct, is
+ * about to invoke ->destroy(), or nf_ct_delete() via timeout
+ * or early_drop().
+ *
+ * The atomic_inc_not_zero() check tells: If that fails, we
+ * know that the ct is being destroyed. If it succeeds, we
+ * can be sure the ct cannot disappear underneath.
+ */
+ if (unlikely(nf_ct_is_dying(exp->master) ||
+ !atomic_inc_not_zero(&exp->master->ct_general.use)))
+ return NULL;
+
if (exp->flags & NF_CT_EXPECT_PERMANENT) {
atomic_inc(&exp->use);
return exp;
@@ -162,6 +174,8 @@ nf_ct_find_expectation(struct net *net, u16 zone,
nf_ct_unlink_expect(exp);
return exp;
}
+ /* Undo exp->master refcnt increase, if del_timer() failed */
+ nf_ct_put(exp->master);
return NULL;
}
@@ -177,12 +191,14 @@ void nf_ct_remove_expectations(struct nf_conn *ct)
if (!help)
return;
+ spin_lock_bh(&nf_conntrack_expect_lock);
hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
if (del_timer(&exp->timeout)) {
nf_ct_unlink_expect(exp);
nf_ct_expect_put(exp);
}
}
+ spin_unlock_bh(&nf_conntrack_expect_lock);
}
EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
@@ -217,12 +233,12 @@ static inline int expect_matches(const struct nf_conntrack_expect *a,
/* Generally a bad idea to call this: could have matched already. */
void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
{
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
if (del_timer(&exp->timeout)) {
nf_ct_unlink_expect(exp);
nf_ct_expect_put(exp);
}
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
}
EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
@@ -335,7 +351,7 @@ static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
(unsigned long)exp);
helper = rcu_dereference_protected(master_help->helper,
- lockdep_is_held(&nf_conntrack_lock));
+ lockdep_is_held(&nf_conntrack_expect_lock));
if (helper) {
exp->timeout.expires = jiffies +
helper->expect_policy[exp->class].timeout * HZ;
@@ -395,7 +411,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
}
/* Will be over limit? */
helper = rcu_dereference_protected(master_help->helper,
- lockdep_is_held(&nf_conntrack_lock));
+ lockdep_is_held(&nf_conntrack_expect_lock));
if (helper) {
p = &helper->expect_policy[expect->class];
if (p->max_expected &&
@@ -417,12 +433,12 @@ out:
return ret;
}
-int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
+int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
u32 portid, int report)
{
int ret;
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
ret = __nf_ct_expect_check(expect);
if (ret <= 0)
goto out;
@@ -430,11 +446,11 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
ret = nf_ct_expect_insert(expect);
if (ret < 0)
goto out;
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
return ret;
out:
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 70866d192efc..3a3a60b126e0 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -1476,7 +1476,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
nf_ct_refresh(ct, skb, info->timeout * HZ);
/* Set expect timeout */
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
exp = find_expect(ct, &ct->tuplehash[dir].tuple.dst.u3,
info->sig_port[!dir]);
if (exp) {
@@ -1486,7 +1486,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
nf_ct_dump_tuple(&exp->tuple);
set_expect_timeout(exp, info->timeout);
}
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
}
return 0;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 974a2a4adefa..5b3eae7d4c9a 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -250,16 +250,14 @@ out:
}
EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper);
+/* appropiate ct lock protecting must be taken by caller */
static inline int unhelp(struct nf_conntrack_tuple_hash *i,
const struct nf_conntrack_helper *me)
{
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
struct nf_conn_help *help = nfct_help(ct);
- if (help && rcu_dereference_protected(
- help->helper,
- lockdep_is_held(&nf_conntrack_lock)
- ) == me) {
+ if (help && rcu_dereference_raw(help->helper) == me) {
nf_conntrack_event(IPCT_HELPER, ct);
RCU_INIT_POINTER(help->helper, NULL);
}
@@ -284,17 +282,17 @@ static LIST_HEAD(nf_ct_helper_expectfn_list);
void nf_ct_helper_expectfn_register(struct nf_ct_helper_expectfn *n)
{
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
list_add_rcu(&n->head, &nf_ct_helper_expectfn_list);
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
}
EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_register);
void nf_ct_helper_expectfn_unregister(struct nf_ct_helper_expectfn *n)
{
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
list_del_rcu(&n->head);
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
}
EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_unregister);
@@ -396,15 +394,17 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
const struct hlist_node *next;
const struct hlist_nulls_node *nn;
unsigned int i;
+ int cpu;
/* Get rid of expectations */
+ spin_lock_bh(&nf_conntrack_expect_lock);
for (i = 0; i < nf_ct_expect_hsize; i++) {
hlist_for_each_entry_safe(exp, next,
&net->ct.expect_hash[i], hnode) {
struct nf_conn_help *help = nfct_help(exp->master);
if ((rcu_dereference_protected(
help->helper,
- lockdep_is_held(&nf_conntrack_lock)
+ lockdep_is_held(&nf_conntrack_expect_lock)
) == me || exp->helper == me) &&
del_timer(&exp->timeout)) {
nf_ct_unlink_expect(exp);
@@ -412,14 +412,27 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
}
}
}
+ spin_unlock_bh(&nf_conntrack_expect_lock);
/* Get rid of expecteds, set helpers to NULL. */
- hlist_nulls_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode)
- unhelp(h, me);
- for (i = 0; i < net->ct.htable_size; i++) {
- hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
+ for_each_possible_cpu(cpu) {
+ struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+
+ spin_lock_bh(&pcpu->lock);
+ hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode)
unhelp(h, me);
+ spin_unlock_bh(&pcpu->lock);
+ }
+ local_bh_disable();
+ for (i = 0; i < net->ct.htable_size; i++) {
+ spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+ if (i < net->ct.htable_size) {
+ hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
+ unhelp(h, me);
+ }
+ spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
}
+ local_bh_enable();
}
void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
@@ -437,10 +450,8 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
synchronize_rcu();
rtnl_lock();
- spin_lock_bh(&nf_conntrack_lock);
for_each_net(net)
__nf_conntrack_helper_unregister(me, net);
- spin_unlock_bh(&nf_conntrack_lock);
rtnl_unlock();
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index bb322d0beb48..ccc46fa5edbc 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -764,14 +764,23 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
u_int8_t l3proto = nfmsg->nfgen_family;
int res;
+ spinlock_t *lockp;
+
#ifdef CONFIG_NF_CONNTRACK_MARK
const struct ctnetlink_dump_filter *filter = cb->data;
#endif
- spin_lock_bh(&nf_conntrack_lock);
last = (struct nf_conn *)cb->args[1];
+
+ local_bh_disable();
for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) {
restart:
+ lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
+ spin_lock(lockp);
+ if (cb->args[0] >= net->ct.htable_size) {
+ spin_unlock(lockp);
+ goto out;
+ }
hlist_nulls_for_each_entry(h, n, &net->ct.hash[cb->args[0]],
hnnode) {
if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
@@ -803,16 +812,18 @@ restart:
if (res < 0) {
nf_conntrack_get(&ct->ct_general);
cb->args[1] = (unsigned long)ct;
+ spin_unlock(lockp);
goto out;
}
}
+ spin_unlock(lockp);
if (cb->args[1]) {
cb->args[1] = 0;
goto restart;
}
}
out:
- spin_unlock_bh(&nf_conntrack_lock);
+ local_bh_enable();
if (last)
nf_ct_put(last);
@@ -966,7 +977,6 @@ ctnetlink_parse_help(const struct nlattr *attr, char **helper_name,
return 0;
}
-#define __CTA_LABELS_MAX_LENGTH ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE)
static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
[CTA_TUPLE_ORIG] = { .type = NLA_NESTED },
[CTA_TUPLE_REPLY] = { .type = NLA_NESTED },
@@ -984,9 +994,9 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
[CTA_ZONE] = { .type = NLA_U16 },
[CTA_MARK_MASK] = { .type = NLA_U32 },
[CTA_LABELS] = { .type = NLA_BINARY,
- .len = __CTA_LABELS_MAX_LENGTH },
+ .len = NF_CT_LABELS_MAX_SIZE },
[CTA_LABELS_MASK] = { .type = NLA_BINARY,
- .len = __CTA_LABELS_MAX_LENGTH },
+ .len = NF_CT_LABELS_MAX_SIZE },
};
static int
@@ -1138,50 +1148,65 @@ static int ctnetlink_done_list(struct netlink_callback *cb)
}
static int
-ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb,
- struct hlist_nulls_head *list)
+ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying)
{
- struct nf_conn *ct, *last;
+ struct nf_conn *ct, *last = NULL;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
u_int8_t l3proto = nfmsg->nfgen_family;
int res;
+ int cpu;
+ struct hlist_nulls_head *list;
+ struct net *net = sock_net(skb->sk);
if (cb->args[2])
return 0;
- spin_lock_bh(&nf_conntrack_lock);
- last = (struct nf_conn *)cb->args[1];
-restart:
- hlist_nulls_for_each_entry(h, n, list, hnnode) {
- ct = nf_ct_tuplehash_to_ctrack(h);
- if (l3proto && nf_ct_l3num(ct) != l3proto)
+ if (cb->args[0] == nr_cpu_ids)
+ return 0;
+
+ for (cpu = cb->args[0]; cpu < nr_cpu_ids; cpu++) {
+ struct ct_pcpu *pcpu;
+
+ if (!cpu_possible(cpu))
continue;
- if (cb->args[1]) {
- if (ct != last)
+
+ pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+ spin_lock_bh(&pcpu->lock);
+ last = (struct nf_conn *)cb->args[1];
+ list = dying ? &pcpu->dying : &pcpu->unconfirmed;
+restart:
+ hlist_nulls_for_each_entry(h, n, list, hnnode) {
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ if (l3proto && nf_ct_l3num(ct) != l3proto)
continue;
- cb->args[1] = 0;
- }
- rcu_read_lock();
- res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
- ct);
- rcu_read_unlock();
- if (res < 0) {
- nf_conntrack_get(&ct->ct_general);
- cb->args[1] = (unsigned long)ct;
- goto out;
+ if (cb->args[1]) {
+ if (ct != last)
+ continue;
+ cb->args[1] = 0;
+ }
+ rcu_read_lock();
+ res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
+ ct);
+ rcu_read_unlock();
+ if (res < 0) {
+ nf_conntrack_get(&ct->ct_general);
+ cb->args[1] = (unsigned long)ct;
+ spin_unlock_bh(&pcpu->lock);
+ goto out;
+ }
}
+ if (cb->args[1]) {
+ cb->args[1] = 0;
+ goto restart;
+ } else
+ cb->args[2] = 1;
+ spin_unlock_bh(&pcpu->lock);
}
- if (cb->args[1]) {
- cb->args[1] = 0;
- goto restart;
- } else
- cb->args[2] = 1;
out:
- spin_unlock_bh(&nf_conntrack_lock);
if (last)
nf_ct_put(last);
@@ -1191,9 +1216,7 @@ out:
static int
ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = sock_net(skb->sk);
-
- return ctnetlink_dump_list(skb, cb, &net->ct.dying);
+ return ctnetlink_dump_list(skb, cb, true);
}
static int
@@ -1215,9 +1238,7 @@ ctnetlink_get_ct_dying(struct sock *ctnl, struct sk_buff *skb,
static int
ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = sock_net(skb->sk);
-
- return ctnetlink_dump_list(skb, cb, &net->ct.unconfirmed);
+ return ctnetlink_dump_list(skb, cb, false);
}
static int
@@ -1310,27 +1331,22 @@ ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[])
}
static int
-ctnetlink_change_nat(struct nf_conn *ct, const struct nlattr * const cda[])
+ctnetlink_setup_nat(struct nf_conn *ct, const struct nlattr * const cda[])
{
#ifdef CONFIG_NF_NAT_NEEDED
int ret;
- if (cda[CTA_NAT_DST]) {
- ret = ctnetlink_parse_nat_setup(ct,
- NF_NAT_MANIP_DST,
- cda[CTA_NAT_DST]);
- if (ret < 0)
- return ret;
- }
- if (cda[CTA_NAT_SRC]) {
- ret = ctnetlink_parse_nat_setup(ct,
- NF_NAT_MANIP_SRC,
- cda[CTA_NAT_SRC]);
- if (ret < 0)
- return ret;
- }
- return 0;
+ ret = ctnetlink_parse_nat_setup(ct, NF_NAT_MANIP_DST,
+ cda[CTA_NAT_DST]);
+ if (ret < 0)
+ return ret;
+
+ ret = ctnetlink_parse_nat_setup(ct, NF_NAT_MANIP_SRC,
+ cda[CTA_NAT_SRC]);
+ return ret;
#else
+ if (!cda[CTA_NAT_DST] && !cda[CTA_NAT_SRC])
+ return 0;
return -EOPNOTSUPP;
#endif
}
@@ -1366,14 +1382,14 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[])
nf_ct_protonum(ct));
if (helper == NULL) {
#ifdef CONFIG_MODULES
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
if (request_module("nfct-helper-%s", helpname) < 0) {
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
return -EOPNOTSUPP;
}
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
nf_ct_protonum(ct));
if (helper)
@@ -1659,11 +1675,9 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
goto err2;
}
- if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) {
- err = ctnetlink_change_nat(ct, cda);
- if (err < 0)
- goto err2;
- }
+ err = ctnetlink_setup_nat(ct, cda);
+ if (err < 0)
+ goto err2;
nf_ct_acct_ext_add(ct, GFP_ATOMIC);
nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
@@ -1811,9 +1825,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
err = -EEXIST;
ct = nf_ct_tuplehash_to_ctrack(h);
if (!(nlh->nlmsg_flags & NLM_F_EXCL)) {
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
err = ctnetlink_change_conntrack(ct, cda);
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
if (err == 0) {
nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
(1 << IPCT_ASSURED) |
@@ -2142,9 +2156,9 @@ ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct)
if (ret < 0)
return ret;
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct);
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
return ret;
}
@@ -2699,13 +2713,13 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
}
/* after list removal, usage count == 1 */
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
if (del_timer(&exp->timeout)) {
nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid,
nlmsg_report(nlh));
nf_ct_expect_put(exp);
}
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
/* have to put what we 'get' above.
* after this line usage count == 0 */
nf_ct_expect_put(exp);
@@ -2714,7 +2728,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
struct nf_conn_help *m_help;
/* delete all expectations for this helper */
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
for (i = 0; i < nf_ct_expect_hsize; i++) {
hlist_for_each_entry_safe(exp, next,
&net->ct.expect_hash[i],
@@ -2729,10 +2743,10 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
}
}
}
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
} else {
/* This basically means we have to flush everything*/
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
for (i = 0; i < nf_ct_expect_hsize; i++) {
hlist_for_each_entry_safe(exp, next,
&net->ct.expect_hash[i],
@@ -2745,7 +2759,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
}
}
}
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
}
return 0;
@@ -2971,11 +2985,11 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
if (err < 0)
return err;
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
exp = __nf_ct_expect_find(net, zone, &tuple);
if (!exp) {
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
err = -ENOENT;
if (nlh->nlmsg_flags & NLM_F_CREATE) {
err = ctnetlink_create_expect(net, zone, cda,
@@ -2989,7 +3003,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
err = -EEXIST;
if (!(nlh->nlmsg_flags & NLM_F_EXCL))
err = ctnetlink_change_expect(exp, cda);
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
return err;
}
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 466410eaa482..4c3ba1c8d682 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -800,7 +800,7 @@ static int refresh_signalling_expectation(struct nf_conn *ct,
struct hlist_node *next;
int found = 0;
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
if (exp->class != SIP_EXPECT_SIGNALLING ||
!nf_inet_addr_cmp(&exp->tuple.dst.u3, addr) ||
@@ -815,7 +815,7 @@ static int refresh_signalling_expectation(struct nf_conn *ct,
found = 1;
break;
}
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
return found;
}
@@ -825,7 +825,7 @@ static void flush_expectations(struct nf_conn *ct, bool media)
struct nf_conntrack_expect *exp;
struct hlist_node *next;
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
if ((exp->class != SIP_EXPECT_SIGNALLING) ^ media)
continue;
@@ -836,7 +836,7 @@ static void flush_expectations(struct nf_conn *ct, bool media)
if (!media)
break;
}
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
}
static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index d3f5cd6dd962..52ca952b802c 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -432,15 +432,15 @@ nf_nat_setup_info(struct nf_conn *ct,
}
EXPORT_SYMBOL(nf_nat_setup_info);
-unsigned int
-nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
+static unsigned int
+__nf_nat_alloc_null_binding(struct nf_conn *ct, enum nf_nat_manip_type manip)
{
/* Force range to this IP; let proto decide mapping for
* per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
* Use reply in case it's already been mangled (eg local packet).
*/
union nf_inet_addr ip =
- (HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
+ (manip == NF_NAT_MANIP_SRC ?
ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 :
ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3);
struct nf_nat_range range = {
@@ -448,7 +448,13 @@ nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
.min_addr = ip,
.max_addr = ip,
};
- return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
+ return nf_nat_setup_info(ct, &range, manip);
+}
+
+unsigned int
+nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
+{
+ return __nf_nat_alloc_null_binding(ct, HOOK2MANIP(hooknum));
}
EXPORT_SYMBOL_GPL(nf_nat_alloc_null_binding);
@@ -702,9 +708,9 @@ static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
static int
nfnetlink_parse_nat(const struct nlattr *nat,
- const struct nf_conn *ct, struct nf_nat_range *range)
+ const struct nf_conn *ct, struct nf_nat_range *range,
+ const struct nf_nat_l3proto *l3proto)
{
- const struct nf_nat_l3proto *l3proto;
struct nlattr *tb[CTA_NAT_MAX+1];
int err;
@@ -714,38 +720,46 @@ nfnetlink_parse_nat(const struct nlattr *nat,
if (err < 0)
return err;
- rcu_read_lock();
- l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct));
- if (l3proto == NULL) {
- err = -EAGAIN;
- goto out;
- }
err = l3proto->nlattr_to_range(tb, range);
if (err < 0)
- goto out;
+ return err;
if (!tb[CTA_NAT_PROTO])
- goto out;
+ return 0;
- err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range);
-out:
- rcu_read_unlock();
- return err;
+ return nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range);
}
+/* This function is called under rcu_read_lock() */
static int
nfnetlink_parse_nat_setup(struct nf_conn *ct,
enum nf_nat_manip_type manip,
const struct nlattr *attr)
{
struct nf_nat_range range;
+ const struct nf_nat_l3proto *l3proto;
int err;
- err = nfnetlink_parse_nat(attr, ct, &range);
+ /* Should not happen, restricted to creating new conntracks
+ * via ctnetlink.
+ */
+ if (WARN_ON_ONCE(nf_nat_initialized(ct, manip)))
+ return -EEXIST;
+
+ /* Make sure that L3 NAT is there by when we call nf_nat_setup_info to
+ * attach the null binding, otherwise this may oops.
+ */
+ l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct));
+ if (l3proto == NULL)
+ return -EAGAIN;
+
+ /* No NAT information has been passed, allocate the null-binding */
+ if (attr == NULL)
+ return __nf_nat_alloc_null_binding(ct, manip);
+
+ err = nfnetlink_parse_nat(attr, ct, &range, l3proto);
if (err < 0)
return err;
- if (nf_nat_initialized(ct, manip))
- return -EEXIST;
return nf_nat_setup_info(ct, &range, manip);
}
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 9858e3e51a3a..52e20c9a46a5 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -363,9 +363,8 @@ static int __net_init synproxy_net_init(struct net *net)
goto err2;
if (!nfct_synproxy_ext_add(ct))
goto err2;
- __set_bit(IPS_TEMPLATE_BIT, &ct->status);
- __set_bit(IPS_CONFIRMED_BIT, &ct->status);
+ nf_conntrack_tmpl_insert(net, ct);
snet->tmpl = ct;
snet->stats = alloc_percpu(struct synproxy_stats);
@@ -390,7 +389,7 @@ static void __net_exit synproxy_net_exit(struct net *net)
{
struct synproxy_net *snet = synproxy_pernet(net);
- nf_conntrack_free(snet->tmpl);
+ nf_ct_put(snet->tmpl);
synproxy_proc_exit(net);
free_percpu(snet->stats);
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 117bbaaddde6..33045a562297 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -794,9 +794,8 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
if (chain->stats) {
- /* nfnl_lock is held, add some nfnl function for this, later */
struct nft_stats __percpu *oldstats =
- rcu_dereference_protected(chain->stats, 1);
+ nft_dereference(chain->stats);
rcu_assign_pointer(chain->stats, newstats);
synchronize_rcu();
@@ -1008,10 +1007,8 @@ notify:
return 0;
}
-static void nf_tables_rcu_chain_destroy(struct rcu_head *head)
+static void nf_tables_chain_destroy(struct nft_chain *chain)
{
- struct nft_chain *chain = container_of(head, struct nft_chain, rcu_head);
-
BUG_ON(chain->use > 0);
if (chain->flags & NFT_BASE_CHAIN) {
@@ -1045,7 +1042,7 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
if (IS_ERR(chain))
return PTR_ERR(chain);
- if (!list_empty(&chain->rules))
+ if (!list_empty(&chain->rules) || chain->use > 0)
return -EBUSY;
list_del(&chain->list);
@@ -1059,7 +1056,9 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
family);
/* Make sure all rule references are gone before this is released */
- call_rcu(&chain->rcu_head, nf_tables_rcu_chain_destroy);
+ synchronize_rcu();
+
+ nf_tables_chain_destroy(chain);
return 0;
}
@@ -1114,35 +1113,45 @@ void nft_unregister_expr(struct nft_expr_type *type)
}
EXPORT_SYMBOL_GPL(nft_unregister_expr);
-static const struct nft_expr_type *__nft_expr_type_get(struct nlattr *nla)
+static const struct nft_expr_type *__nft_expr_type_get(u8 family,
+ struct nlattr *nla)
{
const struct nft_expr_type *type;
list_for_each_entry(type, &nf_tables_expressions, list) {
- if (!nla_strcmp(nla, type->name))
+ if (!nla_strcmp(nla, type->name) &&
+ (!type->family || type->family == family))
return type;
}
return NULL;
}
-static const struct nft_expr_type *nft_expr_type_get(struct nlattr *nla)
+static const struct nft_expr_type *nft_expr_type_get(u8 family,
+ struct nlattr *nla)
{
const struct nft_expr_type *type;
if (nla == NULL)
return ERR_PTR(-EINVAL);
- type = __nft_expr_type_get(nla);
+ type = __nft_expr_type_get(family, nla);
if (type != NULL && try_module_get(type->owner))
return type;
#ifdef CONFIG_MODULES
if (type == NULL) {
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ request_module("nft-expr-%u-%.*s", family,
+ nla_len(nla), (char *)nla_data(nla));
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ if (__nft_expr_type_get(family, nla))
+ return ERR_PTR(-EAGAIN);
+
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
request_module("nft-expr-%.*s",
nla_len(nla), (char *)nla_data(nla));
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- if (__nft_expr_type_get(nla))
+ if (__nft_expr_type_get(family, nla))
return ERR_PTR(-EAGAIN);
}
#endif
@@ -1193,7 +1202,7 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx,
if (err < 0)
return err;
- type = nft_expr_type_get(tb[NFTA_EXPR_NAME]);
+ type = nft_expr_type_get(ctx->afi->family, tb[NFTA_EXPR_NAME]);
if (IS_ERR(type))
return PTR_ERR(type);
@@ -1244,10 +1253,11 @@ err1:
return err;
}
-static void nf_tables_expr_destroy(struct nft_expr *expr)
+static void nf_tables_expr_destroy(const struct nft_ctx *ctx,
+ struct nft_expr *expr)
{
if (expr->ops->destroy)
- expr->ops->destroy(expr);
+ expr->ops->destroy(ctx, expr);
module_put(expr->ops->type->owner);
}
@@ -1286,6 +1296,8 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
[NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED },
[NFTA_RULE_COMPAT] = { .type = NLA_NESTED },
[NFTA_RULE_POSITION] = { .type = NLA_U64 },
+ [NFTA_RULE_USERDATA] = { .type = NLA_BINARY,
+ .len = NFT_USERDATA_MAXLEN },
};
static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq,
@@ -1338,6 +1350,10 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq,
}
nla_nest_end(skb, list);
+ if (rule->ulen &&
+ nla_put(skb, NFTA_RULE_USERDATA, rule->ulen, nft_userdata(rule)))
+ goto nla_put_failure;
+
return nlmsg_end(skb, nlh);
nla_put_failure:
@@ -1521,9 +1537,9 @@ err:
return err;
}
-static void nf_tables_rcu_rule_destroy(struct rcu_head *head)
+static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
+ struct nft_rule *rule)
{
- struct nft_rule *rule = container_of(head, struct nft_rule, rcu_head);
struct nft_expr *expr;
/*
@@ -1532,23 +1548,18 @@ static void nf_tables_rcu_rule_destroy(struct rcu_head *head)
*/
expr = nft_expr_first(rule);
while (expr->ops && expr != nft_expr_last(rule)) {
- nf_tables_expr_destroy(expr);
+ nf_tables_expr_destroy(ctx, expr);
expr = nft_expr_next(expr);
}
kfree(rule);
}
-static void nf_tables_rule_destroy(struct nft_rule *rule)
-{
- call_rcu(&rule->rcu_head, nf_tables_rcu_rule_destroy);
-}
-
#define NFT_RULE_MAXEXPRS 128
static struct nft_expr_info *info;
static struct nft_rule_trans *
-nf_tables_trans_add(struct nft_rule *rule, const struct nft_ctx *ctx)
+nf_tables_trans_add(struct nft_ctx *ctx, struct nft_rule *rule)
{
struct nft_rule_trans *rupd;
@@ -1556,11 +1567,8 @@ nf_tables_trans_add(struct nft_rule *rule, const struct nft_ctx *ctx)
if (rupd == NULL)
return NULL;
- rupd->chain = ctx->chain;
- rupd->table = ctx->table;
+ rupd->ctx = *ctx;
rupd->rule = rule;
- rupd->family = ctx->afi->family;
- rupd->nlh = ctx->nlh;
list_add_tail(&rupd->list, &ctx->net->nft.commit_list);
return rupd;
@@ -1580,7 +1588,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
struct nft_expr *expr;
struct nft_ctx ctx;
struct nlattr *tmp;
- unsigned int size, i, n;
+ unsigned int size, i, n, ulen = 0;
int err, rem;
bool create;
u64 handle, pos_handle;
@@ -1646,8 +1654,11 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
}
}
+ if (nla[NFTA_RULE_USERDATA])
+ ulen = nla_len(nla[NFTA_RULE_USERDATA]);
+
err = -ENOMEM;
- rule = kzalloc(sizeof(*rule) + size, GFP_KERNEL);
+ rule = kzalloc(sizeof(*rule) + size + ulen, GFP_KERNEL);
if (rule == NULL)
goto err1;
@@ -1655,6 +1666,10 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
rule->handle = handle;
rule->dlen = size;
+ rule->ulen = ulen;
+
+ if (ulen)
+ nla_memcpy(nft_userdata(rule), nla[NFTA_RULE_USERDATA], ulen);
expr = nft_expr_first(rule);
for (i = 0; i < n; i++) {
@@ -1667,7 +1682,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
if (nlh->nlmsg_flags & NLM_F_REPLACE) {
if (nft_rule_is_active_next(net, old_rule)) {
- repl = nf_tables_trans_add(old_rule, &ctx);
+ repl = nf_tables_trans_add(&ctx, old_rule);
if (repl == NULL) {
err = -ENOMEM;
goto err2;
@@ -1690,7 +1705,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
list_add_rcu(&rule->list, &chain->rules);
}
- if (nf_tables_trans_add(rule, &ctx) == NULL) {
+ if (nf_tables_trans_add(&ctx, rule) == NULL) {
err = -ENOMEM;
goto err3;
}
@@ -1705,7 +1720,7 @@ err3:
kfree(repl);
}
err2:
- nf_tables_rule_destroy(rule);
+ nf_tables_rule_destroy(&ctx, rule);
err1:
for (i = 0; i < n; i++) {
if (info[i].ops != NULL)
@@ -1719,7 +1734,7 @@ nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule)
{
/* You cannot delete the same rule twice */
if (nft_rule_is_active_next(ctx->net, rule)) {
- if (nf_tables_trans_add(rule, ctx) == NULL)
+ if (nf_tables_trans_add(ctx, rule) == NULL)
return -ENOMEM;
nft_rule_disactivate_next(ctx->net, rule);
return 0;
@@ -1809,29 +1824,36 @@ static int nf_tables_commit(struct sk_buff *skb)
synchronize_rcu();
list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
- /* Delete this rule from the dirty list */
- list_del(&rupd->list);
-
/* This rule was inactive in the past and just became active.
* Clear the next bit of the genmask since its meaning has
* changed, now it is the future.
*/
if (nft_rule_is_active(net, rupd->rule)) {
nft_rule_clear(net, rupd->rule);
- nf_tables_rule_notify(skb, rupd->nlh, rupd->table,
- rupd->chain, rupd->rule,
- NFT_MSG_NEWRULE, 0,
- rupd->family);
+ nf_tables_rule_notify(skb, rupd->ctx.nlh,
+ rupd->ctx.table, rupd->ctx.chain,
+ rupd->rule, NFT_MSG_NEWRULE, 0,
+ rupd->ctx.afi->family);
+ list_del(&rupd->list);
kfree(rupd);
continue;
}
/* This rule is in the past, get rid of it */
list_del_rcu(&rupd->rule->list);
- nf_tables_rule_notify(skb, rupd->nlh, rupd->table, rupd->chain,
+ nf_tables_rule_notify(skb, rupd->ctx.nlh,
+ rupd->ctx.table, rupd->ctx.chain,
rupd->rule, NFT_MSG_DELRULE, 0,
- rupd->family);
- nf_tables_rule_destroy(rupd->rule);
+ rupd->ctx.afi->family);
+ }
+
+ /* Make sure we don't see any packet traversing old rules */
+ synchronize_rcu();
+
+ /* Now we can safely release unused old rules */
+ list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
+ nf_tables_rule_destroy(&rupd->ctx, rupd->rule);
+ list_del(&rupd->list);
kfree(rupd);
}
@@ -1844,20 +1866,26 @@ static int nf_tables_abort(struct sk_buff *skb)
struct nft_rule_trans *rupd, *tmp;
list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
- /* Delete all rules from the dirty list */
- list_del(&rupd->list);
-
if (!nft_rule_is_active_next(net, rupd->rule)) {
nft_rule_clear(net, rupd->rule);
+ list_del(&rupd->list);
kfree(rupd);
continue;
}
/* This rule is inactive, get rid of it */
list_del_rcu(&rupd->rule->list);
- nf_tables_rule_destroy(rupd->rule);
+ }
+
+ /* Make sure we don't see any packet accessing aborted rules */
+ synchronize_rcu();
+
+ list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
+ nf_tables_rule_destroy(&rupd->ctx, rupd->rule);
+ list_del(&rupd->list);
kfree(rupd);
}
+
return 0;
}
@@ -1943,6 +1971,9 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
}
if (nla[NFTA_SET_TABLE] != NULL) {
+ if (afi == NULL)
+ return -EAFNOSUPPORT;
+
table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -1989,13 +2020,13 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
if (!sscanf(i->name, name, &tmp))
continue;
- if (tmp < 0 || tmp > BITS_PER_LONG * PAGE_SIZE)
+ if (tmp < 0 || tmp >= BITS_PER_BYTE * PAGE_SIZE)
continue;
set_bit(tmp, inuse);
}
- n = find_first_zero_bit(inuse, BITS_PER_LONG * PAGE_SIZE);
+ n = find_first_zero_bit(inuse, BITS_PER_BYTE * PAGE_SIZE);
free_page((unsigned long)inuse);
}
@@ -2411,8 +2442,7 @@ err1:
static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
{
list_del(&set->list);
- if (!(set->flags & NFT_SET_ANONYMOUS))
- nf_tables_set_notify(ctx, set, NFT_MSG_DELSET);
+ nf_tables_set_notify(ctx, set, NFT_MSG_DELSET);
set->ops->destroy(set);
module_put(set->ops->owner);
@@ -2428,6 +2458,8 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
struct nft_ctx ctx;
int err;
+ if (nfmsg->nfgen_family == NFPROTO_UNSPEC)
+ return -EAFNOSUPPORT;
if (nla[NFTA_SET_TABLE] == NULL)
return -EINVAL;
@@ -2435,9 +2467,6 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
if (err < 0)
return err;
- if (nfmsg->nfgen_family == NFPROTO_UNSPEC)
- return -EAFNOSUPPORT;
-
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
if (IS_ERR(set))
return PTR_ERR(set);
@@ -2723,6 +2752,9 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
if (nla[NFTA_SET_ELEM_DATA] == NULL &&
!(elem.flags & NFT_SET_ELEM_INTERVAL_END))
return -EINVAL;
+ if (nla[NFTA_SET_ELEM_DATA] != NULL &&
+ elem.flags & NFT_SET_ELEM_INTERVAL_END)
+ return -EINVAL;
} else {
if (nla[NFTA_SET_ELEM_DATA] != NULL)
return -EINVAL;
@@ -2977,6 +3009,9 @@ static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
const struct nft_set_iter *iter,
const struct nft_set_elem *elem)
{
+ if (elem->flags & NFT_SET_ELEM_INTERVAL_END)
+ return 0;
+
switch (elem->data.verdict) {
case NFT_JUMP:
case NFT_GOTO:
@@ -3151,9 +3186,16 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
data->verdict = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
switch (data->verdict) {
- case NF_ACCEPT:
- case NF_DROP:
- case NF_QUEUE:
+ default:
+ switch (data->verdict & NF_VERDICT_MASK) {
+ case NF_ACCEPT:
+ case NF_DROP:
+ case NF_QUEUE:
+ break;
+ default:
+ return -EINVAL;
+ }
+ /* fall through */
case NFT_CONTINUE:
case NFT_BREAK:
case NFT_RETURN:
@@ -3174,8 +3216,6 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
data->chain = chain;
desc->len = sizeof(data);
break;
- default:
- return -EINVAL;
}
desc->type = NFT_DATA_VERDICT;
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 0d879fcb8763..90998a6ff8b9 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -103,9 +103,9 @@ static struct nf_loginfo trace_loginfo = {
},
};
-static inline void nft_trace_packet(const struct nft_pktinfo *pkt,
- const struct nft_chain *chain,
- int rulenum, enum nft_trace type)
+static void nft_trace_packet(const struct nft_pktinfo *pkt,
+ const struct nft_chain *chain,
+ int rulenum, enum nft_trace type)
{
struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 046aa13b4fea..e8138da4c14f 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -61,6 +61,14 @@ void nfnl_unlock(__u8 subsys_id)
}
EXPORT_SYMBOL_GPL(nfnl_unlock);
+#ifdef CONFIG_PROVE_LOCKING
+int lockdep_nfnl_is_held(u8 subsys_id)
+{
+ return lockdep_is_held(&table[subsys_id].mutex);
+}
+EXPORT_SYMBOL_GPL(lockdep_nfnl_is_held);
+#endif
+
int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n)
{
nfnl_lock(n->subsys_id);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index a155d19a225e..d292c8d286eb 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -28,8 +28,6 @@
#include <linux/proc_fs.h>
#include <linux/security.h>
#include <linux/list.h>
-#include <linux/jhash.h>
-#include <linux/random.h>
#include <linux/slab.h>
#include <net/sock.h>
#include <net/netfilter/nf_log.h>
@@ -75,7 +73,6 @@ struct nfulnl_instance {
};
#define INSTANCE_BUCKETS 16
-static unsigned int hash_init;
static int nfnl_log_net_id __read_mostly;
@@ -1067,11 +1064,6 @@ static int __init nfnetlink_log_init(void)
{
int status = -ENOMEM;
- /* it's not really all that important to have a random value, so
- * we can do this from the init function, even if there hasn't
- * been that much entropy yet */
- get_random_bytes(&hash_init, sizeof(hash_init));
-
netlink_register_notifier(&nfulnl_rtnl_notifier);
status = nfnetlink_subsys_register(&nfulnl_subsys);
if (status < 0) {
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 82cb8236f8a1..8a779be832fb 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -192,7 +192,7 @@ err:
}
static void
-nft_target_destroy(const struct nft_expr *expr)
+nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
{
struct xt_target *target = expr->ops->data;
@@ -379,7 +379,7 @@ err:
}
static void
-nft_match_destroy(const struct nft_expr *expr)
+nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
{
struct xt_match *match = expr->ops->data;
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 917052e20602..bd0d41e69341 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -19,15 +19,15 @@
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_labels.h>
struct nft_ct {
enum nft_ct_keys key:8;
enum ip_conntrack_dir dir:8;
- union{
+ union {
enum nft_registers dreg:8;
enum nft_registers sreg:8;
};
- uint8_t family;
};
static void nft_ct_get_eval(const struct nft_expr *expr,
@@ -97,6 +97,26 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
goto err;
strncpy((char *)dest->data, helper->name, sizeof(dest->data));
return;
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+ case NFT_CT_LABELS: {
+ struct nf_conn_labels *labels = nf_ct_labels_find(ct);
+ unsigned int size;
+
+ if (!labels) {
+ memset(dest->data, 0, sizeof(dest->data));
+ return;
+ }
+
+ BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > sizeof(dest->data));
+ size = labels->words * sizeof(long);
+
+ memcpy(dest->data, labels->bits, size);
+ if (size < sizeof(dest->data))
+ memset(((char *) dest->data) + size, 0,
+ sizeof(dest->data) - size);
+ return;
+ }
+#endif
}
tuple = &ct->tuplehash[priv->dir].tuple;
@@ -221,11 +241,15 @@ static int nft_ct_init_validate_get(const struct nft_expr *expr,
#ifdef CONFIG_NF_CONNTRACK_SECMARK
case NFT_CT_SECMARK:
#endif
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+ case NFT_CT_LABELS:
+#endif
case NFT_CT_EXPIRATION:
case NFT_CT_HELPER:
if (tb[NFTA_CT_DIRECTION] != NULL)
return -EINVAL;
break;
+ case NFT_CT_L3PROTOCOL:
case NFT_CT_PROTOCOL:
case NFT_CT_SRC:
case NFT_CT_DST:
@@ -291,16 +315,13 @@ static int nft_ct_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
- priv->family = ctx->afi->family;
-
return 0;
}
-static void nft_ct_destroy(const struct nft_expr *expr)
+static void nft_ct_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
{
- struct nft_ct *priv = nft_expr_priv(expr);
-
- nft_ct_l3proto_module_put(priv->family);
+ nft_ct_l3proto_module_put(ctx->afi->family);
}
static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -311,8 +332,19 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
goto nla_put_failure;
- if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
- goto nla_put_failure;
+
+ switch (priv->key) {
+ case NFT_CT_PROTOCOL:
+ case NFT_CT_SRC:
+ case NFT_CT_DST:
+ case NFT_CT_PROTO_SRC:
+ case NFT_CT_PROTO_DST:
+ if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
+ goto nla_put_failure;
+ default:
+ break;
+ }
+
return 0;
nla_put_failure:
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 3d3f8fce10a5..3b1ad876d6b0 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -14,21 +14,34 @@
#include <linux/list.h>
#include <linux/jhash.h>
#include <linux/netlink.h>
+#include <linux/vmalloc.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
+#define NFT_HASH_MIN_SIZE 4
+
struct nft_hash {
- struct hlist_head *hash;
- unsigned int hsize;
+ struct nft_hash_table __rcu *tbl;
+};
+
+struct nft_hash_table {
+ unsigned int size;
+ unsigned int elements;
+ struct nft_hash_elem __rcu *buckets[];
};
struct nft_hash_elem {
- struct hlist_node hnode;
- struct nft_data key;
- struct nft_data data[];
+ struct nft_hash_elem __rcu *next;
+ struct nft_data key;
+ struct nft_data data[];
};
+#define nft_hash_for_each_entry(i, head) \
+ for (i = nft_dereference(head); i != NULL; i = nft_dereference(i->next))
+#define nft_hash_for_each_entry_rcu(i, head) \
+ for (i = rcu_dereference(head); i != NULL; i = rcu_dereference(i->next))
+
static u32 nft_hash_rnd __read_mostly;
static bool nft_hash_rnd_initted __read_mostly;
@@ -38,7 +51,7 @@ static unsigned int nft_hash_data(const struct nft_data *data,
unsigned int h;
h = jhash(data->data, len, nft_hash_rnd);
- return ((u64)h * hsize) >> 32;
+ return h & (hsize - 1);
}
static bool nft_hash_lookup(const struct nft_set *set,
@@ -46,11 +59,12 @@ static bool nft_hash_lookup(const struct nft_set *set,
struct nft_data *data)
{
const struct nft_hash *priv = nft_set_priv(set);
+ const struct nft_hash_table *tbl = rcu_dereference(priv->tbl);
const struct nft_hash_elem *he;
unsigned int h;
- h = nft_hash_data(key, priv->hsize, set->klen);
- hlist_for_each_entry(he, &priv->hash[h], hnode) {
+ h = nft_hash_data(key, tbl->size, set->klen);
+ nft_hash_for_each_entry_rcu(he, tbl->buckets[h]) {
if (nft_data_cmp(&he->key, key, set->klen))
continue;
if (set->flags & NFT_SET_MAP)
@@ -60,19 +74,148 @@ static bool nft_hash_lookup(const struct nft_set *set,
return false;
}
-static void nft_hash_elem_destroy(const struct nft_set *set,
- struct nft_hash_elem *he)
+static void nft_hash_tbl_free(const struct nft_hash_table *tbl)
{
- nft_data_uninit(&he->key, NFT_DATA_VALUE);
- if (set->flags & NFT_SET_MAP)
- nft_data_uninit(he->data, set->dtype);
- kfree(he);
+ if (is_vmalloc_addr(tbl))
+ vfree(tbl);
+ else
+ kfree(tbl);
+}
+
+static struct nft_hash_table *nft_hash_tbl_alloc(unsigned int nbuckets)
+{
+ struct nft_hash_table *tbl;
+ size_t size;
+
+ size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
+ tbl = kzalloc(size, GFP_KERNEL | __GFP_REPEAT | __GFP_NOWARN);
+ if (tbl == NULL)
+ tbl = vzalloc(size);
+ if (tbl == NULL)
+ return NULL;
+ tbl->size = nbuckets;
+
+ return tbl;
+}
+
+static void nft_hash_chain_unzip(const struct nft_set *set,
+ const struct nft_hash_table *ntbl,
+ struct nft_hash_table *tbl, unsigned int n)
+{
+ struct nft_hash_elem *he, *last, *next;
+ unsigned int h;
+
+ he = nft_dereference(tbl->buckets[n]);
+ if (he == NULL)
+ return;
+ h = nft_hash_data(&he->key, ntbl->size, set->klen);
+
+ /* Find last element of first chain hashing to bucket h */
+ last = he;
+ nft_hash_for_each_entry(he, he->next) {
+ if (nft_hash_data(&he->key, ntbl->size, set->klen) != h)
+ break;
+ last = he;
+ }
+
+ /* Unlink first chain from the old table */
+ RCU_INIT_POINTER(tbl->buckets[n], last->next);
+
+ /* If end of chain reached, done */
+ if (he == NULL)
+ return;
+
+ /* Find first element of second chain hashing to bucket h */
+ next = NULL;
+ nft_hash_for_each_entry(he, he->next) {
+ if (nft_hash_data(&he->key, ntbl->size, set->klen) != h)
+ continue;
+ next = he;
+ break;
+ }
+
+ /* Link the two chains */
+ RCU_INIT_POINTER(last->next, next);
+}
+
+static int nft_hash_tbl_expand(const struct nft_set *set, struct nft_hash *priv)
+{
+ struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl;
+ struct nft_hash_elem *he;
+ unsigned int i, h;
+ bool complete;
+
+ ntbl = nft_hash_tbl_alloc(tbl->size * 2);
+ if (ntbl == NULL)
+ return -ENOMEM;
+
+ /* Link new table's buckets to first element in the old table
+ * hashing to the new bucket.
+ */
+ for (i = 0; i < ntbl->size; i++) {
+ h = i < tbl->size ? i : i - tbl->size;
+ nft_hash_for_each_entry(he, tbl->buckets[h]) {
+ if (nft_hash_data(&he->key, ntbl->size, set->klen) != i)
+ continue;
+ RCU_INIT_POINTER(ntbl->buckets[i], he);
+ break;
+ }
+ }
+ ntbl->elements = tbl->elements;
+
+ /* Publish new table */
+ rcu_assign_pointer(priv->tbl, ntbl);
+
+ /* Unzip interleaved hash chains */
+ do {
+ /* Wait for readers to use new table/unzipped chains */
+ synchronize_rcu();
+
+ complete = true;
+ for (i = 0; i < tbl->size; i++) {
+ nft_hash_chain_unzip(set, ntbl, tbl, i);
+ if (tbl->buckets[i] != NULL)
+ complete = false;
+ }
+ } while (!complete);
+
+ nft_hash_tbl_free(tbl);
+ return 0;
+}
+
+static int nft_hash_tbl_shrink(const struct nft_set *set, struct nft_hash *priv)
+{
+ struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl;
+ struct nft_hash_elem __rcu **pprev;
+ unsigned int i;
+
+ ntbl = nft_hash_tbl_alloc(tbl->size / 2);
+ if (ntbl == NULL)
+ return -ENOMEM;
+
+ for (i = 0; i < ntbl->size; i++) {
+ ntbl->buckets[i] = tbl->buckets[i];
+
+ for (pprev = &ntbl->buckets[i]; *pprev != NULL;
+ pprev = &nft_dereference(*pprev)->next)
+ ;
+ RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]);
+ }
+ ntbl->elements = tbl->elements;
+
+ /* Publish new table */
+ rcu_assign_pointer(priv->tbl, ntbl);
+ synchronize_rcu();
+
+ nft_hash_tbl_free(tbl);
+ return 0;
}
static int nft_hash_insert(const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_table *tbl = nft_dereference(priv->tbl);
struct nft_hash_elem *he;
unsigned int size, h;
@@ -91,33 +234,66 @@ static int nft_hash_insert(const struct nft_set *set,
if (set->flags & NFT_SET_MAP)
nft_data_copy(he->data, &elem->data);
- h = nft_hash_data(&he->key, priv->hsize, set->klen);
- hlist_add_head_rcu(&he->hnode, &priv->hash[h]);
+ h = nft_hash_data(&he->key, tbl->size, set->klen);
+ RCU_INIT_POINTER(he->next, tbl->buckets[h]);
+ rcu_assign_pointer(tbl->buckets[h], he);
+ tbl->elements++;
+
+ /* Expand table when exceeding 75% load */
+ if (tbl->elements > tbl->size / 4 * 3)
+ nft_hash_tbl_expand(set, priv);
+
return 0;
}
+static void nft_hash_elem_destroy(const struct nft_set *set,
+ struct nft_hash_elem *he)
+{
+ nft_data_uninit(&he->key, NFT_DATA_VALUE);
+ if (set->flags & NFT_SET_MAP)
+ nft_data_uninit(he->data, set->dtype);
+ kfree(he);
+}
+
static void nft_hash_remove(const struct nft_set *set,
const struct nft_set_elem *elem)
{
- struct nft_hash_elem *he = elem->cookie;
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_table *tbl = nft_dereference(priv->tbl);
+ struct nft_hash_elem *he, __rcu **pprev;
- hlist_del_rcu(&he->hnode);
+ pprev = elem->cookie;
+ he = nft_dereference((*pprev));
+
+ RCU_INIT_POINTER(*pprev, he->next);
+ synchronize_rcu();
kfree(he);
+ tbl->elements--;
+
+ /* Shrink table beneath 30% load */
+ if (tbl->elements < tbl->size * 3 / 10 &&
+ tbl->size > NFT_HASH_MIN_SIZE)
+ nft_hash_tbl_shrink(set, priv);
}
static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
{
const struct nft_hash *priv = nft_set_priv(set);
+ const struct nft_hash_table *tbl = nft_dereference(priv->tbl);
+ struct nft_hash_elem __rcu * const *pprev;
struct nft_hash_elem *he;
unsigned int h;
- h = nft_hash_data(&elem->key, priv->hsize, set->klen);
- hlist_for_each_entry(he, &priv->hash[h], hnode) {
- if (nft_data_cmp(&he->key, &elem->key, set->klen))
+ h = nft_hash_data(&elem->key, tbl->size, set->klen);
+ pprev = &tbl->buckets[h];
+ nft_hash_for_each_entry(he, tbl->buckets[h]) {
+ if (nft_data_cmp(&he->key, &elem->key, set->klen)) {
+ pprev = &he->next;
continue;
+ }
- elem->cookie = he;
- elem->flags = 0;
+ elem->cookie = (void *)pprev;
+ elem->flags = 0;
if (set->flags & NFT_SET_MAP)
nft_data_copy(&elem->data, he->data);
return 0;
@@ -129,12 +305,13 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
struct nft_set_iter *iter)
{
const struct nft_hash *priv = nft_set_priv(set);
+ const struct nft_hash_table *tbl = nft_dereference(priv->tbl);
const struct nft_hash_elem *he;
struct nft_set_elem elem;
unsigned int i;
- for (i = 0; i < priv->hsize; i++) {
- hlist_for_each_entry(he, &priv->hash[i], hnode) {
+ for (i = 0; i < tbl->size; i++) {
+ nft_hash_for_each_entry(he, tbl->buckets[i]) {
if (iter->count < iter->skip)
goto cont;
@@ -161,43 +338,35 @@ static int nft_hash_init(const struct nft_set *set,
const struct nlattr * const tb[])
{
struct nft_hash *priv = nft_set_priv(set);
- unsigned int cnt, i;
+ struct nft_hash_table *tbl;
if (unlikely(!nft_hash_rnd_initted)) {
get_random_bytes(&nft_hash_rnd, 4);
nft_hash_rnd_initted = true;
}
- /* Aim for a load factor of 0.75 */
- // FIXME: temporarily broken until we have set descriptions
- cnt = 100;
- cnt = cnt * 4 / 3;
-
- priv->hash = kcalloc(cnt, sizeof(struct hlist_head), GFP_KERNEL);
- if (priv->hash == NULL)
+ tbl = nft_hash_tbl_alloc(NFT_HASH_MIN_SIZE);
+ if (tbl == NULL)
return -ENOMEM;
- priv->hsize = cnt;
-
- for (i = 0; i < cnt; i++)
- INIT_HLIST_HEAD(&priv->hash[i]);
-
+ RCU_INIT_POINTER(priv->tbl, tbl);
return 0;
}
static void nft_hash_destroy(const struct nft_set *set)
{
const struct nft_hash *priv = nft_set_priv(set);
- const struct hlist_node *next;
- struct nft_hash_elem *elem;
+ const struct nft_hash_table *tbl = nft_dereference(priv->tbl);
+ struct nft_hash_elem *he, *next;
unsigned int i;
- for (i = 0; i < priv->hsize; i++) {
- hlist_for_each_entry_safe(elem, next, &priv->hash[i], hnode) {
- hlist_del(&elem->hnode);
- nft_hash_elem_destroy(set, elem);
+ for (i = 0; i < tbl->size; i++) {
+ for (he = nft_dereference(tbl->buckets[i]); he != NULL;
+ he = next) {
+ next = nft_dereference(he->next);
+ nft_hash_elem_destroy(set, he);
}
}
- kfree(priv->hash);
+ kfree(tbl);
}
static struct nft_set_ops nft_hash_ops __read_mostly = {
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index f169501f1ad4..810385eb7249 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -70,7 +70,8 @@ err1:
return err;
}
-static void nft_immediate_destroy(const struct nft_expr *expr)
+static void nft_immediate_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg));
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 5af790123ad8..10cfb156cdf4 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -23,7 +23,6 @@ static const char *nft_log_null_prefix = "";
struct nft_log {
struct nf_loginfo loginfo;
char *prefix;
- int family;
};
static void nft_log_eval(const struct nft_expr *expr,
@@ -33,7 +32,7 @@ static void nft_log_eval(const struct nft_expr *expr,
const struct nft_log *priv = nft_expr_priv(expr);
struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
- nf_log_packet(net, priv->family, pkt->ops->hooknum, pkt->skb, pkt->in,
+ nf_log_packet(net, pkt->ops->pf, pkt->ops->hooknum, pkt->skb, pkt->in,
pkt->out, &priv->loginfo, "%s", priv->prefix);
}
@@ -52,8 +51,6 @@ static int nft_log_init(const struct nft_ctx *ctx,
struct nf_loginfo *li = &priv->loginfo;
const struct nlattr *nla;
- priv->family = ctx->afi->family;
-
nla = tb[NFTA_LOG_PREFIX];
if (nla != NULL) {
priv->prefix = kmalloc(nla_len(nla) + 1, GFP_KERNEL);
@@ -77,7 +74,8 @@ static int nft_log_init(const struct nft_ctx *ctx,
return 0;
}
-static void nft_log_destroy(const struct nft_expr *expr)
+static void nft_log_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
{
struct nft_log *priv = nft_expr_priv(expr);
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 8a6116b75b5a..7fd2bea8aa23 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -16,6 +16,7 @@
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
struct nft_lookup {
struct nft_set *set;
@@ -88,11 +89,12 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
return 0;
}
-static void nft_lookup_destroy(const struct nft_expr *expr)
+static void nft_lookup_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
{
struct nft_lookup *priv = nft_expr_priv(expr);
- nf_tables_unbind_set(NULL, priv->set, &priv->binding);
+ nf_tables_unbind_set(ctx, priv->set, &priv->binding);
}
static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index e8254ad2e5a9..425cf39af890 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -116,7 +116,7 @@ static void nft_meta_get_eval(const struct nft_expr *expr,
skb->sk->sk_socket->file->f_cred->fsgid);
read_unlock_bh(&skb->sk->sk_callback_lock);
break;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
case NFT_META_RTCLASSID: {
const struct dst_entry *dst = skb_dst(skb);
@@ -199,7 +199,7 @@ static int nft_meta_init_validate_get(uint32_t key)
case NFT_META_OIFTYPE:
case NFT_META_SKUID:
case NFT_META_SKGID:
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
case NFT_META_RTCLASSID:
#endif
#ifdef CONFIG_NETWORK_SECMARK
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index d3b1ffe26181..a0195d28bcfc 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -31,8 +31,8 @@ struct nft_nat {
enum nft_registers sreg_addr_max:8;
enum nft_registers sreg_proto_min:8;
enum nft_registers sreg_proto_max:8;
- int family;
- enum nf_nat_manip_type type;
+ enum nf_nat_manip_type type:8;
+ u8 family;
};
static void nft_nat_eval(const struct nft_expr *expr,
@@ -88,6 +88,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_nat *priv = nft_expr_priv(expr);
+ u32 family;
int err;
if (tb[NFTA_NAT_TYPE] == NULL)
@@ -107,9 +108,12 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
if (tb[NFTA_NAT_FAMILY] == NULL)
return -EINVAL;
- priv->family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY]));
- if (priv->family != AF_INET && priv->family != AF_INET6)
- return -EINVAL;
+ family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY]));
+ if (family != AF_INET && family != AF_INET6)
+ return -EAFNOSUPPORT;
+ if (family != ctx->afi->family)
+ return -EOPNOTSUPP;
+ priv->family = family;
if (tb[NFTA_NAT_REG_ADDR_MIN]) {
priv->sreg_addr_min = ntohl(nla_get_be32(
@@ -202,13 +206,7 @@ static struct nft_expr_type nft_nat_type __read_mostly = {
static int __init nft_nat_module_init(void)
{
- int err;
-
- err = nft_register_expr(&nft_nat_type);
- if (err < 0)
- return err;
-
- return 0;
+ return nft_register_expr(&nft_nat_type);
}
static void __exit nft_nat_module_exit(void)
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index a2aeb318678f..85daa84bfdfe 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -135,7 +135,8 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
if (len == 0 || len > FIELD_SIZEOF(struct nft_data, data))
return ERR_PTR(-EINVAL);
- if (len <= 4 && IS_ALIGNED(offset, len) && base != NFT_PAYLOAD_LL_HEADER)
+ if (len <= 4 && is_power_of_2(len) && IS_ALIGNED(offset, len) &&
+ base != NFT_PAYLOAD_LL_HEADER)
return &nft_payload_fast_ops;
else
return &nft_payload_ops;
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index cbea473d69e9..e8ae2f6bf232 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -25,7 +25,6 @@ struct nft_queue {
u16 queuenum;
u16 queues_total;
u16 flags;
- u8 family;
};
static void nft_queue_eval(const struct nft_expr *expr,
@@ -43,7 +42,7 @@ static void nft_queue_eval(const struct nft_expr *expr,
queue = priv->queuenum + cpu % priv->queues_total;
} else {
queue = nfqueue_hash(pkt->skb, queue,
- priv->queues_total, priv->family,
+ priv->queues_total, pkt->ops->pf,
jhash_initval);
}
}
@@ -71,7 +70,6 @@ static int nft_queue_init(const struct nft_ctx *ctx,
return -EINVAL;
init_hashrandom(&jhash_initval);
- priv->family = ctx->afi->family;
priv->queuenum = ntohs(nla_get_be16(tb[NFTA_QUEUE_NUM]));
if (tb[NFTA_QUEUE_TOTAL] != NULL)
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index ca0c1b231bfe..e21d69d13506 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -69,8 +69,10 @@ static void nft_rbtree_elem_destroy(const struct nft_set *set,
struct nft_rbtree_elem *rbe)
{
nft_data_uninit(&rbe->key, NFT_DATA_VALUE);
- if (set->flags & NFT_SET_MAP)
+ if (set->flags & NFT_SET_MAP &&
+ !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
nft_data_uninit(rbe->data, set->dtype);
+
kfree(rbe);
}
@@ -108,7 +110,8 @@ static int nft_rbtree_insert(const struct nft_set *set,
int err;
size = sizeof(*rbe);
- if (set->flags & NFT_SET_MAP)
+ if (set->flags & NFT_SET_MAP &&
+ !(elem->flags & NFT_SET_ELEM_INTERVAL_END))
size += sizeof(rbe->data[0]);
rbe = kzalloc(size, GFP_KERNEL);
@@ -117,7 +120,8 @@ static int nft_rbtree_insert(const struct nft_set *set,
rbe->flags = elem->flags;
nft_data_copy(&rbe->key, &elem->key);
- if (set->flags & NFT_SET_MAP)
+ if (set->flags & NFT_SET_MAP &&
+ !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
nft_data_copy(rbe->data, &elem->data);
err = __nft_rbtree_insert(set, rbe);
@@ -153,7 +157,8 @@ static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem)
parent = parent->rb_right;
else {
elem->cookie = rbe;
- if (set->flags & NFT_SET_MAP)
+ if (set->flags & NFT_SET_MAP &&
+ !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
nft_data_copy(&elem->data, rbe->data);
elem->flags = rbe->flags;
return 0;
@@ -177,7 +182,8 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
rbe = rb_entry(node, struct nft_rbtree_elem, node);
nft_data_copy(&elem.key, &rbe->key);
- if (set->flags & NFT_SET_MAP)
+ if (set->flags & NFT_SET_MAP &&
+ !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
nft_data_copy(&elem.data, rbe->data);
elem.flags = rbe->flags;
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index 5e204711d704..f3448c296446 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c
@@ -16,65 +16,23 @@
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
-#include <net/icmp.h>
-#include <net/netfilter/ipv4/nf_reject.h>
+#include <net/netfilter/nft_reject.h>
-#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
-#include <net/netfilter/ipv6/nf_reject.h>
-#endif
-
-struct nft_reject {
- enum nft_reject_types type:8;
- u8 icmp_code;
- u8 family;
-};
-
-static void nft_reject_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
- const struct nft_pktinfo *pkt)
-{
- struct nft_reject *priv = nft_expr_priv(expr);
-#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
- struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
-#endif
- switch (priv->type) {
- case NFT_REJECT_ICMP_UNREACH:
- if (priv->family == NFPROTO_IPV4)
- nf_send_unreach(pkt->skb, priv->icmp_code);
-#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
- else if (priv->family == NFPROTO_IPV6)
- nf_send_unreach6(net, pkt->skb, priv->icmp_code,
- pkt->ops->hooknum);
-#endif
- break;
- case NFT_REJECT_TCP_RST:
- if (priv->family == NFPROTO_IPV4)
- nf_send_reset(pkt->skb, pkt->ops->hooknum);
-#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
- else if (priv->family == NFPROTO_IPV6)
- nf_send_reset6(net, pkt->skb, pkt->ops->hooknum);
-#endif
- break;
- }
-
- data[NFT_REG_VERDICT].verdict = NF_DROP;
-}
-
-static const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = {
+const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = {
[NFTA_REJECT_TYPE] = { .type = NLA_U32 },
[NFTA_REJECT_ICMP_CODE] = { .type = NLA_U8 },
};
+EXPORT_SYMBOL_GPL(nft_reject_policy);
-static int nft_reject_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
+int nft_reject_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
struct nft_reject *priv = nft_expr_priv(expr);
if (tb[NFTA_REJECT_TYPE] == NULL)
return -EINVAL;
- priv->family = ctx->afi->family;
priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE]));
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
@@ -89,8 +47,9 @@ static int nft_reject_init(const struct nft_ctx *ctx,
return 0;
}
+EXPORT_SYMBOL_GPL(nft_reject_init);
-static int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr)
+int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_reject *priv = nft_expr_priv(expr);
@@ -109,37 +68,7 @@ static int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr)
nla_put_failure:
return -1;
}
-
-static struct nft_expr_type nft_reject_type;
-static const struct nft_expr_ops nft_reject_ops = {
- .type = &nft_reject_type,
- .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)),
- .eval = nft_reject_eval,
- .init = nft_reject_init,
- .dump = nft_reject_dump,
-};
-
-static struct nft_expr_type nft_reject_type __read_mostly = {
- .name = "reject",
- .ops = &nft_reject_ops,
- .policy = nft_reject_policy,
- .maxattr = NFTA_REJECT_MAX,
- .owner = THIS_MODULE,
-};
-
-static int __init nft_reject_module_init(void)
-{
- return nft_register_expr(&nft_reject_type);
-}
-
-static void __exit nft_reject_module_exit(void)
-{
- nft_unregister_expr(&nft_reject_type);
-}
-
-module_init(nft_reject_module_init);
-module_exit(nft_reject_module_exit);
+EXPORT_SYMBOL_GPL(nft_reject_dump);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_EXPR("reject");
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
new file mode 100644
index 000000000000..b718a52a4654
--- /dev/null
+++ b/net/netfilter/nft_reject_inet.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2014 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_reject.h>
+
+static void nft_reject_inet_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ switch (pkt->ops->pf) {
+ case NFPROTO_IPV4:
+ return nft_reject_ipv4_eval(expr, data, pkt);
+ case NFPROTO_IPV6:
+ return nft_reject_ipv6_eval(expr, data, pkt);
+ }
+}
+
+static struct nft_expr_type nft_reject_inet_type;
+static const struct nft_expr_ops nft_reject_inet_ops = {
+ .type = &nft_reject_inet_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)),
+ .eval = nft_reject_inet_eval,
+ .init = nft_reject_init,
+ .dump = nft_reject_dump,
+};
+
+static struct nft_expr_type nft_reject_inet_type __read_mostly = {
+ .family = NFPROTO_INET,
+ .name = "reject",
+ .ops = &nft_reject_inet_ops,
+ .policy = nft_reject_policy,
+ .maxattr = NFTA_REJECT_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_reject_inet_module_init(void)
+{
+ return nft_register_expr(&nft_reject_inet_type);
+}
+
+static void __exit nft_reject_inet_module_exit(void)
+{
+ nft_unregister_expr(&nft_reject_inet_type);
+}
+
+module_init(nft_reject_inet_module_init);
+module_exit(nft_reject_inet_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_AF_EXPR(1, "reject");
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
index 3228d7f24eb4..4973cbddc446 100644
--- a/net/netfilter/xt_AUDIT.c
+++ b/net/netfilter/xt_AUDIT.c
@@ -146,11 +146,11 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
if (par->family == NFPROTO_BRIDGE) {
switch (eth_hdr(skb)->h_proto) {
- case __constant_htons(ETH_P_IP):
+ case htons(ETH_P_IP):
audit_ip4(ab, skb);
break;
- case __constant_htons(ETH_P_IPV6):
+ case htons(ETH_P_IPV6):
audit_ip6(ab, skb);
break;
}
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 5929be622c5c..75747aecdebe 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -228,12 +228,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
goto err3;
}
- __set_bit(IPS_TEMPLATE_BIT, &ct->status);
- __set_bit(IPS_CONFIRMED_BIT, &ct->status);
-
- /* Overload tuple linked list to put us in template list. */
- hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
- &par->net->ct.tmpl);
+ nf_conntrack_tmpl_insert(par->net, ct);
out:
info->ct = ct;
return 0;
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index c40b2695633b..458464e7bd7a 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -19,6 +19,7 @@
#include <linux/jhash.h>
#include <linux/slab.h>
#include <linux/list.h>
+#include <linux/rbtree.h>
#include <linux/module.h>
#include <linux/random.h>
#include <linux/skbuff.h>
@@ -31,6 +32,10 @@
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_zones.h>
+#define CONNLIMIT_SLOTS 32
+#define CONNLIMIT_LOCK_SLOTS 32
+#define CONNLIMIT_GC_MAX_NODES 8
+
/* we will save the tuples of all connections we care about */
struct xt_connlimit_conn {
struct hlist_node node;
@@ -38,16 +43,26 @@ struct xt_connlimit_conn {
union nf_inet_addr addr;
};
+struct xt_connlimit_rb {
+ struct rb_node node;
+ struct hlist_head hhead; /* connections/hosts in same subnet */
+ union nf_inet_addr addr; /* search key */
+};
+
struct xt_connlimit_data {
- struct hlist_head iphash[256];
- spinlock_t lock;
+ struct rb_root climit_root4[CONNLIMIT_SLOTS];
+ struct rb_root climit_root6[CONNLIMIT_SLOTS];
+ spinlock_t locks[CONNLIMIT_LOCK_SLOTS];
};
static u_int32_t connlimit_rnd __read_mostly;
+static struct kmem_cache *connlimit_rb_cachep __read_mostly;
+static struct kmem_cache *connlimit_conn_cachep __read_mostly;
static inline unsigned int connlimit_iphash(__be32 addr)
{
- return jhash_1word((__force __u32)addr, connlimit_rnd) & 0xFF;
+ return jhash_1word((__force __u32)addr,
+ connlimit_rnd) % CONNLIMIT_SLOTS;
}
static inline unsigned int
@@ -60,7 +75,8 @@ connlimit_iphash6(const union nf_inet_addr *addr,
for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i)
res.ip6[i] = addr->ip6[i] & mask->ip6[i];
- return jhash2((u32 *)res.ip6, ARRAY_SIZE(res.ip6), connlimit_rnd) & 0xFF;
+ return jhash2((u32 *)res.ip6, ARRAY_SIZE(res.ip6),
+ connlimit_rnd) % CONNLIMIT_SLOTS;
}
static inline bool already_closed(const struct nf_conn *conn)
@@ -72,13 +88,14 @@ static inline bool already_closed(const struct nf_conn *conn)
return 0;
}
-static inline unsigned int
+static int
same_source_net(const union nf_inet_addr *addr,
const union nf_inet_addr *mask,
const union nf_inet_addr *u3, u_int8_t family)
{
if (family == NFPROTO_IPV4) {
- return (addr->ip & mask->ip) == (u3->ip & mask->ip);
+ return ntohl(addr->ip & mask->ip) -
+ ntohl(u3->ip & mask->ip);
} else {
union nf_inet_addr lh, rh;
unsigned int i;
@@ -88,89 +105,205 @@ same_source_net(const union nf_inet_addr *addr,
rh.ip6[i] = u3->ip6[i] & mask->ip6[i];
}
- return memcmp(&lh.ip6, &rh.ip6, sizeof(lh.ip6)) == 0;
+ return memcmp(&lh.ip6, &rh.ip6, sizeof(lh.ip6));
}
}
-static int count_them(struct net *net,
- struct xt_connlimit_data *data,
+static bool add_hlist(struct hlist_head *head,
const struct nf_conntrack_tuple *tuple,
- const union nf_inet_addr *addr,
- const union nf_inet_addr *mask,
- u_int8_t family)
+ const union nf_inet_addr *addr)
+{
+ struct xt_connlimit_conn *conn;
+
+ conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC);
+ if (conn == NULL)
+ return false;
+ conn->tuple = *tuple;
+ conn->addr = *addr;
+ hlist_add_head(&conn->node, head);
+ return true;
+}
+
+static unsigned int check_hlist(struct net *net,
+ struct hlist_head *head,
+ const struct nf_conntrack_tuple *tuple,
+ bool *addit)
{
const struct nf_conntrack_tuple_hash *found;
struct xt_connlimit_conn *conn;
struct hlist_node *n;
struct nf_conn *found_ct;
- struct hlist_head *hash;
- bool addit = true;
- int matches = 0;
-
- if (family == NFPROTO_IPV6)
- hash = &data->iphash[connlimit_iphash6(addr, mask)];
- else
- hash = &data->iphash[connlimit_iphash(addr->ip & mask->ip)];
+ unsigned int length = 0;
+ *addit = true;
rcu_read_lock();
/* check the saved connections */
- hlist_for_each_entry_safe(conn, n, hash, node) {
+ hlist_for_each_entry_safe(conn, n, head, node) {
found = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE,
&conn->tuple);
- found_ct = NULL;
+ if (found == NULL) {
+ hlist_del(&conn->node);
+ kmem_cache_free(connlimit_conn_cachep, conn);
+ continue;
+ }
- if (found != NULL)
- found_ct = nf_ct_tuplehash_to_ctrack(found);
+ found_ct = nf_ct_tuplehash_to_ctrack(found);
- if (found_ct != NULL &&
- nf_ct_tuple_equal(&conn->tuple, tuple) &&
- !already_closed(found_ct))
+ if (nf_ct_tuple_equal(&conn->tuple, tuple)) {
/*
* Just to be sure we have it only once in the list.
* We should not see tuples twice unless someone hooks
* this into a table without "-p tcp --syn".
*/
- addit = false;
-
- if (found == NULL) {
- /* this one is gone */
- hlist_del(&conn->node);
- kfree(conn);
- continue;
- }
-
- if (already_closed(found_ct)) {
+ *addit = false;
+ } else if (already_closed(found_ct)) {
/*
* we do not care about connections which are
* closed already -> ditch it
*/
nf_ct_put(found_ct);
hlist_del(&conn->node);
- kfree(conn);
+ kmem_cache_free(connlimit_conn_cachep, conn);
continue;
}
- if (same_source_net(addr, mask, &conn->addr, family))
- /* same source network -> be counted! */
- ++matches;
nf_ct_put(found_ct);
+ length++;
}
rcu_read_unlock();
- if (addit) {
- /* save the new connection in our list */
- conn = kmalloc(sizeof(*conn), GFP_ATOMIC);
- if (conn == NULL)
- return -ENOMEM;
- conn->tuple = *tuple;
- conn->addr = *addr;
- hlist_add_head(&conn->node, hash);
- ++matches;
+ return length;
+}
+
+static void tree_nodes_free(struct rb_root *root,
+ struct xt_connlimit_rb *gc_nodes[],
+ unsigned int gc_count)
+{
+ struct xt_connlimit_rb *rbconn;
+
+ while (gc_count) {
+ rbconn = gc_nodes[--gc_count];
+ rb_erase(&rbconn->node, root);
+ kmem_cache_free(connlimit_rb_cachep, rbconn);
+ }
+}
+
+static unsigned int
+count_tree(struct net *net, struct rb_root *root,
+ const struct nf_conntrack_tuple *tuple,
+ const union nf_inet_addr *addr, const union nf_inet_addr *mask,
+ u8 family)
+{
+ struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES];
+ struct rb_node **rbnode, *parent;
+ struct xt_connlimit_rb *rbconn;
+ struct xt_connlimit_conn *conn;
+ unsigned int gc_count;
+ bool no_gc = false;
+
+ restart:
+ gc_count = 0;
+ parent = NULL;
+ rbnode = &(root->rb_node);
+ while (*rbnode) {
+ int diff;
+ bool addit;
+
+ rbconn = container_of(*rbnode, struct xt_connlimit_rb, node);
+
+ parent = *rbnode;
+ diff = same_source_net(addr, mask, &rbconn->addr, family);
+ if (diff < 0) {
+ rbnode = &((*rbnode)->rb_left);
+ } else if (diff > 0) {
+ rbnode = &((*rbnode)->rb_right);
+ } else {
+ /* same source network -> be counted! */
+ unsigned int count;
+ count = check_hlist(net, &rbconn->hhead, tuple, &addit);
+
+ tree_nodes_free(root, gc_nodes, gc_count);
+ if (!addit)
+ return count;
+
+ if (!add_hlist(&rbconn->hhead, tuple, addr))
+ return 0; /* hotdrop */
+
+ return count + 1;
+ }
+
+ if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes))
+ continue;
+
+ /* only used for GC on hhead, retval and 'addit' ignored */
+ check_hlist(net, &rbconn->hhead, tuple, &addit);
+ if (hlist_empty(&rbconn->hhead))
+ gc_nodes[gc_count++] = rbconn;
+ }
+
+ if (gc_count) {
+ no_gc = true;
+ tree_nodes_free(root, gc_nodes, gc_count);
+ /* tree_node_free before new allocation permits
+ * allocator to re-use newly free'd object.
+ *
+ * This is a rare event; in most cases we will find
+ * existing node to re-use. (or gc_count is 0).
+ */
+ goto restart;
+ }
+
+ /* no match, need to insert new node */
+ rbconn = kmem_cache_alloc(connlimit_rb_cachep, GFP_ATOMIC);
+ if (rbconn == NULL)
+ return 0;
+
+ conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC);
+ if (conn == NULL) {
+ kmem_cache_free(connlimit_rb_cachep, rbconn);
+ return 0;
+ }
+
+ conn->tuple = *tuple;
+ conn->addr = *addr;
+ rbconn->addr = *addr;
+
+ INIT_HLIST_HEAD(&rbconn->hhead);
+ hlist_add_head(&conn->node, &rbconn->hhead);
+
+ rb_link_node(&rbconn->node, parent, rbnode);
+ rb_insert_color(&rbconn->node, root);
+ return 1;
+}
+
+static int count_them(struct net *net,
+ struct xt_connlimit_data *data,
+ const struct nf_conntrack_tuple *tuple,
+ const union nf_inet_addr *addr,
+ const union nf_inet_addr *mask,
+ u_int8_t family)
+{
+ struct rb_root *root;
+ int count;
+ u32 hash;
+
+ if (family == NFPROTO_IPV6) {
+ hash = connlimit_iphash6(addr, mask);
+ root = &data->climit_root6[hash];
+ } else {
+ hash = connlimit_iphash(addr->ip & mask->ip);
+ root = &data->climit_root4[hash];
}
- return matches;
+ spin_lock_bh(&data->locks[hash % CONNLIMIT_LOCK_SLOTS]);
+
+ count = count_tree(net, root, tuple, addr, mask, family);
+
+ spin_unlock_bh(&data->locks[hash % CONNLIMIT_LOCK_SLOTS]);
+
+ return count;
}
static bool
@@ -183,7 +316,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
const struct nf_conntrack_tuple *tuple_ptr = &tuple;
enum ip_conntrack_info ctinfo;
const struct nf_conn *ct;
- int connections;
+ unsigned int connections;
ct = nf_ct_get(skb, &ctinfo);
if (ct != NULL)
@@ -202,12 +335,9 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
iph->daddr : iph->saddr;
}
- spin_lock_bh(&info->data->lock);
connections = count_them(net, info->data, tuple_ptr, &addr,
&info->mask, par->family);
- spin_unlock_bh(&info->data->lock);
-
- if (connections < 0)
+ if (connections == 0)
/* kmalloc failed, drop it entirely */
goto hotdrop;
@@ -247,29 +377,47 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
return -ENOMEM;
}
- spin_lock_init(&info->data->lock);
- for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i)
- INIT_HLIST_HEAD(&info->data->iphash[i]);
+ for (i = 0; i < ARRAY_SIZE(info->data->locks); ++i)
+ spin_lock_init(&info->data->locks[i]);
+
+ for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
+ info->data->climit_root4[i] = RB_ROOT;
+ for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i)
+ info->data->climit_root6[i] = RB_ROOT;
return 0;
}
-static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
+static void destroy_tree(struct rb_root *r)
{
- const struct xt_connlimit_info *info = par->matchinfo;
struct xt_connlimit_conn *conn;
+ struct xt_connlimit_rb *rbconn;
struct hlist_node *n;
- struct hlist_head *hash = info->data->iphash;
+ struct rb_node *node;
+
+ while ((node = rb_first(r)) != NULL) {
+ rbconn = container_of(node, struct xt_connlimit_rb, node);
+
+ rb_erase(node, r);
+
+ hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node)
+ kmem_cache_free(connlimit_conn_cachep, conn);
+
+ kmem_cache_free(connlimit_rb_cachep, rbconn);
+ }
+}
+
+static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
+{
+ const struct xt_connlimit_info *info = par->matchinfo;
unsigned int i;
nf_ct_l3proto_module_put(par->family);
- for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) {
- hlist_for_each_entry_safe(conn, n, &hash[i], node) {
- hlist_del(&conn->node);
- kfree(conn);
- }
- }
+ for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
+ destroy_tree(&info->data->climit_root4[i]);
+ for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i)
+ destroy_tree(&info->data->climit_root6[i]);
kfree(info->data);
}
@@ -287,12 +435,37 @@ static struct xt_match connlimit_mt_reg __read_mostly = {
static int __init connlimit_mt_init(void)
{
- return xt_register_match(&connlimit_mt_reg);
+ int ret;
+
+ BUILD_BUG_ON(CONNLIMIT_LOCK_SLOTS > CONNLIMIT_SLOTS);
+ BUILD_BUG_ON((CONNLIMIT_SLOTS % CONNLIMIT_LOCK_SLOTS) != 0);
+
+ connlimit_conn_cachep = kmem_cache_create("xt_connlimit_conn",
+ sizeof(struct xt_connlimit_conn),
+ 0, 0, NULL);
+ if (!connlimit_conn_cachep)
+ return -ENOMEM;
+
+ connlimit_rb_cachep = kmem_cache_create("xt_connlimit_rb",
+ sizeof(struct xt_connlimit_rb),
+ 0, 0, NULL);
+ if (!connlimit_rb_cachep) {
+ kmem_cache_destroy(connlimit_conn_cachep);
+ return -ENOMEM;
+ }
+ ret = xt_register_match(&connlimit_mt_reg);
+ if (ret != 0) {
+ kmem_cache_destroy(connlimit_conn_cachep);
+ kmem_cache_destroy(connlimit_rb_cachep);
+ }
+ return ret;
}
static void __exit connlimit_mt_exit(void)
{
xt_unregister_match(&connlimit_mt_reg);
+ kmem_cache_destroy(connlimit_conn_cachep);
+ kmem_cache_destroy(connlimit_rb_cachep);
}
module_init(connlimit_mt_init);
diff --git a/net/netfilter/xt_ipcomp.c b/net/netfilter/xt_ipcomp.c
index a4c7561698c5..89d53104c6b3 100644
--- a/net/netfilter/xt_ipcomp.c
+++ b/net/netfilter/xt_ipcomp.c
@@ -60,7 +60,7 @@ static bool comp_mt(const struct sk_buff *skb, struct xt_action_param *par)
}
return spi_match(compinfo->spis[0], compinfo->spis[1],
- ntohl(chdr->cpi << 16),
+ ntohs(chdr->cpi),
!!(compinfo->invflags & XT_IPCOMP_INV_SPI));
}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index fdf51353cf78..c2d585c4f7c5 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1460,7 +1460,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
if (nlk->netlink_bind && nlk->groups[0]) {
int i;
- for (i=0; i<nlk->ngroups; i++) {
+ for (i = 0; i < nlk->ngroups; i++) {
if (test_bit(i, nlk->groups))
nlk->netlink_bind(i);
}
@@ -1489,8 +1489,8 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
if (addr->sa_family != AF_NETLINK)
return -EINVAL;
- /* Only superuser is allowed to send multicasts */
- if (nladdr->nl_groups && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
+ if ((nladdr->nl_groups || nladdr->nl_pid) &&
+ !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
return -EPERM;
if (!nlk->portid)
@@ -2343,6 +2343,11 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
}
#endif
+ /* Record the max length of recvmsg() calls for future allocations */
+ nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len);
+ nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len,
+ 16384);
+
copied = data_skb->len;
if (len < copied) {
msg->msg_flags |= MSG_TRUNC;
@@ -2549,7 +2554,7 @@ __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int fla
struct nlmsghdr *nlh;
int size = nlmsg_msg_size(len);
- nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size));
+ nlh = (struct nlmsghdr *)skb_put(skb, NLMSG_ALIGN(size));
nlh->nlmsg_type = type;
nlh->nlmsg_len = size;
nlh->nlmsg_flags = flags;
@@ -2587,7 +2592,27 @@ static int netlink_dump(struct sock *sk)
if (!netlink_rx_is_mmaped(sk) &&
atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
goto errout_skb;
- skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL);
+
+ /* NLMSG_GOODSIZE is small to avoid high order allocations being
+ * required, but it makes sense to _attempt_ a 16K bytes allocation
+ * to reduce number of system calls on dump operations, if user
+ * ever provided a big enough buffer.
+ */
+ if (alloc_size < nlk->max_recvmsg_len) {
+ skb = netlink_alloc_skb(sk,
+ nlk->max_recvmsg_len,
+ nlk->portid,
+ GFP_KERNEL |
+ __GFP_NOWARN |
+ __GFP_NORETRY);
+ /* available room should be exact amount to avoid MSG_TRUNC */
+ if (skb)
+ skb_reserve(skb, skb_tailroom(skb) -
+ nlk->max_recvmsg_len);
+ }
+ if (!skb)
+ skb = netlink_alloc_skb(sk, alloc_size, nlk->portid,
+ GFP_KERNEL);
if (!skb)
goto errout_skb;
netlink_skb_set_owner_r(skb, sk);
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index acbd774eeb7c..ed13a790b00e 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -31,6 +31,7 @@ struct netlink_sock {
u32 ngroups;
unsigned long *groups;
unsigned long state;
+ size_t max_recvmsg_len;
wait_queue_head_t wait;
bool cb_running;
struct netlink_callback cb;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index df4692826ead..c53fe0c9697c 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -55,6 +55,7 @@
#include "datapath.h"
#include "flow.h"
+#include "flow_table.h"
#include "flow_netlink.h"
#include "vport-internal_dev.h"
#include "vport-netdev.h"
@@ -160,7 +161,6 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
{
struct datapath *dp = container_of(rcu, struct datapath, rcu);
- ovs_flow_tbl_destroy(&dp->table);
free_percpu(dp->stats_percpu);
release_net(ovs_dp_get_net(dp));
kfree(dp->ports);
@@ -256,10 +256,10 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
out:
/* Update datapath statistics. */
- u64_stats_update_begin(&stats->sync);
+ u64_stats_update_begin(&stats->syncp);
(*stats_counter)++;
stats->n_mask_hit += n_mask_hit;
- u64_stats_update_end(&stats->sync);
+ u64_stats_update_end(&stats->syncp);
}
static struct genl_family dp_packet_genl_family = {
@@ -295,9 +295,9 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
err:
stats = this_cpu_ptr(dp->stats_percpu);
- u64_stats_update_begin(&stats->sync);
+ u64_stats_update_begin(&stats->syncp);
stats->n_lost++;
- u64_stats_update_end(&stats->sync);
+ u64_stats_update_end(&stats->syncp);
return err;
}
@@ -466,6 +466,14 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
skb_zerocopy(user_skb, skb, skb->len, hlen);
+ /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
+ if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
+ size_t plen = NLA_ALIGN(user_skb->len) - user_skb->len;
+
+ if (plen > 0)
+ memset(skb_put(user_skb, plen), 0, plen);
+ }
+
((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
@@ -598,9 +606,9 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats,
percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
do {
- start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
+ start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
local_stats = *percpu_stats;
- } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
+ } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
stats->n_hit += local_stats.n_hit;
stats->n_missed += local_stats.n_missed;
@@ -852,11 +860,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
goto err_unlock_ovs;
/* The unmasked key has to be the same for flow updates. */
- error = -EINVAL;
- if (!ovs_flow_cmp_unmasked_key(flow, &match)) {
- OVS_NLERR("Flow modification message rejected, unmasked key does not match.\n");
+ if (!ovs_flow_cmp_unmasked_key(flow, &match))
goto err_unlock_ovs;
- }
/* Update actions. */
old_acts = ovsl_dereference(flow->sf_acts);
@@ -1079,6 +1084,7 @@ static size_t ovs_dp_cmd_msg_size(void)
msgsize += nla_total_size(IFNAMSIZ);
msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats));
+ msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
return msgsize;
}
@@ -1209,18 +1215,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (err)
goto err_free_dp;
- dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
+ dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
if (!dp->stats_percpu) {
err = -ENOMEM;
goto err_destroy_table;
}
- for_each_possible_cpu(i) {
- struct dp_stats_percpu *dpath_stats;
- dpath_stats = per_cpu_ptr(dp->stats_percpu, i);
- u64_stats_init(&dpath_stats->sync);
- }
-
dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
GFP_KERNEL);
if (!dp->ports) {
@@ -1279,7 +1279,7 @@ err_destroy_ports_array:
err_destroy_percpu:
free_percpu(dp->stats_percpu);
err_destroy_table:
- ovs_flow_tbl_destroy(&dp->table);
+ ovs_flow_tbl_destroy(&dp->table, false);
err_free_dp:
release_net(ovs_dp_get_net(dp));
kfree(dp);
@@ -1306,10 +1306,13 @@ static void __dp_destroy(struct datapath *dp)
list_del_rcu(&dp->list_node);
/* OVSP_LOCAL is datapath internal port. We need to make sure that
- * all port in datapath are destroyed first before freeing datapath.
+ * all ports in datapath are destroyed first before freeing datapath.
*/
ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
+ /* RCU destroy the flow table */
+ ovs_flow_tbl_destroy(&dp->table, true);
+
call_rcu(&dp->rcu, destroy_dp_rcu);
}
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 6be9fbb5e9cb..05317380fc03 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -55,7 +55,7 @@ struct dp_stats_percpu {
u64 n_missed;
u64 n_lost;
u64 n_mask_hit;
- struct u64_stats_sync sync;
+ struct u64_stats_sync syncp;
};
/**
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index c58a0fe3c889..3c268b3d71c3 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -153,29 +153,29 @@ static void rcu_free_flow_callback(struct rcu_head *rcu)
flow_free(flow);
}
-static void flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred)
-{
- if (!mask)
- return;
-
- BUG_ON(!mask->ref_count);
- mask->ref_count--;
-
- if (!mask->ref_count) {
- list_del_rcu(&mask->list);
- if (deferred)
- kfree_rcu(mask, rcu);
- else
- kfree(mask);
- }
-}
-
void ovs_flow_free(struct sw_flow *flow, bool deferred)
{
if (!flow)
return;
- flow_mask_del_ref(flow->mask, deferred);
+ if (flow->mask) {
+ struct sw_flow_mask *mask = flow->mask;
+
+ /* ovs-lock is required to protect mask-refcount and
+ * mask list.
+ */
+ ASSERT_OVSL();
+ BUG_ON(!mask->ref_count);
+ mask->ref_count--;
+
+ if (!mask->ref_count) {
+ list_del_rcu(&mask->list);
+ if (deferred)
+ kfree_rcu(mask, rcu);
+ else
+ kfree(mask);
+ }
+ }
if (deferred)
call_rcu(&flow->rcu, rcu_free_flow_callback);
@@ -188,26 +188,9 @@ static void free_buckets(struct flex_array *buckets)
flex_array_free(buckets);
}
+
static void __table_instance_destroy(struct table_instance *ti)
{
- int i;
-
- if (ti->keep_flows)
- goto skip_flows;
-
- for (i = 0; i < ti->n_buckets; i++) {
- struct sw_flow *flow;
- struct hlist_head *head = flex_array_get(ti->buckets, i);
- struct hlist_node *n;
- int ver = ti->node_ver;
-
- hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
- hlist_del(&flow->hash_node[ver]);
- ovs_flow_free(flow, false);
- }
- }
-
-skip_flows:
free_buckets(ti->buckets);
kfree(ti);
}
@@ -258,20 +241,38 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
static void table_instance_destroy(struct table_instance *ti, bool deferred)
{
+ int i;
+
if (!ti)
return;
+ if (ti->keep_flows)
+ goto skip_flows;
+
+ for (i = 0; i < ti->n_buckets; i++) {
+ struct sw_flow *flow;
+ struct hlist_head *head = flex_array_get(ti->buckets, i);
+ struct hlist_node *n;
+ int ver = ti->node_ver;
+
+ hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
+ hlist_del_rcu(&flow->hash_node[ver]);
+ ovs_flow_free(flow, deferred);
+ }
+ }
+
+skip_flows:
if (deferred)
call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
else
__table_instance_destroy(ti);
}
-void ovs_flow_tbl_destroy(struct flow_table *table)
+void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred)
{
struct table_instance *ti = ovsl_dereference(table->ti);
- table_instance_destroy(ti, false);
+ table_instance_destroy(ti, deferred);
}
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
@@ -504,16 +505,11 @@ static struct sw_flow_mask *mask_alloc(void)
mask = kmalloc(sizeof(*mask), GFP_KERNEL);
if (mask)
- mask->ref_count = 0;
+ mask->ref_count = 1;
return mask;
}
-static void mask_add_ref(struct sw_flow_mask *mask)
-{
- mask->ref_count++;
-}
-
static bool mask_equal(const struct sw_flow_mask *a,
const struct sw_flow_mask *b)
{
@@ -554,9 +550,11 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
mask->key = new->key;
mask->range = new->range;
list_add_rcu(&mask->list, &tbl->mask_list);
+ } else {
+ BUG_ON(!mask->ref_count);
+ mask->ref_count++;
}
- mask_add_ref(mask);
flow->mask = mask;
return 0;
}
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index 1996e34c0fd8..baaeb101924d 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -60,7 +60,7 @@ void ovs_flow_free(struct sw_flow *, bool deferred);
int ovs_flow_tbl_init(struct flow_table *);
int ovs_flow_tbl_count(struct flow_table *table);
-void ovs_flow_tbl_destroy(struct flow_table *table);
+void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred);
int ovs_flow_tbl_flush(struct flow_table *flow_table);
int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 208dd9a26dd1..42c0f4a0b78c 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -121,7 +121,6 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
{
struct vport *vport;
size_t alloc_size;
- int i;
alloc_size = sizeof(struct vport);
if (priv_size) {
@@ -139,19 +138,12 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
vport->ops = ops;
INIT_HLIST_NODE(&vport->dp_hash_node);
- vport->percpu_stats = alloc_percpu(struct pcpu_sw_netstats);
+ vport->percpu_stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!vport->percpu_stats) {
kfree(vport);
return ERR_PTR(-ENOMEM);
}
- for_each_possible_cpu(i) {
- struct pcpu_sw_netstats *vport_stats;
- vport_stats = per_cpu_ptr(vport->percpu_stats, i);
- u64_stats_init(&vport_stats->syncp);
- }
-
-
spin_lock_init(&vport->stats_lock);
return vport;
@@ -285,9 +277,9 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
percpu_stats = per_cpu_ptr(vport->percpu_stats, i);
do {
- start = u64_stats_fetch_begin_bh(&percpu_stats->syncp);
+ start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
local_stats = *percpu_stats;
- } while (u64_stats_fetch_retry_bh(&percpu_stats->syncp, start));
+ } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
stats->rx_bytes += local_stats.rx_bytes;
stats->rx_packets += local_stats.rx_packets;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 6a2bb37506c5..292304404fda 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -308,11 +308,27 @@ static bool packet_use_direct_xmit(const struct packet_sock *po)
return po->xmit == packet_direct_xmit;
}
-static u16 packet_pick_tx_queue(struct net_device *dev)
+static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb)
{
return (u16) raw_smp_processor_id() % dev->real_num_tx_queues;
}
+static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+ u16 queue_index;
+
+ if (ops->ndo_select_queue) {
+ queue_index = ops->ndo_select_queue(dev, skb, NULL,
+ __packet_pick_tx_queue);
+ queue_index = netdev_cap_txqueue(dev, queue_index);
+ } else {
+ queue_index = __packet_pick_tx_queue(dev, skb);
+ }
+
+ skb_set_queue_mapping(skb, queue_index);
+}
+
/* register_prot_hook must be invoked with the po->bind_lock held,
* or from a context in which asynchronous accesses to the packet
* socket is not possible (packet_create()).
@@ -2241,8 +2257,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
if (unlikely(!(dev->flags & IFF_UP)))
goto out_put;
- reserve = dev->hard_header_len;
-
+ reserve = dev->hard_header_len + VLAN_HLEN;
size_max = po->tx_ring.frame_size
- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
@@ -2269,8 +2284,19 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
goto out_status;
tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
- addr, hlen);
+ addr, hlen);
+ if (tp_len > dev->mtu + dev->hard_header_len) {
+ struct ethhdr *ehdr;
+ /* Earlier code assumed this would be a VLAN pkt,
+ * double-check this now that we have the actual
+ * packet in hand.
+ */
+ skb_reset_mac_header(skb);
+ ehdr = eth_hdr(skb);
+ if (ehdr->h_proto != htons(ETH_P_8021Q))
+ tp_len = -EMSGSIZE;
+ }
if (unlikely(tp_len < 0)) {
if (po->tp_loss) {
__packet_set_status(po, ph,
@@ -2285,7 +2311,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
}
}
- skb_set_queue_mapping(skb, packet_pick_tx_queue(dev));
+ packet_pick_tx_queue(dev, skb);
+
skb->destructor = tpacket_destruct_skb;
__packet_set_status(po, ph, TP_STATUS_SENDING);
packet_inc_pending(&po->tx_ring);
@@ -2499,7 +2526,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
skb->dev = dev;
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
- skb_set_queue_mapping(skb, packet_pick_tx_queue(dev));
+
+ packet_pick_tx_queue(dev, skb);
if (po->has_vnet_hdr) {
if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
@@ -3786,7 +3814,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
*/
if (!tx_ring)
init_prb_bdqc(po, rb, pg_vec, req_u, tx_ring);
- break;
+ break;
default:
break;
}
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index d1c3429b69ed..ec126f91276b 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -20,9 +20,8 @@ af-rxrpc-y := \
ar-skbuff.o \
ar-transport.o
-ifeq ($(CONFIG_PROC_FS),y)
-af-rxrpc-y += ar-proc.o
-endif
+af-rxrpc-$(CONFIG_PROC_FS) += ar-proc.o
+af-rxrpc-$(CONFIG_SYSCTL) += sysctl.o
obj-$(CONFIG_AF_RXRPC) += af-rxrpc.o
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index e61aa6001c65..7b1670489638 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -838,6 +838,12 @@ static int __init af_rxrpc_init(void)
goto error_key_type_s;
}
+ ret = rxrpc_sysctl_init();
+ if (ret < 0) {
+ printk(KERN_CRIT "RxRPC: Cannot register sysctls\n");
+ goto error_sysctls;
+ }
+
#ifdef CONFIG_PROC_FS
proc_create("rxrpc_calls", 0, init_net.proc_net, &rxrpc_call_seq_fops);
proc_create("rxrpc_conns", 0, init_net.proc_net,
@@ -845,6 +851,8 @@ static int __init af_rxrpc_init(void)
#endif
return 0;
+error_sysctls:
+ unregister_key_type(&key_type_rxrpc_s);
error_key_type_s:
unregister_key_type(&key_type_rxrpc);
error_key_type:
@@ -865,6 +873,7 @@ error_call_jar:
static void __exit af_rxrpc_exit(void)
{
_enter("");
+ rxrpc_sysctl_exit();
unregister_key_type(&key_type_rxrpc_s);
unregister_key_type(&key_type_rxrpc);
sock_unregister(PF_RXRPC);
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
index cd97a0ce48d8..c6be17a959a6 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/ar-ack.c
@@ -19,7 +19,49 @@
#include <net/af_rxrpc.h>
#include "ar-internal.h"
-static unsigned int rxrpc_ack_defer = 1;
+/*
+ * How long to wait before scheduling ACK generation after seeing a
+ * packet with RXRPC_REQUEST_ACK set (in jiffies).
+ */
+unsigned rxrpc_requested_ack_delay = 1;
+
+/*
+ * How long to wait before scheduling an ACK with subtype DELAY (in jiffies).
+ *
+ * We use this when we've received new data packets. If those packets aren't
+ * all consumed within this time we will send a DELAY ACK if an ACK was not
+ * requested to let the sender know it doesn't need to resend.
+ */
+unsigned rxrpc_soft_ack_delay = 1 * HZ;
+
+/*
+ * How long to wait before scheduling an ACK with subtype IDLE (in jiffies).
+ *
+ * We use this when we've consumed some previously soft-ACK'd packets when
+ * further packets aren't immediately received to decide when to send an IDLE
+ * ACK let the other end know that it can free up its Tx buffer space.
+ */
+unsigned rxrpc_idle_ack_delay = 0.5 * HZ;
+
+/*
+ * Receive window size in packets. This indicates the maximum number of
+ * unconsumed received packets we're willing to retain in memory. Once this
+ * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further
+ * packets.
+ */
+unsigned rxrpc_rx_window_size = 32;
+
+/*
+ * Maximum Rx MTU size. This indicates to the sender the size of jumbo packet
+ * made by gluing normal packets together that we're willing to handle.
+ */
+unsigned rxrpc_rx_mtu = 5692;
+
+/*
+ * The maximum number of fragments in a received jumbo packet that we tell the
+ * sender that we're willing to handle.
+ */
+unsigned rxrpc_rx_jumbo_max = 4;
static const char *rxrpc_acks(u8 reason)
{
@@ -82,24 +124,23 @@ void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
switch (ack_reason) {
case RXRPC_ACK_DELAY:
_debug("run delay timer");
- call->ack_timer.expires = jiffies + rxrpc_ack_timeout * HZ;
- add_timer(&call->ack_timer);
- return;
+ expiry = rxrpc_soft_ack_delay;
+ goto run_timer;
case RXRPC_ACK_IDLE:
if (!immediate) {
_debug("run defer timer");
- expiry = 1;
+ expiry = rxrpc_idle_ack_delay;
goto run_timer;
}
goto cancel_timer;
case RXRPC_ACK_REQUESTED:
- if (!rxrpc_ack_defer)
+ expiry = rxrpc_requested_ack_delay;
+ if (!expiry)
goto cancel_timer;
if (!immediate || serial == cpu_to_be32(1)) {
_debug("run defer timer");
- expiry = rxrpc_ack_defer;
goto run_timer;
}
@@ -1174,11 +1215,11 @@ send_ACK:
mtu = call->conn->trans->peer->if_mtu;
mtu -= call->conn->trans->peer->hdrsize;
ackinfo.maxMTU = htonl(mtu);
- ackinfo.rwind = htonl(32);
+ ackinfo.rwind = htonl(rxrpc_rx_window_size);
/* permit the peer to send us jumbo packets if it wants to */
- ackinfo.rxMTU = htonl(5692);
- ackinfo.jumbo_max = htonl(4);
+ ackinfo.rxMTU = htonl(rxrpc_rx_mtu);
+ ackinfo.jumbo_max = htonl(rxrpc_rx_jumbo_max);
hdr.serial = htonl(atomic_inc_return(&call->conn->serial));
_proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c
index a3bbb360a3f9..a9e05db0f5d5 100644
--- a/net/rxrpc/ar-call.c
+++ b/net/rxrpc/ar-call.c
@@ -12,10 +12,22 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/circ_buf.h>
+#include <linux/hashtable.h>
+#include <linux/spinlock_types.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include "ar-internal.h"
+/*
+ * Maximum lifetime of a call (in jiffies).
+ */
+unsigned rxrpc_max_call_lifetime = 60 * HZ;
+
+/*
+ * Time till dead call expires after last use (in jiffies).
+ */
+unsigned rxrpc_dead_call_expiry = 2 * HZ;
+
const char *const rxrpc_call_states[] = {
[RXRPC_CALL_CLIENT_SEND_REQUEST] = "ClSndReq",
[RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl",
@@ -38,8 +50,6 @@ const char *const rxrpc_call_states[] = {
struct kmem_cache *rxrpc_call_jar;
LIST_HEAD(rxrpc_calls);
DEFINE_RWLOCK(rxrpc_call_lock);
-static unsigned int rxrpc_call_max_lifetime = 60;
-static unsigned int rxrpc_dead_call_timeout = 2;
static void rxrpc_destroy_call(struct work_struct *work);
static void rxrpc_call_life_expired(unsigned long _call);
@@ -47,6 +57,145 @@ static void rxrpc_dead_call_expired(unsigned long _call);
static void rxrpc_ack_time_expired(unsigned long _call);
static void rxrpc_resend_time_expired(unsigned long _call);
+static DEFINE_SPINLOCK(rxrpc_call_hash_lock);
+static DEFINE_HASHTABLE(rxrpc_call_hash, 10);
+
+/*
+ * Hash function for rxrpc_call_hash
+ */
+static unsigned long rxrpc_call_hashfunc(
+ u8 clientflag,
+ __be32 cid,
+ __be32 call_id,
+ __be32 epoch,
+ __be16 service_id,
+ sa_family_t proto,
+ void *localptr,
+ unsigned int addr_size,
+ const u8 *peer_addr)
+{
+ const u16 *p;
+ unsigned int i;
+ unsigned long key;
+ u32 hcid = ntohl(cid);
+
+ _enter("");
+
+ key = (unsigned long)localptr;
+ /* We just want to add up the __be32 values, so forcing the
+ * cast should be okay.
+ */
+ key += (__force u32)epoch;
+ key += (__force u16)service_id;
+ key += (__force u32)call_id;
+ key += (hcid & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT;
+ key += hcid & RXRPC_CHANNELMASK;
+ key += clientflag;
+ key += proto;
+ /* Step through the peer address in 16-bit portions for speed */
+ for (i = 0, p = (const u16 *)peer_addr; i < addr_size >> 1; i++, p++)
+ key += *p;
+ _leave(" key = 0x%lx", key);
+ return key;
+}
+
+/*
+ * Add a call to the hashtable
+ */
+static void rxrpc_call_hash_add(struct rxrpc_call *call)
+{
+ unsigned long key;
+ unsigned int addr_size = 0;
+
+ _enter("");
+ switch (call->proto) {
+ case AF_INET:
+ addr_size = sizeof(call->peer_ip.ipv4_addr);
+ break;
+ case AF_INET6:
+ addr_size = sizeof(call->peer_ip.ipv6_addr);
+ break;
+ default:
+ break;
+ }
+ key = rxrpc_call_hashfunc(call->in_clientflag, call->cid,
+ call->call_id, call->epoch,
+ call->service_id, call->proto,
+ call->conn->trans->local, addr_size,
+ call->peer_ip.ipv6_addr);
+ /* Store the full key in the call */
+ call->hash_key = key;
+ spin_lock(&rxrpc_call_hash_lock);
+ hash_add_rcu(rxrpc_call_hash, &call->hash_node, key);
+ spin_unlock(&rxrpc_call_hash_lock);
+ _leave("");
+}
+
+/*
+ * Remove a call from the hashtable
+ */
+static void rxrpc_call_hash_del(struct rxrpc_call *call)
+{
+ _enter("");
+ spin_lock(&rxrpc_call_hash_lock);
+ hash_del_rcu(&call->hash_node);
+ spin_unlock(&rxrpc_call_hash_lock);
+ _leave("");
+}
+
+/*
+ * Find a call in the hashtable and return it, or NULL if it
+ * isn't there.
+ */
+struct rxrpc_call *rxrpc_find_call_hash(
+ u8 clientflag,
+ __be32 cid,
+ __be32 call_id,
+ __be32 epoch,
+ __be16 service_id,
+ void *localptr,
+ sa_family_t proto,
+ const u8 *peer_addr)
+{
+ unsigned long key;
+ unsigned int addr_size = 0;
+ struct rxrpc_call *call = NULL;
+ struct rxrpc_call *ret = NULL;
+
+ _enter("");
+ switch (proto) {
+ case AF_INET:
+ addr_size = sizeof(call->peer_ip.ipv4_addr);
+ break;
+ case AF_INET6:
+ addr_size = sizeof(call->peer_ip.ipv6_addr);
+ break;
+ default:
+ break;
+ }
+
+ key = rxrpc_call_hashfunc(clientflag, cid, call_id, epoch,
+ service_id, proto, localptr, addr_size,
+ peer_addr);
+ hash_for_each_possible_rcu(rxrpc_call_hash, call, hash_node, key) {
+ if (call->hash_key == key &&
+ call->call_id == call_id &&
+ call->cid == cid &&
+ call->in_clientflag == clientflag &&
+ call->service_id == service_id &&
+ call->proto == proto &&
+ call->local == localptr &&
+ memcmp(call->peer_ip.ipv6_addr, peer_addr,
+ addr_size) == 0 &&
+ call->epoch == epoch) {
+ ret = call;
+ break;
+ }
+ }
+ _leave(" = %p", ret);
+ return ret;
+}
+
/*
* allocate a new call
*/
@@ -91,7 +240,7 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
call->rx_data_expect = 1;
call->rx_data_eaten = 0;
call->rx_first_oos = 0;
- call->ackr_win_top = call->rx_data_eaten + 1 + RXRPC_MAXACKS;
+ call->ackr_win_top = call->rx_data_eaten + 1 + rxrpc_rx_window_size;
call->creation_jif = jiffies;
return call;
}
@@ -128,11 +277,31 @@ static struct rxrpc_call *rxrpc_alloc_client_call(
return ERR_PTR(ret);
}
+ /* Record copies of information for hashtable lookup */
+ call->proto = rx->proto;
+ call->local = trans->local;
+ switch (call->proto) {
+ case AF_INET:
+ call->peer_ip.ipv4_addr =
+ trans->peer->srx.transport.sin.sin_addr.s_addr;
+ break;
+ case AF_INET6:
+ memcpy(call->peer_ip.ipv6_addr,
+ trans->peer->srx.transport.sin6.sin6_addr.in6_u.u6_addr8,
+ sizeof(call->peer_ip.ipv6_addr));
+ break;
+ }
+ call->epoch = call->conn->epoch;
+ call->service_id = call->conn->service_id;
+ call->in_clientflag = call->conn->in_clientflag;
+ /* Add the new call to the hashtable */
+ rxrpc_call_hash_add(call);
+
spin_lock(&call->conn->trans->peer->lock);
list_add(&call->error_link, &call->conn->trans->peer->error_targets);
spin_unlock(&call->conn->trans->peer->lock);
- call->lifetimer.expires = jiffies + rxrpc_call_max_lifetime * HZ;
+ call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime;
add_timer(&call->lifetimer);
_leave(" = %p", call);
@@ -320,9 +489,12 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
parent = *p;
call = rb_entry(parent, struct rxrpc_call, conn_node);
- if (call_id < call->call_id)
+ /* The tree is sorted in order of the __be32 value without
+ * turning it into host order.
+ */
+ if ((__force u32)call_id < (__force u32)call->call_id)
p = &(*p)->rb_left;
- else if (call_id > call->call_id)
+ else if ((__force u32)call_id > (__force u32)call->call_id)
p = &(*p)->rb_right;
else
goto old_call;
@@ -347,9 +519,31 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
list_add_tail(&call->link, &rxrpc_calls);
write_unlock_bh(&rxrpc_call_lock);
+ /* Record copies of information for hashtable lookup */
+ call->proto = rx->proto;
+ call->local = conn->trans->local;
+ switch (call->proto) {
+ case AF_INET:
+ call->peer_ip.ipv4_addr =
+ conn->trans->peer->srx.transport.sin.sin_addr.s_addr;
+ break;
+ case AF_INET6:
+ memcpy(call->peer_ip.ipv6_addr,
+ conn->trans->peer->srx.transport.sin6.sin6_addr.in6_u.u6_addr8,
+ sizeof(call->peer_ip.ipv6_addr));
+ break;
+ default:
+ break;
+ }
+ call->epoch = conn->epoch;
+ call->service_id = conn->service_id;
+ call->in_clientflag = conn->in_clientflag;
+ /* Add the new call to the hashtable */
+ rxrpc_call_hash_add(call);
+
_net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id);
- call->lifetimer.expires = jiffies + rxrpc_call_max_lifetime * HZ;
+ call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime;
add_timer(&call->lifetimer);
_leave(" = %p {%d} [new]", call, call->debug_id);
return call;
@@ -533,7 +727,7 @@ void rxrpc_release_call(struct rxrpc_call *call)
del_timer_sync(&call->resend_timer);
del_timer_sync(&call->ack_timer);
del_timer_sync(&call->lifetimer);
- call->deadspan.expires = jiffies + rxrpc_dead_call_timeout * HZ;
+ call->deadspan.expires = jiffies + rxrpc_dead_call_expiry;
add_timer(&call->deadspan);
_leave("");
@@ -665,6 +859,9 @@ static void rxrpc_cleanup_call(struct rxrpc_call *call)
rxrpc_put_connection(call->conn);
}
+ /* Remove the call from the hash */
+ rxrpc_call_hash_del(call);
+
if (call->acks_window) {
_debug("kill Tx window %d",
CIRC_CNT(call->acks_head, call->acks_tail,
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
index 7bf5b5b9e8b9..6631f4f1e39b 100644
--- a/net/rxrpc/ar-connection.c
+++ b/net/rxrpc/ar-connection.c
@@ -18,11 +18,15 @@
#include <net/af_rxrpc.h>
#include "ar-internal.h"
+/*
+ * Time till a connection expires after last use (in seconds).
+ */
+unsigned rxrpc_connection_expiry = 10 * 60;
+
static void rxrpc_connection_reaper(struct work_struct *work);
LIST_HEAD(rxrpc_connections);
DEFINE_RWLOCK(rxrpc_connection_lock);
-static unsigned long rxrpc_connection_timeout = 10 * 60;
static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper);
/*
@@ -862,7 +866,7 @@ static void rxrpc_connection_reaper(struct work_struct *work)
spin_lock(&conn->trans->client_lock);
write_lock(&conn->trans->conn_lock);
- reap_time = conn->put_time + rxrpc_connection_timeout;
+ reap_time = conn->put_time + rxrpc_connection_expiry;
if (atomic_read(&conn->usage) > 0) {
;
@@ -916,7 +920,7 @@ void __exit rxrpc_destroy_all_connections(void)
{
_enter("");
- rxrpc_connection_timeout = 0;
+ rxrpc_connection_expiry = 0;
cancel_delayed_work(&rxrpc_connection_reap);
rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c
index a9206087b4d7..db57458c824c 100644
--- a/net/rxrpc/ar-error.c
+++ b/net/rxrpc/ar-error.c
@@ -83,6 +83,7 @@ void rxrpc_UDP_error_report(struct sock *sk)
if (mtu == 0) {
/* they didn't give us a size, estimate one */
+ mtu = peer->if_mtu;
if (mtu > 1500) {
mtu >>= 1;
if (mtu < 1500)
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
index 529572f18d1f..73742647c135 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/ar-input.c
@@ -25,8 +25,6 @@
#include <net/net_namespace.h>
#include "ar-internal.h"
-unsigned long rxrpc_ack_timeout = 1;
-
const char *rxrpc_pkts[] = {
"?00",
"DATA", "ACK", "BUSY", "ABORT", "ACKALL", "CHALL", "RESP", "DEBUG",
@@ -349,8 +347,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
* it */
if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
_proto("ACK Requested on %%%u", serial);
- rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial,
- !(sp->hdr.flags & RXRPC_MORE_PACKETS));
+ rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial, false);
}
switch (sp->hdr.type) {
@@ -526,36 +523,38 @@ protocol_error:
* post an incoming packet to the appropriate call/socket to deal with
* - must get rid of the sk_buff, either by freeing it or by queuing it
*/
-static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn,
+static void rxrpc_post_packet_to_call(struct rxrpc_call *call,
struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp;
- struct rxrpc_call *call;
- struct rb_node *p;
- __be32 call_id;
-
- _enter("%p,%p", conn, skb);
- read_lock_bh(&conn->lock);
+ _enter("%p,%p", call, skb);
sp = rxrpc_skb(skb);
- /* look at extant calls by channel number first */
- call = conn->channels[ntohl(sp->hdr.cid) & RXRPC_CHANNELMASK];
- if (!call || call->call_id != sp->hdr.callNumber)
- goto call_not_extant;
-
_debug("extant call [%d]", call->state);
- ASSERTCMP(call->conn, ==, conn);
read_lock(&call->state_lock);
switch (call->state) {
case RXRPC_CALL_LOCALLY_ABORTED:
- if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events))
+ if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events)) {
rxrpc_queue_call(call);
+ goto free_unlock;
+ }
case RXRPC_CALL_REMOTELY_ABORTED:
case RXRPC_CALL_NETWORK_ERROR:
case RXRPC_CALL_DEAD:
+ goto dead_call;
+ case RXRPC_CALL_COMPLETE:
+ case RXRPC_CALL_CLIENT_FINAL_ACK:
+ /* complete server call */
+ if (call->conn->in_clientflag)
+ goto dead_call;
+ /* resend last packet of a completed call */
+ _debug("final ack again");
+ rxrpc_get_call(call);
+ set_bit(RXRPC_CALL_ACK_FINAL, &call->events);
+ rxrpc_queue_call(call);
goto free_unlock;
default:
break;
@@ -563,7 +562,6 @@ static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn,
read_unlock(&call->state_lock);
rxrpc_get_call(call);
- read_unlock_bh(&conn->lock);
if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
sp->hdr.flags & RXRPC_JUMBO_PACKET)
@@ -574,78 +572,16 @@ static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn,
rxrpc_put_call(call);
goto done;
-call_not_extant:
- /* search the completed calls in case what we're dealing with is
- * there */
- _debug("call not extant");
-
- call_id = sp->hdr.callNumber;
- p = conn->calls.rb_node;
- while (p) {
- call = rb_entry(p, struct rxrpc_call, conn_node);
-
- if (call_id < call->call_id)
- p = p->rb_left;
- else if (call_id > call->call_id)
- p = p->rb_right;
- else
- goto found_completed_call;
- }
-
dead_call:
- /* it's a either a really old call that we no longer remember or its a
- * new incoming call */
- read_unlock_bh(&conn->lock);
-
- if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
- sp->hdr.seq == cpu_to_be32(1)) {
- _debug("incoming call");
- skb_queue_tail(&conn->trans->local->accept_queue, skb);
- rxrpc_queue_work(&conn->trans->local->acceptor);
- goto done;
- }
-
- _debug("dead call");
- skb->priority = RX_CALL_DEAD;
- rxrpc_reject_packet(conn->trans->local, skb);
- goto done;
-
- /* resend last packet of a completed call
- * - client calls may have been aborted or ACK'd
- * - server calls may have been aborted
- */
-found_completed_call:
- _debug("completed call");
-
- if (atomic_read(&call->usage) == 0)
- goto dead_call;
-
- /* synchronise any state changes */
- read_lock(&call->state_lock);
- ASSERTIFCMP(call->state != RXRPC_CALL_CLIENT_FINAL_ACK,
- call->state, >=, RXRPC_CALL_COMPLETE);
-
- if (call->state == RXRPC_CALL_LOCALLY_ABORTED ||
- call->state == RXRPC_CALL_REMOTELY_ABORTED ||
- call->state == RXRPC_CALL_DEAD) {
- read_unlock(&call->state_lock);
- goto dead_call;
- }
-
- if (call->conn->in_clientflag) {
- read_unlock(&call->state_lock);
- goto dead_call; /* complete server call */
+ if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) {
+ skb->priority = RX_CALL_DEAD;
+ rxrpc_reject_packet(call->conn->trans->local, skb);
+ goto unlock;
}
-
- _debug("final ack again");
- rxrpc_get_call(call);
- set_bit(RXRPC_CALL_ACK_FINAL, &call->events);
- rxrpc_queue_call(call);
-
free_unlock:
- read_unlock(&call->state_lock);
- read_unlock_bh(&conn->lock);
rxrpc_free_skb(skb);
+unlock:
+ read_unlock(&call->state_lock);
done:
_leave("");
}
@@ -664,17 +600,42 @@ static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
rxrpc_queue_conn(conn);
}
+static struct rxrpc_connection *rxrpc_conn_from_local(struct rxrpc_local *local,
+ struct sk_buff *skb,
+ struct rxrpc_skb_priv *sp)
+{
+ struct rxrpc_peer *peer;
+ struct rxrpc_transport *trans;
+ struct rxrpc_connection *conn;
+
+ peer = rxrpc_find_peer(local, ip_hdr(skb)->saddr,
+ udp_hdr(skb)->source);
+ if (IS_ERR(peer))
+ goto cant_find_conn;
+
+ trans = rxrpc_find_transport(local, peer);
+ rxrpc_put_peer(peer);
+ if (!trans)
+ goto cant_find_conn;
+
+ conn = rxrpc_find_connection(trans, &sp->hdr);
+ rxrpc_put_transport(trans);
+ if (!conn)
+ goto cant_find_conn;
+
+ return conn;
+cant_find_conn:
+ return NULL;
+}
+
/*
* handle data received on the local endpoint
* - may be called in interrupt context
*/
void rxrpc_data_ready(struct sock *sk, int count)
{
- struct rxrpc_connection *conn;
- struct rxrpc_transport *trans;
struct rxrpc_skb_priv *sp;
struct rxrpc_local *local;
- struct rxrpc_peer *peer;
struct sk_buff *skb;
int ret;
@@ -749,27 +710,34 @@ void rxrpc_data_ready(struct sock *sk, int count)
(sp->hdr.callNumber == 0 || sp->hdr.seq == 0))
goto bad_message;
- peer = rxrpc_find_peer(local, ip_hdr(skb)->saddr, udp_hdr(skb)->source);
- if (IS_ERR(peer))
- goto cant_route_call;
+ if (sp->hdr.callNumber == 0) {
+ /* This is a connection-level packet. These should be
+ * fairly rare, so the extra overhead of looking them up the
+ * old-fashioned way doesn't really hurt */
+ struct rxrpc_connection *conn;
- trans = rxrpc_find_transport(local, peer);
- rxrpc_put_peer(peer);
- if (!trans)
- goto cant_route_call;
+ conn = rxrpc_conn_from_local(local, skb, sp);
+ if (!conn)
+ goto cant_route_call;
- conn = rxrpc_find_connection(trans, &sp->hdr);
- rxrpc_put_transport(trans);
- if (!conn)
- goto cant_route_call;
-
- _debug("CONN %p {%d}", conn, conn->debug_id);
-
- if (sp->hdr.callNumber == 0)
+ _debug("CONN %p {%d}", conn, conn->debug_id);
rxrpc_post_packet_to_conn(conn, skb);
- else
- rxrpc_post_packet_to_call(conn, skb);
- rxrpc_put_connection(conn);
+ rxrpc_put_connection(conn);
+ } else {
+ struct rxrpc_call *call;
+ u8 in_clientflag = 0;
+
+ if (sp->hdr.flags & RXRPC_CLIENT_INITIATED)
+ in_clientflag = RXRPC_CLIENT_INITIATED;
+ call = rxrpc_find_call_hash(in_clientflag, sp->hdr.cid,
+ sp->hdr.callNumber, sp->hdr.epoch,
+ sp->hdr.serviceId, local, AF_INET,
+ (u8 *)&ip_hdr(skb)->saddr);
+ if (call)
+ rxrpc_post_packet_to_call(call, skb);
+ else
+ goto cant_route_call;
+ }
rxrpc_put_local(local);
return;
@@ -790,8 +758,10 @@ cant_route_call:
skb->priority = RX_CALL_DEAD;
}
- _debug("reject");
- rxrpc_reject_packet(local, skb);
+ if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) {
+ _debug("reject type %d",sp->hdr.type);
+ rxrpc_reject_packet(local, skb);
+ }
rxrpc_put_local(local);
_leave(" [no call]");
return;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 5f43675ee1df..c831d44b0841 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -396,9 +396,20 @@ struct rxrpc_call {
#define RXRPC_ACKR_WINDOW_ASZ DIV_ROUND_UP(RXRPC_MAXACKS, BITS_PER_LONG)
unsigned long ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1];
+ struct hlist_node hash_node;
+ unsigned long hash_key; /* Full hash key */
+ u8 in_clientflag; /* Copy of conn->in_clientflag for hashing */
+ struct rxrpc_local *local; /* Local endpoint. Used for hashing. */
+ sa_family_t proto; /* Frame protocol */
/* the following should all be in net order */
__be32 cid; /* connection ID + channel index */
__be32 call_id; /* call ID on connection */
+ __be32 epoch; /* epoch of this connection */
+ __be16 service_id; /* service ID */
+ union { /* Peer IP address for hashing */
+ __be32 ipv4_addr;
+ __u8 ipv6_addr[16]; /* Anticipates eventual IPv6 support */
+ } peer_ip;
};
/*
@@ -433,6 +444,13 @@ int rxrpc_reject_call(struct rxrpc_sock *);
/*
* ar-ack.c
*/
+extern unsigned rxrpc_requested_ack_delay;
+extern unsigned rxrpc_soft_ack_delay;
+extern unsigned rxrpc_idle_ack_delay;
+extern unsigned rxrpc_rx_window_size;
+extern unsigned rxrpc_rx_mtu;
+extern unsigned rxrpc_rx_jumbo_max;
+
void __rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool);
void rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool);
void rxrpc_process_call(struct work_struct *);
@@ -440,10 +458,14 @@ void rxrpc_process_call(struct work_struct *);
/*
* ar-call.c
*/
+extern unsigned rxrpc_max_call_lifetime;
+extern unsigned rxrpc_dead_call_expiry;
extern struct kmem_cache *rxrpc_call_jar;
extern struct list_head rxrpc_calls;
extern rwlock_t rxrpc_call_lock;
+struct rxrpc_call *rxrpc_find_call_hash(u8, __be32, __be32, __be32,
+ __be16, void *, sa_family_t, const u8 *);
struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *,
struct rxrpc_transport *,
struct rxrpc_conn_bundle *,
@@ -460,6 +482,7 @@ void __exit rxrpc_destroy_all_calls(void);
/*
* ar-connection.c
*/
+extern unsigned rxrpc_connection_expiry;
extern struct list_head rxrpc_connections;
extern rwlock_t rxrpc_connection_lock;
@@ -493,7 +516,6 @@ void rxrpc_UDP_error_handler(struct work_struct *);
/*
* ar-input.c
*/
-extern unsigned long rxrpc_ack_timeout;
extern const char *rxrpc_pkts[];
void rxrpc_data_ready(struct sock *, int);
@@ -504,6 +526,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *);
* ar-local.c
*/
extern rwlock_t rxrpc_local_lock;
+
struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *);
void rxrpc_put_local(struct rxrpc_local *);
void __exit rxrpc_destroy_all_locals(void);
@@ -522,7 +545,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time_t,
/*
* ar-output.c
*/
-extern int rxrpc_resend_timeout;
+extern unsigned rxrpc_resend_timeout;
int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *);
int rxrpc_client_sendmsg(struct kiocb *, struct rxrpc_sock *,
@@ -572,6 +595,8 @@ void rxrpc_packet_destructor(struct sk_buff *);
/*
* ar-transport.c
*/
+extern unsigned rxrpc_transport_expiry;
+
struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *,
struct rxrpc_peer *, gfp_t);
void rxrpc_put_transport(struct rxrpc_transport *);
@@ -580,6 +605,17 @@ struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *,
struct rxrpc_peer *);
/*
+ * sysctl.c
+ */
+#ifdef CONFIG_SYSCTL
+extern int __init rxrpc_sysctl_init(void);
+extern void rxrpc_sysctl_exit(void);
+#else
+static inline int __init rxrpc_sysctl_init(void) { return 0; }
+static inline void rxrpc_sysctl_exit(void) {}
+#endif
+
+/*
* debug tracing
*/
extern unsigned int rxrpc_debug;
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index d0e8f1c1898a..0b4b9a79f5ab 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -18,7 +18,10 @@
#include <net/af_rxrpc.h>
#include "ar-internal.h"
-int rxrpc_resend_timeout = 4;
+/*
+ * Time till packet resend (in jiffies).
+ */
+unsigned rxrpc_resend_timeout = 4 * HZ;
static int rxrpc_send_data(struct kiocb *iocb,
struct rxrpc_sock *rx,
@@ -487,7 +490,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
sp->need_resend = false;
- sp->resend_at = jiffies + rxrpc_resend_timeout * HZ;
+ sp->resend_at = jiffies + rxrpc_resend_timeout;
if (!test_and_set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags)) {
_debug("run timer");
call->resend_timer.expires = sp->resend_at;
@@ -666,6 +669,7 @@ static int rxrpc_send_data(struct kiocb *iocb,
/* add the packet to the send queue if it's now full */
if (sp->remain <= 0 || (segment == 0 && !more)) {
struct rxrpc_connection *conn = call->conn;
+ uint32_t seq;
size_t pad;
/* pad out if we're using security */
@@ -678,11 +682,12 @@ static int rxrpc_send_data(struct kiocb *iocb,
memset(skb_put(skb, pad), 0, pad);
}
+ seq = atomic_inc_return(&call->sequence);
+
sp->hdr.epoch = conn->epoch;
sp->hdr.cid = call->cid;
sp->hdr.callNumber = call->call_id;
- sp->hdr.seq =
- htonl(atomic_inc_return(&call->sequence));
+ sp->hdr.seq = htonl(seq);
sp->hdr.serial =
htonl(atomic_inc_return(&conn->serial));
sp->hdr.type = RXRPC_PACKET_TYPE_DATA;
@@ -697,6 +702,8 @@ static int rxrpc_send_data(struct kiocb *iocb,
else if (CIRC_SPACE(call->acks_head, call->acks_tail,
call->acks_winsz) > 1)
sp->hdr.flags |= RXRPC_MORE_PACKETS;
+ if (more && seq & 1)
+ sp->hdr.flags |= RXRPC_REQUEST_ACK;
ret = rxrpc_secure_packet(
call, skb, skb->mark,
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
index 34b5490dde65..e9aaa65c0778 100644
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/ar-recvmsg.c
@@ -180,16 +180,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
if (copy > len - copied)
copy = len - copied;
- if (skb->ip_summed == CHECKSUM_UNNECESSARY ||
- skb->ip_summed == CHECKSUM_PARTIAL) {
- ret = skb_copy_datagram_iovec(skb, offset,
- msg->msg_iov, copy);
- } else {
- ret = skb_copy_and_csum_datagram_iovec(skb, offset,
- msg->msg_iov);
- if (ret == -EINVAL)
- goto csum_copy_error;
- }
+ ret = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copy);
if (ret < 0)
goto copy_error;
@@ -348,20 +339,6 @@ copy_error:
_leave(" = %d", ret);
return ret;
-csum_copy_error:
- _debug("csum error");
- release_sock(&rx->sk);
- if (continue_call)
- rxrpc_put_call(continue_call);
- rxrpc_kill_skb(skb);
- if (!(flags & MSG_PEEK)) {
- if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
- BUG();
- }
- skb_kill_datagram(&rx->sk, skb, flags);
- rxrpc_put_call(call);
- return -EAGAIN;
-
wait_interrupted:
ret = sock_intr_errno(timeo);
wait_error:
diff --git a/net/rxrpc/ar-skbuff.c b/net/rxrpc/ar-skbuff.c
index de755e04d29c..4cfab49e329d 100644
--- a/net/rxrpc/ar-skbuff.c
+++ b/net/rxrpc/ar-skbuff.c
@@ -83,9 +83,14 @@ static void rxrpc_hard_ACK_data(struct rxrpc_call *call,
rxrpc_request_final_ACK(call);
} else if (atomic_dec_and_test(&call->ackr_not_idle) &&
test_and_clear_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags)) {
+ /* We previously soft-ACK'd some received packets that have now
+ * been consumed, so send a hard-ACK if no more packets are
+ * immediately forthcoming to allow the transmitter to free up
+ * its Tx bufferage.
+ */
_debug("send Rx idle ACK");
__rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, sp->hdr.serial,
- true);
+ false);
}
spin_unlock_bh(&call->lock);
diff --git a/net/rxrpc/ar-transport.c b/net/rxrpc/ar-transport.c
index 92df566930b9..1976dec84f29 100644
--- a/net/rxrpc/ar-transport.c
+++ b/net/rxrpc/ar-transport.c
@@ -17,11 +17,15 @@
#include <net/af_rxrpc.h>
#include "ar-internal.h"
+/*
+ * Time after last use at which transport record is cleaned up.
+ */
+unsigned rxrpc_transport_expiry = 3600 * 24;
+
static void rxrpc_transport_reaper(struct work_struct *work);
static LIST_HEAD(rxrpc_transports);
static DEFINE_RWLOCK(rxrpc_transport_lock);
-static unsigned long rxrpc_transport_timeout = 3600 * 24;
static DECLARE_DELAYED_WORK(rxrpc_transport_reap, rxrpc_transport_reaper);
/*
@@ -235,7 +239,7 @@ static void rxrpc_transport_reaper(struct work_struct *work)
if (likely(atomic_read(&trans->usage) > 0))
continue;
- reap_time = trans->put_time + rxrpc_transport_timeout;
+ reap_time = trans->put_time + rxrpc_transport_expiry;
if (reap_time <= now)
list_move_tail(&trans->link, &graveyard);
else if (reap_time < earliest)
@@ -271,7 +275,7 @@ void __exit rxrpc_destroy_all_transports(void)
{
_enter("");
- rxrpc_transport_timeout = 0;
+ rxrpc_transport_expiry = 0;
cancel_delayed_work(&rxrpc_transport_reap);
rxrpc_queue_delayed_work(&rxrpc_transport_reap, 0);
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
new file mode 100644
index 000000000000..50a98a910eb1
--- /dev/null
+++ b/net/rxrpc/sysctl.c
@@ -0,0 +1,146 @@
+/* sysctls for configuring RxRPC operating parameters
+ *
+ * Copyright (C) 2014 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/sysctl.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static struct ctl_table_header *rxrpc_sysctl_reg_table;
+static const unsigned zero = 0;
+static const unsigned one = 1;
+static const unsigned four = 4;
+static const unsigned n_65535 = 65535;
+static const unsigned n_max_acks = RXRPC_MAXACKS;
+
+/*
+ * RxRPC operating parameters.
+ *
+ * See Documentation/networking/rxrpc.txt and the variable definitions for more
+ * information on the individual parameters.
+ */
+static struct ctl_table rxrpc_sysctl_table[] = {
+ /* Values measured in milliseconds */
+ {
+ .procname = "req_ack_delay",
+ .data = &rxrpc_requested_ack_delay,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ .extra1 = (void *)&zero,
+ },
+ {
+ .procname = "soft_ack_delay",
+ .data = &rxrpc_soft_ack_delay,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ .extra1 = (void *)&one,
+ },
+ {
+ .procname = "idle_ack_delay",
+ .data = &rxrpc_idle_ack_delay,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ .extra1 = (void *)&one,
+ },
+ {
+ .procname = "resend_timeout",
+ .data = &rxrpc_resend_timeout,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ .extra1 = (void *)&one,
+ },
+
+ /* Values measured in seconds but used in jiffies */
+ {
+ .procname = "max_call_lifetime",
+ .data = &rxrpc_max_call_lifetime,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ .extra1 = (void *)&one,
+ },
+ {
+ .procname = "dead_call_expiry",
+ .data = &rxrpc_dead_call_expiry,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ .extra1 = (void *)&one,
+ },
+
+ /* Values measured in seconds */
+ {
+ .procname = "connection_expiry",
+ .data = &rxrpc_connection_expiry,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&one,
+ },
+ {
+ .procname = "transport_expiry",
+ .data = &rxrpc_transport_expiry,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&one,
+ },
+
+ /* Non-time values */
+ {
+ .procname = "rx_window_size",
+ .data = &rxrpc_rx_window_size,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&one,
+ .extra2 = (void *)&n_max_acks,
+ },
+ {
+ .procname = "rx_mtu",
+ .data = &rxrpc_rx_mtu,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&one,
+ .extra1 = (void *)&n_65535,
+ },
+ {
+ .procname = "rx_jumbo_max",
+ .data = &rxrpc_rx_jumbo_max,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&one,
+ .extra2 = (void *)&four,
+ },
+
+ { }
+};
+
+int __init rxrpc_sysctl_init(void)
+{
+ rxrpc_sysctl_reg_table = register_net_sysctl(&init_net, "net/rxrpc",
+ rxrpc_sysctl_table);
+ if (!rxrpc_sysctl_reg_table)
+ return -ENOMEM;
+ return 0;
+}
+
+void rxrpc_sysctl_exit(void)
+{
+ if (rxrpc_sysctl_reg_table)
+ unregister_net_sysctl_table(rxrpc_sysctl_reg_table);
+}
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 72bdc7166345..8a5ba5add4bc 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -27,8 +27,11 @@
#include <net/act_api.h>
#include <net/netlink.h>
-void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
+void tcf_hash_destroy(struct tc_action *a)
{
+ struct tcf_common *p = a->priv;
+ struct tcf_hashinfo *hinfo = a->ops->hinfo;
+
spin_lock_bh(&hinfo->lock);
hlist_del(&p->tcfc_head);
spin_unlock_bh(&hinfo->lock);
@@ -42,18 +45,22 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
}
EXPORT_SYMBOL(tcf_hash_destroy);
-int tcf_hash_release(struct tcf_common *p, int bind,
- struct tcf_hashinfo *hinfo)
+int tcf_hash_release(struct tc_action *a, int bind)
{
+ struct tcf_common *p = a->priv;
int ret = 0;
if (p) {
if (bind)
p->tcfc_bindcnt--;
+ else if (p->tcfc_bindcnt > 0)
+ return -EPERM;
p->tcfc_refcnt--;
if (p->tcfc_bindcnt <= 0 && p->tcfc_refcnt <= 0) {
- tcf_hash_destroy(p, hinfo);
+ if (a->ops->cleanup)
+ a->ops->cleanup(a, bind);
+ tcf_hash_destroy(a);
ret = 1;
}
}
@@ -118,6 +125,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a)
struct tcf_common *p;
struct nlattr *nest;
int i = 0, n_i = 0;
+ int ret = -EINVAL;
nest = nla_nest_start(skb, a->order);
if (nest == NULL)
@@ -127,10 +135,13 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a)
for (i = 0; i < (hinfo->hmask + 1); i++) {
head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
hlist_for_each_entry_safe(p, n, head, tcfc_head) {
- if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo)) {
+ a->priv = p;
+ ret = tcf_hash_release(a, 0);
+ if (ret == ACT_P_DELETED) {
module_put(a->ops->owner);
n_i++;
- }
+ } else if (ret < 0)
+ goto nla_put_failure;
}
}
if (nla_put_u32(skb, TCA_FCNT, n_i))
@@ -140,7 +151,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a)
return n_i;
nla_put_failure:
nla_nest_cancel(skb, nest);
- return -EINVAL;
+ return ret;
}
static int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
@@ -198,7 +209,7 @@ int tcf_hash_search(struct tc_action *a, u32 index)
}
EXPORT_SYMBOL(tcf_hash_search);
-struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind)
+int tcf_hash_check(u32 index, struct tc_action *a, int bind)
{
struct tcf_hashinfo *hinfo = a->ops->hinfo;
struct tcf_common *p = NULL;
@@ -207,19 +218,30 @@ struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind)
p->tcfc_bindcnt++;
p->tcfc_refcnt++;
a->priv = p;
+ return 1;
}
- return p;
+ return 0;
}
EXPORT_SYMBOL(tcf_hash_check);
-struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est,
- struct tc_action *a, int size, int bind)
+void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est)
+{
+ struct tcf_common *pc = a->priv;
+ if (est)
+ gen_kill_estimator(&pc->tcfc_bstats,
+ &pc->tcfc_rate_est);
+ kfree_rcu(pc, tcfc_rcu);
+}
+EXPORT_SYMBOL(tcf_hash_cleanup);
+
+int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a,
+ int size, int bind)
{
struct tcf_hashinfo *hinfo = a->ops->hinfo;
struct tcf_common *p = kzalloc(size, GFP_KERNEL);
if (unlikely(!p))
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
p->tcfc_refcnt = 1;
if (bind)
p->tcfc_bindcnt = 1;
@@ -234,17 +256,19 @@ struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est,
&p->tcfc_lock, est);
if (err) {
kfree(p);
- return ERR_PTR(err);
+ return err;
}
}
a->priv = (void *) p;
- return p;
+ return 0;
}
EXPORT_SYMBOL(tcf_hash_create);
-void tcf_hash_insert(struct tcf_common *p, struct tcf_hashinfo *hinfo)
+void tcf_hash_insert(struct tc_action *a)
{
+ struct tcf_common *p = a->priv;
+ struct tcf_hashinfo *hinfo = a->ops->hinfo;
unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
spin_lock_bh(&hinfo->lock);
@@ -256,12 +280,13 @@ EXPORT_SYMBOL(tcf_hash_insert);
static LIST_HEAD(act_base);
static DEFINE_RWLOCK(act_mod_lock);
-int tcf_register_action(struct tc_action_ops *act)
+int tcf_register_action(struct tc_action_ops *act, unsigned int mask)
{
struct tc_action_ops *a;
+ int err;
- /* Must supply act, dump, cleanup and init */
- if (!act->act || !act->dump || !act->cleanup || !act->init)
+ /* Must supply act, dump and init */
+ if (!act->act || !act->dump || !act->init)
return -EINVAL;
/* Supply defaults */
@@ -270,10 +295,21 @@ int tcf_register_action(struct tc_action_ops *act)
if (!act->walk)
act->walk = tcf_generic_walker;
+ act->hinfo = kmalloc(sizeof(struct tcf_hashinfo), GFP_KERNEL);
+ if (!act->hinfo)
+ return -ENOMEM;
+ err = tcf_hashinfo_init(act->hinfo, mask);
+ if (err) {
+ kfree(act->hinfo);
+ return err;
+ }
+
write_lock(&act_mod_lock);
list_for_each_entry(a, &act_base, head) {
if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) {
write_unlock(&act_mod_lock);
+ tcf_hashinfo_destroy(act->hinfo);
+ kfree(act->hinfo);
return -EEXIST;
}
}
@@ -292,6 +328,8 @@ int tcf_unregister_action(struct tc_action_ops *act)
list_for_each_entry(a, &act_base, head) {
if (a == act) {
list_del(&act->head);
+ tcf_hashinfo_destroy(act->hinfo);
+ kfree(act->hinfo);
err = 0;
break;
}
@@ -368,16 +406,21 @@ exec_done:
}
EXPORT_SYMBOL(tcf_action_exec);
-void tcf_action_destroy(struct list_head *actions, int bind)
+int tcf_action_destroy(struct list_head *actions, int bind)
{
struct tc_action *a, *tmp;
+ int ret = 0;
list_for_each_entry_safe(a, tmp, actions, list) {
- if (a->ops->cleanup(a, bind) == ACT_P_DELETED)
+ ret = tcf_hash_release(a, bind);
+ if (ret == ACT_P_DELETED)
module_put(a->ops->owner);
+ else if (ret < 0)
+ return ret;
list_del(&a->list);
kfree(a);
}
+ return ret;
}
int
@@ -642,6 +685,20 @@ act_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
return rtnl_unicast(skb, net, portid);
}
+static struct tc_action *create_a(int i)
+{
+ struct tc_action *act;
+
+ act = kzalloc(sizeof(*act), GFP_KERNEL);
+ if (act == NULL) {
+ pr_debug("create_a: failed to alloc!\n");
+ return NULL;
+ }
+ act->order = i;
+ INIT_LIST_HEAD(&act->list);
+ return act;
+}
+
static struct tc_action *
tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid)
{
@@ -661,11 +718,10 @@ tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid)
index = nla_get_u32(tb[TCA_ACT_INDEX]);
err = -ENOMEM;
- a = kzalloc(sizeof(struct tc_action), GFP_KERNEL);
+ a = create_a(0);
if (a == NULL)
goto err_out;
- INIT_LIST_HEAD(&a->list);
err = -EINVAL;
a->ops = tc_lookup_action(tb[TCA_ACT_KIND]);
if (a->ops == NULL) /* could happen in batch of actions */
@@ -695,20 +751,6 @@ static void cleanup_a(struct list_head *actions)
}
}
-static struct tc_action *create_a(int i)
-{
- struct tc_action *act;
-
- act = kzalloc(sizeof(*act), GFP_KERNEL);
- if (act == NULL) {
- pr_debug("create_a: failed to alloc!\n");
- return NULL;
- }
- act->order = i;
- INIT_LIST_HEAD(&act->list);
- return act;
-}
-
static int tca_action_flush(struct net *net, struct nlattr *nla,
struct nlmsghdr *n, u32 portid)
{
@@ -720,18 +762,12 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
struct nlattr *nest;
struct nlattr *tb[TCA_ACT_MAX + 1];
struct nlattr *kind;
- struct tc_action *a = create_a(0);
+ struct tc_action a;
int err = -ENOMEM;
- if (a == NULL) {
- pr_debug("tca_action_flush: couldnt create tc_action\n");
- return err;
- }
-
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb) {
pr_debug("tca_action_flush: failed skb alloc\n");
- kfree(a);
return err;
}
@@ -743,8 +779,10 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
err = -EINVAL;
kind = tb[TCA_ACT_KIND];
- a->ops = tc_lookup_action(kind);
- if (a->ops == NULL) /*some idjot trying to flush unknown action */
+ memset(&a, 0, sizeof(struct tc_action));
+ INIT_LIST_HEAD(&a.list);
+ a.ops = tc_lookup_action(kind);
+ if (a.ops == NULL) /*some idjot trying to flush unknown action */
goto err_out;
nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0);
@@ -759,7 +797,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
if (nest == NULL)
goto out_module_put;
- err = a->ops->walk(skb, &dcb, RTM_DELACTION, a);
+ err = a.ops->walk(skb, &dcb, RTM_DELACTION, &a);
if (err < 0)
goto out_module_put;
if (err == 0)
@@ -769,8 +807,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
nlh->nlmsg_flags |= NLM_F_ROOT;
- module_put(a->ops->owner);
- kfree(a);
+ module_put(a.ops->owner);
err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
if (err > 0)
@@ -779,11 +816,10 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
return err;
out_module_put:
- module_put(a->ops->owner);
+ module_put(a.ops->owner);
err_out:
noflush_out:
kfree_skb(skb);
- kfree(a);
return err;
}
@@ -805,7 +841,11 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
}
/* now do the delete */
- tcf_action_destroy(actions, 0);
+ ret = tcf_action_destroy(actions, 0);
+ if (ret < 0) {
+ kfree_skb(skb);
+ return ret;
+ }
ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 2210187c45c2..edbf40dac709 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -37,7 +37,6 @@
#include <net/tc_act/tc_csum.h>
#define CSUM_TAB_MASK 15
-static struct tcf_hashinfo csum_hash_info;
static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
[TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
@@ -48,7 +47,6 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est,
{
struct nlattr *tb[TCA_CSUM_MAX + 1];
struct tc_csum *parm;
- struct tcf_common *pc;
struct tcf_csum *p;
int ret = 0, err;
@@ -63,38 +61,31 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est,
return -EINVAL;
parm = nla_data(tb[TCA_CSUM_PARMS]);
- pc = tcf_hash_check(parm->index, a, bind);
- if (!pc) {
- pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
- if (IS_ERR(pc))
- return PTR_ERR(pc);
+ if (!tcf_hash_check(parm->index, a, bind)) {
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
+ if (ret)
+ return ret;
ret = ACT_P_CREATED;
} else {
if (bind)/* dont override defaults */
return 0;
- tcf_hash_release(pc, bind, a->ops->hinfo);
+ tcf_hash_release(a, bind);
if (!ovr)
return -EEXIST;
}
- p = to_tcf_csum(pc);
+ p = to_tcf_csum(a);
spin_lock_bh(&p->tcf_lock);
p->tcf_action = parm->action;
p->update_flags = parm->update_flags;
spin_unlock_bh(&p->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(pc, a->ops->hinfo);
+ tcf_hash_insert(a);
return ret;
}
-static int tcf_csum_cleanup(struct tc_action *a, int bind)
-{
- struct tcf_csum *p = a->priv;
- return tcf_hash_release(&p->common, bind, &csum_hash_info);
-}
-
/**
* tcf_csum_skb_nextlayer - Get next layer pointer
* @skb: sk_buff to use
@@ -569,12 +560,10 @@ nla_put_failure:
static struct tc_action_ops act_csum_ops = {
.kind = "csum",
- .hinfo = &csum_hash_info,
.type = TCA_ACT_CSUM,
.owner = THIS_MODULE,
.act = tcf_csum,
.dump = tcf_csum_dump,
- .cleanup = tcf_csum_cleanup,
.init = tcf_csum_init,
};
@@ -583,11 +572,7 @@ MODULE_LICENSE("GPL");
static int __init csum_init_module(void)
{
- int err = tcf_hashinfo_init(&csum_hash_info, CSUM_TAB_MASK);
- if (err)
- return err;
-
- return tcf_register_action(&act_csum_ops);
+ return tcf_register_action(&act_csum_ops, CSUM_TAB_MASK);
}
static void __exit csum_cleanup_module(void)
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index a0eed30d5811..d6bcbd9f7791 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -24,7 +24,6 @@
#include <net/tc_act/tc_gact.h>
#define GACT_TAB_MASK 15
-static struct tcf_hashinfo gact_hash_info;
#ifdef CONFIG_GACT_PROB
static int gact_net_rand(struct tcf_gact *gact)
@@ -57,7 +56,6 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
struct nlattr *tb[TCA_GACT_MAX + 1];
struct tc_gact *parm;
struct tcf_gact *gact;
- struct tcf_common *pc;
int ret = 0;
int err;
#ifdef CONFIG_GACT_PROB
@@ -86,21 +84,20 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
}
#endif
- pc = tcf_hash_check(parm->index, a, bind);
- if (!pc) {
- pc = tcf_hash_create(parm->index, est, a, sizeof(*gact), bind);
- if (IS_ERR(pc))
- return PTR_ERR(pc);
+ if (!tcf_hash_check(parm->index, a, bind)) {
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*gact), bind);
+ if (ret)
+ return ret;
ret = ACT_P_CREATED;
} else {
if (bind)/* dont override defaults */
return 0;
- tcf_hash_release(pc, bind, a->ops->hinfo);
+ tcf_hash_release(a, bind);
if (!ovr)
return -EEXIST;
}
- gact = to_gact(pc);
+ gact = to_gact(a);
spin_lock_bh(&gact->tcf_lock);
gact->tcf_action = parm->action;
@@ -113,19 +110,10 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
#endif
spin_unlock_bh(&gact->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(pc, a->ops->hinfo);
+ tcf_hash_insert(a);
return ret;
}
-static int tcf_gact_cleanup(struct tc_action *a, int bind)
-{
- struct tcf_gact *gact = a->priv;
-
- if (gact)
- return tcf_hash_release(&gact->common, bind, a->ops->hinfo);
- return 0;
-}
-
static int tcf_gact(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
@@ -191,12 +179,10 @@ nla_put_failure:
static struct tc_action_ops act_gact_ops = {
.kind = "gact",
- .hinfo = &gact_hash_info,
.type = TCA_ACT_GACT,
.owner = THIS_MODULE,
.act = tcf_gact,
.dump = tcf_gact_dump,
- .cleanup = tcf_gact_cleanup,
.init = tcf_gact_init,
};
@@ -206,21 +192,17 @@ MODULE_LICENSE("GPL");
static int __init gact_init_module(void)
{
- int err = tcf_hashinfo_init(&gact_hash_info, GACT_TAB_MASK);
- if (err)
- return err;
#ifdef CONFIG_GACT_PROB
pr_info("GACT probability on\n");
#else
pr_info("GACT probability NOT on\n");
#endif
- return tcf_register_action(&act_gact_ops);
+ return tcf_register_action(&act_gact_ops, GACT_TAB_MASK);
}
static void __exit gact_cleanup_module(void)
{
tcf_unregister_action(&act_gact_ops);
- tcf_hashinfo_destroy(&gact_hash_info);
}
module_init(gact_init_module);
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 0a6d62174027..8a64a0734aee 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -29,7 +29,6 @@
#define IPT_TAB_MASK 15
-static struct tcf_hashinfo ipt_hash_info;
static int ipt_init_target(struct xt_entry_target *t, char *table, unsigned int hook)
{
@@ -69,22 +68,12 @@ static void ipt_destroy_target(struct xt_entry_target *t)
module_put(par.target->me);
}
-static int tcf_ipt_release(struct tcf_ipt *ipt, int bind)
+static void tcf_ipt_release(struct tc_action *a, int bind)
{
- int ret = 0;
- if (ipt) {
- if (bind)
- ipt->tcf_bindcnt--;
- ipt->tcf_refcnt--;
- if (ipt->tcf_bindcnt <= 0 && ipt->tcf_refcnt <= 0) {
- ipt_destroy_target(ipt->tcfi_t);
- kfree(ipt->tcfi_tname);
- kfree(ipt->tcfi_t);
- tcf_hash_destroy(&ipt->common, &ipt_hash_info);
- ret = ACT_P_DELETED;
- }
- }
- return ret;
+ struct tcf_ipt *ipt = to_ipt(a);
+ ipt_destroy_target(ipt->tcfi_t);
+ kfree(ipt->tcfi_tname);
+ kfree(ipt->tcfi_t);
}
static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
@@ -99,7 +88,6 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est,
{
struct nlattr *tb[TCA_IPT_MAX + 1];
struct tcf_ipt *ipt;
- struct tcf_common *pc;
struct xt_entry_target *td, *t;
char *tname;
int ret = 0, err;
@@ -125,21 +113,20 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est,
if (tb[TCA_IPT_INDEX] != NULL)
index = nla_get_u32(tb[TCA_IPT_INDEX]);
- pc = tcf_hash_check(index, a, bind);
- if (!pc) {
- pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind);
- if (IS_ERR(pc))
- return PTR_ERR(pc);
+ if (!tcf_hash_check(index, a, bind) ) {
+ ret = tcf_hash_create(index, est, a, sizeof(*ipt), bind);
+ if (ret)
+ return ret;
ret = ACT_P_CREATED;
} else {
if (bind)/* dont override defaults */
return 0;
- tcf_ipt_release(to_ipt(pc), bind);
+ tcf_hash_release(a, bind);
if (!ovr)
return -EEXIST;
}
- ipt = to_ipt(pc);
+ ipt = to_ipt(a);
hook = nla_get_u32(tb[TCA_IPT_HOOK]);
@@ -170,7 +157,7 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est,
ipt->tcfi_hook = hook;
spin_unlock_bh(&ipt->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(pc, a->ops->hinfo);
+ tcf_hash_insert(a);
return ret;
err3:
@@ -178,21 +165,11 @@ err3:
err2:
kfree(tname);
err1:
- if (ret == ACT_P_CREATED) {
- if (est)
- gen_kill_estimator(&pc->tcfc_bstats,
- &pc->tcfc_rate_est);
- kfree_rcu(pc, tcfc_rcu);
- }
+ if (ret == ACT_P_CREATED)
+ tcf_hash_cleanup(a, est);
return err;
}
-static int tcf_ipt_cleanup(struct tc_action *a, int bind)
-{
- struct tcf_ipt *ipt = a->priv;
- return tcf_ipt_release(ipt, bind);
-}
-
static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
@@ -284,23 +261,21 @@ nla_put_failure:
static struct tc_action_ops act_ipt_ops = {
.kind = "ipt",
- .hinfo = &ipt_hash_info,
.type = TCA_ACT_IPT,
.owner = THIS_MODULE,
.act = tcf_ipt,
.dump = tcf_ipt_dump,
- .cleanup = tcf_ipt_cleanup,
+ .cleanup = tcf_ipt_release,
.init = tcf_ipt_init,
};
static struct tc_action_ops act_xt_ops = {
.kind = "xt",
- .hinfo = &ipt_hash_info,
.type = TCA_ACT_XT,
.owner = THIS_MODULE,
.act = tcf_ipt,
.dump = tcf_ipt_dump,
- .cleanup = tcf_ipt_cleanup,
+ .cleanup = tcf_ipt_release,
.init = tcf_ipt_init,
};
@@ -311,20 +286,16 @@ MODULE_ALIAS("act_xt");
static int __init ipt_init_module(void)
{
- int ret1, ret2, err;
- err = tcf_hashinfo_init(&ipt_hash_info, IPT_TAB_MASK);
- if (err)
- return err;
+ int ret1, ret2;
- ret1 = tcf_register_action(&act_xt_ops);
+ ret1 = tcf_register_action(&act_xt_ops, IPT_TAB_MASK);
if (ret1 < 0)
printk("Failed to load xt action\n");
- ret2 = tcf_register_action(&act_ipt_ops);
+ ret2 = tcf_register_action(&act_ipt_ops, IPT_TAB_MASK);
if (ret2 < 0)
printk("Failed to load ipt action\n");
if (ret1 < 0 && ret2 < 0) {
- tcf_hashinfo_destroy(&ipt_hash_info);
return ret1;
} else
return 0;
@@ -334,7 +305,6 @@ static void __exit ipt_cleanup_module(void)
{
tcf_unregister_action(&act_xt_ops);
tcf_unregister_action(&act_ipt_ops);
- tcf_hashinfo_destroy(&ipt_hash_info);
}
module_init(ipt_init_module);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 0b2c6d39d396..4f912c0e225b 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -31,23 +31,13 @@
#define MIRRED_TAB_MASK 7
static LIST_HEAD(mirred_list);
-static struct tcf_hashinfo mirred_hash_info;
-static int tcf_mirred_release(struct tcf_mirred *m, int bind)
+static void tcf_mirred_release(struct tc_action *a, int bind)
{
- if (m) {
- if (bind)
- m->tcf_bindcnt--;
- m->tcf_refcnt--;
- if (!m->tcf_bindcnt && m->tcf_refcnt <= 0) {
- list_del(&m->tcfm_list);
- if (m->tcfm_dev)
- dev_put(m->tcfm_dev);
- tcf_hash_destroy(&m->common, &mirred_hash_info);
- return 1;
- }
- }
- return 0;
+ struct tcf_mirred *m = to_mirred(a);
+ list_del(&m->tcfm_list);
+ if (m->tcfm_dev)
+ dev_put(m->tcfm_dev);
}
static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
@@ -61,7 +51,6 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
struct nlattr *tb[TCA_MIRRED_MAX + 1];
struct tc_mirred *parm;
struct tcf_mirred *m;
- struct tcf_common *pc;
struct net_device *dev;
int ret, ok_push = 0;
@@ -101,21 +90,20 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
dev = NULL;
}
- pc = tcf_hash_check(parm->index, a, bind);
- if (!pc) {
+ if (!tcf_hash_check(parm->index, a, bind)) {
if (dev == NULL)
return -EINVAL;
- pc = tcf_hash_create(parm->index, est, a, sizeof(*m), bind);
- if (IS_ERR(pc))
- return PTR_ERR(pc);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*m), bind);
+ if (ret)
+ return ret;
ret = ACT_P_CREATED;
} else {
if (!ovr) {
- tcf_mirred_release(to_mirred(pc), bind);
+ tcf_hash_release(a, bind);
return -EEXIST;
}
}
- m = to_mirred(pc);
+ m = to_mirred(a);
spin_lock_bh(&m->tcf_lock);
m->tcf_action = parm->action;
@@ -131,21 +119,12 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
spin_unlock_bh(&m->tcf_lock);
if (ret == ACT_P_CREATED) {
list_add(&m->tcfm_list, &mirred_list);
- tcf_hash_insert(pc, a->ops->hinfo);
+ tcf_hash_insert(a);
}
return ret;
}
-static int tcf_mirred_cleanup(struct tc_action *a, int bind)
-{
- struct tcf_mirred *m = a->priv;
-
- if (m)
- return tcf_mirred_release(m, bind);
- return 0;
-}
-
static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
@@ -254,12 +233,11 @@ static struct notifier_block mirred_device_notifier = {
static struct tc_action_ops act_mirred_ops = {
.kind = "mirred",
- .hinfo = &mirred_hash_info,
.type = TCA_ACT_MIRRED,
.owner = THIS_MODULE,
.act = tcf_mirred,
.dump = tcf_mirred_dump,
- .cleanup = tcf_mirred_cleanup,
+ .cleanup = tcf_mirred_release,
.init = tcf_mirred_init,
};
@@ -273,19 +251,13 @@ static int __init mirred_init_module(void)
if (err)
return err;
- err = tcf_hashinfo_init(&mirred_hash_info, MIRRED_TAB_MASK);
- if (err) {
- unregister_netdevice_notifier(&mirred_device_notifier);
- return err;
- }
pr_info("Mirror/redirect action on\n");
- return tcf_register_action(&act_mirred_ops);
+ return tcf_register_action(&act_mirred_ops, MIRRED_TAB_MASK);
}
static void __exit mirred_cleanup_module(void)
{
tcf_unregister_action(&act_mirred_ops);
- tcf_hashinfo_destroy(&mirred_hash_info);
unregister_netdevice_notifier(&mirred_device_notifier);
}
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 81f0404bb335..270a030d5fd0 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -31,8 +31,6 @@
#define NAT_TAB_MASK 15
-static struct tcf_hashinfo nat_hash_info;
-
static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
[TCA_NAT_PARMS] = { .len = sizeof(struct tc_nat) },
};
@@ -44,7 +42,6 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
struct tc_nat *parm;
int ret = 0, err;
struct tcf_nat *p;
- struct tcf_common *pc;
if (nla == NULL)
return -EINVAL;
@@ -57,20 +54,19 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
return -EINVAL;
parm = nla_data(tb[TCA_NAT_PARMS]);
- pc = tcf_hash_check(parm->index, a, bind);
- if (!pc) {
- pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
- if (IS_ERR(pc))
- return PTR_ERR(pc);
+ if (!tcf_hash_check(parm->index, a, bind)) {
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
+ if (ret)
+ return ret;
ret = ACT_P_CREATED;
} else {
if (bind)
return 0;
- tcf_hash_release(pc, bind, a->ops->hinfo);
+ tcf_hash_release(a, bind);
if (!ovr)
return -EEXIST;
}
- p = to_tcf_nat(pc);
+ p = to_tcf_nat(a);
spin_lock_bh(&p->tcf_lock);
p->old_addr = parm->old_addr;
@@ -82,18 +78,11 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
spin_unlock_bh(&p->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(pc, a->ops->hinfo);
+ tcf_hash_insert(a);
return ret;
}
-static int tcf_nat_cleanup(struct tc_action *a, int bind)
-{
- struct tcf_nat *p = a->priv;
-
- return tcf_hash_release(&p->common, bind, &nat_hash_info);
-}
-
static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
@@ -293,12 +282,10 @@ nla_put_failure:
static struct tc_action_ops act_nat_ops = {
.kind = "nat",
- .hinfo = &nat_hash_info,
.type = TCA_ACT_NAT,
.owner = THIS_MODULE,
.act = tcf_nat,
.dump = tcf_nat_dump,
- .cleanup = tcf_nat_cleanup,
.init = tcf_nat_init,
};
@@ -307,16 +294,12 @@ MODULE_LICENSE("GPL");
static int __init nat_init_module(void)
{
- int err = tcf_hashinfo_init(&nat_hash_info, NAT_TAB_MASK);
- if (err)
- return err;
- return tcf_register_action(&act_nat_ops);
+ return tcf_register_action(&act_nat_ops, NAT_TAB_MASK);
}
static void __exit nat_cleanup_module(void)
{
tcf_unregister_action(&act_nat_ops);
- tcf_hashinfo_destroy(&nat_hash_info);
}
module_init(nat_init_module);
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index be3f0f6875bb..5f9bcb2e080b 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -25,8 +25,6 @@
#define PEDIT_TAB_MASK 15
-static struct tcf_hashinfo pedit_hash_info;
-
static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = {
[TCA_PEDIT_PARMS] = { .len = sizeof(struct tc_pedit) },
};
@@ -39,7 +37,6 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct tc_pedit *parm;
int ret = 0, err;
struct tcf_pedit *p;
- struct tcf_common *pc;
struct tc_pedit_key *keys = NULL;
int ksize;
@@ -57,26 +54,22 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
if (nla_len(tb[TCA_PEDIT_PARMS]) < sizeof(*parm) + ksize)
return -EINVAL;
- pc = tcf_hash_check(parm->index, a, bind);
- if (!pc) {
+ if (!tcf_hash_check(parm->index, a, bind)) {
if (!parm->nkeys)
return -EINVAL;
- pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
- if (IS_ERR(pc))
- return PTR_ERR(pc);
- p = to_pedit(pc);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
+ if (ret)
+ return ret;
+ p = to_pedit(a);
keys = kmalloc(ksize, GFP_KERNEL);
if (keys == NULL) {
- if (est)
- gen_kill_estimator(&pc->tcfc_bstats,
- &pc->tcfc_rate_est);
- kfree_rcu(pc, tcfc_rcu);
+ tcf_hash_cleanup(a, est);
return -ENOMEM;
}
ret = ACT_P_CREATED;
} else {
- p = to_pedit(pc);
- tcf_hash_release(pc, bind, a->ops->hinfo);
+ p = to_pedit(a);
+ tcf_hash_release(a, bind);
if (bind)
return 0;
if (!ovr)
@@ -100,22 +93,15 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
memcpy(p->tcfp_keys, parm->keys, ksize);
spin_unlock_bh(&p->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(pc, a->ops->hinfo);
+ tcf_hash_insert(a);
return ret;
}
-static int tcf_pedit_cleanup(struct tc_action *a, int bind)
+static void tcf_pedit_cleanup(struct tc_action *a, int bind)
{
struct tcf_pedit *p = a->priv;
-
- if (p) {
- struct tc_pedit_key *keys = p->tcfp_keys;
- if (tcf_hash_release(&p->common, bind, &pedit_hash_info)) {
- kfree(keys);
- return 1;
- }
- }
- return 0;
+ struct tc_pedit_key *keys = p->tcfp_keys;
+ kfree(keys);
}
static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
@@ -230,7 +216,6 @@ nla_put_failure:
static struct tc_action_ops act_pedit_ops = {
.kind = "pedit",
- .hinfo = &pedit_hash_info,
.type = TCA_ACT_PEDIT,
.owner = THIS_MODULE,
.act = tcf_pedit,
@@ -245,15 +230,11 @@ MODULE_LICENSE("GPL");
static int __init pedit_init_module(void)
{
- int err = tcf_hashinfo_init(&pedit_hash_info, PEDIT_TAB_MASK);
- if (err)
- return err;
- return tcf_register_action(&act_pedit_ops);
+ return tcf_register_action(&act_pedit_ops, PEDIT_TAB_MASK);
}
static void __exit pedit_cleanup_module(void)
{
- tcf_hashinfo_destroy(&pedit_hash_info);
tcf_unregister_action(&act_pedit_ops);
}
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 1778209a332f..0566e4606a4a 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -41,7 +41,6 @@ struct tcf_police {
container_of(pc, struct tcf_police, common)
#define POL_TAB_MASK 15
-static struct tcf_hashinfo police_hash_info;
/* old policer structure from before tc actions */
struct tc_police_compat {
@@ -234,7 +233,7 @@ override:
police->tcfp_t_c = ktime_to_ns(ktime_get());
police->tcf_index = parm->index ? parm->index :
- tcf_hash_new_index(a->ops->hinfo);
+ tcf_hash_new_index(hinfo);
h = tcf_hash(police->tcf_index, POL_TAB_MASK);
spin_lock_bh(&hinfo->lock);
hlist_add_head(&police->tcf_head, &hinfo->htab[h]);
@@ -253,14 +252,6 @@ failure:
return err;
}
-static int tcf_act_police_cleanup(struct tc_action *a, int bind)
-{
- struct tcf_police *p = a->priv;
- if (p)
- return tcf_hash_release(&p->common, bind, &police_hash_info);
- return 0;
-}
-
static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
@@ -357,12 +348,10 @@ MODULE_LICENSE("GPL");
static struct tc_action_ops act_police_ops = {
.kind = "police",
- .hinfo = &police_hash_info,
.type = TCA_ID_POLICE,
.owner = THIS_MODULE,
.act = tcf_act_police,
.dump = tcf_act_police_dump,
- .cleanup = tcf_act_police_cleanup,
.init = tcf_act_police_locate,
.walk = tcf_act_police_walker
};
@@ -370,19 +359,12 @@ static struct tc_action_ops act_police_ops = {
static int __init
police_init_module(void)
{
- int err = tcf_hashinfo_init(&police_hash_info, POL_TAB_MASK);
- if (err)
- return err;
- err = tcf_register_action(&act_police_ops);
- if (err)
- tcf_hashinfo_destroy(&police_hash_info);
- return err;
+ return tcf_register_action(&act_police_ops, POL_TAB_MASK);
}
static void __exit
police_cleanup_module(void)
{
- tcf_hashinfo_destroy(&police_hash_info);
tcf_unregister_action(&act_police_ops);
}
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 8ef2f1fcbfba..992c2317ce88 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -25,7 +25,6 @@
#include <net/tc_act/tc_defact.h>
#define SIMP_TAB_MASK 7
-static struct tcf_hashinfo simp_hash_info;
#define SIMP_MAX_DATA 32
static int tcf_simp(struct sk_buff *skb, const struct tc_action *a,
@@ -47,20 +46,10 @@ static int tcf_simp(struct sk_buff *skb, const struct tc_action *a,
return d->tcf_action;
}
-static int tcf_simp_release(struct tcf_defact *d, int bind)
+static void tcf_simp_release(struct tc_action *a, int bind)
{
- int ret = 0;
- if (d) {
- if (bind)
- d->tcf_bindcnt--;
- d->tcf_refcnt--;
- if (d->tcf_bindcnt <= 0 && d->tcf_refcnt <= 0) {
- kfree(d->tcfd_defdata);
- tcf_hash_destroy(&d->common, &simp_hash_info);
- ret = 1;
- }
- }
- return ret;
+ struct tcf_defact *d = to_defact(a);
+ kfree(d->tcfd_defdata);
}
static int alloc_defdata(struct tcf_defact *d, char *defdata)
@@ -94,7 +83,6 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
struct nlattr *tb[TCA_DEF_MAX + 1];
struct tc_defact *parm;
struct tcf_defact *d;
- struct tcf_common *pc;
char *defdata;
int ret = 0, err;
@@ -114,29 +102,25 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_DEF_PARMS]);
defdata = nla_data(tb[TCA_DEF_DATA]);
- pc = tcf_hash_check(parm->index, a, bind);
- if (!pc) {
- pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind);
- if (IS_ERR(pc))
- return PTR_ERR(pc);
+ if (!tcf_hash_check(parm->index, a, bind)) {
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*d), bind);
+ if (ret)
+ return ret;
- d = to_defact(pc);
+ d = to_defact(a);
ret = alloc_defdata(d, defdata);
if (ret < 0) {
- if (est)
- gen_kill_estimator(&pc->tcfc_bstats,
- &pc->tcfc_rate_est);
- kfree_rcu(pc, tcfc_rcu);
+ tcf_hash_cleanup(a, est);
return ret;
}
d->tcf_action = parm->action;
ret = ACT_P_CREATED;
} else {
- d = to_defact(pc);
+ d = to_defact(a);
if (bind)
return 0;
- tcf_simp_release(d, bind);
+ tcf_hash_release(a, bind);
if (!ovr)
return -EEXIST;
@@ -144,19 +128,10 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
}
if (ret == ACT_P_CREATED)
- tcf_hash_insert(pc, a->ops->hinfo);
+ tcf_hash_insert(a);
return ret;
}
-static int tcf_simp_cleanup(struct tc_action *a, int bind)
-{
- struct tcf_defact *d = a->priv;
-
- if (d)
- return tcf_simp_release(d, bind);
- return 0;
-}
-
static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
@@ -187,12 +162,11 @@ nla_put_failure:
static struct tc_action_ops act_simp_ops = {
.kind = "simple",
- .hinfo = &simp_hash_info,
.type = TCA_ACT_SIMP,
.owner = THIS_MODULE,
.act = tcf_simp,
.dump = tcf_simp_dump,
- .cleanup = tcf_simp_cleanup,
+ .cleanup = tcf_simp_release,
.init = tcf_simp_init,
};
@@ -202,23 +176,15 @@ MODULE_LICENSE("GPL");
static int __init simp_init_module(void)
{
- int err, ret;
- err = tcf_hashinfo_init(&simp_hash_info, SIMP_TAB_MASK);
- if (err)
- return err;
-
- ret = tcf_register_action(&act_simp_ops);
+ int ret;
+ ret = tcf_register_action(&act_simp_ops, SIMP_TAB_MASK);
if (!ret)
pr_info("Simple TC action Loaded\n");
- else
- tcf_hashinfo_destroy(&simp_hash_info);
-
return ret;
}
static void __exit simp_cleanup_module(void)
{
- tcf_hashinfo_destroy(&simp_hash_info);
tcf_unregister_action(&act_simp_ops);
}
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 98725080b5aa..fcfeeaf838be 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -28,7 +28,6 @@
#include <net/tc_act/tc_skbedit.h>
#define SKBEDIT_TAB_MASK 15
-static struct tcf_hashinfo skbedit_hash_info;
static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
@@ -65,7 +64,6 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
struct tc_skbedit *parm;
struct tcf_skbedit *d;
- struct tcf_common *pc;
u32 flags = 0, *priority = NULL, *mark = NULL;
u16 *queue_mapping = NULL;
int ret = 0, err;
@@ -100,19 +98,18 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
- pc = tcf_hash_check(parm->index, a, bind);
- if (!pc) {
- pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind);
- if (IS_ERR(pc))
- return PTR_ERR(pc);
+ if (!tcf_hash_check(parm->index, a, bind)) {
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*d), bind);
+ if (ret)
+ return ret;
- d = to_skbedit(pc);
+ d = to_skbedit(a);
ret = ACT_P_CREATED;
} else {
- d = to_skbedit(pc);
+ d = to_skbedit(a);
if (bind)
return 0;
- tcf_hash_release(pc, bind, a->ops->hinfo);
+ tcf_hash_release(a, bind);
if (!ovr)
return -EEXIST;
}
@@ -132,19 +129,10 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
spin_unlock_bh(&d->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(pc, a->ops->hinfo);
+ tcf_hash_insert(a);
return ret;
}
-static int tcf_skbedit_cleanup(struct tc_action *a, int bind)
-{
- struct tcf_skbedit *d = a->priv;
-
- if (d)
- return tcf_hash_release(&d->common, bind, &skbedit_hash_info);
- return 0;
-}
-
static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
@@ -186,12 +174,10 @@ nla_put_failure:
static struct tc_action_ops act_skbedit_ops = {
.kind = "skbedit",
- .hinfo = &skbedit_hash_info,
.type = TCA_ACT_SKBEDIT,
.owner = THIS_MODULE,
.act = tcf_skbedit,
.dump = tcf_skbedit_dump,
- .cleanup = tcf_skbedit_cleanup,
.init = tcf_skbedit_init,
};
@@ -201,15 +187,11 @@ MODULE_LICENSE("GPL");
static int __init skbedit_init_module(void)
{
- int err = tcf_hashinfo_init(&skbedit_hash_info, SKBEDIT_TAB_MASK);
- if (err)
- return err;
- return tcf_register_action(&act_skbedit_ops);
+ return tcf_register_action(&act_skbedit_ops, SKBEDIT_TAB_MASK);
}
static void __exit skbedit_cleanup_module(void)
{
- tcf_hashinfo_destroy(&skbedit_hash_info);
tcf_unregister_action(&act_skbedit_ops);
}
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index a366537f82c6..63a3ce75c02e 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -29,11 +29,11 @@
#include <net/act_api.h>
#include <net/pkt_cls.h>
-#define HTSIZE (PAGE_SIZE/sizeof(struct fw_filter *))
+#define HTSIZE 256
struct fw_head {
- struct fw_filter *ht[HTSIZE];
- u32 mask;
+ u32 mask;
+ struct fw_filter *ht[HTSIZE];
};
struct fw_filter {
@@ -46,30 +46,11 @@ struct fw_filter {
struct tcf_exts exts;
};
-static inline int fw_hash(u32 handle)
+static u32 fw_hash(u32 handle)
{
- if (HTSIZE == 4096)
- return ((handle >> 24) & 0xFFF) ^
- ((handle >> 12) & 0xFFF) ^
- (handle & 0xFFF);
- else if (HTSIZE == 2048)
- return ((handle >> 22) & 0x7FF) ^
- ((handle >> 11) & 0x7FF) ^
- (handle & 0x7FF);
- else if (HTSIZE == 1024)
- return ((handle >> 20) & 0x3FF) ^
- ((handle >> 10) & 0x3FF) ^
- (handle & 0x3FF);
- else if (HTSIZE == 512)
- return (handle >> 27) ^
- ((handle >> 18) & 0x1FF) ^
- ((handle >> 9) & 0x1FF) ^
- (handle & 0x1FF);
- else if (HTSIZE == 256) {
- u8 *t = (u8 *) &handle;
- return t[0] ^ t[1] ^ t[2] ^ t[3];
- } else
- return handle & (HTSIZE - 1);
+ handle ^= (handle >> 16);
+ handle ^= (handle >> 8);
+ return handle % HTSIZE;
}
static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 1313145e3b86..a0b84e0e22de 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -273,11 +273,12 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
void qdisc_list_add(struct Qdisc *q)
{
- struct Qdisc *root = qdisc_dev(q)->qdisc;
+ if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
+ struct Qdisc *root = qdisc_dev(q)->qdisc;
- WARN_ON_ONCE(root == &noop_qdisc);
- if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
+ WARN_ON_ONCE(root == &noop_qdisc);
list_add_tail(&q->list, &root->list);
+ }
}
EXPORT_SYMBOL(qdisc_list_add);
@@ -1303,6 +1304,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
struct gnet_dump d;
struct qdisc_size_table *stab;
+ cond_resched();
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
if (!nlh)
goto out_nlmsg_trim;
@@ -1434,9 +1436,9 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
s_idx = cb->args[0];
s_q_idx = q_idx = cb->args[1];
- rcu_read_lock();
idx = 0;
- for_each_netdev_rcu(net, dev) {
+ ASSERT_RTNL();
+ for_each_netdev(net, dev) {
struct netdev_queue *dev_queue;
if (idx < s_idx)
@@ -1459,8 +1461,6 @@ cont:
}
done:
- rcu_read_unlock();
-
cb->args[0] = idx;
cb->args[1] = q_idx;
@@ -1617,6 +1617,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
struct gnet_dump d;
const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
+ cond_resched();
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
if (!nlh)
goto out_nlmsg_trim;
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 1f9c31411f19..8449b337f9e3 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -623,8 +623,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
if (nla_put_u32(skb, TCA_ATM_EXCESS, 0))
goto nla_put_failure;
}
- nla_nest_end(skb, nest);
- return skb->len;
+ return nla_nest_end(skb, nest);
nla_put_failure:
nla_nest_cancel(skb, nest);
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 2f80d01d42a6..ead526467cca 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1563,8 +1563,7 @@ static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
goto nla_put_failure;
if (cbq_dump_attr(skb, &q->link) < 0)
goto nla_put_failure;
- nla_nest_end(skb, nest);
- return skb->len;
+ return nla_nest_end(skb, nest);
nla_put_failure:
nla_nest_cancel(skb, nest);
@@ -1599,8 +1598,7 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg,
goto nla_put_failure;
if (cbq_dump_attr(skb, cl) < 0)
goto nla_put_failure;
- nla_nest_end(skb, nest);
- return skb->len;
+ return nla_nest_end(skb, nest);
nla_put_failure:
nla_nest_cancel(skb, nest);
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 08ef7a42c0e4..23c682b42f99 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -601,6 +601,7 @@ static int fq_resize(struct Qdisc *sch, u32 log)
{
struct fq_sched_data *q = qdisc_priv(sch);
struct rb_root *array;
+ void *old_fq_root;
u32 idx;
if (q->fq_root && log == q->fq_trees_log)
@@ -615,13 +616,19 @@ static int fq_resize(struct Qdisc *sch, u32 log)
for (idx = 0; idx < (1U << log); idx++)
array[idx] = RB_ROOT;
- if (q->fq_root) {
- fq_rehash(q, q->fq_root, q->fq_trees_log, array, log);
- fq_free(q->fq_root);
- }
+ sch_tree_lock(sch);
+
+ old_fq_root = q->fq_root;
+ if (old_fq_root)
+ fq_rehash(q, old_fq_root, q->fq_trees_log, array, log);
+
q->fq_root = array;
q->fq_trees_log = log;
+ sch_tree_unlock(sch);
+
+ fq_free(old_fq_root);
+
return 0;
}
@@ -697,9 +704,11 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
q->flow_refill_delay = usecs_to_jiffies(usecs_delay);
}
- if (!err)
+ if (!err) {
+ sch_tree_unlock(sch);
err = fq_resize(sch, fq_log);
-
+ sch_tree_lock(sch);
+ }
while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = fq_dequeue(sch);
@@ -772,8 +781,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
goto nla_put_failure;
- nla_nest_end(skb, opts);
- return skb->len;
+ return nla_nest_end(skb, opts);
nla_put_failure:
return -1;
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index ba5bc929eac7..0bf432c782c1 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -450,8 +450,7 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
q->flows_cnt))
goto nla_put_failure;
- nla_nest_end(skb, opts);
- return skb->len;
+ return nla_nest_end(skb, opts);
nla_put_failure:
return -1;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index c4075610502c..ec8aeaac1dd7 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1353,8 +1353,7 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
goto nla_put_failure;
if (hfsc_dump_curves(skb, cl) < 0)
goto nla_put_failure;
- nla_nest_end(skb, nest);
- return skb->len;
+ return nla_nest_end(skb, nest);
nla_put_failure:
nla_nest_cancel(skb, nest);
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 647680b1c625..edee03d922e2 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -691,8 +691,7 @@ static int hhf_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_put_u32(skb, TCA_HHF_NON_HH_WEIGHT, q->hhf_non_hh_weight))
goto nla_put_failure;
- nla_nest_end(skb, opts);
- return skb->len;
+ return nla_nest_end(skb, opts);
nla_put_failure:
return -1;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 722e137df244..9f949abcacef 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1062,12 +1062,13 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
{
- spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
struct htb_sched *q = qdisc_priv(sch);
struct nlattr *nest;
struct tc_htb_glob gopt;
- spin_lock_bh(root_lock);
+ /* Its safe to not acquire qdisc lock. As we hold RTNL,
+ * no change can happen on the qdisc parameters.
+ */
gopt.direct_pkts = q->direct_pkts;
gopt.version = HTB_VER;
@@ -1081,13 +1082,10 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
goto nla_put_failure;
- nla_nest_end(skb, nest);
- spin_unlock_bh(root_lock);
- return skb->len;
+ return nla_nest_end(skb, nest);
nla_put_failure:
- spin_unlock_bh(root_lock);
nla_nest_cancel(skb, nest);
return -1;
}
@@ -1096,11 +1094,12 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
struct sk_buff *skb, struct tcmsg *tcm)
{
struct htb_class *cl = (struct htb_class *)arg;
- spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
struct nlattr *nest;
struct tc_htb_opt opt;
- spin_lock_bh(root_lock);
+ /* Its safe to not acquire qdisc lock. As we hold RTNL,
+ * no change can happen on the class parameters.
+ */
tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
tcm->tcm_handle = cl->common.classid;
if (!cl->level && cl->un.leaf.q)
@@ -1128,12 +1127,9 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
nla_put_u64(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps))
goto nla_put_failure;
- nla_nest_end(skb, nest);
- spin_unlock_bh(root_lock);
- return skb->len;
+ return nla_nest_end(skb, nest);
nla_put_failure:
- spin_unlock_bh(root_lock);
nla_nest_cancel(skb, nest);
return -1;
}
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index bce1665239b8..62871c14e1f9 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -100,8 +100,7 @@ static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
nest = nla_nest_start(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
- nla_nest_end(skb, nest);
- return skb->len;
+ return nla_nest_end(skb, nest);
nla_put_failure:
nla_nest_cancel(skb, nest);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index de1059af6da1..f1669a00f571 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -117,6 +117,11 @@ struct netem_sched_data {
LOST_IN_BURST_PERIOD,
} _4_state_model;
+ enum {
+ GOOD_STATE = 1,
+ BAD_STATE,
+ } GE_state_model;
+
/* Correlated Loss Generation models */
struct clgstate {
/* state of the Markov chain */
@@ -272,15 +277,15 @@ static bool loss_gilb_ell(struct netem_sched_data *q)
struct clgstate *clg = &q->clg;
switch (clg->state) {
- case 1:
+ case GOOD_STATE:
if (prandom_u32() < clg->a1)
- clg->state = 2;
+ clg->state = BAD_STATE;
if (prandom_u32() < clg->a4)
return true;
break;
- case 2:
+ case BAD_STATE:
if (prandom_u32() < clg->a2)
- clg->state = 1;
+ clg->state = GOOD_STATE;
if (prandom_u32() > clg->a3)
return true;
}
@@ -689,9 +694,8 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
return 0;
}
-static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
+static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr)
{
- struct netem_sched_data *q = qdisc_priv(sch);
const struct tc_netem_corr *c = nla_data(attr);
init_crandom(&q->delay_cor, c->delay_corr);
@@ -699,27 +703,24 @@ static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
init_crandom(&q->dup_cor, c->dup_corr);
}
-static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
+static void get_reorder(struct netem_sched_data *q, const struct nlattr *attr)
{
- struct netem_sched_data *q = qdisc_priv(sch);
const struct tc_netem_reorder *r = nla_data(attr);
q->reorder = r->probability;
init_crandom(&q->reorder_cor, r->correlation);
}
-static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
+static void get_corrupt(struct netem_sched_data *q, const struct nlattr *attr)
{
- struct netem_sched_data *q = qdisc_priv(sch);
const struct tc_netem_corrupt *r = nla_data(attr);
q->corrupt = r->probability;
init_crandom(&q->corrupt_cor, r->correlation);
}
-static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
+static void get_rate(struct netem_sched_data *q, const struct nlattr *attr)
{
- struct netem_sched_data *q = qdisc_priv(sch);
const struct tc_netem_rate *r = nla_data(attr);
q->rate = r->rate;
@@ -732,9 +733,8 @@ static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
q->cell_size_reciprocal = (struct reciprocal_value) { 0 };
}
-static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
+static int get_loss_clg(struct netem_sched_data *q, const struct nlattr *attr)
{
- struct netem_sched_data *q = qdisc_priv(sch);
const struct nlattr *la;
int rem;
@@ -752,7 +752,7 @@ static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
q->loss_model = CLG_4_STATES;
- q->clg.state = 1;
+ q->clg.state = TX_IN_GAP_PERIOD;
q->clg.a1 = gi->p13;
q->clg.a2 = gi->p31;
q->clg.a3 = gi->p32;
@@ -770,7 +770,7 @@ static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
}
q->loss_model = CLG_GILB_ELL;
- q->clg.state = 1;
+ q->clg.state = GOOD_STATE;
q->clg.a1 = ge->p;
q->clg.a2 = ge->r;
q->clg.a3 = ge->h;
@@ -821,6 +821,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
struct netem_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_NETEM_MAX + 1];
struct tc_netem_qopt *qopt;
+ struct clgstate old_clg;
+ int old_loss_model = CLG_RANDOM;
int ret;
if (opt == NULL)
@@ -831,6 +833,33 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
if (ret < 0)
return ret;
+ /* backup q->clg and q->loss_model */
+ old_clg = q->clg;
+ old_loss_model = q->loss_model;
+
+ if (tb[TCA_NETEM_LOSS]) {
+ ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]);
+ if (ret) {
+ q->loss_model = old_loss_model;
+ return ret;
+ }
+ } else {
+ q->loss_model = CLG_RANDOM;
+ }
+
+ if (tb[TCA_NETEM_DELAY_DIST]) {
+ ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
+ if (ret) {
+ /* recover clg and loss_model, in case of
+ * q->clg and q->loss_model were modified
+ * in get_loss_clg()
+ */
+ q->clg = old_clg;
+ q->loss_model = old_loss_model;
+ return ret;
+ }
+ }
+
sch->limit = qopt->limit;
q->latency = qopt->latency;
@@ -848,22 +877,16 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
q->reorder = ~0;
if (tb[TCA_NETEM_CORR])
- get_correlation(sch, tb[TCA_NETEM_CORR]);
-
- if (tb[TCA_NETEM_DELAY_DIST]) {
- ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
- if (ret)
- return ret;
- }
+ get_correlation(q, tb[TCA_NETEM_CORR]);
if (tb[TCA_NETEM_REORDER])
- get_reorder(sch, tb[TCA_NETEM_REORDER]);
+ get_reorder(q, tb[TCA_NETEM_REORDER]);
if (tb[TCA_NETEM_CORRUPT])
- get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
+ get_corrupt(q, tb[TCA_NETEM_CORRUPT]);
if (tb[TCA_NETEM_RATE])
- get_rate(sch, tb[TCA_NETEM_RATE]);
+ get_rate(q, tb[TCA_NETEM_RATE]);
if (tb[TCA_NETEM_RATE64])
q->rate = max_t(u64, q->rate,
@@ -872,10 +895,6 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
if (tb[TCA_NETEM_ECN])
q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
- q->loss_model = CLG_RANDOM;
- if (tb[TCA_NETEM_LOSS])
- ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
-
return ret;
}
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index a255d0200a59..fefeeb73f15f 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -15,6 +15,11 @@
*
* ECN support is added by Naeem Khademi <naeemk@ifi.uio.no>
* University of Oslo, Norway.
+ *
+ * References:
+ * IETF draft submission: http://tools.ietf.org/html/draft-pan-aqm-pie-00
+ * IEEE Conference on High Performance Switching and Routing 2013 :
+ * "PIE: A * Lightweight Control Scheme to Address the Bufferbloat Problem"
*/
#include <linux/module.h>
@@ -36,7 +41,7 @@ struct pie_params {
psched_time_t target; /* user specified target delay in pschedtime */
u32 tupdate; /* timer frequency (in jiffies) */
u32 limit; /* number of packets that can be enqueued */
- u32 alpha; /* alpha and beta are between -4 and 4 */
+ u32 alpha; /* alpha and beta are between 0 and 32 */
u32 beta; /* and are used for shift relative to 1 */
bool ecn; /* true if ecn is enabled */
bool bytemode; /* to scale drop early prob based on pkt size */
@@ -326,10 +331,16 @@ static void calculate_probability(struct Qdisc *sch)
if (qdelay == 0 && qlen != 0)
update_prob = false;
- /* Add ranges for alpha and beta, more aggressive for high dropping
- * mode and gentle steps for light dropping mode
- * In light dropping mode, take gentle steps; in medium dropping mode,
- * take medium steps; in high dropping mode, take big steps.
+ /* In the algorithm, alpha and beta are between 0 and 2 with typical
+ * value for alpha as 0.125. In this implementation, we use values 0-32
+ * passed from user space to represent this. Also, alpha and beta have
+ * unit of HZ and need to be scaled before they can used to update
+ * probability. alpha/beta are updated locally below by 1) scaling them
+ * appropriately 2) scaling down by 16 to come to 0-2 range.
+ * Please see paper for details.
+ *
+ * We scale alpha and beta differently depending on whether we are in
+ * light, medium or high dropping mode.
*/
if (q->vars.prob < MAX_PROB / 100) {
alpha =
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 1cb413fead89..18ff63433709 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -101,12 +101,11 @@
struct tbf_sched_data {
/* Parameters */
u32 limit; /* Maximal length of backlog: bytes */
+ u32 max_size;
s64 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */
s64 mtu;
- u32 max_size;
struct psched_ratecfg rate;
struct psched_ratecfg peak;
- bool peak_present;
/* Variables */
s64 tokens; /* Current number of B tokens */
@@ -222,6 +221,11 @@ static unsigned int tbf_drop(struct Qdisc *sch)
return len;
}
+static bool tbf_peak_present(const struct tbf_sched_data *q)
+{
+ return q->peak.rate_bytes_ps;
+}
+
static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
{
struct tbf_sched_data *q = qdisc_priv(sch);
@@ -238,7 +242,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
now = ktime_to_ns(ktime_get());
toks = min_t(s64, now - q->t_c, q->buffer);
- if (q->peak_present) {
+ if (tbf_peak_present(q)) {
ptoks = toks + q->ptokens;
if (ptoks > q->mtu)
ptoks = q->mtu;
@@ -334,18 +338,6 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate,
tb[TCA_TBF_PTAB]));
- if (q->qdisc != &noop_qdisc) {
- err = fifo_set_limit(q->qdisc, qopt->limit);
- if (err)
- goto done;
- } else if (qopt->limit > 0) {
- child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
- if (IS_ERR(child)) {
- err = PTR_ERR(child);
- goto done;
- }
- }
-
buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U);
mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U);
@@ -378,6 +370,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
} else {
max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu));
}
+ } else {
+ memset(&peak, 0, sizeof(peak));
}
if (max_size < psched_mtu(qdisc_dev(sch)))
@@ -390,6 +384,18 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
goto done;
}
+ if (q->qdisc != &noop_qdisc) {
+ err = fifo_set_limit(q->qdisc, qopt->limit);
+ if (err)
+ goto done;
+ } else if (qopt->limit > 0) {
+ child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
+ if (IS_ERR(child)) {
+ err = PTR_ERR(child);
+ goto done;
+ }
+ }
+
sch_tree_lock(sch);
if (child) {
qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
@@ -410,12 +416,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
q->ptokens = q->mtu;
memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg));
- if (qopt->peakrate.rate) {
- memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg));
- q->peak_present = true;
- } else {
- q->peak_present = false;
- }
+ memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg));
sch_tree_unlock(sch);
err = 0;
@@ -458,7 +459,7 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
opt.limit = q->limit;
psched_ratecfg_getrate(&opt.rate, &q->rate);
- if (q->peak_present)
+ if (tbf_peak_present(q))
psched_ratecfg_getrate(&opt.peakrate, &q->peak);
else
memset(&opt.peakrate, 0, sizeof(opt.peakrate));
@@ -469,13 +470,12 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
if (q->rate.rate_bytes_ps >= (1ULL << 32) &&
nla_put_u64(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps))
goto nla_put_failure;
- if (q->peak_present &&
+ if (tbf_peak_present(q) &&
q->peak.rate_bytes_ps >= (1ULL << 32) &&
nla_put_u64(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps))
goto nla_put_failure;
- nla_nest_end(skb, nest);
- return skb->len;
+ return nla_nest_end(skb, nest);
nla_put_failure:
nla_nest_cancel(skb, nest);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 5ae609200674..4f6d6f9d1274 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1239,78 +1239,106 @@ void sctp_assoc_update(struct sctp_association *asoc,
}
/* Update the retran path for sending a retransmitted packet.
- * Round-robin through the active transports, else round-robin
- * through the inactive transports as this is the next best thing
- * we can try.
+ * See also RFC4960, 6.4. Multi-Homed SCTP Endpoints:
+ *
+ * When there is outbound data to send and the primary path
+ * becomes inactive (e.g., due to failures), or where the
+ * SCTP user explicitly requests to send data to an
+ * inactive destination transport address, before reporting
+ * an error to its ULP, the SCTP endpoint should try to send
+ * the data to an alternate active destination transport
+ * address if one exists.
+ *
+ * When retransmitting data that timed out, if the endpoint
+ * is multihomed, it should consider each source-destination
+ * address pair in its retransmission selection policy.
+ * When retransmitting timed-out data, the endpoint should
+ * attempt to pick the most divergent source-destination
+ * pair from the original source-destination pair to which
+ * the packet was transmitted.
+ *
+ * Note: Rules for picking the most divergent source-destination
+ * pair are an implementation decision and are not specified
+ * within this document.
+ *
+ * Our basic strategy is to round-robin transports in priorities
+ * according to sctp_state_prio_map[] e.g., if no such
+ * transport with state SCTP_ACTIVE exists, round-robin through
+ * SCTP_UNKNOWN, etc. You get the picture.
*/
-void sctp_assoc_update_retran_path(struct sctp_association *asoc)
+static const u8 sctp_trans_state_to_prio_map[] = {
+ [SCTP_ACTIVE] = 3, /* best case */
+ [SCTP_UNKNOWN] = 2,
+ [SCTP_PF] = 1,
+ [SCTP_INACTIVE] = 0, /* worst case */
+};
+
+static u8 sctp_trans_score(const struct sctp_transport *trans)
{
- struct sctp_transport *t, *next;
- struct list_head *head = &asoc->peer.transport_addr_list;
- struct list_head *pos;
+ return sctp_trans_state_to_prio_map[trans->state];
+}
- if (asoc->peer.transport_count == 1)
- return;
+static struct sctp_transport *sctp_trans_elect_best(struct sctp_transport *curr,
+ struct sctp_transport *best)
+{
+ if (best == NULL)
+ return curr;
- /* Find the next transport in a round-robin fashion. */
- t = asoc->peer.retran_path;
- pos = &t->transports;
- next = NULL;
+ return sctp_trans_score(curr) > sctp_trans_score(best) ? curr : best;
+}
- while (1) {
- /* Skip the head. */
- if (pos->next == head)
- pos = head->next;
- else
- pos = pos->next;
+void sctp_assoc_update_retran_path(struct sctp_association *asoc)
+{
+ struct sctp_transport *trans = asoc->peer.retran_path;
+ struct sctp_transport *trans_next = NULL;
- t = list_entry(pos, struct sctp_transport, transports);
+ /* We're done as we only have the one and only path. */
+ if (asoc->peer.transport_count == 1)
+ return;
+ /* If active_path and retran_path are the same and active,
+ * then this is the only active path. Use it.
+ */
+ if (asoc->peer.active_path == asoc->peer.retran_path &&
+ asoc->peer.active_path->state == SCTP_ACTIVE)
+ return;
- /* We have exhausted the list, but didn't find any
- * other active transports. If so, use the next
- * transport.
- */
- if (t == asoc->peer.retran_path) {
- t = next;
+ /* Iterate from retran_path's successor back to retran_path. */
+ for (trans = list_next_entry(trans, transports); 1;
+ trans = list_next_entry(trans, transports)) {
+ /* Manually skip the head element. */
+ if (&trans->transports == &asoc->peer.transport_addr_list)
+ continue;
+ if (trans->state == SCTP_UNCONFIRMED)
+ continue;
+ trans_next = sctp_trans_elect_best(trans, trans_next);
+ /* Active is good enough for immediate return. */
+ if (trans_next->state == SCTP_ACTIVE)
break;
- }
-
- /* Try to find an active transport. */
-
- if ((t->state == SCTP_ACTIVE) ||
- (t->state == SCTP_UNKNOWN)) {
+ /* We've reached the end, time to update path. */
+ if (trans == asoc->peer.retran_path)
break;
- } else {
- /* Keep track of the next transport in case
- * we don't find any active transport.
- */
- if (t->state != SCTP_UNCONFIRMED && !next)
- next = t;
- }
}
- if (t)
- asoc->peer.retran_path = t;
- else
- t = asoc->peer.retran_path;
+ asoc->peer.retran_path = trans_next;
- pr_debug("%s: association:%p addr:%pISpc\n", __func__, asoc,
- &t->ipaddr.sa);
+ pr_debug("%s: association:%p updated new path to addr:%pISpc\n",
+ __func__, asoc, &asoc->peer.retran_path->ipaddr.sa);
}
-/* Choose the transport for sending retransmit packet. */
-struct sctp_transport *sctp_assoc_choose_alter_transport(
- struct sctp_association *asoc, struct sctp_transport *last_sent_to)
+struct sctp_transport *
+sctp_assoc_choose_alter_transport(struct sctp_association *asoc,
+ struct sctp_transport *last_sent_to)
{
/* If this is the first time packet is sent, use the active path,
* else use the retran path. If the last packet was sent over the
* retran path, update the retran path and use it.
*/
- if (!last_sent_to)
+ if (last_sent_to == NULL) {
return asoc->peer.active_path;
- else {
+ } else {
if (last_sent_to == asoc->peer.retran_path)
sctp_assoc_update_retran_path(asoc);
+
return asoc->peer.retran_path;
}
}
@@ -1367,44 +1395,35 @@ static inline bool sctp_peer_needs_update(struct sctp_association *asoc)
return false;
}
-/* Increase asoc's rwnd by len and send any window update SACK if needed. */
-void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len)
+/* Update asoc's rwnd for the approximated state in the buffer,
+ * and check whether SACK needs to be sent.
+ */
+void sctp_assoc_rwnd_update(struct sctp_association *asoc, bool update_peer)
{
+ int rx_count;
struct sctp_chunk *sack;
struct timer_list *timer;
- if (asoc->rwnd_over) {
- if (asoc->rwnd_over >= len) {
- asoc->rwnd_over -= len;
- } else {
- asoc->rwnd += (len - asoc->rwnd_over);
- asoc->rwnd_over = 0;
- }
- } else {
- asoc->rwnd += len;
- }
+ if (asoc->ep->rcvbuf_policy)
+ rx_count = atomic_read(&asoc->rmem_alloc);
+ else
+ rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc);
- /* If we had window pressure, start recovering it
- * once our rwnd had reached the accumulated pressure
- * threshold. The idea is to recover slowly, but up
- * to the initial advertised window.
- */
- if (asoc->rwnd_press && asoc->rwnd >= asoc->rwnd_press) {
- int change = min(asoc->pathmtu, asoc->rwnd_press);
- asoc->rwnd += change;
- asoc->rwnd_press -= change;
- }
+ if ((asoc->base.sk->sk_rcvbuf - rx_count) > 0)
+ asoc->rwnd = (asoc->base.sk->sk_rcvbuf - rx_count) >> 1;
+ else
+ asoc->rwnd = 0;
- pr_debug("%s: asoc:%p rwnd increased by %d to (%u, %u) - %u\n",
- __func__, asoc, len, asoc->rwnd, asoc->rwnd_over,
- asoc->a_rwnd);
+ pr_debug("%s: asoc:%p rwnd=%u, rx_count=%d, sk_rcvbuf=%d\n",
+ __func__, asoc, asoc->rwnd, rx_count,
+ asoc->base.sk->sk_rcvbuf);
/* Send a window update SACK if the rwnd has increased by at least the
* minimum of the association's PMTU and half of the receive buffer.
* The algorithm used is similar to the one described in
* Section 4.2.3.3 of RFC 1122.
*/
- if (sctp_peer_needs_update(asoc)) {
+ if (update_peer && sctp_peer_needs_update(asoc)) {
asoc->a_rwnd = asoc->rwnd;
pr_debug("%s: sending window update SACK- asoc:%p rwnd:%u "
@@ -1426,45 +1445,6 @@ void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len)
}
}
-/* Decrease asoc's rwnd by len. */
-void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len)
-{
- int rx_count;
- int over = 0;
-
- if (unlikely(!asoc->rwnd || asoc->rwnd_over))
- pr_debug("%s: association:%p has asoc->rwnd:%u, "
- "asoc->rwnd_over:%u!\n", __func__, asoc,
- asoc->rwnd, asoc->rwnd_over);
-
- if (asoc->ep->rcvbuf_policy)
- rx_count = atomic_read(&asoc->rmem_alloc);
- else
- rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc);
-
- /* If we've reached or overflowed our receive buffer, announce
- * a 0 rwnd if rwnd would still be positive. Store the
- * the potential pressure overflow so that the window can be restored
- * back to original value.
- */
- if (rx_count >= asoc->base.sk->sk_rcvbuf)
- over = 1;
-
- if (asoc->rwnd >= len) {
- asoc->rwnd -= len;
- if (over) {
- asoc->rwnd_press += asoc->rwnd;
- asoc->rwnd = 0;
- }
- } else {
- asoc->rwnd_over = len - asoc->rwnd;
- asoc->rwnd = 0;
- }
-
- pr_debug("%s: asoc:%p rwnd decreased by %d to (%u, %u, %u)\n",
- __func__, asoc, len, asoc->rwnd, asoc->rwnd_over,
- asoc->rwnd_press);
-}
/* Build the bind address list for the association based on info from the
* local endpoint and the remote peer.
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 0f6259a6a932..2b1738ef9394 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -662,6 +662,8 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
*/
sctp_v6_to_sk_daddr(&asoc->peer.primary_addr, newsk);
+ newsk->sk_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
+
sk_refcnt_debug_inc(newsk);
if (newsk->sk_prot->init(newsk)) {
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 632090b961c3..3a1767ef3201 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1421,8 +1421,8 @@ static void sctp_chunk_destroy(struct sctp_chunk *chunk)
BUG_ON(!list_empty(&chunk->list));
list_del_init(&chunk->transmitted_list);
- /* Free the chunk skb data and the SCTP_chunk stub itself. */
- dev_kfree_skb(chunk->skb);
+ consume_skb(chunk->skb);
+ consume_skb(chunk->auth_chunk);
SCTP_DBG_OBJCNT_DEC(chunk);
kmem_cache_free(sctp_chunk_cachep, chunk);
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index bd859154000e..5d6883ff00c3 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -495,11 +495,12 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands,
}
/* If the transport error count is greater than the pf_retrans
- * threshold, and less than pathmaxrtx, then mark this transport
- * as Partially Failed, ee SCTP Quick Failover Draft, secon 5.1,
- * point 1
+ * threshold, and less than pathmaxrtx, and if the current state
+ * is not SCTP_UNCONFIRMED, then mark this transport as Partially
+ * Failed, see SCTP Quick Failover Draft, section 5.1
*/
if ((transport->state != SCTP_PF) &&
+ (transport->state != SCTP_UNCONFIRMED) &&
(asoc->pf_retrans < transport->pathmaxrxt) &&
(transport->error_count > asoc->pf_retrans)) {
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 483dcd71b3c5..01e002430c85 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -758,6 +758,12 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
struct sctp_chunk auth;
sctp_ierror_t ret;
+ /* Make sure that we and the peer are AUTH capable */
+ if (!net->sctp.auth_enable || !new_asoc->peer.auth_capable) {
+ sctp_association_free(new_asoc);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+ }
+
/* set-up our fake chunk so that we can process it */
auth.skb = chunk->auth_chunk;
auth.asoc = chunk->asoc;
@@ -768,10 +774,6 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
auth.transport = chunk->transport;
ret = sctp_sf_authenticate(net, ep, new_asoc, type, &auth);
-
- /* We can now safely free the auth_chunk clone */
- kfree_skb(chunk->auth_chunk);
-
if (ret != SCTP_IERROR_NO_ERROR) {
sctp_association_free(new_asoc);
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -6176,7 +6178,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
* PMTU. In cases, such as loopback, this might be a rather
* large spill over.
*/
- if ((!chunk->data_accepted) && (!asoc->rwnd || asoc->rwnd_over ||
+ if ((!chunk->data_accepted) && (!asoc->rwnd ||
(datalen > asoc->rwnd + asoc->frag_point))) {
/* If this is the next TSN, consider reneging to make
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9e91d6e5df63..981aaf8b6ace 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -64,6 +64,7 @@
#include <linux/crypto.h>
#include <linux/slab.h>
#include <linux/file.h>
+#include <linux/compat.h>
#include <net/ip.h>
#include <net/icmp.h>
@@ -1368,11 +1369,19 @@ static int sctp_setsockopt_connectx(struct sock *sk,
/*
* New (hopefully final) interface for the API.
* We use the sctp_getaddrs_old structure so that use-space library
- * can avoid any unnecessary allocations. The only defferent part
+ * can avoid any unnecessary allocations. The only different part
* is that we store the actual length of the address buffer into the
- * addrs_num structure member. That way we can re-use the existing
+ * addrs_num structure member. That way we can re-use the existing
* code.
*/
+#ifdef CONFIG_COMPAT
+struct compat_sctp_getaddrs_old {
+ sctp_assoc_t assoc_id;
+ s32 addr_num;
+ compat_uptr_t addrs; /* struct sockaddr * */
+};
+#endif
+
static int sctp_getsockopt_connectx3(struct sock *sk, int len,
char __user *optval,
int __user *optlen)
@@ -1381,16 +1390,30 @@ static int sctp_getsockopt_connectx3(struct sock *sk, int len,
sctp_assoc_t assoc_id = 0;
int err = 0;
- if (len < sizeof(param))
- return -EINVAL;
+#ifdef CONFIG_COMPAT
+ if (is_compat_task()) {
+ struct compat_sctp_getaddrs_old param32;
- if (copy_from_user(&param, optval, sizeof(param)))
- return -EFAULT;
+ if (len < sizeof(param32))
+ return -EINVAL;
+ if (copy_from_user(&param32, optval, sizeof(param32)))
+ return -EFAULT;
- err = __sctp_setsockopt_connectx(sk,
- (struct sockaddr __user *)param.addrs,
- param.addr_num, &assoc_id);
+ param.assoc_id = param32.assoc_id;
+ param.addr_num = param32.addr_num;
+ param.addrs = compat_ptr(param32.addrs);
+ } else
+#endif
+ {
+ if (len < sizeof(param))
+ return -EINVAL;
+ if (copy_from_user(&param, optval, sizeof(param)))
+ return -EFAULT;
+ }
+ err = __sctp_setsockopt_connectx(sk, (struct sockaddr __user *)
+ param.addrs, param.addr_num,
+ &assoc_id);
if (err == 0 || err == -EINPROGRESS) {
if (copy_to_user(optval, &assoc_id, sizeof(assoc_id)))
return -EFAULT;
@@ -2092,12 +2115,6 @@ static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk,
sctp_skb_pull(skb, copied);
skb_queue_head(&sk->sk_receive_queue, skb);
- /* When only partial message is copied to the user, increase
- * rwnd by that amount. If all the data in the skb is read,
- * rwnd is updated when the event is freed.
- */
- if (!sctp_ulpevent_is_notification(event))
- sctp_assoc_rwnd_increase(event->asoc, copied);
goto out;
} else if ((event->msg_flags & MSG_NOTIFICATION) ||
(event->msg_flags & MSG_EOR))
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 7135e617ab0f..35c8923b5554 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -151,6 +151,7 @@ static struct ctl_table sctp_net_table[] = {
},
{
.procname = "cookie_hmac_alg",
+ .data = &init_net.sctp.sctp_hmac_alg,
.maxlen = 8,
.mode = 0644,
.proc_handler = proc_sctp_do_hmac_alg,
@@ -401,15 +402,18 @@ static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write,
int sctp_sysctl_net_register(struct net *net)
{
- struct ctl_table *table;
- int i;
+ struct ctl_table *table = sctp_net_table;
+
+ if (!net_eq(net, &init_net)) {
+ int i;
- table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL);
- if (!table)
- return -ENOMEM;
+ table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL);
+ if (!table)
+ return -ENOMEM;
- for (i = 0; table[i].data; i++)
- table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp;
+ for (i = 0; table[i].data; i++)
+ table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp;
+ }
net->sctp.sysctl_header = register_net_sysctl(net, "net/sctp", table);
return 0;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index d0810dc5f079..1d348d15b33d 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -652,5 +652,4 @@ void sctp_transport_immediate_rtx(struct sctp_transport *t)
if (!mod_timer(&t->T3_rtx_timer, jiffies + t->rto))
sctp_transport_hold(t);
}
- return;
}
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 85c64658bd0b..8d198ae03606 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -989,7 +989,7 @@ static void sctp_ulpevent_receive_data(struct sctp_ulpevent *event,
skb = sctp_event2skb(event);
/* Set the owner and charge rwnd for bytes received. */
sctp_ulpevent_set_owner(event, asoc);
- sctp_assoc_rwnd_decrease(asoc, skb_headlen(skb));
+ sctp_assoc_rwnd_update(asoc, false);
if (!skb->data_len)
return;
@@ -1011,6 +1011,7 @@ static void sctp_ulpevent_release_data(struct sctp_ulpevent *event)
{
struct sk_buff *skb, *frag;
unsigned int len;
+ struct sctp_association *asoc;
/* Current stack structures assume that the rcv buffer is
* per socket. For UDP style sockets this is not true as
@@ -1035,8 +1036,11 @@ static void sctp_ulpevent_release_data(struct sctp_ulpevent *event)
}
done:
- sctp_assoc_rwnd_increase(event->asoc, len);
+ asoc = event->asoc;
+ sctp_association_hold(asoc);
sctp_ulpevent_release_owner(event);
+ sctp_assoc_rwnd_update(asoc, true);
+ sctp_association_put(asoc);
}
static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event)
diff --git a/net/socket.c b/net/socket.c
index 879933aaed4c..f25eaa30b690 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -450,16 +450,17 @@ EXPORT_SYMBOL(sockfd_lookup);
static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
{
- struct file *file;
+ struct fd f = fdget(fd);
struct socket *sock;
*err = -EBADF;
- file = fget_light(fd, fput_needed);
- if (file) {
- sock = sock_from_file(file, err);
- if (sock)
+ if (f.file) {
+ sock = sock_from_file(f.file, err);
+ if (likely(sock)) {
+ *fput_needed = f.flags;
return sock;
- fput_light(file, *fput_needed);
+ }
+ fdput(f);
}
return NULL;
}
@@ -593,7 +594,7 @@ void sock_release(struct socket *sock)
}
if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
- printk(KERN_ERR "sock_release: fasync list not empty!\n");
+ pr_err("%s: fasync list not empty!\n", __func__);
if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags))
return;
@@ -1265,8 +1266,8 @@ int __sock_create(struct net *net, int family, int type, int protocol,
static int warned;
if (!warned) {
warned = 1;
- printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
- current->comm);
+ pr_info("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
+ current->comm);
}
family = PF_PACKET;
}
@@ -1985,6 +1986,10 @@ static int copy_msghdr_from_user(struct msghdr *kmsg,
{
if (copy_from_user(kmsg, umsg, sizeof(struct msghdr)))
return -EFAULT;
+
+ if (kmsg->msg_namelen < 0)
+ return -EINVAL;
+
if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
kmsg->msg_namelen = sizeof(struct sockaddr_storage);
return 0;
@@ -2595,8 +2600,7 @@ int sock_register(const struct net_proto_family *ops)
int err;
if (ops->family >= NPROTO) {
- printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
- NPROTO);
+ pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
return -ENOBUFS;
}
@@ -2610,7 +2614,7 @@ int sock_register(const struct net_proto_family *ops)
}
spin_unlock(&net_family_lock);
- printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
+ pr_info("NET: Registered protocol family %d\n", ops->family);
return err;
}
EXPORT_SYMBOL(sock_register);
@@ -2638,7 +2642,7 @@ void sock_unregister(int family)
synchronize_rcu();
- printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
+ pr_info("NET: Unregistered protocol family %d\n", family);
}
EXPORT_SYMBOL(sock_unregister);
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 6c0513a7f992..36e431ee1c90 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -108,6 +108,7 @@ struct gss_auth {
static DEFINE_SPINLOCK(pipe_version_lock);
static struct rpc_wait_queue pipe_version_rpc_waitqueue;
static DECLARE_WAIT_QUEUE_HEAD(pipe_version_waitqueue);
+static void gss_put_auth(struct gss_auth *gss_auth);
static void gss_free_ctx(struct gss_cl_ctx *);
static const struct rpc_pipe_ops gss_upcall_ops_v0;
@@ -320,6 +321,7 @@ gss_release_msg(struct gss_upcall_msg *gss_msg)
if (gss_msg->ctx != NULL)
gss_put_ctx(gss_msg->ctx);
rpc_destroy_wait_queue(&gss_msg->rpc_waitqueue);
+ gss_put_auth(gss_msg->auth);
kfree(gss_msg);
}
@@ -498,9 +500,12 @@ gss_alloc_msg(struct gss_auth *gss_auth,
default:
err = gss_encode_v1_msg(gss_msg, service_name, gss_auth->target_name);
if (err)
- goto err_free_msg;
+ goto err_put_pipe_version;
};
+ kref_get(&gss_auth->kref);
return gss_msg;
+err_put_pipe_version:
+ put_pipe_version(gss_auth->net);
err_free_msg:
kfree(gss_msg);
err:
@@ -991,6 +996,8 @@ gss_create_new(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor);
if (gss_auth->service == 0)
goto err_put_mech;
+ if (!gssd_running(gss_auth->net))
+ goto err_put_mech;
auth = &gss_auth->rpc_auth;
auth->au_cslack = GSS_CRED_SLACK >> 2;
auth->au_rslack = GSS_VERF_SLACK >> 2;
@@ -1062,6 +1069,12 @@ gss_free_callback(struct kref *kref)
}
static void
+gss_put_auth(struct gss_auth *gss_auth)
+{
+ kref_put(&gss_auth->kref, gss_free_callback);
+}
+
+static void
gss_destroy(struct rpc_auth *auth)
{
struct gss_auth *gss_auth = container_of(auth,
@@ -1082,7 +1095,7 @@ gss_destroy(struct rpc_auth *auth)
gss_auth->gss_pipe[1] = NULL;
rpcauth_destroy_credcache(auth);
- kref_put(&gss_auth->kref, gss_free_callback);
+ gss_put_auth(gss_auth);
}
/*
@@ -1253,7 +1266,7 @@ gss_destroy_nullcred(struct rpc_cred *cred)
call_rcu(&cred->cr_rcu, gss_free_cred_callback);
if (ctx)
gss_put_ctx(ctx);
- kref_put(&gss_auth->kref, gss_free_callback);
+ gss_put_auth(gss_auth);
}
static void
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 890a29912d5a..e860d4f7ed2a 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -64,7 +64,6 @@ static void xprt_free_allocation(struct rpc_rqst *req)
free_page((unsigned long)xbufp->head[0].iov_base);
xbufp = &req->rq_snd_buf;
free_page((unsigned long)xbufp->head[0].iov_base);
- list_del(&req->rq_bc_pa_list);
kfree(req);
}
@@ -168,8 +167,10 @@ out_free:
/*
* Memory allocation failed, free the temporary list
*/
- list_for_each_entry_safe(req, tmp, &tmp_list, rq_bc_pa_list)
+ list_for_each_entry_safe(req, tmp, &tmp_list, rq_bc_pa_list) {
+ list_del(&req->rq_bc_pa_list);
xprt_free_allocation(req);
+ }
dprintk("RPC: setup backchannel transport failed\n");
return -ENOMEM;
@@ -198,6 +199,7 @@ void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs)
xprt_dec_alloc_count(xprt, max_reqs);
list_for_each_entry_safe(req, tmp, &xprt->bc_pa_list, rq_bc_pa_list) {
dprintk("RPC: req=%p\n", req);
+ list_del(&req->rq_bc_pa_list);
xprt_free_allocation(req);
if (--max_reqs == 0)
break;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 80a6640f329b..06c6ff0cb911 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -571,7 +571,7 @@ static void svc_check_conn_limits(struct svc_serv *serv)
}
}
-int svc_alloc_arg(struct svc_rqst *rqstp)
+static int svc_alloc_arg(struct svc_rqst *rqstp)
{
struct svc_serv *serv = rqstp->rq_server;
struct xdr_buf *arg;
@@ -612,7 +612,7 @@ int svc_alloc_arg(struct svc_rqst *rqstp)
return 0;
}
-struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
+static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
{
struct svc_xprt *xprt;
struct svc_pool *pool = rqstp->rq_pool;
@@ -691,7 +691,7 @@ struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
return xprt;
}
-void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)
+static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)
{
spin_lock_bh(&serv->sv_lock);
set_bit(XPT_TEMP, &newxpt->xpt_flags);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 817a1e523969..0addefca8e77 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -510,6 +510,7 @@ static int xs_nospace(struct rpc_task *task)
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ struct sock *sk = transport->inet;
int ret = -EAGAIN;
dprintk("RPC: %5u xmit incomplete (%u left of %u)\n",
@@ -527,7 +528,7 @@ static int xs_nospace(struct rpc_task *task)
* window size
*/
set_bit(SOCK_NOSPACE, &transport->sock->flags);
- transport->inet->sk_write_pending++;
+ sk->sk_write_pending++;
/* ...and wait for more buffer space */
xprt_wait_for_buffer_space(task, xs_nospace_callback);
}
@@ -537,6 +538,9 @@ static int xs_nospace(struct rpc_task *task)
}
spin_unlock_bh(&xprt->transport_lock);
+
+ /* Race breaker in case memory is freed before above code is called */
+ sk->sk_write_space(sk);
return ret;
}
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 60b00ab93d74..a74acf9ee804 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -37,6 +37,8 @@
#ifndef _TIPC_ADDR_H
#define _TIPC_ADDR_H
+#include "core.h"
+
#define TIPC_ZONE_MASK 0xff000000u
#define TIPC_CLUSTER_MASK 0xfffff000u
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index bf860d9e75af..e0feb7ef1469 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -356,9 +356,9 @@ static void bclink_peek_nack(struct tipc_msg *msg)
}
/*
- * tipc_bclink_send_msg - broadcast a packet to all nodes in cluster
+ * tipc_bclink_xmit - broadcast a packet to all nodes in cluster
*/
-int tipc_bclink_send_msg(struct sk_buff *buf)
+int tipc_bclink_xmit(struct sk_buff *buf)
{
int res;
@@ -370,7 +370,7 @@ int tipc_bclink_send_msg(struct sk_buff *buf)
goto exit;
}
- res = tipc_link_send_buf(bcl, buf);
+ res = __tipc_link_xmit(bcl, buf);
if (likely(res >= 0)) {
bclink_set_last_sent();
bcl->stats.queue_sz_counts++;
@@ -399,19 +399,18 @@ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno)
*/
if (((seqno - tipc_own_addr) % TIPC_MIN_LINK_WIN) == 0) {
- tipc_link_send_proto_msg(
- node->active_links[node->addr & 1],
- STATE_MSG, 0, 0, 0, 0, 0);
+ tipc_link_proto_xmit(node->active_links[node->addr & 1],
+ STATE_MSG, 0, 0, 0, 0, 0);
bcl->stats.sent_acks++;
}
}
/**
- * tipc_bclink_recv_pkt - receive a broadcast packet, and deliver upwards
+ * tipc_bclink_rcv - receive a broadcast packet, and deliver upwards
*
* tipc_net_lock is read_locked, no other locks set
*/
-void tipc_bclink_recv_pkt(struct sk_buff *buf)
+void tipc_bclink_rcv(struct sk_buff *buf)
{
struct tipc_msg *msg = buf_msg(buf);
struct tipc_node *node;
@@ -468,7 +467,7 @@ receive:
spin_unlock_bh(&bc_lock);
tipc_node_unlock(node);
if (likely(msg_mcast(msg)))
- tipc_port_recv_mcast(buf, NULL);
+ tipc_port_mcast_rcv(buf, NULL);
else
kfree_skb(buf);
} else if (msg_user(msg) == MSG_BUNDLER) {
@@ -478,12 +477,12 @@ receive:
bcl->stats.recv_bundled += msg_msgcnt(msg);
spin_unlock_bh(&bc_lock);
tipc_node_unlock(node);
- tipc_link_recv_bundle(buf);
+ tipc_link_bundle_rcv(buf);
} else if (msg_user(msg) == MSG_FRAGMENTER) {
int ret;
- ret = tipc_link_recv_fragment(&node->bclink.reasm_head,
- &node->bclink.reasm_tail,
- &buf);
+ ret = tipc_link_frag_rcv(&node->bclink.reasm_head,
+ &node->bclink.reasm_tail,
+ &buf);
if (ret == LINK_REASM_ERROR)
goto unlock;
spin_lock_bh(&bc_lock);
@@ -503,7 +502,7 @@ receive:
bclink_accept_pkt(node, seqno);
spin_unlock_bh(&bc_lock);
tipc_node_unlock(node);
- tipc_named_recv(buf);
+ tipc_named_rcv(buf);
} else {
spin_lock_bh(&bc_lock);
bclink_accept_pkt(node, seqno);
@@ -785,7 +784,6 @@ void tipc_bclink_init(void)
bcl->owner = &bclink->node;
bcl->max_pkt = MAX_PKT_DEFAULT_MCAST;
tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT);
- spin_lock_init(&bcbearer->bearer.lock);
bcl->b_ptr = &bcbearer->bearer;
bcl->state = WORKING_WORKING;
strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 6ee587b469fd..a80ef54b818e 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -90,8 +90,8 @@ void tipc_bclink_add_node(u32 addr);
void tipc_bclink_remove_node(u32 addr);
struct tipc_node *tipc_bclink_retransmit_to(void);
void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked);
-int tipc_bclink_send_msg(struct sk_buff *buf);
-void tipc_bclink_recv_pkt(struct sk_buff *buf);
+int tipc_bclink_xmit(struct sk_buff *buf);
+void tipc_bclink_rcv(struct sk_buff *buf);
u32 tipc_bclink_get_last_sent(void);
u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr);
void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index a38c89969c68..7f1f95c57476 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -51,7 +51,7 @@ static struct tipc_media * const media_info_array[] = {
struct tipc_bearer tipc_bearers[MAX_BEARERS];
-static void bearer_disable(struct tipc_bearer *b_ptr);
+static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down);
/**
* tipc_media_find - locates specified media object by name
@@ -327,12 +327,10 @@ restart:
b_ptr->net_plane = bearer_id + 'A';
b_ptr->active = 1;
b_ptr->priority = priority;
- INIT_LIST_HEAD(&b_ptr->links);
- spin_lock_init(&b_ptr->lock);
res = tipc_disc_create(b_ptr, &b_ptr->bcast_addr, disc_domain);
if (res) {
- bearer_disable(b_ptr);
+ bearer_disable(b_ptr, false);
pr_warn("Bearer <%s> rejected, discovery object creation failed\n",
name);
goto exit;
@@ -350,20 +348,9 @@ exit:
*/
static int tipc_reset_bearer(struct tipc_bearer *b_ptr)
{
- struct tipc_link *l_ptr;
- struct tipc_link *temp_l_ptr;
-
read_lock_bh(&tipc_net_lock);
pr_info("Resetting bearer <%s>\n", b_ptr->name);
- spin_lock_bh(&b_ptr->lock);
- list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
- struct tipc_node *n_ptr = l_ptr->owner;
-
- spin_lock_bh(&n_ptr->lock);
- tipc_link_reset(l_ptr);
- spin_unlock_bh(&n_ptr->lock);
- }
- spin_unlock_bh(&b_ptr->lock);
+ tipc_link_reset_list(b_ptr->identity);
read_unlock_bh(&tipc_net_lock);
return 0;
}
@@ -373,25 +360,14 @@ static int tipc_reset_bearer(struct tipc_bearer *b_ptr)
*
* Note: This routine assumes caller holds tipc_net_lock.
*/
-static void bearer_disable(struct tipc_bearer *b_ptr)
+static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down)
{
- struct tipc_link *l_ptr;
- struct tipc_link *temp_l_ptr;
- struct tipc_link_req *temp_req;
-
pr_info("Disabling bearer <%s>\n", b_ptr->name);
- spin_lock_bh(&b_ptr->lock);
b_ptr->media->disable_media(b_ptr);
- list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
- tipc_link_delete(l_ptr);
- }
- temp_req = b_ptr->link_req;
- b_ptr->link_req = NULL;
- spin_unlock_bh(&b_ptr->lock);
-
- if (temp_req)
- tipc_disc_delete(temp_req);
+ tipc_link_delete_list(b_ptr->identity, shutting_down);
+ if (b_ptr->link_req)
+ tipc_disc_delete(b_ptr->link_req);
memset(b_ptr, 0, sizeof(struct tipc_bearer));
}
@@ -406,7 +382,7 @@ int tipc_disable_bearer(const char *name)
pr_warn("Attempt to disable unknown bearer <%s>\n", name);
res = -EINVAL;
} else {
- bearer_disable(b_ptr);
+ bearer_disable(b_ptr, false);
res = 0;
}
write_unlock_bh(&tipc_net_lock);
@@ -599,7 +575,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
}
static struct packet_type tipc_packet_type __read_mostly = {
- .type = __constant_htons(ETH_P_TIPC),
+ .type = htons(ETH_P_TIPC),
.func = tipc_l2_rcv_msg,
};
@@ -610,8 +586,13 @@ static struct notifier_block notifier = {
int tipc_bearer_setup(void)
{
+ int err;
+
+ err = register_netdevice_notifier(&notifier);
+ if (err)
+ return err;
dev_add_pack(&tipc_packet_type);
- return register_netdevice_notifier(&notifier);
+ return 0;
}
void tipc_bearer_cleanup(void)
@@ -626,6 +607,6 @@ void tipc_bearer_stop(void)
for (i = 0; i < MAX_BEARERS; i++) {
if (tipc_bearers[i].active)
- bearer_disable(&tipc_bearers[i]);
+ bearer_disable(&tipc_bearers[i], true);
}
}
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 4f5db9ad5bf6..425dd8107a8f 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -107,10 +107,8 @@ struct tipc_media {
/**
* struct tipc_bearer - Generic TIPC bearer structure
- * @dev: ptr to associated network device
- * @usr_handle: pointer to additional media-specific information about bearer
+ * @media_ptr: pointer to additional media-specific information about bearer
* @mtu: max packet size bearer can support
- * @lock: spinlock for controlling access to bearer
* @addr: media-specific address associated with bearer
* @name: bearer name (format = media:interface)
* @media: ptr to media structure associated with bearer
@@ -120,7 +118,6 @@ struct tipc_media {
* @tolerance: default link tolerance for bearer
* @identity: array index of this bearer within TIPC bearer array
* @link_req: ptr to (optional) structure making periodic link setup requests
- * @links: list of non-congested links associated with bearer
* @active: non-zero if bearer structure is represents a bearer
* @net_plane: network plane ('A' through 'H') currently associated with bearer
* @nodes: indicates which nodes in cluster can be reached through bearer
@@ -134,7 +131,6 @@ struct tipc_bearer {
u32 mtu; /* initalized by media */
struct tipc_media_addr addr; /* initalized by media */
char name[TIPC_MAX_BEARER_NAME];
- spinlock_t lock;
struct tipc_media *media;
struct tipc_media_addr bcast_addr;
u32 priority;
@@ -142,7 +138,6 @@ struct tipc_bearer {
u32 tolerance;
u32 identity;
struct tipc_link_req *link_req;
- struct list_head links;
int active;
char net_plane;
struct tipc_node_map nodes;
diff --git a/net/tipc/config.c b/net/tipc/config.c
index c301a9a592d8..e6d721692ae0 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -181,7 +181,7 @@ static struct sk_buff *cfg_set_own_addr(void)
if (tipc_own_addr)
return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
" (cannot change node address once assigned)");
- tipc_core_start_net(addr);
+ tipc_net_start(addr);
return tipc_cfg_reply_none();
}
@@ -376,7 +376,6 @@ static void cfg_conn_msg_event(int conid, struct sockaddr_tipc *addr,
struct tipc_cfg_msg_hdr *req_hdr;
struct tipc_cfg_msg_hdr *rep_hdr;
struct sk_buff *rep_buf;
- int ret;
/* Validate configuration message header (ignore invalid message) */
req_hdr = (struct tipc_cfg_msg_hdr *)buf;
@@ -398,12 +397,8 @@ static void cfg_conn_msg_event(int conid, struct sockaddr_tipc *addr,
memcpy(rep_hdr, req_hdr, sizeof(*rep_hdr));
rep_hdr->tcm_len = htonl(rep_buf->len);
rep_hdr->tcm_flags &= htons(~TCM_F_REQUEST);
-
- ret = tipc_conn_sendmsg(&cfgsrv, conid, addr, rep_buf->data,
- rep_buf->len);
- if (ret < 0)
- pr_err("Sending cfg reply message failed, no memory\n");
-
+ tipc_conn_sendmsg(&cfgsrv, conid, addr, rep_buf->data,
+ rep_buf->len);
kfree_skb(rep_buf);
}
}
diff --git a/net/tipc/core.c b/net/tipc/core.c
index f9e88d8b04ca..e2491b341edb 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -1,7 +1,7 @@
/*
* net/tipc/core.c: TIPC module code
*
- * Copyright (c) 2003-2006, Ericsson AB
+ * Copyright (c) 2003-2006, 2013, Ericsson AB
* Copyright (c) 2005-2006, 2010-2013, Wind River Systems
* All rights reserved.
*
@@ -77,37 +77,13 @@ struct sk_buff *tipc_buf_acquire(u32 size)
}
/**
- * tipc_core_stop_net - shut down TIPC networking sub-systems
- */
-static void tipc_core_stop_net(void)
-{
- tipc_net_stop();
- tipc_bearer_cleanup();
-}
-
-/**
- * start_net - start TIPC networking sub-systems
- */
-int tipc_core_start_net(unsigned long addr)
-{
- int res;
-
- tipc_net_start(addr);
- res = tipc_bearer_setup();
- if (res < 0)
- goto err;
- return res;
-
-err:
- tipc_core_stop_net();
- return res;
-}
-
-/**
* tipc_core_stop - switch TIPC from SINGLE NODE to NOT RUNNING mode
*/
static void tipc_core_stop(void)
{
+ tipc_handler_stop();
+ tipc_net_stop();
+ tipc_bearer_cleanup();
tipc_netlink_stop();
tipc_cfg_stop();
tipc_subscr_stop();
@@ -122,30 +98,65 @@ static void tipc_core_stop(void)
*/
static int tipc_core_start(void)
{
- int res;
+ int err;
get_random_bytes(&tipc_random, sizeof(tipc_random));
- res = tipc_handler_start();
- if (!res)
- res = tipc_ref_table_init(tipc_max_ports, tipc_random);
- if (!res)
- res = tipc_nametbl_init();
- if (!res)
- res = tipc_netlink_start();
- if (!res)
- res = tipc_socket_init();
- if (!res)
- res = tipc_register_sysctl();
- if (!res)
- res = tipc_subscr_start();
- if (!res)
- res = tipc_cfg_init();
- if (res) {
- tipc_handler_stop();
- tipc_core_stop();
- }
- return res;
+ err = tipc_handler_start();
+ if (err)
+ goto out_handler;
+
+ err = tipc_ref_table_init(tipc_max_ports, tipc_random);
+ if (err)
+ goto out_reftbl;
+
+ err = tipc_nametbl_init();
+ if (err)
+ goto out_nametbl;
+
+ err = tipc_netlink_start();
+ if (err)
+ goto out_netlink;
+
+ err = tipc_socket_init();
+ if (err)
+ goto out_socket;
+
+ err = tipc_register_sysctl();
+ if (err)
+ goto out_sysctl;
+
+ err = tipc_subscr_start();
+ if (err)
+ goto out_subscr;
+
+ err = tipc_cfg_init();
+ if (err)
+ goto out_cfg;
+
+ err = tipc_bearer_setup();
+ if (err)
+ goto out_bearer;
+
+ return 0;
+out_bearer:
+ tipc_cfg_stop();
+out_cfg:
+ tipc_subscr_stop();
+out_subscr:
+ tipc_unregister_sysctl();
+out_sysctl:
+ tipc_socket_stop();
+out_socket:
+ tipc_netlink_stop();
+out_netlink:
+ tipc_nametbl_stop();
+out_nametbl:
+ tipc_ref_table_stop();
+out_reftbl:
+ tipc_handler_stop();
+out_handler:
+ return err;
}
static int __init tipc_init(void)
@@ -174,8 +185,6 @@ static int __init tipc_init(void)
static void __exit tipc_exit(void)
{
- tipc_handler_stop();
- tipc_core_stop_net();
tipc_core_stop();
pr_info("Deactivated\n");
}
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 1ff477b0450d..4dfe137587bb 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -90,7 +90,6 @@ extern int tipc_random __read_mostly;
/*
* Routines available to privileged subsystems
*/
-int tipc_core_start_net(unsigned long);
int tipc_handler_start(void);
void tipc_handler_stop(void);
int tipc_netlink_start(void);
@@ -192,6 +191,7 @@ static inline void k_term_timer(struct timer_list *timer)
struct tipc_skb_cb {
void *handle;
+ bool deferred;
};
#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0]))
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 412ff41b8611..fa94da6db3d4 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -110,11 +110,11 @@ static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr,
}
/**
- * tipc_disc_recv_msg - handle incoming link setup message (request or response)
+ * tipc_disc_rcv - handle incoming link setup message (request or response)
* @buf: buffer containing message
* @b_ptr: bearer that message arrived on
*/
-void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr)
+void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *b_ptr)
{
struct tipc_node *n_ptr;
struct tipc_link *link;
diff --git a/net/tipc/discover.h b/net/tipc/discover.h
index 75b67c403aa3..b4fc962c3623 100644
--- a/net/tipc/discover.h
+++ b/net/tipc/discover.h
@@ -44,6 +44,6 @@ int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest,
void tipc_disc_delete(struct tipc_link_req *req);
void tipc_disc_add_dest(struct tipc_link_req *req);
void tipc_disc_remove_dest(struct tipc_link_req *req);
-void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr);
+void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *b_ptr);
#endif
diff --git a/net/tipc/handler.c b/net/tipc/handler.c
index e4bc8a296744..1fabf160501f 100644
--- a/net/tipc/handler.c
+++ b/net/tipc/handler.c
@@ -58,7 +58,6 @@ unsigned int tipc_k_signal(Handler routine, unsigned long argument)
spin_lock_bh(&qitem_lock);
if (!handler_enabled) {
- pr_err("Signal request ignored by handler\n");
spin_unlock_bh(&qitem_lock);
return -ENOPROTOOPT;
}
diff --git a/net/tipc/link.c b/net/tipc/link.c
index d4b5de41b682..a42f4a1d3cd1 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -77,19 +77,19 @@ static const char *link_unk_evt = "Unknown link event ";
static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr,
struct sk_buff *buf);
-static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf);
-static int tipc_link_tunnel_rcv(struct tipc_link **l_ptr,
+static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf);
+static int tipc_link_tunnel_rcv(struct tipc_node *n_ptr,
struct sk_buff **buf);
static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance);
-static int link_send_sections_long(struct tipc_port *sender,
- struct iovec const *msg_sect,
- unsigned int len, u32 destnode);
+static int tipc_link_iovec_long_xmit(struct tipc_port *sender,
+ struct iovec const *msg_sect,
+ unsigned int len, u32 destnode);
static void link_state_event(struct tipc_link *l_ptr, u32 event);
static void link_reset_statistics(struct tipc_link *l_ptr);
static void link_print(struct tipc_link *l_ptr, const char *str);
-static int link_send_long_buf(struct tipc_link *l_ptr, struct sk_buff *buf);
-static void tipc_link_send_sync(struct tipc_link *l);
-static void tipc_link_recv_sync(struct tipc_node *n, struct sk_buff *buf);
+static int tipc_link_frag_xmit(struct tipc_link *l_ptr, struct sk_buff *buf);
+static void tipc_link_sync_xmit(struct tipc_link *l);
+static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf);
/*
* Simple link routines
@@ -147,11 +147,6 @@ int tipc_link_is_active(struct tipc_link *l_ptr)
/**
* link_timeout - handle expiration of link timer
* @l_ptr: pointer to link
- *
- * This routine must not grab "tipc_net_lock" to avoid a potential deadlock conflict
- * with tipc_link_delete(). (There is no risk that the node will be deleted by
- * another thread because tipc_link_delete() always cancels the link timer before
- * tipc_node_delete() is called.)
*/
static void link_timeout(struct tipc_link *l_ptr)
{
@@ -213,8 +208,8 @@ static void link_set_timer(struct tipc_link *l_ptr, u32 time)
* Returns pointer to link.
*/
struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
- struct tipc_bearer *b_ptr,
- const struct tipc_media_addr *media_addr)
+ struct tipc_bearer *b_ptr,
+ const struct tipc_media_addr *media_addr)
{
struct tipc_link *l_ptr;
struct tipc_msg *msg;
@@ -279,41 +274,43 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
k_init_timer(&l_ptr->timer, (Handler)link_timeout,
(unsigned long)l_ptr);
- list_add_tail(&l_ptr->link_list, &b_ptr->links);
link_state_event(l_ptr, STARTING_EVT);
return l_ptr;
}
-/**
- * tipc_link_delete - delete a link
- * @l_ptr: pointer to link
- *
- * Note: 'tipc_net_lock' is write_locked, bearer is locked.
- * This routine must not grab the node lock until after link timer cancellation
- * to avoid a potential deadlock situation.
- */
-void tipc_link_delete(struct tipc_link *l_ptr)
-{
- if (!l_ptr) {
- pr_err("Attempt to delete non-existent link\n");
- return;
- }
- k_cancel_timer(&l_ptr->timer);
+void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down)
+{
+ struct tipc_link *l_ptr;
+ struct tipc_node *n_ptr;
- tipc_node_lock(l_ptr->owner);
- tipc_link_reset(l_ptr);
- tipc_node_detach_link(l_ptr->owner, l_ptr);
- tipc_link_purge_queues(l_ptr);
- list_del_init(&l_ptr->link_list);
- tipc_node_unlock(l_ptr->owner);
- k_term_timer(&l_ptr->timer);
- kfree(l_ptr);
+ list_for_each_entry(n_ptr, &tipc_node_list, list) {
+ spin_lock_bh(&n_ptr->lock);
+ l_ptr = n_ptr->links[bearer_id];
+ if (l_ptr) {
+ tipc_link_reset(l_ptr);
+ if (shutting_down || !tipc_node_is_up(n_ptr)) {
+ tipc_node_detach_link(l_ptr->owner, l_ptr);
+ tipc_link_reset_fragments(l_ptr);
+ spin_unlock_bh(&n_ptr->lock);
+
+ /* Nobody else can access this link now: */
+ del_timer_sync(&l_ptr->timer);
+ kfree(l_ptr);
+ } else {
+ /* Detach/delete when failover is finished: */
+ l_ptr->flags |= LINK_STOPPED;
+ spin_unlock_bh(&n_ptr->lock);
+ del_timer_sync(&l_ptr->timer);
+ }
+ continue;
+ }
+ spin_unlock_bh(&n_ptr->lock);
+ }
}
-
/**
* link_schedule_port - schedule port for deferred sending
* @l_ptr: pointer to link
@@ -330,8 +327,6 @@ static int link_schedule_port(struct tipc_link *l_ptr, u32 origport, u32 sz)
spin_lock_bh(&tipc_port_list_lock);
p_ptr = tipc_port_lock(origport);
if (p_ptr) {
- if (!p_ptr->wakeup)
- goto exit;
if (!list_empty(&p_ptr->wait_list))
goto exit;
p_ptr->congested = 1;
@@ -366,7 +361,7 @@ void tipc_link_wakeup_ports(struct tipc_link *l_ptr, int all)
list_del_init(&p_ptr->wait_list);
spin_lock_bh(p_ptr->lock);
p_ptr->congested = 0;
- p_ptr->wakeup(p_ptr);
+ tipc_port_wakeup(p_ptr);
win -= p_ptr->waiting_pkts;
spin_unlock_bh(p_ptr->lock);
}
@@ -461,6 +456,19 @@ void tipc_link_reset(struct tipc_link *l_ptr)
link_reset_statistics(l_ptr);
}
+void tipc_link_reset_list(unsigned int bearer_id)
+{
+ struct tipc_link *l_ptr;
+ struct tipc_node *n_ptr;
+
+ list_for_each_entry(n_ptr, &tipc_node_list, list) {
+ spin_lock_bh(&n_ptr->lock);
+ l_ptr = n_ptr->links[bearer_id];
+ if (l_ptr)
+ tipc_link_reset(l_ptr);
+ spin_unlock_bh(&n_ptr->lock);
+ }
+}
static void link_activate(struct tipc_link *l_ptr)
{
@@ -479,7 +487,10 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
struct tipc_link *other;
u32 cont_intv = l_ptr->continuity_interval;
- if (!l_ptr->started && (event != STARTING_EVT))
+ if (l_ptr->flags & LINK_STOPPED)
+ return;
+
+ if (!(l_ptr->flags & LINK_STARTED) && (event != STARTING_EVT))
return; /* Not yet. */
/* Check whether changeover is going on */
@@ -499,12 +510,12 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
if (l_ptr->next_in_no != l_ptr->checkpoint) {
l_ptr->checkpoint = l_ptr->next_in_no;
if (tipc_bclink_acks_missing(l_ptr->owner)) {
- tipc_link_send_proto_msg(l_ptr, STATE_MSG,
- 0, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, STATE_MSG,
+ 0, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
} else if (l_ptr->max_pkt < l_ptr->max_pkt_target) {
- tipc_link_send_proto_msg(l_ptr, STATE_MSG,
- 1, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, STATE_MSG,
+ 1, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
}
link_set_timer(l_ptr, cont_intv);
@@ -512,7 +523,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
}
l_ptr->state = WORKING_UNKNOWN;
l_ptr->fsm_msg_cnt = 0;
- tipc_link_send_proto_msg(l_ptr, STATE_MSG, 1, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
link_set_timer(l_ptr, cont_intv / 4);
break;
@@ -522,7 +533,8 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
tipc_link_reset(l_ptr);
l_ptr->state = RESET_RESET;
l_ptr->fsm_msg_cnt = 0;
- tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 0, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG,
+ 0, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
link_set_timer(l_ptr, cont_intv);
break;
@@ -544,7 +556,8 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
tipc_link_reset(l_ptr);
l_ptr->state = RESET_RESET;
l_ptr->fsm_msg_cnt = 0;
- tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 0, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG,
+ 0, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
link_set_timer(l_ptr, cont_intv);
break;
@@ -554,14 +567,14 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
l_ptr->fsm_msg_cnt = 0;
l_ptr->checkpoint = l_ptr->next_in_no;
if (tipc_bclink_acks_missing(l_ptr->owner)) {
- tipc_link_send_proto_msg(l_ptr, STATE_MSG,
- 0, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, STATE_MSG,
+ 0, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
}
link_set_timer(l_ptr, cont_intv);
} else if (l_ptr->fsm_msg_cnt < l_ptr->abort_limit) {
- tipc_link_send_proto_msg(l_ptr, STATE_MSG,
- 1, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, STATE_MSG,
+ 1, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
link_set_timer(l_ptr, cont_intv / 4);
} else { /* Link has failed */
@@ -570,8 +583,8 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
tipc_link_reset(l_ptr);
l_ptr->state = RESET_UNKNOWN;
l_ptr->fsm_msg_cnt = 0;
- tipc_link_send_proto_msg(l_ptr, RESET_MSG,
- 0, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, RESET_MSG,
+ 0, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
link_set_timer(l_ptr, cont_intv);
}
@@ -591,24 +604,25 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
l_ptr->state = WORKING_WORKING;
l_ptr->fsm_msg_cnt = 0;
link_activate(l_ptr);
- tipc_link_send_proto_msg(l_ptr, STATE_MSG, 1, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
if (l_ptr->owner->working_links == 1)
- tipc_link_send_sync(l_ptr);
+ tipc_link_sync_xmit(l_ptr);
link_set_timer(l_ptr, cont_intv);
break;
case RESET_MSG:
l_ptr->state = RESET_RESET;
l_ptr->fsm_msg_cnt = 0;
- tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 1, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG,
+ 1, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
link_set_timer(l_ptr, cont_intv);
break;
case STARTING_EVT:
- l_ptr->started = 1;
+ l_ptr->flags |= LINK_STARTED;
/* fall through */
case TIMEOUT_EVT:
- tipc_link_send_proto_msg(l_ptr, RESET_MSG, 0, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, RESET_MSG, 0, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
link_set_timer(l_ptr, cont_intv);
break;
@@ -626,16 +640,17 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
l_ptr->state = WORKING_WORKING;
l_ptr->fsm_msg_cnt = 0;
link_activate(l_ptr);
- tipc_link_send_proto_msg(l_ptr, STATE_MSG, 1, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
if (l_ptr->owner->working_links == 1)
- tipc_link_send_sync(l_ptr);
+ tipc_link_sync_xmit(l_ptr);
link_set_timer(l_ptr, cont_intv);
break;
case RESET_MSG:
break;
case TIMEOUT_EVT:
- tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 0, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG,
+ 0, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
link_set_timer(l_ptr, cont_intv);
break;
@@ -721,11 +736,11 @@ static void link_add_chain_to_outqueue(struct tipc_link *l_ptr,
}
/*
- * tipc_link_send_buf() is the 'full path' for messages, called from
- * inside TIPC when the 'fast path' in tipc_send_buf
+ * tipc_link_xmit() is the 'full path' for messages, called from
+ * inside TIPC when the 'fast path' in tipc_send_xmit
* has failed, and from link_send()
*/
-int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf)
+int __tipc_link_xmit(struct tipc_link *l_ptr, struct sk_buff *buf)
{
struct tipc_msg *msg = buf_msg(buf);
u32 size = msg_size(msg);
@@ -753,7 +768,7 @@ int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf)
/* Fragmentation needed ? */
if (size > max_packet)
- return link_send_long_buf(l_ptr, buf);
+ return tipc_link_frag_xmit(l_ptr, buf);
/* Packet can be queued or sent. */
if (likely(!link_congested(l_ptr))) {
@@ -797,11 +812,11 @@ int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf)
}
/*
- * tipc_link_send(): same as tipc_link_send_buf(), but the link to use has
- * not been selected yet, and the the owner node is not locked
+ * tipc_link_xmit(): same as __tipc_link_xmit(), but the link to use
+ * has not been selected yet, and the the owner node is not locked
* Called by TIPC internal users, e.g. the name distributor
*/
-int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector)
+int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector)
{
struct tipc_link *l_ptr;
struct tipc_node *n_ptr;
@@ -813,7 +828,7 @@ int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector)
tipc_node_lock(n_ptr);
l_ptr = n_ptr->active_links[selector & 1];
if (l_ptr)
- res = tipc_link_send_buf(l_ptr, buf);
+ res = __tipc_link_xmit(l_ptr, buf);
else
kfree_skb(buf);
tipc_node_unlock(n_ptr);
@@ -825,14 +840,14 @@ int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector)
}
/*
- * tipc_link_send_sync - synchronize broadcast link endpoints.
+ * tipc_link_sync_xmit - synchronize broadcast link endpoints.
*
* Give a newly added peer node the sequence number where it should
* start receiving and acking broadcast packets.
*
* Called with node locked
*/
-static void tipc_link_send_sync(struct tipc_link *l)
+static void tipc_link_sync_xmit(struct tipc_link *l)
{
struct sk_buff *buf;
struct tipc_msg *msg;
@@ -849,14 +864,14 @@ static void tipc_link_send_sync(struct tipc_link *l)
}
/*
- * tipc_link_recv_sync - synchronize broadcast link endpoints.
+ * tipc_link_sync_rcv - synchronize broadcast link endpoints.
* Receive the sequence number where we should start receiving and
* acking broadcast packets from a newly added peer node, and open
* up for reception of such packets.
*
* Called with node locked
*/
-static void tipc_link_recv_sync(struct tipc_node *n, struct sk_buff *buf)
+static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf)
{
struct tipc_msg *msg = buf_msg(buf);
@@ -866,7 +881,7 @@ static void tipc_link_recv_sync(struct tipc_node *n, struct sk_buff *buf)
}
/*
- * tipc_link_send_names - send name table entries to new neighbor
+ * tipc_link_names_xmit - send name table entries to new neighbor
*
* Send routine for bulk delivery of name table messages when contact
* with a new neighbor occurs. No link congestion checking is performed
@@ -874,7 +889,7 @@ static void tipc_link_recv_sync(struct tipc_node *n, struct sk_buff *buf)
* small enough not to require fragmentation.
* Called without any locks held.
*/
-void tipc_link_send_names(struct list_head *message_list, u32 dest)
+void tipc_link_names_xmit(struct list_head *message_list, u32 dest)
{
struct tipc_node *n_ptr;
struct tipc_link *l_ptr;
@@ -909,13 +924,13 @@ void tipc_link_send_names(struct list_head *message_list, u32 dest)
}
/*
- * link_send_buf_fast: Entry for data messages where the
+ * tipc_link_xmit_fast: Entry for data messages where the
* destination link is known and the header is complete,
* inclusive total message length. Very time critical.
* Link is locked. Returns user data length.
*/
-static int link_send_buf_fast(struct tipc_link *l_ptr, struct sk_buff *buf,
- u32 *used_max_pkt)
+static int tipc_link_xmit_fast(struct tipc_link *l_ptr, struct sk_buff *buf,
+ u32 *used_max_pkt)
{
struct tipc_msg *msg = buf_msg(buf);
int res = msg_data_sz(msg);
@@ -931,18 +946,18 @@ static int link_send_buf_fast(struct tipc_link *l_ptr, struct sk_buff *buf,
else
*used_max_pkt = l_ptr->max_pkt;
}
- return tipc_link_send_buf(l_ptr, buf); /* All other cases */
+ return __tipc_link_xmit(l_ptr, buf); /* All other cases */
}
/*
- * tipc_link_send_sections_fast: Entry for messages where the
+ * tipc_link_iovec_xmit_fast: Entry for messages where the
* destination processor is known and the header is complete,
* except for total message length.
* Returns user data length or errno.
*/
-int tipc_link_send_sections_fast(struct tipc_port *sender,
- struct iovec const *msg_sect,
- unsigned int len, u32 destaddr)
+int tipc_link_iovec_xmit_fast(struct tipc_port *sender,
+ struct iovec const *msg_sect,
+ unsigned int len, u32 destaddr)
{
struct tipc_msg *hdr = &sender->phdr;
struct tipc_link *l_ptr;
@@ -968,8 +983,8 @@ again:
l_ptr = node->active_links[selector];
if (likely(l_ptr)) {
if (likely(buf)) {
- res = link_send_buf_fast(l_ptr, buf,
- &sender->max_pkt);
+ res = tipc_link_xmit_fast(l_ptr, buf,
+ &sender->max_pkt);
exit:
tipc_node_unlock(node);
read_unlock_bh(&tipc_net_lock);
@@ -995,24 +1010,21 @@ exit:
if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt)
goto again;
- return link_send_sections_long(sender, msg_sect, len,
- destaddr);
+ return tipc_link_iovec_long_xmit(sender, msg_sect,
+ len, destaddr);
}
tipc_node_unlock(node);
}
read_unlock_bh(&tipc_net_lock);
/* Couldn't find a link to the destination node */
- if (buf)
- return tipc_reject_msg(buf, TIPC_ERR_NO_NODE);
- if (res >= 0)
- return tipc_port_reject_sections(sender, hdr, msg_sect,
- len, TIPC_ERR_NO_NODE);
- return res;
+ kfree_skb(buf);
+ tipc_port_iovec_reject(sender, hdr, msg_sect, len, TIPC_ERR_NO_NODE);
+ return -ENETUNREACH;
}
/*
- * link_send_sections_long(): Entry for long messages where the
+ * tipc_link_iovec_long_xmit(): Entry for long messages where the
* destination node is known and the header is complete,
* inclusive total message length.
* Link and bearer congestion status have been checked to be ok,
@@ -1025,9 +1037,9 @@ exit:
*
* Returns user data length or errno.
*/
-static int link_send_sections_long(struct tipc_port *sender,
- struct iovec const *msg_sect,
- unsigned int len, u32 destaddr)
+static int tipc_link_iovec_long_xmit(struct tipc_port *sender,
+ struct iovec const *msg_sect,
+ unsigned int len, u32 destaddr)
{
struct tipc_link *l_ptr;
struct tipc_node *node;
@@ -1146,8 +1158,9 @@ error:
} else {
reject:
kfree_skb_list(buf_chain);
- return tipc_port_reject_sections(sender, hdr, msg_sect,
- len, TIPC_ERR_NO_NODE);
+ tipc_port_iovec_reject(sender, hdr, msg_sect, len,
+ TIPC_ERR_NO_NODE);
+ return -ENETUNREACH;
}
/* Append chain of fragments to send queue & send them */
@@ -1391,6 +1404,12 @@ static int link_recv_buf_validate(struct sk_buff *buf)
u32 hdr_size;
u32 min_hdr_size;
+ /* If this packet comes from the defer queue, the skb has already
+ * been validated
+ */
+ if (unlikely(TIPC_SKB_CB(buf)->deferred))
+ return 1;
+
if (unlikely(buf->len < MIN_H_SIZE))
return 0;
@@ -1435,7 +1454,6 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr)
u32 seq_no;
u32 ackd;
u32 released = 0;
- int type;
head = head->next;
buf->next = NULL;
@@ -1457,9 +1475,9 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr)
if (unlikely(msg_non_seq(msg))) {
if (msg_user(msg) == LINK_CONFIG)
- tipc_disc_recv_msg(buf, b_ptr);
+ tipc_disc_rcv(buf, b_ptr);
else
- tipc_bclink_recv_pkt(buf);
+ tipc_bclink_rcv(buf);
continue;
}
@@ -1483,7 +1501,7 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr)
if ((n_ptr->block_setup & WAIT_PEER_DOWN) &&
msg_user(msg) == LINK_PROTOCOL &&
(msg_type(msg) == RESET_MSG ||
- msg_type(msg) == ACTIVATE_MSG) &&
+ msg_type(msg) == ACTIVATE_MSG) &&
!msg_redundant_link(msg))
n_ptr->block_setup &= ~WAIT_PEER_DOWN;
@@ -1502,7 +1520,6 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr)
while ((crs != l_ptr->next_out) &&
less_eq(buf_seqno(crs), ackd)) {
struct sk_buff *next = crs->next;
-
kfree_skb(crs);
crs = next;
released++;
@@ -1515,18 +1532,19 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr)
/* Try sending any messages link endpoint has pending */
if (unlikely(l_ptr->next_out))
tipc_link_push_queue(l_ptr);
+
if (unlikely(!list_empty(&l_ptr->waiting_ports)))
tipc_link_wakeup_ports(l_ptr, 0);
+
if (unlikely(++l_ptr->unacked_window >= TIPC_MIN_LINK_WIN)) {
l_ptr->stats.sent_acks++;
- tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0);
}
- /* Now (finally!) process the incoming message */
-protocol_check:
+ /* Process the incoming packet */
if (unlikely(!link_working_working(l_ptr))) {
if (msg_user(msg) == LINK_PROTOCOL) {
- link_recv_proto_msg(l_ptr, buf);
+ tipc_link_proto_rcv(l_ptr, buf);
head = link_insert_deferred_queue(l_ptr, head);
tipc_node_unlock(n_ptr);
continue;
@@ -1555,67 +1573,65 @@ protocol_check:
l_ptr->next_in_no++;
if (unlikely(l_ptr->oldest_deferred_in))
head = link_insert_deferred_queue(l_ptr, head);
-deliver:
- if (likely(msg_isdata(msg))) {
- tipc_node_unlock(n_ptr);
- tipc_port_recv_msg(buf);
- continue;
+
+ /* Deliver packet/message to correct user: */
+ if (unlikely(msg_user(msg) == CHANGEOVER_PROTOCOL)) {
+ if (!tipc_link_tunnel_rcv(n_ptr, &buf)) {
+ tipc_node_unlock(n_ptr);
+ continue;
+ }
+ msg = buf_msg(buf);
+ } else if (msg_user(msg) == MSG_FRAGMENTER) {
+ int rc;
+
+ l_ptr->stats.recv_fragments++;
+ rc = tipc_link_frag_rcv(&l_ptr->reasm_head,
+ &l_ptr->reasm_tail,
+ &buf);
+ if (rc == LINK_REASM_COMPLETE) {
+ l_ptr->stats.recv_fragmented++;
+ msg = buf_msg(buf);
+ } else {
+ if (rc == LINK_REASM_ERROR)
+ tipc_link_reset(l_ptr);
+ tipc_node_unlock(n_ptr);
+ continue;
+ }
}
+
switch (msg_user(msg)) {
- int ret;
+ case TIPC_LOW_IMPORTANCE:
+ case TIPC_MEDIUM_IMPORTANCE:
+ case TIPC_HIGH_IMPORTANCE:
+ case TIPC_CRITICAL_IMPORTANCE:
+ tipc_node_unlock(n_ptr);
+ tipc_port_rcv(buf);
+ continue;
case MSG_BUNDLER:
l_ptr->stats.recv_bundles++;
l_ptr->stats.recv_bundled += msg_msgcnt(msg);
tipc_node_unlock(n_ptr);
- tipc_link_recv_bundle(buf);
+ tipc_link_bundle_rcv(buf);
continue;
case NAME_DISTRIBUTOR:
n_ptr->bclink.recv_permitted = true;
tipc_node_unlock(n_ptr);
- tipc_named_recv(buf);
- continue;
- case BCAST_PROTOCOL:
- tipc_link_recv_sync(n_ptr, buf);
- tipc_node_unlock(n_ptr);
+ tipc_named_rcv(buf);
continue;
case CONN_MANAGER:
tipc_node_unlock(n_ptr);
- tipc_port_recv_proto_msg(buf);
- continue;
- case MSG_FRAGMENTER:
- l_ptr->stats.recv_fragments++;
- ret = tipc_link_recv_fragment(&l_ptr->reasm_head,
- &l_ptr->reasm_tail,
- &buf);
- if (ret == LINK_REASM_COMPLETE) {
- l_ptr->stats.recv_fragmented++;
- msg = buf_msg(buf);
- goto deliver;
- }
- if (ret == LINK_REASM_ERROR)
- tipc_link_reset(l_ptr);
- tipc_node_unlock(n_ptr);
+ tipc_port_proto_rcv(buf);
continue;
- case CHANGEOVER_PROTOCOL:
- type = msg_type(msg);
- if (tipc_link_tunnel_rcv(&l_ptr, &buf)) {
- msg = buf_msg(buf);
- seq_no = msg_seqno(msg);
- if (type == ORIGINAL_MSG)
- goto deliver;
- goto protocol_check;
- }
+ case BCAST_PROTOCOL:
+ tipc_link_sync_rcv(n_ptr, buf);
break;
default:
kfree_skb(buf);
- buf = NULL;
break;
}
tipc_node_unlock(n_ptr);
- tipc_net_route_msg(buf);
continue;
unlock_discard:
-
tipc_node_unlock(n_ptr);
discard:
kfree_skb(buf);
@@ -1682,7 +1698,7 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr,
u32 seq_no = buf_seqno(buf);
if (likely(msg_user(buf_msg(buf)) == LINK_PROTOCOL)) {
- link_recv_proto_msg(l_ptr, buf);
+ tipc_link_proto_rcv(l_ptr, buf);
return;
}
@@ -1703,8 +1719,9 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr,
&l_ptr->newest_deferred_in, buf)) {
l_ptr->deferred_inqueue_sz++;
l_ptr->stats.deferred_recv++;
+ TIPC_SKB_CB(buf)->deferred = true;
if ((l_ptr->deferred_inqueue_sz % 16) == 1)
- tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0);
} else
l_ptr->stats.duplicates++;
}
@@ -1712,9 +1729,8 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr,
/*
* Send protocol message to the other endpoint.
*/
-void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ,
- int probe_msg, u32 gap, u32 tolerance,
- u32 priority, u32 ack_mtu)
+void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg,
+ u32 gap, u32 tolerance, u32 priority, u32 ack_mtu)
{
struct sk_buff *buf = NULL;
struct tipc_msg *msg = l_ptr->pmsg;
@@ -1813,7 +1829,7 @@ void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ,
* Note that network plane id propagates through the network, and may
* change at any time. The node with lowest address rules
*/
-static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf)
+static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf)
{
u32 rec_gap = 0;
u32 max_pkt_info;
@@ -1932,8 +1948,8 @@ static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf)
msg_last_bcast(msg));
if (rec_gap || (msg_probe(msg))) {
- tipc_link_send_proto_msg(l_ptr, STATE_MSG,
- 0, rec_gap, 0, 0, max_pkt_ack);
+ tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, rec_gap, 0,
+ 0, max_pkt_ack);
}
if (msg_seq_gap(msg)) {
l_ptr->stats.recv_nacks++;
@@ -1972,7 +1988,7 @@ static void tipc_link_tunnel_xmit(struct tipc_link *l_ptr,
}
skb_copy_to_linear_data(buf, tunnel_hdr, INT_H_SIZE);
skb_copy_to_linear_data_offset(buf, INT_H_SIZE, msg, length);
- tipc_link_send_buf(tunnel, buf);
+ __tipc_link_xmit(tunnel, buf);
}
@@ -2005,7 +2021,7 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr)
if (buf) {
skb_copy_to_linear_data(buf, &tunnel_hdr, INT_H_SIZE);
msg_set_size(&tunnel_hdr, INT_H_SIZE);
- tipc_link_send_buf(tunnel, buf);
+ __tipc_link_xmit(tunnel, buf);
} else {
pr_warn("%sunable to send changeover msg\n",
link_co_err);
@@ -2039,7 +2055,7 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr)
}
}
-/* tipc_link_dup_send_queue(): A second link has become active. Tunnel a
+/* tipc_link_dup_queue_xmit(): A second link has become active. Tunnel a
* duplicate of the first link's send queue via the new link. This way, we
* are guaranteed that currently queued packets from a socket are delivered
* before future traffic from the same socket, even if this is using the
@@ -2048,7 +2064,7 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr)
* and sequence order is preserved per sender/receiver socket pair.
* Owner node is locked.
*/
-void tipc_link_dup_send_queue(struct tipc_link *l_ptr,
+void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr,
struct tipc_link *tunnel)
{
struct sk_buff *iter;
@@ -2078,7 +2094,7 @@ void tipc_link_dup_send_queue(struct tipc_link *l_ptr,
skb_copy_to_linear_data(outbuf, &tunnel_hdr, INT_H_SIZE);
skb_copy_to_linear_data_offset(outbuf, INT_H_SIZE, iter->data,
length);
- tipc_link_send_buf(tunnel, outbuf);
+ __tipc_link_xmit(tunnel, outbuf);
if (!tipc_link_is_up(l_ptr))
return;
iter = iter->next;
@@ -2105,89 +2121,114 @@ static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos)
return eb;
}
-/* tipc_link_tunnel_rcv(): Receive a tunneled packet, sent
- * via other link as result of a failover (ORIGINAL_MSG) or
- * a new active link (DUPLICATE_MSG). Failover packets are
- * returned to the active link for delivery upwards.
+
+
+/* tipc_link_dup_rcv(): Receive a tunnelled DUPLICATE_MSG packet.
+ * Owner node is locked.
+ */
+static void tipc_link_dup_rcv(struct tipc_link *l_ptr,
+ struct sk_buff *t_buf)
+{
+ struct sk_buff *buf;
+
+ if (!tipc_link_is_up(l_ptr))
+ return;
+
+ buf = buf_extract(t_buf, INT_H_SIZE);
+ if (buf == NULL) {
+ pr_warn("%sfailed to extract inner dup pkt\n", link_co_err);
+ return;
+ }
+
+ /* Add buffer to deferred queue, if applicable: */
+ link_handle_out_of_seq_msg(l_ptr, buf);
+}
+
+/* tipc_link_failover_rcv(): Receive a tunnelled ORIGINAL_MSG packet
* Owner node is locked.
*/
-static int tipc_link_tunnel_rcv(struct tipc_link **l_ptr,
- struct sk_buff **buf)
+static struct sk_buff *tipc_link_failover_rcv(struct tipc_link *l_ptr,
+ struct sk_buff *t_buf)
{
- struct sk_buff *tunnel_buf = *buf;
- struct tipc_link *dest_link;
+ struct tipc_msg *t_msg = buf_msg(t_buf);
+ struct sk_buff *buf = NULL;
struct tipc_msg *msg;
- struct tipc_msg *tunnel_msg = buf_msg(tunnel_buf);
- u32 msg_typ = msg_type(tunnel_msg);
- u32 msg_count = msg_msgcnt(tunnel_msg);
- u32 bearer_id = msg_bearer_id(tunnel_msg);
- if (bearer_id >= MAX_BEARERS)
- goto exit;
- dest_link = (*l_ptr)->owner->links[bearer_id];
- if (!dest_link)
- goto exit;
- if (dest_link == *l_ptr) {
- pr_err("Unexpected changeover message on link <%s>\n",
- (*l_ptr)->name);
- goto exit;
- }
- *l_ptr = dest_link;
- msg = msg_get_wrapped(tunnel_msg);
+ if (tipc_link_is_up(l_ptr))
+ tipc_link_reset(l_ptr);
- if (msg_typ == DUPLICATE_MSG) {
- if (less(msg_seqno(msg), mod(dest_link->next_in_no)))
- goto exit;
- *buf = buf_extract(tunnel_buf, INT_H_SIZE);
- if (*buf == NULL) {
- pr_warn("%sduplicate msg dropped\n", link_co_err);
+ /* First failover packet? */
+ if (l_ptr->exp_msg_count == START_CHANGEOVER)
+ l_ptr->exp_msg_count = msg_msgcnt(t_msg);
+
+ /* Should there be an inner packet? */
+ if (l_ptr->exp_msg_count) {
+ l_ptr->exp_msg_count--;
+ buf = buf_extract(t_buf, INT_H_SIZE);
+ if (buf == NULL) {
+ pr_warn("%sno inner failover pkt\n", link_co_err);
goto exit;
}
- kfree_skb(tunnel_buf);
- return 1;
- }
+ msg = buf_msg(buf);
- /* First original message ?: */
- if (tipc_link_is_up(dest_link)) {
- pr_info("%s<%s>, changeover initiated by peer\n", link_rst_msg,
- dest_link->name);
- tipc_link_reset(dest_link);
- dest_link->exp_msg_count = msg_count;
- if (!msg_count)
- goto exit;
- } else if (dest_link->exp_msg_count == START_CHANGEOVER) {
- dest_link->exp_msg_count = msg_count;
- if (!msg_count)
+ if (less(msg_seqno(msg), l_ptr->reset_checkpoint)) {
+ kfree_skb(buf);
+ buf = NULL;
goto exit;
+ }
+ if (msg_user(msg) == MSG_FRAGMENTER) {
+ l_ptr->stats.recv_fragments++;
+ tipc_link_frag_rcv(&l_ptr->reasm_head,
+ &l_ptr->reasm_tail,
+ &buf);
+ }
}
+exit:
+ if ((l_ptr->exp_msg_count == 0) && (l_ptr->flags & LINK_STOPPED)) {
+ tipc_node_detach_link(l_ptr->owner, l_ptr);
+ kfree(l_ptr);
+ }
+ return buf;
+}
+
+/* tipc_link_tunnel_rcv(): Receive a tunnelled packet, sent
+ * via other link as result of a failover (ORIGINAL_MSG) or
+ * a new active link (DUPLICATE_MSG). Failover packets are
+ * returned to the active link for delivery upwards.
+ * Owner node is locked.
+ */
+static int tipc_link_tunnel_rcv(struct tipc_node *n_ptr,
+ struct sk_buff **buf)
+{
+ struct sk_buff *t_buf = *buf;
+ struct tipc_link *l_ptr;
+ struct tipc_msg *t_msg = buf_msg(t_buf);
+ u32 bearer_id = msg_bearer_id(t_msg);
+
+ *buf = NULL;
- /* Receive original message */
- if (dest_link->exp_msg_count == 0) {
- pr_warn("%sgot too many tunnelled messages\n", link_co_err);
+ if (bearer_id >= MAX_BEARERS)
goto exit;
- }
- dest_link->exp_msg_count--;
- if (less(msg_seqno(msg), dest_link->reset_checkpoint)) {
+
+ l_ptr = n_ptr->links[bearer_id];
+ if (!l_ptr)
goto exit;
- } else {
- *buf = buf_extract(tunnel_buf, INT_H_SIZE);
- if (*buf != NULL) {
- kfree_skb(tunnel_buf);
- return 1;
- } else {
- pr_warn("%soriginal msg dropped\n", link_co_err);
- }
- }
+
+ if (msg_type(t_msg) == DUPLICATE_MSG)
+ tipc_link_dup_rcv(l_ptr, t_buf);
+ else if (msg_type(t_msg) == ORIGINAL_MSG)
+ *buf = tipc_link_failover_rcv(l_ptr, t_buf);
+ else
+ pr_warn("%sunknown tunnel pkt received\n", link_co_err);
exit:
- *buf = NULL;
- kfree_skb(tunnel_buf);
- return 0;
+ kfree_skb(t_buf);
+ return *buf != NULL;
}
/*
* Bundler functionality:
*/
-void tipc_link_recv_bundle(struct sk_buff *buf)
+void tipc_link_bundle_rcv(struct sk_buff *buf)
{
u32 msgcount = msg_msgcnt(buf_msg(buf));
u32 pos = INT_H_SIZE;
@@ -2210,11 +2251,11 @@ void tipc_link_recv_bundle(struct sk_buff *buf)
*/
/*
- * link_send_long_buf: Entry for buffers needing fragmentation.
+ * tipc_link_frag_xmit: Entry for buffers needing fragmentation.
* The buffer is complete, inclusive total message length.
* Returns user data length.
*/
-static int link_send_long_buf(struct tipc_link *l_ptr, struct sk_buff *buf)
+static int tipc_link_frag_xmit(struct tipc_link *l_ptr, struct sk_buff *buf)
{
struct sk_buff *buf_chain = NULL;
struct sk_buff *buf_chain_tail = (struct sk_buff *)&buf_chain;
@@ -2277,12 +2318,11 @@ static int link_send_long_buf(struct tipc_link *l_ptr, struct sk_buff *buf)
return dsz;
}
-/*
- * tipc_link_recv_fragment(): Called with node lock on. Returns
+/* tipc_link_frag_rcv(): Called with node lock on. Returns
* the reassembled buffer if message is complete.
*/
-int tipc_link_recv_fragment(struct sk_buff **head, struct sk_buff **tail,
- struct sk_buff **fbuf)
+int tipc_link_frag_rcv(struct sk_buff **head, struct sk_buff **tail,
+ struct sk_buff **fbuf)
{
struct sk_buff *frag = *fbuf;
struct tipc_msg *msg = buf_msg(frag);
@@ -2296,6 +2336,7 @@ int tipc_link_recv_fragment(struct sk_buff **head, struct sk_buff **tail,
goto out_free;
*head = frag;
skb_frag_list_init(*head);
+ *fbuf = NULL;
return 0;
} else if (*head &&
skb_try_coalesce(*head, frag, &headstolen, &delta)) {
@@ -2315,10 +2356,12 @@ int tipc_link_recv_fragment(struct sk_buff **head, struct sk_buff **tail,
*tail = *head = NULL;
return LINK_REASM_COMPLETE;
}
+ *fbuf = NULL;
return 0;
out_free:
pr_warn_ratelimited("Link unable to reassemble fragmented message\n");
kfree_skb(*fbuf);
+ *fbuf = NULL;
return LINK_REASM_ERROR;
}
@@ -2352,35 +2395,40 @@ void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window)
l_ptr->queue_limit[MSG_FRAGMENTER] = 4000;
}
-/**
- * link_find_link - locate link by name
- * @name: ptr to link name string
- * @node: ptr to area to be filled with ptr to associated node
- *
+/* tipc_link_find_owner - locate owner node of link by link's name
+ * @name: pointer to link name string
+ * @bearer_id: pointer to index in 'node->links' array where the link was found.
* Caller must hold 'tipc_net_lock' to ensure node and bearer are not deleted;
* this also prevents link deletion.
*
- * Returns pointer to link (or 0 if invalid link name).
+ * Returns pointer to node owning the link, or 0 if no matching link is found.
*/
-static struct tipc_link *link_find_link(const char *name,
- struct tipc_node **node)
+static struct tipc_node *tipc_link_find_owner(const char *link_name,
+ unsigned int *bearer_id)
{
struct tipc_link *l_ptr;
struct tipc_node *n_ptr;
+ struct tipc_node *tmp_n_ptr;
+ struct tipc_node *found_node = 0;
+
int i;
- list_for_each_entry(n_ptr, &tipc_node_list, list) {
+ *bearer_id = 0;
+ list_for_each_entry_safe(n_ptr, tmp_n_ptr, &tipc_node_list, list) {
+ tipc_node_lock(n_ptr);
for (i = 0; i < MAX_BEARERS; i++) {
l_ptr = n_ptr->links[i];
- if (l_ptr && !strcmp(l_ptr->name, name))
- goto found;
+ if (l_ptr && !strcmp(l_ptr->name, link_name)) {
+ *bearer_id = i;
+ found_node = n_ptr;
+ break;
+ }
}
+ tipc_node_unlock(n_ptr);
+ if (found_node)
+ break;
}
- l_ptr = NULL;
- n_ptr = NULL;
-found:
- *node = n_ptr;
- return l_ptr;
+ return found_node;
}
/**
@@ -2422,32 +2470,33 @@ static int link_cmd_set_value(const char *name, u32 new_value, u16 cmd)
struct tipc_link *l_ptr;
struct tipc_bearer *b_ptr;
struct tipc_media *m_ptr;
+ int bearer_id;
int res = 0;
- l_ptr = link_find_link(name, &node);
- if (l_ptr) {
- /*
- * acquire node lock for tipc_link_send_proto_msg().
- * see "TIPC locking policy" in net.c.
- */
+ node = tipc_link_find_owner(name, &bearer_id);
+ if (node) {
tipc_node_lock(node);
- switch (cmd) {
- case TIPC_CMD_SET_LINK_TOL:
- link_set_supervision_props(l_ptr, new_value);
- tipc_link_send_proto_msg(l_ptr,
- STATE_MSG, 0, 0, new_value, 0, 0);
- break;
- case TIPC_CMD_SET_LINK_PRI:
- l_ptr->priority = new_value;
- tipc_link_send_proto_msg(l_ptr,
- STATE_MSG, 0, 0, 0, new_value, 0);
- break;
- case TIPC_CMD_SET_LINK_WINDOW:
- tipc_link_set_queue_limits(l_ptr, new_value);
- break;
- default:
- res = -EINVAL;
- break;
+ l_ptr = node->links[bearer_id];
+
+ if (l_ptr) {
+ switch (cmd) {
+ case TIPC_CMD_SET_LINK_TOL:
+ link_set_supervision_props(l_ptr, new_value);
+ tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0,
+ new_value, 0, 0);
+ break;
+ case TIPC_CMD_SET_LINK_PRI:
+ l_ptr->priority = new_value;
+ tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0,
+ 0, new_value, 0);
+ break;
+ case TIPC_CMD_SET_LINK_WINDOW:
+ tipc_link_set_queue_limits(l_ptr, new_value);
+ break;
+ default:
+ res = -EINVAL;
+ break;
+ }
}
tipc_node_unlock(node);
return res;
@@ -2542,6 +2591,7 @@ struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_
char *link_name;
struct tipc_link *l_ptr;
struct tipc_node *node;
+ unsigned int bearer_id;
if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_NAME))
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
@@ -2552,15 +2602,19 @@ struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_
return tipc_cfg_reply_error_string("link not found");
return tipc_cfg_reply_none();
}
-
read_lock_bh(&tipc_net_lock);
- l_ptr = link_find_link(link_name, &node);
- if (!l_ptr) {
+ node = tipc_link_find_owner(link_name, &bearer_id);
+ if (!node) {
read_unlock_bh(&tipc_net_lock);
return tipc_cfg_reply_error_string("link not found");
}
-
tipc_node_lock(node);
+ l_ptr = node->links[bearer_id];
+ if (!l_ptr) {
+ tipc_node_unlock(node);
+ read_unlock_bh(&tipc_net_lock);
+ return tipc_cfg_reply_error_string("link not found");
+ }
link_reset_statistics(l_ptr);
tipc_node_unlock(node);
read_unlock_bh(&tipc_net_lock);
@@ -2590,18 +2644,27 @@ static int tipc_link_stats(const char *name, char *buf, const u32 buf_size)
struct tipc_node *node;
char *status;
u32 profile_total = 0;
+ unsigned int bearer_id;
int ret;
if (!strcmp(name, tipc_bclink_name))
return tipc_bclink_stats(buf, buf_size);
read_lock_bh(&tipc_net_lock);
- l = link_find_link(name, &node);
- if (!l) {
+ node = tipc_link_find_owner(name, &bearer_id);
+ if (!node) {
read_unlock_bh(&tipc_net_lock);
return 0;
}
tipc_node_lock(node);
+
+ l = node->links[bearer_id];
+ if (!l) {
+ tipc_node_unlock(node);
+ read_unlock_bh(&tipc_net_lock);
+ return 0;
+ }
+
s = &l->stats;
if (tipc_link_is_active(l))
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 3b6aa65b608c..8c0b49b5b2ee 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -1,7 +1,7 @@
/*
* net/tipc/link.h: Include file for TIPC link code
*
- * Copyright (c) 1995-2006, Ericsson AB
+ * Copyright (c) 1995-2006, 2013, Ericsson AB
* Copyright (c) 2004-2005, 2010-2011, Wind River Systems
* All rights reserved.
*
@@ -40,27 +40,28 @@
#include "msg.h"
#include "node.h"
-/*
- * Link reassembly status codes
+/* Link reassembly status codes
*/
#define LINK_REASM_ERROR -1
#define LINK_REASM_COMPLETE 1
-/*
- * Out-of-range value for link sequence numbers
+/* Out-of-range value for link sequence numbers
*/
#define INVALID_LINK_SEQ 0x10000
-/*
- * Link states
+/* Link working states
*/
#define WORKING_WORKING 560810u
#define WORKING_UNKNOWN 560811u
#define RESET_UNKNOWN 560812u
#define RESET_RESET 560813u
-/*
- * Starting value for maximum packet size negotiation on unicast links
+/* Link endpoint execution states
+ */
+#define LINK_STARTED 0x0001
+#define LINK_STOPPED 0x0002
+
+/* Starting value for maximum packet size negotiation on unicast links
* (unless bearer MTU is less)
*/
#define MAX_PKT_DEFAULT 1500
@@ -102,8 +103,7 @@ struct tipc_stats {
* @media_addr: media address to use when sending messages over link
* @timer: link timer
* @owner: pointer to peer node
- * @link_list: adjacent links in bearer's list of links
- * @started: indicates if link has been started
+ * @flags: execution state flags for link endpoint instance
* @checkpoint: reference point for triggering link continuity checking
* @peer_session: link session # being used by peer end of link
* @peer_bearer_id: bearer id used by link's peer endpoint
@@ -149,10 +149,9 @@ struct tipc_link {
struct tipc_media_addr media_addr;
struct timer_list timer;
struct tipc_node *owner;
- struct list_head link_list;
/* Management and link supervision data */
- int started;
+ unsigned int flags;
u32 checkpoint;
u32 peer_session;
u32 peer_bearer_id;
@@ -215,10 +214,9 @@ struct tipc_port;
struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
struct tipc_bearer *b_ptr,
const struct tipc_media_addr *media_addr);
-void tipc_link_delete(struct tipc_link *l_ptr);
+void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down);
void tipc_link_failover_send_queue(struct tipc_link *l_ptr);
-void tipc_link_dup_send_queue(struct tipc_link *l_ptr,
- struct tipc_link *dest);
+void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, struct tipc_link *dest);
void tipc_link_reset_fragments(struct tipc_link *l_ptr);
int tipc_link_is_up(struct tipc_link *l_ptr);
int tipc_link_is_active(struct tipc_link *l_ptr);
@@ -231,23 +229,24 @@ struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area,
struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area,
int req_tlv_space);
void tipc_link_reset(struct tipc_link *l_ptr);
-int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector);
-void tipc_link_send_names(struct list_head *message_list, u32 dest);
+void tipc_link_reset_list(unsigned int bearer_id);
+int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector);
+void tipc_link_names_xmit(struct list_head *message_list, u32 dest);
+int __tipc_link_xmit(struct tipc_link *l_ptr, struct sk_buff *buf);
int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf);
u32 tipc_link_get_max_pkt(u32 dest, u32 selector);
-int tipc_link_send_sections_fast(struct tipc_port *sender,
- struct iovec const *msg_sect,
- unsigned int len, u32 destnode);
-void tipc_link_recv_bundle(struct sk_buff *buf);
-int tipc_link_recv_fragment(struct sk_buff **reasm_head,
- struct sk_buff **reasm_tail,
- struct sk_buff **fbuf);
-void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ, int prob,
- u32 gap, u32 tolerance, u32 priority,
- u32 acked_mtu);
+int tipc_link_iovec_xmit_fast(struct tipc_port *sender,
+ struct iovec const *msg_sect,
+ unsigned int len, u32 destnode);
+void tipc_link_bundle_rcv(struct sk_buff *buf);
+int tipc_link_frag_rcv(struct sk_buff **reasm_head,
+ struct sk_buff **reasm_tail,
+ struct sk_buff **fbuf);
+void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob,
+ u32 gap, u32 tolerance, u32 priority, u32 acked_mtu);
void tipc_link_push_queue(struct tipc_link *l_ptr);
u32 tipc_link_defer_pkt(struct sk_buff **head, struct sk_buff **tail,
- struct sk_buff *buf);
+ struct sk_buff *buf);
void tipc_link_wakeup_ports(struct tipc_link *l_ptr, int all);
void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window);
void tipc_link_retransmit(struct tipc_link *l_ptr,
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index e0d08055754e..893c49a3d98a 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -138,7 +138,7 @@ static void named_cluster_distribute(struct sk_buff *buf)
if (!buf_copy)
break;
msg_set_destnode(buf_msg(buf_copy), n_ptr->addr);
- tipc_link_send(buf_copy, n_ptr->addr, n_ptr->addr);
+ tipc_link_xmit(buf_copy, n_ptr->addr, n_ptr->addr);
}
}
@@ -262,7 +262,7 @@ void tipc_named_node_up(unsigned long nodearg)
named_distribute(&message_list, node, &publ_zone, max_item_buf);
read_unlock_bh(&tipc_nametbl_lock);
- tipc_link_send_names(&message_list, node);
+ tipc_link_names_xmit(&message_list, node);
}
/**
@@ -293,9 +293,9 @@ static void named_purge_publ(struct publication *publ)
}
/**
- * tipc_named_recv - process name table update message sent by another node
+ * tipc_named_rcv - process name table update message sent by another node
*/
-void tipc_named_recv(struct sk_buff *buf)
+void tipc_named_rcv(struct sk_buff *buf)
{
struct publication *publ;
struct tipc_msg *msg = buf_msg(buf);
diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h
index 1e41bdd4f255..9b312ccfd43e 100644
--- a/net/tipc/name_distr.h
+++ b/net/tipc/name_distr.h
@@ -42,7 +42,7 @@
void tipc_named_publish(struct publication *publ);
void tipc_named_withdraw(struct publication *publ);
void tipc_named_node_up(unsigned long node);
-void tipc_named_recv(struct sk_buff *buf);
+void tipc_named_rcv(struct sk_buff *buf);
void tipc_named_reinit(void);
#endif
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 92a1533af4e0..042e8e3cabc0 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -941,20 +941,48 @@ int tipc_nametbl_init(void)
return 0;
}
-void tipc_nametbl_stop(void)
+/**
+ * tipc_purge_publications - remove all publications for a given type
+ *
+ * tipc_nametbl_lock must be held when calling this function
+ */
+static void tipc_purge_publications(struct name_seq *seq)
{
- u32 i;
+ struct publication *publ, *safe;
+ struct sub_seq *sseq;
+ struct name_info *info;
- if (!table.types)
+ if (!seq->sseqs) {
+ nameseq_delete_empty(seq);
return;
+ }
+ sseq = seq->sseqs;
+ info = sseq->info;
+ list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) {
+ tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node,
+ publ->ref, publ->key);
+ }
+}
+
+void tipc_nametbl_stop(void)
+{
+ u32 i;
+ struct name_seq *seq;
+ struct hlist_head *seq_head;
+ struct hlist_node *safe;
- /* Verify name table is empty, then release it */
+ /* Verify name table is empty and purge any lingering
+ * publications, then release the name table
+ */
write_lock_bh(&tipc_nametbl_lock);
for (i = 0; i < TIPC_NAMETBL_SIZE; i++) {
if (hlist_empty(&table.types[i]))
continue;
- pr_err("nametbl_stop(): orphaned hash chain detected\n");
- break;
+ seq_head = &table.types[i];
+ hlist_for_each_entry_safe(seq, safe, seq_head, ns_list) {
+ tipc_purge_publications(seq);
+ }
+ continue;
}
kfree(table.types);
table.types = NULL;
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 7d305ecc09c2..31b606e3916c 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -146,19 +146,19 @@ void tipc_net_route_msg(struct sk_buff *buf)
if (tipc_in_scope(dnode, tipc_own_addr)) {
if (msg_isdata(msg)) {
if (msg_mcast(msg))
- tipc_port_recv_mcast(buf, NULL);
+ tipc_port_mcast_rcv(buf, NULL);
else if (msg_destport(msg))
- tipc_port_recv_msg(buf);
+ tipc_port_rcv(buf);
else
net_route_named_msg(buf);
return;
}
switch (msg_user(msg)) {
case NAME_DISTRIBUTOR:
- tipc_named_recv(buf);
+ tipc_named_rcv(buf);
break;
case CONN_MANAGER:
- tipc_port_recv_proto_msg(buf);
+ tipc_port_proto_rcv(buf);
break;
default:
kfree_skb(buf);
@@ -168,7 +168,7 @@ void tipc_net_route_msg(struct sk_buff *buf)
/* Handle message for another node */
skb_trim(buf, msg_size(msg));
- tipc_link_send(buf, dnode, msg_link_selector(msg));
+ tipc_link_xmit(buf, dnode, msg_link_selector(msg));
}
void tipc_net_start(u32 addr)
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 9f72a6376362..3aaf73de9e2d 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -83,8 +83,6 @@ static struct genl_ops tipc_genl_ops[] = {
},
};
-static int tipc_genl_family_registered;
-
int tipc_netlink_start(void)
{
int res;
@@ -94,16 +92,10 @@ int tipc_netlink_start(void)
pr_err("Failed to register netlink interface\n");
return res;
}
-
- tipc_genl_family_registered = 1;
return 0;
}
void tipc_netlink_stop(void)
{
- if (!tipc_genl_family_registered)
- return;
-
genl_unregister_family(&tipc_genl_family);
- tipc_genl_family_registered = 0;
}
diff --git a/net/tipc/node.c b/net/tipc/node.c
index efe4d41bf11b..0b0f6c7da965 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -162,7 +162,7 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
pr_info("New link <%s> becomes standby\n", l_ptr->name);
return;
}
- tipc_link_dup_send_queue(active[0], l_ptr);
+ tipc_link_dup_queue_xmit(active[0], l_ptr);
if (l_ptr->priority == active[0]->priority) {
active[0] = l_ptr;
return;
@@ -249,9 +249,15 @@ void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
{
- n_ptr->links[l_ptr->b_ptr->identity] = NULL;
- atomic_dec(&tipc_num_links);
- n_ptr->link_cnt--;
+ int i;
+
+ for (i = 0; i < MAX_BEARERS; i++) {
+ if (l_ptr != n_ptr->links[i])
+ continue;
+ n_ptr->links[i] = NULL;
+ atomic_dec(&tipc_num_links);
+ n_ptr->link_cnt--;
+ }
}
static void node_established_contact(struct tipc_node *n_ptr)
diff --git a/net/tipc/port.c b/net/tipc/port.c
index b742b2654525..5c14c7801ee6 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -1,7 +1,7 @@
/*
* net/tipc/port.c: TIPC port code
*
- * Copyright (c) 1992-2007, Ericsson AB
+ * Copyright (c) 1992-2007, 2014, Ericsson AB
* Copyright (c) 2004-2008, 2010-2013, Wind River Systems
* All rights reserved.
*
@@ -38,6 +38,7 @@
#include "config.h"
#include "port.h"
#include "name_table.h"
+#include "socket.h"
/* Connection management: */
#define PROBING_INTERVAL 3600000 /* [ms] => 1 h */
@@ -54,17 +55,6 @@ static struct sk_buff *port_build_self_abort_msg(struct tipc_port *, u32 err);
static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *, u32 err);
static void port_timeout(unsigned long ref);
-
-static u32 port_peernode(struct tipc_port *p_ptr)
-{
- return msg_destnode(&p_ptr->phdr);
-}
-
-static u32 port_peerport(struct tipc_port *p_ptr)
-{
- return msg_destport(&p_ptr->phdr);
-}
-
/**
* tipc_port_peer_msg - verify message was sent by connected port's peer
*
@@ -76,33 +66,32 @@ int tipc_port_peer_msg(struct tipc_port *p_ptr, struct tipc_msg *msg)
u32 peernode;
u32 orignode;
- if (msg_origport(msg) != port_peerport(p_ptr))
+ if (msg_origport(msg) != tipc_port_peerport(p_ptr))
return 0;
orignode = msg_orignode(msg);
- peernode = port_peernode(p_ptr);
+ peernode = tipc_port_peernode(p_ptr);
return (orignode == peernode) ||
(!orignode && (peernode == tipc_own_addr)) ||
(!peernode && (orignode == tipc_own_addr));
}
/**
- * tipc_multicast - send a multicast message to local and remote destinations
+ * tipc_port_mcast_xmit - send a multicast message to local and remote
+ * destinations
*/
-int tipc_multicast(u32 ref, struct tipc_name_seq const *seq,
- struct iovec const *msg_sect, unsigned int len)
+int tipc_port_mcast_xmit(struct tipc_port *oport,
+ struct tipc_name_seq const *seq,
+ struct iovec const *msg_sect,
+ unsigned int len)
{
struct tipc_msg *hdr;
struct sk_buff *buf;
struct sk_buff *ibuf = NULL;
struct tipc_port_list dports = {0, NULL, };
- struct tipc_port *oport = tipc_port_deref(ref);
int ext_targets;
int res;
- if (unlikely(!oport))
- return -EINVAL;
-
/* Create multicast message */
hdr = &oport->phdr;
msg_set_type(hdr, TIPC_MCAST_MSG);
@@ -131,7 +120,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq,
return -ENOMEM;
}
}
- res = tipc_bclink_send_msg(buf);
+ res = tipc_bclink_xmit(buf);
if ((res < 0) && (dports.count != 0))
kfree_skb(ibuf);
} else {
@@ -140,7 +129,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq,
if (res >= 0) {
if (ibuf)
- tipc_port_recv_mcast(ibuf, &dports);
+ tipc_port_mcast_rcv(ibuf, &dports);
} else {
tipc_port_list_free(&dports);
}
@@ -148,11 +137,11 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq,
}
/**
- * tipc_port_recv_mcast - deliver multicast message to all destination ports
+ * tipc_port_mcast_rcv - deliver multicast message to all destination ports
*
* If there is no port list, perform a lookup to create one
*/
-void tipc_port_recv_mcast(struct sk_buff *buf, struct tipc_port_list *dp)
+void tipc_port_mcast_rcv(struct sk_buff *buf, struct tipc_port_list *dp)
{
struct tipc_msg *msg;
struct tipc_port_list dports = {0, NULL, };
@@ -176,7 +165,7 @@ void tipc_port_recv_mcast(struct sk_buff *buf, struct tipc_port_list *dp)
msg_set_destnode(msg, tipc_own_addr);
if (dp->count == 1) {
msg_set_destport(msg, dp->ports[0]);
- tipc_port_recv_msg(buf);
+ tipc_port_rcv(buf);
tipc_port_list_free(dp);
return;
}
@@ -191,7 +180,7 @@ void tipc_port_recv_mcast(struct sk_buff *buf, struct tipc_port_list *dp)
if ((index == 0) && (cnt != 0))
item = item->next;
msg_set_destport(buf_msg(b), item->ports[index]);
- tipc_port_recv_msg(b);
+ tipc_port_rcv(b);
}
}
exit:
@@ -199,40 +188,32 @@ exit:
tipc_port_list_free(dp);
}
-/**
- * tipc_createport - create a generic TIPC port
+
+void tipc_port_wakeup(struct tipc_port *port)
+{
+ tipc_sock_wakeup(tipc_port_to_sock(port));
+}
+
+/* tipc_port_init - intiate TIPC port and lock it
*
- * Returns pointer to (locked) TIPC port, or NULL if unable to create it
+ * Returns obtained reference if initialization is successful, zero otherwise
*/
-struct tipc_port *tipc_createport(struct sock *sk,
- u32 (*dispatcher)(struct tipc_port *,
- struct sk_buff *),
- void (*wakeup)(struct tipc_port *),
- const u32 importance)
+u32 tipc_port_init(struct tipc_port *p_ptr,
+ const unsigned int importance)
{
- struct tipc_port *p_ptr;
struct tipc_msg *msg;
u32 ref;
- p_ptr = kzalloc(sizeof(*p_ptr), GFP_ATOMIC);
- if (!p_ptr) {
- pr_warn("Port creation failed, no memory\n");
- return NULL;
- }
ref = tipc_ref_acquire(p_ptr, &p_ptr->lock);
if (!ref) {
- pr_warn("Port creation failed, ref. table exhausted\n");
- kfree(p_ptr);
- return NULL;
+ pr_warn("Port registration failed, ref. table exhausted\n");
+ return 0;
}
- p_ptr->sk = sk;
p_ptr->max_pkt = MAX_PKT_DEFAULT;
p_ptr->ref = ref;
INIT_LIST_HEAD(&p_ptr->wait_list);
INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list);
- p_ptr->dispatcher = dispatcher;
- p_ptr->wakeup = wakeup;
k_init_timer(&p_ptr->timer, (Handler)port_timeout, ref);
INIT_LIST_HEAD(&p_ptr->publications);
INIT_LIST_HEAD(&p_ptr->port_list);
@@ -248,10 +229,10 @@ struct tipc_port *tipc_createport(struct sock *sk,
msg_set_origport(msg, ref);
list_add_tail(&p_ptr->port_list, &ports);
spin_unlock_bh(&tipc_port_list_lock);
- return p_ptr;
+ return ref;
}
-int tipc_deleteport(struct tipc_port *p_ptr)
+void tipc_port_destroy(struct tipc_port *p_ptr)
{
struct sk_buff *buf = NULL;
@@ -272,67 +253,7 @@ int tipc_deleteport(struct tipc_port *p_ptr)
list_del(&p_ptr->wait_list);
spin_unlock_bh(&tipc_port_list_lock);
k_term_timer(&p_ptr->timer);
- kfree(p_ptr);
tipc_net_route_msg(buf);
- return 0;
-}
-
-static int port_unreliable(struct tipc_port *p_ptr)
-{
- return msg_src_droppable(&p_ptr->phdr);
-}
-
-int tipc_portunreliable(u32 ref, unsigned int *isunreliable)
-{
- struct tipc_port *p_ptr;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return -EINVAL;
- *isunreliable = port_unreliable(p_ptr);
- tipc_port_unlock(p_ptr);
- return 0;
-}
-
-int tipc_set_portunreliable(u32 ref, unsigned int isunreliable)
-{
- struct tipc_port *p_ptr;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return -EINVAL;
- msg_set_src_droppable(&p_ptr->phdr, (isunreliable != 0));
- tipc_port_unlock(p_ptr);
- return 0;
-}
-
-static int port_unreturnable(struct tipc_port *p_ptr)
-{
- return msg_dest_droppable(&p_ptr->phdr);
-}
-
-int tipc_portunreturnable(u32 ref, unsigned int *isunrejectable)
-{
- struct tipc_port *p_ptr;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return -EINVAL;
- *isunrejectable = port_unreturnable(p_ptr);
- tipc_port_unlock(p_ptr);
- return 0;
-}
-
-int tipc_set_portunreturnable(u32 ref, unsigned int isunrejectable)
-{
- struct tipc_port *p_ptr;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return -EINVAL;
- msg_set_dest_droppable(&p_ptr->phdr, (isunrejectable != 0));
- tipc_port_unlock(p_ptr);
- return 0;
}
/*
@@ -350,8 +271,8 @@ static struct sk_buff *port_build_proto_msg(struct tipc_port *p_ptr,
if (buf) {
msg = buf_msg(buf);
tipc_msg_init(msg, CONN_MANAGER, type, INT_H_SIZE,
- port_peernode(p_ptr));
- msg_set_destport(msg, port_peerport(p_ptr));
+ tipc_port_peernode(p_ptr));
+ msg_set_destport(msg, tipc_port_peerport(p_ptr));
msg_set_origport(msg, p_ptr->ref);
msg_set_msgcnt(msg, ack);
}
@@ -422,17 +343,17 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err)
/* send returned message & dispose of rejected message */
src_node = msg_prevnode(msg);
if (in_own_node(src_node))
- tipc_port_recv_msg(rbuf);
+ tipc_port_rcv(rbuf);
else
- tipc_link_send(rbuf, src_node, msg_link_selector(rmsg));
+ tipc_link_xmit(rbuf, src_node, msg_link_selector(rmsg));
exit:
kfree_skb(buf);
return data_sz;
}
-int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr,
- struct iovec const *msg_sect, unsigned int len,
- int err)
+int tipc_port_iovec_reject(struct tipc_port *p_ptr, struct tipc_msg *hdr,
+ struct iovec const *msg_sect, unsigned int len,
+ int err)
{
struct sk_buff *buf;
int res;
@@ -519,7 +440,7 @@ static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *p_ptr, u32 er
return buf;
}
-void tipc_port_recv_proto_msg(struct sk_buff *buf)
+void tipc_port_proto_rcv(struct sk_buff *buf)
{
struct tipc_msg *msg = buf_msg(buf);
struct tipc_port *p_ptr;
@@ -547,13 +468,12 @@ void tipc_port_recv_proto_msg(struct sk_buff *buf)
/* Process protocol message sent by peer */
switch (msg_type(msg)) {
case CONN_ACK:
- wakeable = tipc_port_congested(p_ptr) && p_ptr->congested &&
- p_ptr->wakeup;
+ wakeable = tipc_port_congested(p_ptr) && p_ptr->congested;
p_ptr->acked += msg_msgcnt(msg);
if (!tipc_port_congested(p_ptr)) {
p_ptr->congested = 0;
if (wakeable)
- p_ptr->wakeup(p_ptr);
+ tipc_port_wakeup(p_ptr);
}
break;
case CONN_PROBE:
@@ -584,8 +504,8 @@ static int port_print(struct tipc_port *p_ptr, char *buf, int len, int full_id)
ret = tipc_snprintf(buf, len, "%-10u:", p_ptr->ref);
if (p_ptr->connected) {
- u32 dport = port_peerport(p_ptr);
- u32 destnode = port_peernode(p_ptr);
+ u32 dport = tipc_port_peerport(p_ptr);
+ u32 destnode = tipc_port_peernode(p_ptr);
ret += tipc_snprintf(buf + ret, len - ret,
" connected to <%u.%u.%u:%u>",
@@ -673,34 +593,6 @@ void tipc_acknowledge(u32 ref, u32 ack)
tipc_net_route_msg(buf);
}
-int tipc_portimportance(u32 ref, unsigned int *importance)
-{
- struct tipc_port *p_ptr;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return -EINVAL;
- *importance = (unsigned int)msg_importance(&p_ptr->phdr);
- tipc_port_unlock(p_ptr);
- return 0;
-}
-
-int tipc_set_portimportance(u32 ref, unsigned int imp)
-{
- struct tipc_port *p_ptr;
-
- if (imp > TIPC_CRITICAL_IMPORTANCE)
- return -EINVAL;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return -EINVAL;
- msg_set_importance(&p_ptr->phdr, (u32)imp);
- tipc_port_unlock(p_ptr);
- return 0;
-}
-
-
int tipc_publish(struct tipc_port *p_ptr, unsigned int scope,
struct tipc_name_seq const *seq)
{
@@ -760,7 +652,7 @@ int tipc_withdraw(struct tipc_port *p_ptr, unsigned int scope,
return res;
}
-int tipc_connect(u32 ref, struct tipc_portid const *peer)
+int tipc_port_connect(u32 ref, struct tipc_portid const *peer)
{
struct tipc_port *p_ptr;
int res;
@@ -768,17 +660,17 @@ int tipc_connect(u32 ref, struct tipc_portid const *peer)
p_ptr = tipc_port_lock(ref);
if (!p_ptr)
return -EINVAL;
- res = __tipc_connect(ref, p_ptr, peer);
+ res = __tipc_port_connect(ref, p_ptr, peer);
tipc_port_unlock(p_ptr);
return res;
}
/*
- * __tipc_connect - connect to a remote peer
+ * __tipc_port_connect - connect to a remote peer
*
* Port must be locked.
*/
-int __tipc_connect(u32 ref, struct tipc_port *p_ptr,
+int __tipc_port_connect(u32 ref, struct tipc_port *p_ptr,
struct tipc_portid const *peer)
{
struct tipc_msg *msg;
@@ -815,7 +707,7 @@ exit:
*
* Port must be locked.
*/
-int __tipc_disconnect(struct tipc_port *tp_ptr)
+int __tipc_port_disconnect(struct tipc_port *tp_ptr)
{
if (tp_ptr->connected) {
tp_ptr->connected = 0;
@@ -828,10 +720,10 @@ int __tipc_disconnect(struct tipc_port *tp_ptr)
}
/*
- * tipc_disconnect(): Disconnect port form peer.
+ * tipc_port_disconnect(): Disconnect port form peer.
* This is a node local operation.
*/
-int tipc_disconnect(u32 ref)
+int tipc_port_disconnect(u32 ref)
{
struct tipc_port *p_ptr;
int res;
@@ -839,15 +731,15 @@ int tipc_disconnect(u32 ref)
p_ptr = tipc_port_lock(ref);
if (!p_ptr)
return -EINVAL;
- res = __tipc_disconnect(p_ptr);
+ res = __tipc_port_disconnect(p_ptr);
tipc_port_unlock(p_ptr);
return res;
}
/*
- * tipc_shutdown(): Send a SHUTDOWN msg to peer and disconnect
+ * tipc_port_shutdown(): Send a SHUTDOWN msg to peer and disconnect
*/
-int tipc_shutdown(u32 ref)
+int tipc_port_shutdown(u32 ref)
{
struct tipc_port *p_ptr;
struct sk_buff *buf = NULL;
@@ -859,13 +751,13 @@ int tipc_shutdown(u32 ref)
buf = port_build_peer_abort_msg(p_ptr, TIPC_CONN_SHUTDOWN);
tipc_port_unlock(p_ptr);
tipc_net_route_msg(buf);
- return tipc_disconnect(ref);
+ return tipc_port_disconnect(ref);
}
/**
- * tipc_port_recv_msg - receive message from lower layer and deliver to port user
+ * tipc_port_rcv - receive message from lower layer and deliver to port user
*/
-int tipc_port_recv_msg(struct sk_buff *buf)
+int tipc_port_rcv(struct sk_buff *buf)
{
struct tipc_port *p_ptr;
struct tipc_msg *msg = buf_msg(buf);
@@ -882,7 +774,7 @@ int tipc_port_recv_msg(struct sk_buff *buf)
/* validate destination & pass to port, otherwise reject message */
p_ptr = tipc_port_lock(destport);
if (likely(p_ptr)) {
- err = p_ptr->dispatcher(p_ptr, buf);
+ err = tipc_sk_rcv(&tipc_port_to_sock(p_ptr)->sk, buf);
tipc_port_unlock(p_ptr);
if (likely(!err))
return dsz;
@@ -894,43 +786,43 @@ int tipc_port_recv_msg(struct sk_buff *buf)
}
/*
- * tipc_port_recv_sections(): Concatenate and deliver sectioned
- * message for this node.
+ * tipc_port_iovec_rcv: Concatenate and deliver sectioned
+ * message for this node.
*/
-static int tipc_port_recv_sections(struct tipc_port *sender,
- struct iovec const *msg_sect,
- unsigned int len)
+static int tipc_port_iovec_rcv(struct tipc_port *sender,
+ struct iovec const *msg_sect,
+ unsigned int len)
{
struct sk_buff *buf;
int res;
res = tipc_msg_build(&sender->phdr, msg_sect, len, MAX_MSG_SIZE, &buf);
if (likely(buf))
- tipc_port_recv_msg(buf);
+ tipc_port_rcv(buf);
return res;
}
/**
* tipc_send - send message sections on connection
*/
-int tipc_send(u32 ref, struct iovec const *msg_sect, unsigned int len)
+int tipc_send(struct tipc_port *p_ptr,
+ struct iovec const *msg_sect,
+ unsigned int len)
{
- struct tipc_port *p_ptr;
u32 destnode;
int res;
- p_ptr = tipc_port_deref(ref);
- if (!p_ptr || !p_ptr->connected)
+ if (!p_ptr->connected)
return -EINVAL;
p_ptr->congested = 1;
if (!tipc_port_congested(p_ptr)) {
- destnode = port_peernode(p_ptr);
+ destnode = tipc_port_peernode(p_ptr);
if (likely(!in_own_node(destnode)))
- res = tipc_link_send_sections_fast(p_ptr, msg_sect,
- len, destnode);
+ res = tipc_link_iovec_xmit_fast(p_ptr, msg_sect, len,
+ destnode);
else
- res = tipc_port_recv_sections(p_ptr, msg_sect, len);
+ res = tipc_port_iovec_rcv(p_ptr, msg_sect, len);
if (likely(res != -ELINKCONG)) {
p_ptr->congested = 0;
@@ -939,7 +831,7 @@ int tipc_send(u32 ref, struct iovec const *msg_sect, unsigned int len)
return res;
}
}
- if (port_unreliable(p_ptr)) {
+ if (tipc_port_unreliable(p_ptr)) {
p_ptr->congested = 0;
return len;
}
@@ -949,17 +841,18 @@ int tipc_send(u32 ref, struct iovec const *msg_sect, unsigned int len)
/**
* tipc_send2name - send message sections to port name
*/
-int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain,
- struct iovec const *msg_sect, unsigned int len)
+int tipc_send2name(struct tipc_port *p_ptr,
+ struct tipc_name const *name,
+ unsigned int domain,
+ struct iovec const *msg_sect,
+ unsigned int len)
{
- struct tipc_port *p_ptr;
struct tipc_msg *msg;
u32 destnode = domain;
u32 destport;
int res;
- p_ptr = tipc_port_deref(ref);
- if (!p_ptr || p_ptr->connected)
+ if (p_ptr->connected)
return -EINVAL;
msg = &p_ptr->phdr;
@@ -974,39 +867,39 @@ int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain,
if (likely(destport || destnode)) {
if (likely(in_own_node(destnode)))
- res = tipc_port_recv_sections(p_ptr, msg_sect, len);
+ res = tipc_port_iovec_rcv(p_ptr, msg_sect, len);
else if (tipc_own_addr)
- res = tipc_link_send_sections_fast(p_ptr, msg_sect,
- len, destnode);
+ res = tipc_link_iovec_xmit_fast(p_ptr, msg_sect, len,
+ destnode);
else
- res = tipc_port_reject_sections(p_ptr, msg, msg_sect,
- len, TIPC_ERR_NO_NODE);
+ res = tipc_port_iovec_reject(p_ptr, msg, msg_sect,
+ len, TIPC_ERR_NO_NODE);
if (likely(res != -ELINKCONG)) {
if (res > 0)
p_ptr->sent++;
return res;
}
- if (port_unreliable(p_ptr)) {
+ if (tipc_port_unreliable(p_ptr))
return len;
- }
+
return -ELINKCONG;
}
- return tipc_port_reject_sections(p_ptr, msg, msg_sect, len,
- TIPC_ERR_NO_NAME);
+ return tipc_port_iovec_reject(p_ptr, msg, msg_sect, len,
+ TIPC_ERR_NO_NAME);
}
/**
* tipc_send2port - send message sections to port identity
*/
-int tipc_send2port(u32 ref, struct tipc_portid const *dest,
- struct iovec const *msg_sect, unsigned int len)
+int tipc_send2port(struct tipc_port *p_ptr,
+ struct tipc_portid const *dest,
+ struct iovec const *msg_sect,
+ unsigned int len)
{
- struct tipc_port *p_ptr;
struct tipc_msg *msg;
int res;
- p_ptr = tipc_port_deref(ref);
- if (!p_ptr || p_ptr->connected)
+ if (p_ptr->connected)
return -EINVAL;
msg = &p_ptr->phdr;
@@ -1017,20 +910,20 @@ int tipc_send2port(u32 ref, struct tipc_portid const *dest,
msg_set_hdr_sz(msg, BASIC_H_SIZE);
if (in_own_node(dest->node))
- res = tipc_port_recv_sections(p_ptr, msg_sect, len);
+ res = tipc_port_iovec_rcv(p_ptr, msg_sect, len);
else if (tipc_own_addr)
- res = tipc_link_send_sections_fast(p_ptr, msg_sect, len,
- dest->node);
+ res = tipc_link_iovec_xmit_fast(p_ptr, msg_sect, len,
+ dest->node);
else
- res = tipc_port_reject_sections(p_ptr, msg, msg_sect, len,
+ res = tipc_port_iovec_reject(p_ptr, msg, msg_sect, len,
TIPC_ERR_NO_NODE);
if (likely(res != -ELINKCONG)) {
if (res > 0)
p_ptr->sent++;
return res;
}
- if (port_unreliable(p_ptr)) {
+ if (tipc_port_unreliable(p_ptr))
return len;
- }
+
return -ELINKCONG;
}
diff --git a/net/tipc/port.h b/net/tipc/port.h
index 34f12bd4074e..a00397393bd1 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -1,7 +1,7 @@
/*
* net/tipc/port.h: Include file for TIPC port code
*
- * Copyright (c) 1994-2007, Ericsson AB
+ * Copyright (c) 1994-2007, 2014, Ericsson AB
* Copyright (c) 2004-2007, 2010-2013, Wind River Systems
* All rights reserved.
*
@@ -48,7 +48,6 @@
/**
* struct tipc_port - TIPC port structure
- * @sk: pointer to socket handle
* @lock: pointer to spinlock for controlling access to port
* @connected: non-zero if port is currently connected to a peer port
* @conn_type: TIPC type used when connection was established
@@ -60,8 +59,6 @@
* @ref: unique reference to port in TIPC object registry
* @phdr: preformatted message header used when sending messages
* @port_list: adjacent ports in TIPC's global list of ports
- * @dispatcher: ptr to routine which handles received messages
- * @wakeup: ptr to routine to call when port is no longer congested
* @wait_list: adjacent ports in list of ports waiting on link congestion
* @waiting_pkts:
* @sent: # of non-empty messages sent by port
@@ -74,7 +71,6 @@
* @subscription: "node down" subscription used to terminate failed connections
*/
struct tipc_port {
- struct sock *sk;
spinlock_t *lock;
int connected;
u32 conn_type;
@@ -86,8 +82,6 @@ struct tipc_port {
u32 ref;
struct tipc_msg phdr;
struct list_head port_list;
- u32 (*dispatcher)(struct tipc_port *, struct sk_buff *);
- void (*wakeup)(struct tipc_port *);
struct list_head wait_list;
u32 waiting_pkts;
u32 sent;
@@ -106,68 +100,71 @@ struct tipc_port_list;
/*
* TIPC port manipulation routines
*/
-struct tipc_port *tipc_createport(struct sock *sk,
- u32 (*dispatcher)(struct tipc_port *,
- struct sk_buff *),
- void (*wakeup)(struct tipc_port *),
- const u32 importance);
+u32 tipc_port_init(struct tipc_port *p_ptr,
+ const unsigned int importance);
int tipc_reject_msg(struct sk_buff *buf, u32 err);
void tipc_acknowledge(u32 port_ref, u32 ack);
-int tipc_deleteport(struct tipc_port *p_ptr);
-
-int tipc_portimportance(u32 portref, unsigned int *importance);
-int tipc_set_portimportance(u32 portref, unsigned int importance);
-
-int tipc_portunreliable(u32 portref, unsigned int *isunreliable);
-int tipc_set_portunreliable(u32 portref, unsigned int isunreliable);
-
-int tipc_portunreturnable(u32 portref, unsigned int *isunreturnable);
-int tipc_set_portunreturnable(u32 portref, unsigned int isunreturnable);
+void tipc_port_destroy(struct tipc_port *p_ptr);
int tipc_publish(struct tipc_port *p_ptr, unsigned int scope,
struct tipc_name_seq const *name_seq);
+
int tipc_withdraw(struct tipc_port *p_ptr, unsigned int scope,
struct tipc_name_seq const *name_seq);
-int tipc_connect(u32 portref, struct tipc_portid const *port);
+int tipc_port_connect(u32 portref, struct tipc_portid const *port);
-int tipc_disconnect(u32 portref);
+int tipc_port_disconnect(u32 portref);
-int tipc_shutdown(u32 ref);
+int tipc_port_shutdown(u32 ref);
+void tipc_port_wakeup(struct tipc_port *port);
/*
* The following routines require that the port be locked on entry
*/
-int __tipc_disconnect(struct tipc_port *tp_ptr);
-int __tipc_connect(u32 ref, struct tipc_port *p_ptr,
+int __tipc_port_disconnect(struct tipc_port *tp_ptr);
+int __tipc_port_connect(u32 ref, struct tipc_port *p_ptr,
struct tipc_portid const *peer);
int tipc_port_peer_msg(struct tipc_port *p_ptr, struct tipc_msg *msg);
/*
* TIPC messaging routines
*/
-int tipc_port_recv_msg(struct sk_buff *buf);
-int tipc_send(u32 portref, struct iovec const *msg_sect, unsigned int len);
-
-int tipc_send2name(u32 portref, struct tipc_name const *name, u32 domain,
- struct iovec const *msg_sect, unsigned int len);
+int tipc_port_rcv(struct sk_buff *buf);
+
+int tipc_send(struct tipc_port *port,
+ struct iovec const *msg_sect,
+ unsigned int len);
+
+int tipc_send2name(struct tipc_port *port,
+ struct tipc_name const *name,
+ u32 domain,
+ struct iovec const *msg_sect,
+ unsigned int len);
+
+int tipc_send2port(struct tipc_port *port,
+ struct tipc_portid const *dest,
+ struct iovec const *msg_sect,
+ unsigned int len);
+
+int tipc_port_mcast_xmit(struct tipc_port *port,
+ struct tipc_name_seq const *seq,
+ struct iovec const *msg,
+ unsigned int len);
+
+int tipc_port_iovec_reject(struct tipc_port *p_ptr,
+ struct tipc_msg *hdr,
+ struct iovec const *msg_sect,
+ unsigned int len,
+ int err);
-int tipc_send2port(u32 portref, struct tipc_portid const *dest,
- struct iovec const *msg_sect, unsigned int len);
-
-int tipc_multicast(u32 portref, struct tipc_name_seq const *seq,
- struct iovec const *msg, unsigned int len);
-
-int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr,
- struct iovec const *msg_sect, unsigned int len,
- int err);
struct sk_buff *tipc_port_get_ports(void);
-void tipc_port_recv_proto_msg(struct sk_buff *buf);
-void tipc_port_recv_mcast(struct sk_buff *buf, struct tipc_port_list *dp);
+void tipc_port_proto_rcv(struct sk_buff *buf);
+void tipc_port_mcast_rcv(struct sk_buff *buf, struct tipc_port_list *dp);
void tipc_port_reinit(void);
/**
@@ -188,14 +185,53 @@ static inline void tipc_port_unlock(struct tipc_port *p_ptr)
spin_unlock_bh(p_ptr->lock);
}
-static inline struct tipc_port *tipc_port_deref(u32 ref)
+static inline int tipc_port_congested(struct tipc_port *p_ptr)
{
- return (struct tipc_port *)tipc_ref_deref(ref);
+ return (p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2);
}
-static inline int tipc_port_congested(struct tipc_port *p_ptr)
+
+static inline u32 tipc_port_peernode(struct tipc_port *p_ptr)
{
- return (p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2);
+ return msg_destnode(&p_ptr->phdr);
+}
+
+static inline u32 tipc_port_peerport(struct tipc_port *p_ptr)
+{
+ return msg_destport(&p_ptr->phdr);
+}
+
+static inline bool tipc_port_unreliable(struct tipc_port *port)
+{
+ return msg_src_droppable(&port->phdr) != 0;
+}
+
+static inline void tipc_port_set_unreliable(struct tipc_port *port,
+ bool unreliable)
+{
+ msg_set_src_droppable(&port->phdr, unreliable ? 1 : 0);
+}
+
+static inline bool tipc_port_unreturnable(struct tipc_port *port)
+{
+ return msg_dest_droppable(&port->phdr) != 0;
+}
+
+static inline void tipc_port_set_unreturnable(struct tipc_port *port,
+ bool unreturnable)
+{
+ msg_set_dest_droppable(&port->phdr, unreturnable ? 1 : 0);
+}
+
+
+static inline int tipc_port_importance(struct tipc_port *port)
+{
+ return msg_importance(&port->phdr);
+}
+
+static inline void tipc_port_set_importance(struct tipc_port *port, int imp)
+{
+ msg_set_importance(&port->phdr, (u32)imp);
}
#endif
diff --git a/net/tipc/ref.c b/net/tipc/ref.c
index 2a2a938dc22c..3d4ecd754eee 100644
--- a/net/tipc/ref.c
+++ b/net/tipc/ref.c
@@ -89,7 +89,7 @@ struct ref_table {
static struct ref_table tipc_ref_table;
-static DEFINE_RWLOCK(ref_table_lock);
+static DEFINE_SPINLOCK(ref_table_lock);
/**
* tipc_ref_table_init - create reference table for objects
@@ -126,9 +126,6 @@ int tipc_ref_table_init(u32 requested_size, u32 start)
*/
void tipc_ref_table_stop(void)
{
- if (!tipc_ref_table.entries)
- return;
-
vfree(tipc_ref_table.entries);
tipc_ref_table.entries = NULL;
}
@@ -162,7 +159,7 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock)
}
/* take a free entry, if available; otherwise initialize a new entry */
- write_lock_bh(&ref_table_lock);
+ spin_lock_bh(&ref_table_lock);
if (tipc_ref_table.first_free) {
index = tipc_ref_table.first_free;
entry = &(tipc_ref_table.entries[index]);
@@ -178,7 +175,7 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock)
} else {
ref = 0;
}
- write_unlock_bh(&ref_table_lock);
+ spin_unlock_bh(&ref_table_lock);
/*
* Grab the lock so no one else can modify this entry
@@ -219,7 +216,7 @@ void tipc_ref_discard(u32 ref)
index = ref & index_mask;
entry = &(tipc_ref_table.entries[index]);
- write_lock_bh(&ref_table_lock);
+ spin_lock_bh(&ref_table_lock);
if (!entry->object) {
pr_err("Attempt to discard ref. to non-existent obj\n");
@@ -245,7 +242,7 @@ void tipc_ref_discard(u32 ref)
tipc_ref_table.last_free = index;
exit:
- write_unlock_bh(&ref_table_lock);
+ spin_unlock_bh(&ref_table_lock);
}
/**
@@ -267,20 +264,3 @@ void *tipc_ref_lock(u32 ref)
}
return NULL;
}
-
-
-/**
- * tipc_ref_deref - return pointer referenced object (without locking it)
- */
-void *tipc_ref_deref(u32 ref)
-{
- if (likely(tipc_ref_table.entries)) {
- struct reference *entry;
-
- entry = &tipc_ref_table.entries[ref &
- tipc_ref_table.index_mask];
- if (likely(entry->ref == ref))
- return entry->object;
- }
- return NULL;
-}
diff --git a/net/tipc/ref.h b/net/tipc/ref.h
index 5bc8e7ab84de..d01aa1df63b8 100644
--- a/net/tipc/ref.h
+++ b/net/tipc/ref.h
@@ -44,6 +44,5 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock);
void tipc_ref_discard(u32 ref);
void *tipc_ref_lock(u32 ref);
-void *tipc_ref_deref(u32 ref);
#endif
diff --git a/net/tipc/server.c b/net/tipc/server.c
index b635ca347a87..646a930eefbf 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -87,7 +87,6 @@ static void tipc_clean_outqueues(struct tipc_conn *con);
static void tipc_conn_kref_release(struct kref *kref)
{
struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
- struct tipc_server *s = con->server;
if (con->sock) {
tipc_sock_release_local(con->sock);
@@ -95,10 +94,6 @@ static void tipc_conn_kref_release(struct kref *kref)
}
tipc_clean_outqueues(con);
-
- if (con->conid)
- s->tipc_conn_shutdown(con->conid, con->usr_data);
-
kfree(con);
}
@@ -181,6 +176,9 @@ static void tipc_close_conn(struct tipc_conn *con)
struct tipc_server *s = con->server;
if (test_and_clear_bit(CF_CONNECTED, &con->flags)) {
+ if (con->conid)
+ s->tipc_conn_shutdown(con->conid, con->usr_data);
+
spin_lock_bh(&s->idr_lock);
idr_remove(&s->conn_idr, con->conid);
s->idr_in_use--;
@@ -429,10 +427,12 @@ int tipc_conn_sendmsg(struct tipc_server *s, int conid,
list_add_tail(&e->list, &con->outqueue);
spin_unlock_bh(&con->outqueue_lock);
- if (test_bit(CF_CONNECTED, &con->flags))
+ if (test_bit(CF_CONNECTED, &con->flags)) {
if (!queue_work(s->send_wq, &con->swork))
conn_put(con);
-
+ } else {
+ conn_put(con);
+ }
return 0;
}
@@ -573,7 +573,6 @@ int tipc_server_start(struct tipc_server *s)
kmem_cache_destroy(s->rcvbuf_cache);
return ret;
}
- s->enabled = 1;
return ret;
}
@@ -583,10 +582,6 @@ void tipc_server_stop(struct tipc_server *s)
int total = 0;
int id;
- if (!s->enabled)
- return;
-
- s->enabled = 0;
spin_lock_bh(&s->idr_lock);
for (id = 0; total < s->idr_in_use; id++) {
con = idr_find(&s->conn_idr, id);
diff --git a/net/tipc/server.h b/net/tipc/server.h
index 98b23f20bc0f..be817b0b547e 100644
--- a/net/tipc/server.h
+++ b/net/tipc/server.h
@@ -56,7 +56,6 @@
* @name: server name
* @imp: message importance
* @type: socket type
- * @enabled: identify whether server is launched or not
*/
struct tipc_server {
struct idr conn_idr;
@@ -74,7 +73,6 @@ struct tipc_server {
const char name[TIPC_SERVER_NAME_LEN];
int imp;
int type;
- int enabled;
};
int tipc_conn_sendmsg(struct tipc_server *s, int conid,
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index aab4948f0aff..29b7f26a12cf 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1,7 +1,7 @@
/*
* net/tipc/socket.c: TIPC socket API
*
- * Copyright (c) 2001-2007, 2012 Ericsson AB
+ * Copyright (c) 2001-2007, 2012-2014, Ericsson AB
* Copyright (c) 2004-2008, 2010-2013, Wind River Systems
* All rights reserved.
*
@@ -38,30 +38,17 @@
#include "port.h"
#include <linux/export.h>
-#include <net/sock.h>
#define SS_LISTENING -1 /* socket is listening */
#define SS_READY -2 /* socket is connectionless */
#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
-struct tipc_sock {
- struct sock sk;
- struct tipc_port *p;
- struct tipc_portid peer_name;
- unsigned int conn_timeout;
-};
-
-#define tipc_sk(sk) ((struct tipc_sock *)(sk))
-#define tipc_sk_port(sk) (tipc_sk(sk)->p)
-
static int backlog_rcv(struct sock *sk, struct sk_buff *skb);
-static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf);
-static void wakeupdispatch(struct tipc_port *tport);
static void tipc_data_ready(struct sock *sk, int len);
static void tipc_write_space(struct sock *sk);
-static int release(struct socket *sock);
-static int accept(struct socket *sock, struct socket *new_sock, int flags);
+static int tipc_release(struct socket *sock);
+static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags);
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
@@ -70,8 +57,6 @@ static const struct proto_ops msg_ops;
static struct proto tipc_proto;
static struct proto tipc_proto_kern;
-static int sockets_enabled;
-
/*
* Revised TIPC socket locking policy:
*
@@ -117,6 +102,8 @@ static int sockets_enabled;
* - port reference
*/
+#include "socket.h"
+
/**
* advance_rx_queue - discard first buffer in socket receive queue
*
@@ -152,13 +139,15 @@ static void reject_rx_queue(struct sock *sk)
*
* Returns 0 on success, errno otherwise
*/
-static int tipc_sk_create(struct net *net, struct socket *sock, int protocol,
- int kern)
+static int tipc_sk_create(struct net *net, struct socket *sock,
+ int protocol, int kern)
{
const struct proto_ops *ops;
socket_state state;
struct sock *sk;
- struct tipc_port *tp_ptr;
+ struct tipc_sock *tsk;
+ struct tipc_port *port;
+ u32 ref;
/* Validate arguments */
if (unlikely(protocol != 0))
@@ -191,10 +180,12 @@ static int tipc_sk_create(struct net *net, struct socket *sock, int protocol,
if (sk == NULL)
return -ENOMEM;
- /* Allocate TIPC port for socket to use */
- tp_ptr = tipc_createport(sk, &dispatch, &wakeupdispatch,
- TIPC_LOW_IMPORTANCE);
- if (unlikely(!tp_ptr)) {
+ tsk = tipc_sk(sk);
+ port = &tsk->port;
+
+ ref = tipc_port_init(port, TIPC_LOW_IMPORTANCE);
+ if (!ref) {
+ pr_warn("Socket registration failed, ref. table exhausted\n");
sk_free(sk);
return -ENOMEM;
}
@@ -208,17 +199,14 @@ static int tipc_sk_create(struct net *net, struct socket *sock, int protocol,
sk->sk_rcvbuf = sysctl_tipc_rmem[1];
sk->sk_data_ready = tipc_data_ready;
sk->sk_write_space = tipc_write_space;
- tipc_sk(sk)->p = tp_ptr;
tipc_sk(sk)->conn_timeout = CONN_TIMEOUT_DEFAULT;
-
- spin_unlock_bh(tp_ptr->lock);
+ tipc_port_unlock(port);
if (sock->state == SS_READY) {
- tipc_set_portunreturnable(tp_ptr->ref, 1);
+ tipc_port_set_unreturnable(port, true);
if (sock->type == SOCK_DGRAM)
- tipc_set_portunreliable(tp_ptr->ref, 1);
+ tipc_port_set_unreliable(port, true);
}
-
return 0;
}
@@ -256,7 +244,7 @@ int tipc_sock_create_local(int type, struct socket **res)
*/
void tipc_sock_release_local(struct socket *sock)
{
- release(sock);
+ tipc_release(sock);
sock->ops = NULL;
sock_release(sock);
}
@@ -282,7 +270,7 @@ int tipc_sock_accept_local(struct socket *sock, struct socket **newsock,
if (ret < 0)
return ret;
- ret = accept(sock, *newsock, flags);
+ ret = tipc_accept(sock, *newsock, flags);
if (ret < 0) {
sock_release(*newsock);
return ret;
@@ -292,7 +280,7 @@ int tipc_sock_accept_local(struct socket *sock, struct socket **newsock,
}
/**
- * release - destroy a TIPC socket
+ * tipc_release - destroy a TIPC socket
* @sock: socket to destroy
*
* This routine cleans up any messages that are still queued on the socket.
@@ -307,10 +295,11 @@ int tipc_sock_accept_local(struct socket *sock, struct socket **newsock,
*
* Returns 0 on success, errno otherwise
*/
-static int release(struct socket *sock)
+static int tipc_release(struct socket *sock)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport;
+ struct tipc_sock *tsk;
+ struct tipc_port *port;
struct sk_buff *buf;
int res;
@@ -321,7 +310,8 @@ static int release(struct socket *sock)
if (sk == NULL)
return 0;
- tport = tipc_sk_port(sk);
+ tsk = tipc_sk(sk);
+ port = &tsk->port;
lock_sock(sk);
/*
@@ -338,17 +328,16 @@ static int release(struct socket *sock)
if ((sock->state == SS_CONNECTING) ||
(sock->state == SS_CONNECTED)) {
sock->state = SS_DISCONNECTING;
- tipc_disconnect(tport->ref);
+ tipc_port_disconnect(port->ref);
}
tipc_reject_msg(buf, TIPC_ERR_NO_PORT);
}
}
- /*
- * Delete TIPC port; this ensures no more messages are queued
- * (also disconnects an active connection & sends a 'FIN-' to peer)
+ /* Destroy TIPC port; also disconnects an active connection and
+ * sends a 'FIN-' to peer.
*/
- res = tipc_deleteport(tport);
+ tipc_port_destroy(port);
/* Discard any remaining (connection-based) messages in receive queue */
__skb_queue_purge(&sk->sk_receive_queue);
@@ -364,7 +353,7 @@ static int release(struct socket *sock)
}
/**
- * bind - associate or disassocate TIPC name(s) with a socket
+ * tipc_bind - associate or disassocate TIPC name(s) with a socket
* @sock: socket structure
* @uaddr: socket address describing name(s) and desired operation
* @uaddr_len: size of socket address data structure
@@ -378,16 +367,17 @@ static int release(struct socket *sock)
* NOTE: This routine doesn't need to take the socket lock since it doesn't
* access any non-constant socket information.
*/
-static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len)
+static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
+ int uaddr_len)
{
struct sock *sk = sock->sk;
struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
- struct tipc_port *tport = tipc_sk_port(sock->sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
int res = -EINVAL;
lock_sock(sk);
if (unlikely(!uaddr_len)) {
- res = tipc_withdraw(tport, 0, NULL);
+ res = tipc_withdraw(&tsk->port, 0, NULL);
goto exit;
}
@@ -415,15 +405,15 @@ static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len)
}
res = (addr->scope > 0) ?
- tipc_publish(tport, addr->scope, &addr->addr.nameseq) :
- tipc_withdraw(tport, -addr->scope, &addr->addr.nameseq);
+ tipc_publish(&tsk->port, addr->scope, &addr->addr.nameseq) :
+ tipc_withdraw(&tsk->port, -addr->scope, &addr->addr.nameseq);
exit:
release_sock(sk);
return res;
}
/**
- * get_name - get port ID of socket or peer socket
+ * tipc_getname - get port ID of socket or peer socket
* @sock: socket structure
* @uaddr: area for returned socket address
* @uaddr_len: area for returned length of socket address
@@ -435,21 +425,21 @@ exit:
* accesses socket information that is unchanging (or which changes in
* a completely predictable manner).
*/
-static int get_name(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
+ int *uaddr_len, int peer)
{
struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
- struct tipc_sock *tsock = tipc_sk(sock->sk);
+ struct tipc_sock *tsk = tipc_sk(sock->sk);
memset(addr, 0, sizeof(*addr));
if (peer) {
if ((sock->state != SS_CONNECTED) &&
((peer != 2) || (sock->state != SS_DISCONNECTING)))
return -ENOTCONN;
- addr->addr.id.ref = tsock->peer_name.ref;
- addr->addr.id.node = tsock->peer_name.node;
+ addr->addr.id.ref = tipc_port_peerport(&tsk->port);
+ addr->addr.id.node = tipc_port_peernode(&tsk->port);
} else {
- addr->addr.id.ref = tsock->p->ref;
+ addr->addr.id.ref = tsk->port.ref;
addr->addr.id.node = tipc_own_addr;
}
@@ -463,7 +453,7 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr,
}
/**
- * poll - read and possibly block on pollmask
+ * tipc_poll - read and possibly block on pollmask
* @file: file structure associated with the socket
* @sock: socket for which to calculate the poll bits
* @wait: ???
@@ -502,22 +492,23 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr,
* imply that the operation will succeed, merely that it should be performed
* and will not block.
*/
-static unsigned int poll(struct file *file, struct socket *sock,
- poll_table *wait)
+static unsigned int tipc_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
{
struct sock *sk = sock->sk;
+ struct tipc_sock *tsk = tipc_sk(sk);
u32 mask = 0;
sock_poll_wait(file, sk_sleep(sk), wait);
switch ((int)sock->state) {
case SS_UNCONNECTED:
- if (!tipc_sk_port(sk)->congested)
+ if (!tsk->port.congested)
mask |= POLLOUT;
break;
case SS_READY:
case SS_CONNECTED:
- if (!tipc_sk_port(sk)->congested)
+ if (!tsk->port.congested)
mask |= POLLOUT;
/* fall thru' */
case SS_CONNECTING:
@@ -567,7 +558,7 @@ static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m)
static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
DEFINE_WAIT(wait);
int done;
@@ -583,14 +574,15 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p)
return sock_intr_errno(*timeo_p);
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- done = sk_wait_event(sk, timeo_p, !tport->congested);
+ done = sk_wait_event(sk, timeo_p, !tsk->port.congested);
finish_wait(sk_sleep(sk), &wait);
} while (!done);
return 0;
}
+
/**
- * send_msg - send message in connectionless manner
+ * tipc_sendmsg - send message in connectionless manner
* @iocb: if NULL, indicates that socket lock is already held
* @sock: socket structure
* @m: message to send
@@ -603,11 +595,12 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p)
*
* Returns the number of bytes sent on success, or errno otherwise
*/
-static int send_msg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t total_len)
+static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *m, size_t total_len)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_port *port = &tsk->port;
DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
int needs_conn;
long timeo;
@@ -634,13 +627,13 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
res = -EISCONN;
goto exit;
}
- if (tport->published) {
+ if (tsk->port.published) {
res = -EOPNOTSUPP;
goto exit;
}
if (dest->addrtype == TIPC_ADDR_NAME) {
- tport->conn_type = dest->addr.name.name.type;
- tport->conn_instance = dest->addr.name.name.instance;
+ tsk->port.conn_type = dest->addr.name.name.type;
+ tsk->port.conn_instance = dest->addr.name.name.instance;
}
/* Abort any pending connection attempts (very unlikely) */
@@ -653,13 +646,13 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
res = dest_name_check(dest, m);
if (res)
break;
- res = tipc_send2name(tport->ref,
+ res = tipc_send2name(port,
&dest->addr.name.name,
dest->addr.name.domain,
m->msg_iov,
total_len);
} else if (dest->addrtype == TIPC_ADDR_ID) {
- res = tipc_send2port(tport->ref,
+ res = tipc_send2port(port,
&dest->addr.id,
m->msg_iov,
total_len);
@@ -671,10 +664,10 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
res = dest_name_check(dest, m);
if (res)
break;
- res = tipc_multicast(tport->ref,
- &dest->addr.nameseq,
- m->msg_iov,
- total_len);
+ res = tipc_port_mcast_xmit(port,
+ &dest->addr.nameseq,
+ m->msg_iov,
+ total_len);
}
if (likely(res != -ELINKCONG)) {
if (needs_conn && (res >= 0))
@@ -695,7 +688,8 @@ exit:
static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_port *port = &tsk->port;
DEFINE_WAIT(wait);
int done;
@@ -714,14 +708,14 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p)
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
done = sk_wait_event(sk, timeo_p,
- (!tport->congested || !tport->connected));
+ (!port->congested || !port->connected));
finish_wait(sk_sleep(sk), &wait);
} while (!done);
return 0;
}
/**
- * send_packet - send a connection-oriented message
+ * tipc_send_packet - send a connection-oriented message
* @iocb: if NULL, indicates that socket lock is already held
* @sock: socket structure
* @m: message to send
@@ -731,18 +725,18 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p)
*
* Returns the number of bytes sent on success, or errno otherwise
*/
-static int send_packet(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t total_len)
+static int tipc_send_packet(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *m, size_t total_len)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
int res = -EINVAL;
long timeo;
/* Handle implied connection establishment */
if (unlikely(dest))
- return send_msg(iocb, sock, m, total_len);
+ return tipc_sendmsg(iocb, sock, m, total_len);
if (total_len > TIPC_MAX_USER_MSG_SIZE)
return -EMSGSIZE;
@@ -760,7 +754,7 @@ static int send_packet(struct kiocb *iocb, struct socket *sock,
timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
do {
- res = tipc_send(tport->ref, m->msg_iov, total_len);
+ res = tipc_send(&tsk->port, m->msg_iov, total_len);
if (likely(res != -ELINKCONG))
break;
res = tipc_wait_for_sndpkt(sock, &timeo);
@@ -774,7 +768,7 @@ exit:
}
/**
- * send_stream - send stream-oriented data
+ * tipc_send_stream - send stream-oriented data
* @iocb: (unused)
* @sock: socket structure
* @m: data to send
@@ -785,11 +779,11 @@ exit:
* Returns the number of bytes sent on success (or partial success),
* or errno if no data sent
*/
-static int send_stream(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t total_len)
+static int tipc_send_stream(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *m, size_t total_len)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
struct msghdr my_msg;
struct iovec my_iov;
struct iovec *curr_iov;
@@ -806,7 +800,7 @@ static int send_stream(struct kiocb *iocb, struct socket *sock,
/* Handle special cases where there is no connection */
if (unlikely(sock->state != SS_CONNECTED)) {
if (sock->state == SS_UNCONNECTED)
- res = send_packet(NULL, sock, m, total_len);
+ res = tipc_send_packet(NULL, sock, m, total_len);
else
res = sock->state == SS_DISCONNECTING ? -EPIPE : -ENOTCONN;
goto exit;
@@ -837,21 +831,22 @@ static int send_stream(struct kiocb *iocb, struct socket *sock,
my_msg.msg_name = NULL;
bytes_sent = 0;
- hdr_size = msg_hdr_sz(&tport->phdr);
+ hdr_size = msg_hdr_sz(&tsk->port.phdr);
while (curr_iovlen--) {
curr_start = curr_iov->iov_base;
curr_left = curr_iov->iov_len;
while (curr_left) {
- bytes_to_send = tport->max_pkt - hdr_size;
+ bytes_to_send = tsk->port.max_pkt - hdr_size;
if (bytes_to_send > TIPC_MAX_USER_MSG_SIZE)
bytes_to_send = TIPC_MAX_USER_MSG_SIZE;
if (curr_left < bytes_to_send)
bytes_to_send = curr_left;
my_iov.iov_base = curr_start;
my_iov.iov_len = bytes_to_send;
- res = send_packet(NULL, sock, &my_msg, bytes_to_send);
+ res = tipc_send_packet(NULL, sock, &my_msg,
+ bytes_to_send);
if (res < 0) {
if (bytes_sent)
res = bytes_sent;
@@ -872,27 +867,25 @@ exit:
/**
* auto_connect - complete connection setup to a remote port
- * @sock: socket structure
+ * @tsk: tipc socket structure
* @msg: peer's response message
*
* Returns 0 on success, errno otherwise
*/
-static int auto_connect(struct socket *sock, struct tipc_msg *msg)
+static int auto_connect(struct tipc_sock *tsk, struct tipc_msg *msg)
{
- struct tipc_sock *tsock = tipc_sk(sock->sk);
- struct tipc_port *p_ptr;
+ struct tipc_port *port = &tsk->port;
+ struct socket *sock = tsk->sk.sk_socket;
+ struct tipc_portid peer;
- tsock->peer_name.ref = msg_origport(msg);
- tsock->peer_name.node = msg_orignode(msg);
- p_ptr = tipc_port_deref(tsock->p->ref);
- if (!p_ptr)
- return -EINVAL;
+ peer.ref = msg_origport(msg);
+ peer.node = msg_orignode(msg);
- __tipc_connect(tsock->p->ref, p_ptr, &tsock->peer_name);
+ __tipc_port_connect(port->ref, port, &peer);
if (msg_importance(msg) > TIPC_CRITICAL_IMPORTANCE)
return -EINVAL;
- msg_set_importance(&p_ptr->phdr, (u32)msg_importance(msg));
+ msg_set_importance(&port->phdr, (u32)msg_importance(msg));
sock->state = SS_CONNECTED;
return 0;
}
@@ -999,7 +992,7 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long timeo)
for (;;) {
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- if (skb_queue_empty(&sk->sk_receive_queue)) {
+ if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
if (sock->state == SS_DISCONNECTING) {
err = -ENOTCONN;
break;
@@ -1023,7 +1016,7 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long timeo)
}
/**
- * recv_msg - receive packet-oriented message
+ * tipc_recvmsg - receive packet-oriented message
* @iocb: (unused)
* @m: descriptor for message info
* @buf_len: total size of user buffer area
@@ -1034,11 +1027,12 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long timeo)
*
* Returns size of returned message data, errno otherwise
*/
-static int recv_msg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t buf_len, int flags)
+static int tipc_recvmsg(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *m, size_t buf_len, int flags)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_port *port = &tsk->port;
struct sk_buff *buf;
struct tipc_msg *msg;
long timeo;
@@ -1081,7 +1075,7 @@ restart:
set_orig_addr(m, msg);
/* Capture ancillary data (optional) */
- res = anc_data_recv(m, msg, tport);
+ res = anc_data_recv(m, msg, port);
if (res)
goto exit;
@@ -1107,8 +1101,8 @@ restart:
/* Consume received message (optional) */
if (likely(!(flags & MSG_PEEK))) {
if ((sock->state != SS_READY) &&
- (++tport->conn_unacked >= TIPC_FLOW_CONTROL_WIN))
- tipc_acknowledge(tport->ref, tport->conn_unacked);
+ (++port->conn_unacked >= TIPC_FLOW_CONTROL_WIN))
+ tipc_acknowledge(port->ref, port->conn_unacked);
advance_rx_queue(sk);
}
exit:
@@ -1117,7 +1111,7 @@ exit:
}
/**
- * recv_stream - receive stream-oriented data
+ * tipc_recv_stream - receive stream-oriented data
* @iocb: (unused)
* @m: descriptor for message info
* @buf_len: total size of user buffer area
@@ -1128,11 +1122,12 @@ exit:
*
* Returns size of returned message data, errno otherwise
*/
-static int recv_stream(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t buf_len, int flags)
+static int tipc_recv_stream(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *m, size_t buf_len, int flags)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_port *port = &tsk->port;
struct sk_buff *buf;
struct tipc_msg *msg;
long timeo;
@@ -1177,7 +1172,7 @@ restart:
/* Optionally capture sender's address & ancillary data of first msg */
if (sz_copied == 0) {
set_orig_addr(m, msg);
- res = anc_data_recv(m, msg, tport);
+ res = anc_data_recv(m, msg, port);
if (res)
goto exit;
}
@@ -1215,8 +1210,8 @@ restart:
/* Consume received message (optional) */
if (likely(!(flags & MSG_PEEK))) {
- if (unlikely(++tport->conn_unacked >= TIPC_FLOW_CONTROL_WIN))
- tipc_acknowledge(tport->ref, tport->conn_unacked);
+ if (unlikely(++port->conn_unacked >= TIPC_FLOW_CONTROL_WIN))
+ tipc_acknowledge(port->ref, port->conn_unacked);
advance_rx_queue(sk);
}
@@ -1268,17 +1263,19 @@ static void tipc_data_ready(struct sock *sk, int len)
/**
* filter_connect - Handle all incoming messages for a connection-based socket
- * @tsock: TIPC socket
+ * @tsk: TIPC socket
* @msg: message
*
* Returns TIPC error status code and socket error status code
* once it encounters some errors
*/
-static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf)
+static u32 filter_connect(struct tipc_sock *tsk, struct sk_buff **buf)
{
- struct socket *sock = tsock->sk.sk_socket;
+ struct sock *sk = &tsk->sk;
+ struct tipc_port *port = &tsk->port;
+ struct socket *sock = sk->sk_socket;
struct tipc_msg *msg = buf_msg(*buf);
- struct sock *sk = &tsock->sk;
+
u32 retval = TIPC_ERR_NO_PORT;
int res;
@@ -1288,10 +1285,10 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf)
switch ((int)sock->state) {
case SS_CONNECTED:
/* Accept only connection-based messages sent by peer */
- if (msg_connected(msg) && tipc_port_peer_msg(tsock->p, msg)) {
+ if (msg_connected(msg) && tipc_port_peer_msg(port, msg)) {
if (unlikely(msg_errcode(msg))) {
sock->state = SS_DISCONNECTING;
- __tipc_disconnect(tsock->p);
+ __tipc_port_disconnect(port);
}
retval = TIPC_OK;
}
@@ -1308,7 +1305,7 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf)
if (unlikely(!msg_connected(msg)))
break;
- res = auto_connect(sock, msg);
+ res = auto_connect(tsk, msg);
if (res) {
sock->state = SS_DISCONNECTING;
sk->sk_err = -res;
@@ -1387,6 +1384,7 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf)
static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
{
struct socket *sock = sk->sk_socket;
+ struct tipc_sock *tsk = tipc_sk(sk);
struct tipc_msg *msg = buf_msg(buf);
unsigned int limit = rcvbuf_limit(sk, buf);
u32 res = TIPC_OK;
@@ -1399,7 +1397,7 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
if (msg_connected(msg))
return TIPC_ERR_NO_PORT;
} else {
- res = filter_connect(tipc_sk(sk), &buf);
+ res = filter_connect(tsk, &buf);
if (res != TIPC_OK || buf == NULL)
return res;
}
@@ -1437,17 +1435,16 @@ static int backlog_rcv(struct sock *sk, struct sk_buff *buf)
}
/**
- * dispatch - handle incoming message
- * @tport: TIPC port that received message
+ * tipc_sk_rcv - handle incoming message
+ * @sk: socket receiving message
* @buf: message
*
* Called with port lock already taken.
*
* Returns TIPC error status code (TIPC_OK if message is not to be rejected)
*/
-static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf)
+u32 tipc_sk_rcv(struct sock *sk, struct sk_buff *buf)
{
- struct sock *sk = tport->sk;
u32 res;
/*
@@ -1470,19 +1467,6 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf)
return res;
}
-/**
- * wakeupdispatch - wake up port after congestion
- * @tport: port to wakeup
- *
- * Called with port lock already taken.
- */
-static void wakeupdispatch(struct tipc_port *tport)
-{
- struct sock *sk = tport->sk;
-
- sk->sk_write_space(sk);
-}
-
static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
{
struct sock *sk = sock->sk;
@@ -1506,7 +1490,7 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
}
/**
- * connect - establish a connection to another TIPC port
+ * tipc_connect - establish a connection to another TIPC port
* @sock: socket structure
* @dest: socket address for destination port
* @destlen: size of socket address data structure
@@ -1514,8 +1498,8 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
*
* Returns 0 on success, errno otherwise
*/
-static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
- int flags)
+static int tipc_connect(struct socket *sock, struct sockaddr *dest,
+ int destlen, int flags)
{
struct sock *sk = sock->sk;
struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest;
@@ -1556,7 +1540,7 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
if (!timeout)
m.msg_flags = MSG_DONTWAIT;
- res = send_msg(NULL, sock, &m, 0);
+ res = tipc_sendmsg(NULL, sock, &m, 0);
if ((res < 0) && (res != -EWOULDBLOCK))
goto exit;
@@ -1587,13 +1571,13 @@ exit:
}
/**
- * listen - allow socket to listen for incoming connections
+ * tipc_listen - allow socket to listen for incoming connections
* @sock: socket structure
* @len: (unused)
*
* Returns 0 on success, errno otherwise
*/
-static int listen(struct socket *sock, int len)
+static int tipc_listen(struct socket *sock, int len)
{
struct sock *sk = sock->sk;
int res;
@@ -1625,7 +1609,7 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
for (;;) {
prepare_to_wait_exclusive(sk_sleep(sk), &wait,
TASK_INTERRUPTIBLE);
- if (skb_queue_empty(&sk->sk_receive_queue)) {
+ if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
release_sock(sk);
timeo = schedule_timeout(timeo);
lock_sock(sk);
@@ -1648,20 +1632,20 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
}
/**
- * accept - wait for connection request
+ * tipc_accept - wait for connection request
* @sock: listening socket
* @newsock: new socket that is to be connected
* @flags: file-related flags associated with socket
*
* Returns 0 on success, errno otherwise
*/
-static int accept(struct socket *sock, struct socket *new_sock, int flags)
+static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
{
struct sock *new_sk, *sk = sock->sk;
struct sk_buff *buf;
- struct tipc_sock *new_tsock;
- struct tipc_port *new_tport;
+ struct tipc_port *new_port;
struct tipc_msg *msg;
+ struct tipc_portid peer;
u32 new_ref;
long timeo;
int res;
@@ -1672,7 +1656,6 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags)
res = -EINVAL;
goto exit;
}
-
timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
res = tipc_wait_for_accept(sock, timeo);
if (res)
@@ -1685,9 +1668,8 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags)
goto exit;
new_sk = new_sock->sk;
- new_tsock = tipc_sk(new_sk);
- new_tport = new_tsock->p;
- new_ref = new_tport->ref;
+ new_port = &tipc_sk(new_sk)->port;
+ new_ref = new_port->ref;
msg = buf_msg(buf);
/* we lock on new_sk; but lockdep sees the lock on sk */
@@ -1700,15 +1682,15 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags)
reject_rx_queue(new_sk);
/* Connect new socket to it's peer */
- new_tsock->peer_name.ref = msg_origport(msg);
- new_tsock->peer_name.node = msg_orignode(msg);
- tipc_connect(new_ref, &new_tsock->peer_name);
+ peer.ref = msg_origport(msg);
+ peer.node = msg_orignode(msg);
+ tipc_port_connect(new_ref, &peer);
new_sock->state = SS_CONNECTED;
- tipc_set_portimportance(new_ref, msg_importance(msg));
+ tipc_port_set_importance(new_port, msg_importance(msg));
if (msg_named(msg)) {
- new_tport->conn_type = msg_nametype(msg);
- new_tport->conn_instance = msg_nameinst(msg);
+ new_port->conn_type = msg_nametype(msg);
+ new_port->conn_instance = msg_nameinst(msg);
}
/*
@@ -1719,21 +1701,20 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags)
struct msghdr m = {NULL,};
advance_rx_queue(sk);
- send_packet(NULL, new_sock, &m, 0);
+ tipc_send_packet(NULL, new_sock, &m, 0);
} else {
__skb_dequeue(&sk->sk_receive_queue);
__skb_queue_head(&new_sk->sk_receive_queue, buf);
skb_set_owner_r(buf, new_sk);
}
release_sock(new_sk);
-
exit:
release_sock(sk);
return res;
}
/**
- * shutdown - shutdown socket connection
+ * tipc_shutdown - shutdown socket connection
* @sock: socket structure
* @how: direction to close (must be SHUT_RDWR)
*
@@ -1741,10 +1722,11 @@ exit:
*
* Returns 0 on success, errno otherwise
*/
-static int shutdown(struct socket *sock, int how)
+static int tipc_shutdown(struct socket *sock, int how)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_port *port = &tsk->port;
struct sk_buff *buf;
int res;
@@ -1765,10 +1747,10 @@ restart:
kfree_skb(buf);
goto restart;
}
- tipc_disconnect(tport->ref);
+ tipc_port_disconnect(port->ref);
tipc_reject_msg(buf, TIPC_CONN_SHUTDOWN);
} else {
- tipc_shutdown(tport->ref);
+ tipc_port_shutdown(port->ref);
}
sock->state = SS_DISCONNECTING;
@@ -1794,7 +1776,7 @@ restart:
}
/**
- * setsockopt - set socket option
+ * tipc_setsockopt - set socket option
* @sock: socket structure
* @lvl: option level
* @opt: option identifier
@@ -1806,11 +1788,12 @@ restart:
*
* Returns 0 on success, errno otherwise
*/
-static int setsockopt(struct socket *sock, int lvl, int opt, char __user *ov,
- unsigned int ol)
+static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
+ char __user *ov, unsigned int ol)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_port *port = &tsk->port;
u32 value;
int res;
@@ -1828,16 +1811,16 @@ static int setsockopt(struct socket *sock, int lvl, int opt, char __user *ov,
switch (opt) {
case TIPC_IMPORTANCE:
- res = tipc_set_portimportance(tport->ref, value);
+ tipc_port_set_importance(port, value);
break;
case TIPC_SRC_DROPPABLE:
if (sock->type != SOCK_STREAM)
- res = tipc_set_portunreliable(tport->ref, value);
+ tipc_port_set_unreliable(port, value);
else
res = -ENOPROTOOPT;
break;
case TIPC_DEST_DROPPABLE:
- res = tipc_set_portunreturnable(tport->ref, value);
+ tipc_port_set_unreturnable(port, value);
break;
case TIPC_CONN_TIMEOUT:
tipc_sk(sk)->conn_timeout = value;
@@ -1853,7 +1836,7 @@ static int setsockopt(struct socket *sock, int lvl, int opt, char __user *ov,
}
/**
- * getsockopt - get socket option
+ * tipc_getsockopt - get socket option
* @sock: socket structure
* @lvl: option level
* @opt: option identifier
@@ -1865,11 +1848,12 @@ static int setsockopt(struct socket *sock, int lvl, int opt, char __user *ov,
*
* Returns 0 on success, errno otherwise
*/
-static int getsockopt(struct socket *sock, int lvl, int opt, char __user *ov,
- int __user *ol)
+static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
+ char __user *ov, int __user *ol)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_port *port = &tsk->port;
int len;
u32 value;
int res;
@@ -1886,13 +1870,13 @@ static int getsockopt(struct socket *sock, int lvl, int opt, char __user *ov,
switch (opt) {
case TIPC_IMPORTANCE:
- res = tipc_portimportance(tport->ref, &value);
+ value = tipc_port_importance(port);
break;
case TIPC_SRC_DROPPABLE:
- res = tipc_portunreliable(tport->ref, &value);
+ value = tipc_port_unreliable(port);
break;
case TIPC_DEST_DROPPABLE:
- res = tipc_portunreturnable(tport->ref, &value);
+ value = tipc_port_unreturnable(port);
break;
case TIPC_CONN_TIMEOUT:
value = tipc_sk(sk)->conn_timeout;
@@ -1927,20 +1911,20 @@ static int getsockopt(struct socket *sock, int lvl, int opt, char __user *ov,
static const struct proto_ops msg_ops = {
.owner = THIS_MODULE,
.family = AF_TIPC,
- .release = release,
- .bind = bind,
- .connect = connect,
+ .release = tipc_release,
+ .bind = tipc_bind,
+ .connect = tipc_connect,
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
- .getname = get_name,
- .poll = poll,
+ .getname = tipc_getname,
+ .poll = tipc_poll,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
- .shutdown = shutdown,
- .setsockopt = setsockopt,
- .getsockopt = getsockopt,
- .sendmsg = send_msg,
- .recvmsg = recv_msg,
+ .shutdown = tipc_shutdown,
+ .setsockopt = tipc_setsockopt,
+ .getsockopt = tipc_getsockopt,
+ .sendmsg = tipc_sendmsg,
+ .recvmsg = tipc_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage
};
@@ -1948,20 +1932,20 @@ static const struct proto_ops msg_ops = {
static const struct proto_ops packet_ops = {
.owner = THIS_MODULE,
.family = AF_TIPC,
- .release = release,
- .bind = bind,
- .connect = connect,
+ .release = tipc_release,
+ .bind = tipc_bind,
+ .connect = tipc_connect,
.socketpair = sock_no_socketpair,
- .accept = accept,
- .getname = get_name,
- .poll = poll,
+ .accept = tipc_accept,
+ .getname = tipc_getname,
+ .poll = tipc_poll,
.ioctl = sock_no_ioctl,
- .listen = listen,
- .shutdown = shutdown,
- .setsockopt = setsockopt,
- .getsockopt = getsockopt,
- .sendmsg = send_packet,
- .recvmsg = recv_msg,
+ .listen = tipc_listen,
+ .shutdown = tipc_shutdown,
+ .setsockopt = tipc_setsockopt,
+ .getsockopt = tipc_getsockopt,
+ .sendmsg = tipc_send_packet,
+ .recvmsg = tipc_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage
};
@@ -1969,20 +1953,20 @@ static const struct proto_ops packet_ops = {
static const struct proto_ops stream_ops = {
.owner = THIS_MODULE,
.family = AF_TIPC,
- .release = release,
- .bind = bind,
- .connect = connect,
+ .release = tipc_release,
+ .bind = tipc_bind,
+ .connect = tipc_connect,
.socketpair = sock_no_socketpair,
- .accept = accept,
- .getname = get_name,
- .poll = poll,
+ .accept = tipc_accept,
+ .getname = tipc_getname,
+ .poll = tipc_poll,
.ioctl = sock_no_ioctl,
- .listen = listen,
- .shutdown = shutdown,
- .setsockopt = setsockopt,
- .getsockopt = getsockopt,
- .sendmsg = send_stream,
- .recvmsg = recv_stream,
+ .listen = tipc_listen,
+ .shutdown = tipc_shutdown,
+ .setsockopt = tipc_setsockopt,
+ .getsockopt = tipc_getsockopt,
+ .sendmsg = tipc_send_stream,
+ .recvmsg = tipc_recv_stream,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage
};
@@ -2027,8 +2011,6 @@ int tipc_socket_init(void)
proto_unregister(&tipc_proto);
goto out;
}
-
- sockets_enabled = 1;
out:
return res;
}
@@ -2038,10 +2020,6 @@ int tipc_socket_init(void)
*/
void tipc_socket_stop(void)
{
- if (!sockets_enabled)
- return;
-
- sockets_enabled = 0;
sock_unregister(tipc_family_ops.family);
proto_unregister(&tipc_proto);
}
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
new file mode 100644
index 000000000000..74e5c7f195a6
--- /dev/null
+++ b/net/tipc/socket.h
@@ -0,0 +1,72 @@
+/* net/tipc/socket.h: Include file for TIPC socket code
+ *
+ * Copyright (c) 2014, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_SOCK_H
+#define _TIPC_SOCK_H
+
+#include "port.h"
+#include <net/sock.h>
+
+/**
+ * struct tipc_sock - TIPC socket structure
+ * @sk: socket - interacts with 'port' and with user via the socket API
+ * @port: port - interacts with 'sk' and with the rest of the TIPC stack
+ * @peer_name: the peer of the connection, if any
+ * @conn_timeout: the time we can wait for an unresponded setup request
+ */
+
+struct tipc_sock {
+ struct sock sk;
+ struct tipc_port port;
+ unsigned int conn_timeout;
+};
+
+static inline struct tipc_sock *tipc_sk(const struct sock *sk)
+{
+ return container_of(sk, struct tipc_sock, sk);
+}
+
+static inline struct tipc_sock *tipc_port_to_sock(const struct tipc_port *port)
+{
+ return container_of(port, struct tipc_sock, port);
+}
+
+static inline void tipc_sock_wakeup(struct tipc_sock *tsk)
+{
+ tsk->sk.sk_write_space(&tsk->sk);
+}
+
+u32 tipc_sk_rcv(struct sock *sk, struct sk_buff *buf);
+
+#endif
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 7cb0bd5b1176..11c9ae00837d 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -96,20 +96,16 @@ static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower,
{
struct tipc_subscriber *subscriber = sub->subscriber;
struct kvec msg_sect;
- int ret;
msg_sect.iov_base = (void *)&sub->evt;
msg_sect.iov_len = sizeof(struct tipc_event);
-
sub->evt.event = htohl(event, sub->swap);
sub->evt.found_lower = htohl(found_lower, sub->swap);
sub->evt.found_upper = htohl(found_upper, sub->swap);
sub->evt.port.ref = htohl(port_ref, sub->swap);
sub->evt.port.node = htohl(node, sub->swap);
- ret = tipc_conn_sendmsg(&topsrv, subscriber->conid, NULL,
- msg_sect.iov_base, msg_sect.iov_len);
- if (ret < 0)
- pr_err("Sending subscription event failed, no memory\n");
+ tipc_conn_sendmsg(&topsrv, subscriber->conid, NULL, msg_sect.iov_base,
+ msg_sect.iov_len);
}
/**
@@ -153,14 +149,6 @@ static void subscr_timeout(struct tipc_subscription *sub)
/* The spin lock per subscriber is used to protect its members */
spin_lock_bh(&subscriber->lock);
- /* Validate if the connection related to the subscriber is
- * closed (in case subscriber is terminating)
- */
- if (subscriber->conid == 0) {
- spin_unlock_bh(&subscriber->lock);
- return;
- }
-
/* Validate timeout (in case subscription is being cancelled) */
if (sub->timeout == TIPC_WAIT_FOREVER) {
spin_unlock_bh(&subscriber->lock);
@@ -215,9 +203,6 @@ static void subscr_release(struct tipc_subscriber *subscriber)
spin_lock_bh(&subscriber->lock);
- /* Invalidate subscriber reference */
- subscriber->conid = 0;
-
/* Destroy any existing subscriptions for subscriber */
list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list,
subscription_list) {
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 29fc8bee9702..ce6ec6c2f4de 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -163,9 +163,8 @@ static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
static inline unsigned int unix_hash_fold(__wsum n)
{
- unsigned int hash = (__force unsigned int)n;
+ unsigned int hash = (__force unsigned int)csum_fold(n);
- hash ^= hash>>16;
hash ^= hash>>8;
return hash&(UNIX_HASH_SIZE-1);
}
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 8659d5cee2a6..9c9501a35fb5 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -768,6 +768,4 @@ cfg80211_get_chan_state(struct wireless_dev *wdev,
case NUM_NL80211_IFTYPES:
WARN_ON(1);
}
-
- return;
}
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 276cf938f764..086cddd03ba6 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -788,8 +788,6 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev,
default:
break;
}
-
- wdev->beacon_interval = 0;
}
static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 39fc1d70da69..e5872ff2c27c 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -11,6 +11,7 @@
#include <net/ip.h>
#include <net/dsfield.h>
#include <linux/if_vlan.h>
+#include <linux/mpls.h>
#include "core.h"
#include "rdev-ops.h"
@@ -717,6 +718,21 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb,
case htons(ETH_P_IPV6):
dscp = ipv6_get_dsfield(ipv6_hdr(skb)) & 0xfc;
break;
+ case htons(ETH_P_MPLS_UC):
+ case htons(ETH_P_MPLS_MC): {
+ struct mpls_label mpls_tmp, *mpls;
+
+ mpls = skb_header_pointer(skb, sizeof(struct ethhdr),
+ sizeof(*mpls), &mpls_tmp);
+ if (!mpls)
+ return 0;
+
+ return (ntohl(mpls->entry) & MPLS_LS_TC_MASK)
+ >> MPLS_LS_TC_SHIFT;
+ }
+ case htons(ETH_P_80221):
+ /* 802.21 is always network control traffic */
+ return 7;
default:
return 0;
}
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 6c7ac016ce3a..85d1d4764612 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -16,6 +16,81 @@
static struct kmem_cache *secpath_cachep __read_mostly;
+static DEFINE_SPINLOCK(xfrm_input_afinfo_lock);
+static struct xfrm_input_afinfo __rcu *xfrm_input_afinfo[NPROTO];
+
+int xfrm_input_register_afinfo(struct xfrm_input_afinfo *afinfo)
+{
+ int err = 0;
+
+ if (unlikely(afinfo == NULL))
+ return -EINVAL;
+ if (unlikely(afinfo->family >= NPROTO))
+ return -EAFNOSUPPORT;
+ spin_lock_bh(&xfrm_input_afinfo_lock);
+ if (unlikely(xfrm_input_afinfo[afinfo->family] != NULL))
+ err = -ENOBUFS;
+ else
+ rcu_assign_pointer(xfrm_input_afinfo[afinfo->family], afinfo);
+ spin_unlock_bh(&xfrm_input_afinfo_lock);
+ return err;
+}
+EXPORT_SYMBOL(xfrm_input_register_afinfo);
+
+int xfrm_input_unregister_afinfo(struct xfrm_input_afinfo *afinfo)
+{
+ int err = 0;
+
+ if (unlikely(afinfo == NULL))
+ return -EINVAL;
+ if (unlikely(afinfo->family >= NPROTO))
+ return -EAFNOSUPPORT;
+ spin_lock_bh(&xfrm_input_afinfo_lock);
+ if (likely(xfrm_input_afinfo[afinfo->family] != NULL)) {
+ if (unlikely(xfrm_input_afinfo[afinfo->family] != afinfo))
+ err = -EINVAL;
+ else
+ RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->family], NULL);
+ }
+ spin_unlock_bh(&xfrm_input_afinfo_lock);
+ synchronize_rcu();
+ return err;
+}
+EXPORT_SYMBOL(xfrm_input_unregister_afinfo);
+
+static struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family)
+{
+ struct xfrm_input_afinfo *afinfo;
+
+ if (unlikely(family >= NPROTO))
+ return NULL;
+ rcu_read_lock();
+ afinfo = rcu_dereference(xfrm_input_afinfo[family]);
+ if (unlikely(!afinfo))
+ rcu_read_unlock();
+ return afinfo;
+}
+
+static void xfrm_input_put_afinfo(struct xfrm_input_afinfo *afinfo)
+{
+ rcu_read_unlock();
+}
+
+static int xfrm_rcv_cb(struct sk_buff *skb, unsigned int family, u8 protocol,
+ int err)
+{
+ int ret;
+ struct xfrm_input_afinfo *afinfo = xfrm_input_get_afinfo(family);
+
+ if (!afinfo)
+ return -EAFNOSUPPORT;
+
+ ret = afinfo->callback(skb, protocol, err);
+ xfrm_input_put_afinfo(afinfo);
+
+ return ret;
+}
+
void __secpath_destroy(struct sec_path *sp)
{
int i;
@@ -108,7 +183,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
int err;
__be32 seq;
__be32 seq_hi;
- struct xfrm_state *x;
+ struct xfrm_state *x = NULL;
xfrm_address_t *daddr;
struct xfrm_mode *inner_mode;
unsigned int family;
@@ -120,9 +195,14 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
async = 1;
x = xfrm_input_state(skb);
seq = XFRM_SKB_CB(skb)->seq.input.low;
+ family = x->outer_mode->afinfo->family;
goto resume;
}
+ daddr = (xfrm_address_t *)(skb_network_header(skb) +
+ XFRM_SPI_SKB_CB(skb)->daddroff);
+ family = XFRM_SPI_SKB_CB(skb)->family;
+
/* Allocate new secpath or COW existing one. */
if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
struct sec_path *sp;
@@ -137,10 +217,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
skb->sp = sp;
}
- daddr = (xfrm_address_t *)(skb_network_header(skb) +
- XFRM_SPI_SKB_CB(skb)->daddroff);
- family = XFRM_SPI_SKB_CB(skb)->family;
-
seq = 0;
if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
@@ -162,6 +238,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
skb->sp->xvec[skb->sp->len++] = x;
+ if (xfrm_tunnel_check(skb, x, family)) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
+ goto drop;
+ }
+
spin_lock(&x->lock);
if (unlikely(x->km.state == XFRM_STATE_ACQ)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
@@ -201,7 +282,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
if (nexthdr == -EINPROGRESS)
return 0;
-
resume:
spin_lock(&x->lock);
if (nexthdr <= 0) {
@@ -263,6 +343,10 @@ resume:
}
} while (!err);
+ err = xfrm_rcv_cb(skb, family, x->type->proto, 0);
+ if (err)
+ goto drop;
+
nf_reset(skb);
if (decaps) {
@@ -276,6 +360,7 @@ resume:
drop_unlock:
spin_unlock(&x->lock);
drop:
+ xfrm_rcv_cb(skb, family, x && x->type ? x->type->proto : nexthdr, -1);
kfree_skb(skb);
return 0;
}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 4b98b25793c5..f02f511b7107 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -39,8 +39,6 @@
#define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
#define XFRM_MAX_QUEUE_LEN 100
-static struct dst_entry *xfrm_policy_sk_bundles;
-
static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
__read_mostly;
@@ -661,7 +659,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
hlist_add_head(&policy->bydst, chain);
xfrm_pol_hold(policy);
net->xfrm.policy_count[dir]++;
- atomic_inc(&flow_cache_genid);
+ atomic_inc(&net->xfrm.flow_cache_genid);
/* After previous checking, family can either be AF_INET or AF_INET6 */
if (policy->family == AF_INET)
@@ -1158,7 +1156,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
if (hlist_unhashed(&pol->bydst))
return NULL;
- hlist_del(&pol->bydst);
+ hlist_del_init(&pol->bydst);
hlist_del(&pol->byidx);
list_del(&pol->walk.all);
net->xfrm.policy_count[dir]--;
@@ -2109,13 +2107,6 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
goto no_transform;
}
- dst_hold(&xdst->u.dst);
-
- spin_lock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
- xdst->u.dst.next = xfrm_policy_sk_bundles;
- xfrm_policy_sk_bundles = &xdst->u.dst;
- spin_unlock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
-
route = xdst->route;
}
}
@@ -2549,33 +2540,15 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
return dst;
}
-static void __xfrm_garbage_collect(struct net *net)
-{
- struct dst_entry *head, *next;
-
- spin_lock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
- head = xfrm_policy_sk_bundles;
- xfrm_policy_sk_bundles = NULL;
- spin_unlock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
-
- while (head) {
- next = head->next;
- dst_free(head);
- head = next;
- }
-}
-
void xfrm_garbage_collect(struct net *net)
{
- flow_cache_flush();
- __xfrm_garbage_collect(net);
+ flow_cache_flush(net);
}
EXPORT_SYMBOL(xfrm_garbage_collect);
static void xfrm_garbage_collect_deferred(struct net *net)
{
- flow_cache_flush_deferred();
- __xfrm_garbage_collect(net);
+ flow_cache_flush_deferred(net);
}
static void xfrm_init_pmtu(struct dst_entry *dst)
@@ -2940,15 +2913,19 @@ static int __net_init xfrm_net_init(struct net *net)
rv = xfrm_sysctl_init(net);
if (rv < 0)
goto out_sysctl;
+ rv = flow_cache_init(net);
+ if (rv < 0)
+ goto out;
/* Initialize the per-net locks here */
spin_lock_init(&net->xfrm.xfrm_state_lock);
rwlock_init(&net->xfrm.xfrm_policy_lock);
- spin_lock_init(&net->xfrm.xfrm_policy_sk_bundle_lock);
mutex_init(&net->xfrm.xfrm_cfg_mutex);
return 0;
+out:
+ xfrm_sysctl_fini(net);
out_sysctl:
xfrm_policy_fini(net);
out_policy:
@@ -2961,6 +2938,7 @@ out_statistics:
static void __net_exit xfrm_net_exit(struct net *net)
{
+ flow_cache_fini(net);
xfrm_sysctl_fini(net);
xfrm_policy_fini(net);
xfrm_state_fini(net);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index a26b7aa79475..8e9c781a6bba 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -161,6 +161,7 @@ static DEFINE_SPINLOCK(xfrm_state_gc_lock);
int __xfrm_state_delete(struct xfrm_state *x);
int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
+bool km_is_alive(const struct km_event *c);
void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
static DEFINE_SPINLOCK(xfrm_type_lock);
@@ -788,6 +789,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
struct xfrm_state *best = NULL;
u32 mark = pol->mark.v & pol->mark.m;
unsigned short encap_family = tmpl->encap_family;
+ struct km_event c;
to_put = NULL;
@@ -832,6 +834,17 @@ found:
error = -EEXIST;
goto out;
}
+
+ c.net = net;
+ /* If the KMs have no listeners (yet...), avoid allocating an SA
+ * for each and every packet - garbage collection might not
+ * handle the flood.
+ */
+ if (!km_is_alive(&c)) {
+ error = -ESRCH;
+ goto out;
+ }
+
x = xfrm_state_alloc(net);
if (x == NULL) {
error = -ENOMEM;
@@ -1135,10 +1148,9 @@ out:
EXPORT_SYMBOL(xfrm_state_add);
#ifdef CONFIG_XFRM_MIGRATE
-static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
+static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig)
{
struct net *net = xs_net(orig);
- int err = -ENOMEM;
struct xfrm_state *x = xfrm_state_alloc(net);
if (!x)
goto out;
@@ -1159,6 +1171,11 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
}
x->props.aalgo = orig->props.aalgo;
+ if (orig->aead) {
+ x->aead = xfrm_algo_aead_clone(orig->aead);
+ if (!x->aead)
+ goto error;
+ }
if (orig->ealg) {
x->ealg = xfrm_algo_clone(orig->ealg);
if (!x->ealg)
@@ -1187,20 +1204,21 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
}
if (orig->replay_esn) {
- err = xfrm_replay_clone(x, orig);
- if (err)
+ if (xfrm_replay_clone(x, orig))
goto error;
}
memcpy(&x->mark, &orig->mark, sizeof(x->mark));
- err = xfrm_init_state(x);
- if (err)
+ if (xfrm_init_state(x) < 0)
goto error;
x->props.flags = orig->props.flags;
x->props.extra_flags = orig->props.extra_flags;
+ x->tfcpad = orig->tfcpad;
+ x->replay_maxdiff = orig->replay_maxdiff;
+ x->replay_maxage = orig->replay_maxage;
x->curlft.add_time = orig->curlft.add_time;
x->km.state = orig->km.state;
x->km.seq = orig->km.seq;
@@ -1210,16 +1228,15 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
error:
xfrm_state_put(x);
out:
- if (errp)
- *errp = err;
return NULL;
}
-/* net->xfrm.xfrm_state_lock is held */
struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net)
{
unsigned int h;
- struct xfrm_state *x;
+ struct xfrm_state *x = NULL;
+
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
if (m->reqid) {
h = xfrm_dst_hash(net, &m->old_daddr, &m->old_saddr,
@@ -1236,7 +1253,7 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n
m->old_family))
continue;
xfrm_state_hold(x);
- return x;
+ break;
}
} else {
h = xfrm_src_hash(net, &m->old_daddr, &m->old_saddr,
@@ -1251,11 +1268,13 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n
m->old_family))
continue;
xfrm_state_hold(x);
- return x;
+ break;
}
}
- return NULL;
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+
+ return x;
}
EXPORT_SYMBOL(xfrm_migrate_state_find);
@@ -1263,9 +1282,8 @@ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
struct xfrm_migrate *m)
{
struct xfrm_state *xc;
- int err;
- xc = xfrm_state_clone(x, &err);
+ xc = xfrm_state_clone(x);
if (!xc)
return NULL;
@@ -1278,7 +1296,7 @@ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
state is to be updated as it is a part of triplet */
xfrm_state_insert(xc);
} else {
- if ((err = xfrm_state_add(xc)) < 0)
+ if (xfrm_state_add(xc) < 0)
goto error;
}
@@ -1451,7 +1469,7 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
{
int err = 0;
struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
- struct net *net = xs_net(*dst);
+ struct net *net = xs_net(*src);
if (!afinfo)
return -EAFNOSUPPORT;
@@ -1590,6 +1608,23 @@ unlock:
}
EXPORT_SYMBOL(xfrm_alloc_spi);
+static bool __xfrm_state_filter_match(struct xfrm_state *x,
+ struct xfrm_address_filter *filter)
+{
+ if (filter) {
+ if ((filter->family == AF_INET ||
+ filter->family == AF_INET6) &&
+ x->props.family != filter->family)
+ return false;
+
+ return addr_match(&x->props.saddr, &filter->saddr,
+ filter->splen) &&
+ addr_match(&x->id.daddr, &filter->daddr,
+ filter->dplen);
+ }
+ return true;
+}
+
int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
int (*func)(struct xfrm_state *, int, void*),
void *data)
@@ -1612,6 +1647,8 @@ int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
state = container_of(x, struct xfrm_state, km);
if (!xfrm_id_proto_match(state->id.proto, walk->proto))
continue;
+ if (!__xfrm_state_filter_match(state, walk->filter))
+ continue;
err = func(state, walk->seq, data);
if (err) {
list_move_tail(&walk->all, &x->all);
@@ -1630,17 +1667,21 @@ out:
}
EXPORT_SYMBOL(xfrm_state_walk);
-void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto)
+void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto,
+ struct xfrm_address_filter *filter)
{
INIT_LIST_HEAD(&walk->all);
walk->proto = proto;
walk->state = XFRM_STATE_DEAD;
walk->seq = 0;
+ walk->filter = filter;
}
EXPORT_SYMBOL(xfrm_state_walk_init);
void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net)
{
+ kfree(walk->filter);
+
if (list_empty(&walk->all))
return;
@@ -1793,6 +1834,24 @@ int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address
}
EXPORT_SYMBOL(km_report);
+bool km_is_alive(const struct km_event *c)
+{
+ struct xfrm_mgr *km;
+ bool is_alive = false;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(km, &xfrm_km_list, list) {
+ if (km->is_alive && km->is_alive(c)) {
+ is_alive = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return is_alive;
+}
+EXPORT_SYMBOL(km_is_alive);
+
int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
{
int err;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 1ae3ec7c18b0..cdd9e9c7ff0e 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -32,11 +32,6 @@
#include <linux/in6.h>
#endif
-static inline int aead_len(struct xfrm_algo_aead *alg)
-{
- return sizeof(*alg) + ((alg->alg_key_len + 7) / 8);
-}
-
static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type)
{
struct nlattr *rt = attrs[type];
@@ -142,7 +137,8 @@ static inline int verify_replay(struct xfrm_usersa_info *p,
if (!rt)
return 0;
- if (p->id.proto != IPPROTO_ESP)
+ /* As only ESP and AH support ESN feature. */
+ if ((p->id.proto != IPPROTO_ESP) && (p->id.proto != IPPROTO_AH))
return -EINVAL;
if (p->replay_window != 0)
@@ -886,6 +882,7 @@ static int xfrm_dump_sa_done(struct netlink_callback *cb)
return 0;
}
+static const struct nla_policy xfrma_policy[XFRMA_MAX+1];
static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
@@ -901,8 +898,31 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
info.nlmsg_flags = NLM_F_MULTI;
if (!cb->args[0]) {
+ struct nlattr *attrs[XFRMA_MAX+1];
+ struct xfrm_address_filter *filter = NULL;
+ u8 proto = 0;
+ int err;
+
cb->args[0] = 1;
- xfrm_state_walk_init(walk, 0);
+
+ err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX,
+ xfrma_policy);
+ if (err < 0)
+ return err;
+
+ if (attrs[XFRMA_ADDRESS_FILTER]) {
+ filter = kmalloc(sizeof(*filter), GFP_KERNEL);
+ if (filter == NULL)
+ return -ENOMEM;
+
+ memcpy(filter, nla_data(attrs[XFRMA_ADDRESS_FILTER]),
+ sizeof(*filter));
+ }
+
+ if (attrs[XFRMA_PROTO])
+ proto = nla_get_u8(attrs[XFRMA_PROTO]);
+
+ xfrm_state_walk_init(walk, proto, filter);
}
(void) xfrm_state_walk(net, walk, dump_one_state, &info);
@@ -2308,6 +2328,8 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_TFCPAD] = { .type = NLA_U32 },
[XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) },
[XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 },
+ [XFRMA_PROTO] = { .type = NLA_U8 },
+ [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) },
};
static const struct xfrm_link {
@@ -2981,6 +3003,11 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MAPPING, GFP_ATOMIC);
}
+static bool xfrm_is_alive(const struct km_event *c)
+{
+ return (bool)xfrm_acquire_is_on(c->net);
+}
+
static struct xfrm_mgr netlink_mgr = {
.id = "netlink",
.notify = xfrm_send_state_notify,
@@ -2990,6 +3017,7 @@ static struct xfrm_mgr netlink_mgr = {
.report = xfrm_send_report,
.migrate = xfrm_send_migrate,
.new_mapping = xfrm_send_mapping,
+ .is_alive = xfrm_is_alive,
};
static int __net_init xfrm_user_net_init(struct net *net)