summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_dev.c4
-rw-r--r--net/8021q/vlan_netlink.c10
-rw-r--r--net/9p/Kconfig1
-rw-r--r--net/9p/client.c51
-rw-r--r--net/9p/trans_virtio.c1
-rw-r--r--net/Kconfig19
-rw-r--r--net/appletalk/ddp.c19
-rw-r--r--net/appletalk/sysctl_net_atalk.c1
-rw-r--r--net/atm/clip.c4
-rw-r--r--net/atm/common.c2
-rw-r--r--net/atm/ioctl.c4
-rw-r--r--net/atm/svc.c8
-rw-r--r--net/ax25/Kconfig2
-rw-r--r--net/ax25/af_ax25.c12
-rw-r--r--net/ax25/ax25_dev.c53
-rw-r--r--net/ax25/sysctl_net_ax25.c5
-rw-r--r--net/batman-adv/main.c2
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/netlink.c1
-rw-r--r--net/batman-adv/originator.c29
-rw-r--r--net/batman-adv/soft-interface.c2
-rw-r--r--net/batman-adv/trace.h4
-rw-r--r--net/bluetooth/6lowpan.c2
-rw-r--r--net/bluetooth/Makefile3
-rw-r--r--net/bluetooth/hci_conn.c166
-rw-r--r--net/bluetooth/hci_core.c307
-rw-r--r--net/bluetooth/hci_debugfs.c1
-rw-r--r--net/bluetooth/hci_event.c277
-rw-r--r--net/bluetooth/hci_request.c903
-rw-r--r--net/bluetooth/hci_request.h75
-rw-r--r--net/bluetooth/hci_sock.c5
-rw-r--r--net/bluetooth/hci_sync.c360
-rw-r--r--net/bluetooth/iso.c163
-rw-r--r--net/bluetooth/l2cap_core.c174
-rw-r--r--net/bluetooth/l2cap_sock.c107
-rw-r--r--net/bluetooth/mgmt.c135
-rw-r--r--net/bluetooth/msft.c3
-rw-r--r--net/bluetooth/msft.h4
-rw-r--r--net/bluetooth/rfcomm/sock.c6
-rw-r--r--net/bluetooth/rfcomm/tty.c23
-rw-r--r--net/bluetooth/sco.c14
-rw-r--r--net/bpf/bpf_dummy_struct_ops.c63
-rw-r--r--net/bpf/test_run.c58
-rw-r--r--net/bridge/br_device.c10
-rw-r--r--net/bridge/br_forward.c13
-rw-r--r--net/bridge/br_mst.c29
-rw-r--r--net/bridge/br_multicast.c4
-rw-r--r--net/bridge/br_netfilter_hooks.c34
-rw-r--r--net/bridge/br_netlink_tunnel.c4
-rw-r--r--net/bridge/br_vlan_tunnel.c9
-rw-r--r--net/bridge/netfilter/nf_conntrack_bridge.c6
-rw-r--r--net/caif/cfctrl.c8
-rw-r--r--net/caif/cfpkt_skbuff.c7
-rw-r--r--net/can/Kconfig11
-rw-r--r--net/can/isotp.c11
-rw-r--r--net/can/j1939/main.c6
-rw-r--r--net/can/j1939/transport.c21
-rw-r--r--net/ceph/crush/mapper.c7
-rw-r--r--net/ceph/mon_client.c14
-rw-r--r--net/core/Makefile3
-rw-r--r--net/core/bpf_sk_storage.c23
-rw-r--r--net/core/datagram.c86
-rw-r--r--net/core/dev.c680
-rw-r--r--net/core/dev.h46
-rw-r--r--net/core/dev_addr_lists_test.c14
-rw-r--r--net/core/dev_ioctl.c9
-rw-r--r--net/core/drop_monitor.c29
-rw-r--r--net/core/dst_cache.c13
-rw-r--r--net/core/fib_rules.c17
-rw-r--r--net/core/filter.c278
-rw-r--r--net/core/flow_dissector.c84
-rw-r--r--net/core/gen_estimator.c2
-rw-r--r--net/core/gro.c31
-rw-r--r--net/core/hotdata.c7
-rw-r--r--net/core/ieee8021q_helpers.c242
-rw-r--r--net/core/link_watch.c4
-rw-r--r--net/core/lwt_bpf.c9
-rw-r--r--net/core/neighbour.c99
-rw-r--r--net/core/net-procfs.c3
-rw-r--r--net/core/net-sysfs.c18
-rw-r--r--net/core/net_namespace.c27
-rw-r--r--net/core/net_test.c (renamed from net/core/gso_test.c)129
-rw-r--r--net/core/netdev-genl-gen.c1
-rw-r--r--net/core/netdev-genl.c93
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/page_pool.c418
-rw-r--r--net/core/rtnetlink.c239
-rw-r--r--net/core/scm.c12
-rw-r--r--net/core/skbuff.c228
-rw-r--r--net/core/skmsg.c3
-rw-r--r--net/core/sock.c57
-rw-r--r--net/core/sock_diag.c8
-rw-r--r--net/core/sock_map.c285
-rw-r--r--net/core/sysctl_net_core.c115
-rw-r--r--net/core/timestamping.c5
-rw-r--r--net/core/xdp.c8
-rw-r--r--net/dccp/ccids/ccid2.c1
-rw-r--r--net/dccp/ipv4.c19
-rw-r--r--net/dccp/ipv6.c23
-rw-r--r--net/dccp/minisocks.c12
-rw-r--r--net/dccp/output.c2
-rw-r--r--net/dccp/sysctl.c2
-rw-r--r--net/devlink/core.c6
-rw-r--r--net/devlink/dev.c14
-rw-r--r--net/devlink/dpipe.c2
-rw-r--r--net/devlink/param.c7
-rw-r--r--net/devlink/port.c53
-rw-r--r--net/dsa/Kconfig8
-rw-r--r--net/dsa/Makefile1
-rw-r--r--net/dsa/devlink.c3
-rw-r--r--net/dsa/dsa.c8
-rw-r--r--net/dsa/port.c243
-rw-r--r--net/dsa/tag_8021q.c84
-rw-r--r--net/dsa/tag_8021q.h7
-rw-r--r--net/dsa/tag_ocelot_8021q.c2
-rw-r--r--net/dsa/tag_sja1105.c72
-rw-r--r--net/dsa/tag_vsc73xx_8021q.c68
-rw-r--r--net/dsa/trace.h34
-rw-r--r--net/dsa/user.c206
-rw-r--r--net/dsa/user.h2
-rw-r--r--net/ethernet/eth.c4
-rw-r--r--net/ethtool/Makefile2
-rw-r--r--net/ethtool/cabletest.c4
-rw-r--r--net/ethtool/channels.c6
-rw-r--r--net/ethtool/cmis.h124
-rw-r--r--net/ethtool/cmis_cdb.c602
-rw-r--r--net/ethtool/cmis_fw_update.c403
-rw-r--r--net/ethtool/coalesce.c274
-rw-r--r--net/ethtool/common.c76
-rw-r--r--net/ethtool/common.h4
-rw-r--r--net/ethtool/eeprom.c6
-rw-r--r--net/ethtool/ioctl.c244
-rw-r--r--net/ethtool/linkstate.c41
-rw-r--r--net/ethtool/module.c394
-rw-r--r--net/ethtool/module_fw.h75
-rw-r--r--net/ethtool/netlink.c56
-rw-r--r--net/ethtool/netlink.h16
-rw-r--r--net/ethtool/pse-pd.c173
-rw-r--r--net/ethtool/rss.c8
-rw-r--r--net/ethtool/tsinfo.c58
-rw-r--r--net/ethtool/wol.c2
-rw-r--r--net/handshake/tlshd.c1
-rw-r--r--net/hsr/hsr_device.c124
-rw-r--r--net/hsr/hsr_device.h4
-rw-r--r--net/hsr/hsr_forward.c126
-rw-r--r--net/hsr/hsr_framereg.c64
-rw-r--r--net/hsr/hsr_framereg.h6
-rw-r--r--net/hsr/hsr_main.c2
-rw-r--r--net/hsr/hsr_main.h11
-rw-r--r--net/hsr/hsr_netlink.c31
-rw-r--r--net/hsr/hsr_slave.c1
-rw-r--r--net/ieee802154/6lowpan/reassembly.c10
-rw-r--r--net/ieee802154/trace.h2
-rw-r--r--net/ipv4/af_inet.c64
-rw-r--r--net/ipv4/arp.c204
-rw-r--r--net/ipv4/bpf_tcp_ca.c12
-rw-r--r--net/ipv4/cipso_ipv4.c84
-rw-r--r--net/ipv4/devinet.c42
-rw-r--r--net/ipv4/esp4.c26
-rw-r--r--net/ipv4/esp4_offload.c24
-rw-r--r--net/ipv4/fib_frontend.c7
-rw-r--r--net/ipv4/fib_semantics.c20
-rw-r--r--net/ipv4/fib_trie.c1
-rw-r--r--net/ipv4/fou_bpf.c2
-rw-r--r--net/ipv4/fou_core.c2
-rw-r--r--net/ipv4/gre_demux.c2
-rw-r--r--net/ipv4/icmp.c30
-rw-r--r--net/ipv4/igmp.c3
-rw-r--r--net/ipv4/inet_connection_sock.c82
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/inet_fragment.c6
-rw-r--r--net/ipv4/inet_hashtables.c3
-rw-r--r--net/ipv4/inet_timewait_sock.c79
-rw-r--r--net/ipv4/ip_fragment.c6
-rw-r--r--net/ipv4/ip_gre.c146
-rw-r--r--net/ipv4/ip_input.c2
-rw-r--r--net/ipv4/ip_output.c22
-rw-r--r--net/ipv4/ip_tunnel.c129
-rw-r--r--net/ipv4/ip_tunnel_core.c82
-rw-r--r--net/ipv4/ip_vti.c41
-rw-r--r--net/ipv4/ipip.c33
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/metrics.c8
-rw-r--r--net/ipv4/netfilter/iptable_filter.c2
-rw-r--r--net/ipv4/netfilter/iptable_nat.c18
-rw-r--r--net/ipv4/netfilter/nf_tproxy_ipv4.c2
-rw-r--r--net/ipv4/nexthop.c7
-rw-r--r--net/ipv4/ping.c2
-rw-r--r--net/ipv4/proc.c3
-rw-r--r--net/ipv4/raw.c6
-rw-r--r--net/ipv4/route.c106
-rw-r--r--net/ipv4/syncookies.c5
-rw-r--r--net/ipv4/sysctl_net_ipv4.c117
-rw-r--r--net/ipv4/tcp.c196
-rw-r--r--net/ipv4/tcp_ao.c86
-rw-r--r--net/ipv4/tcp_bbr.c6
-rw-r--r--net/ipv4/tcp_cong.c20
-rw-r--r--net/ipv4/tcp_cubic.c4
-rw-r--r--net/ipv4/tcp_dctcp.c17
-rw-r--r--net/ipv4/tcp_fastopen.c7
-rw-r--r--net/ipv4/tcp_input.c229
-rw-r--r--net/ipv4/tcp_ipv4.c123
-rw-r--r--net/ipv4/tcp_metrics.c8
-rw-r--r--net/ipv4/tcp_minisocks.c67
-rw-r--r--net/ipv4/tcp_offload.c241
-rw-r--r--net/ipv4/tcp_output.c172
-rw-r--r--net/ipv4/tcp_sigpool.c17
-rw-r--r--net/ipv4/tcp_timer.c42
-rw-r--r--net/ipv4/udp.c78
-rw-r--r--net/ipv4/udp_offload.c54
-rw-r--r--net/ipv4/udp_tunnel_core.c5
-rw-r--r--net/ipv4/xfrm4_input.c19
-rw-r--r--net/ipv4/xfrm4_policy.c5
-rw-r--r--net/ipv6/addrconf.c40
-rw-r--r--net/ipv6/addrlabel.c18
-rw-r--r--net/ipv6/af_inet6.c3
-rw-r--r--net/ipv6/anycast.c5
-rw-r--r--net/ipv6/esp6.c18
-rw-r--r--net/ipv6/esp6_offload.c7
-rw-r--r--net/ipv6/fib6_rules.c6
-rw-r--r--net/ipv6/icmp.c9
-rw-r--r--net/ipv6/ila/ila_lwt.c11
-rw-r--r--net/ipv6/inet6_hashtables.c4
-rw-r--r--net/ipv6/ioam6_iptunnel.c8
-rw-r--r--net/ipv6/ip6_fib.c60
-rw-r--r--net/ipv6/ip6_gre.c110
-rw-r--r--net/ipv6/ip6_offload.c18
-rw-r--r--net/ipv6/ip6_output.c34
-rw-r--r--net/ipv6/ip6_tunnel.c18
-rw-r--r--net/ipv6/ip6_vti.c14
-rw-r--r--net/ipv6/ip6mr.c2
-rw-r--r--net/ipv6/ipv6_sockglue.c3
-rw-r--r--net/ipv6/ndisc.c40
-rw-r--r--net/ipv6/netfilter.c7
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c2
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c14
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c9
-rw-r--r--net/ipv6/ping.c2
-rw-r--r--net/ipv6/proc.c2
-rw-r--r--net/ipv6/raw.c14
-rw-r--r--net/ipv6/reassembly.c8
-rw-r--r--net/ipv6/route.c105
-rw-r--r--net/ipv6/rpl_iptunnel.c14
-rw-r--r--net/ipv6/seg6.c34
-rw-r--r--net/ipv6/seg6_hmac.c42
-rw-r--r--net/ipv6/seg6_iptunnel.c25
-rw-r--r--net/ipv6/seg6_local.c30
-rw-r--r--net/ipv6/sit.c38
-rw-r--r--net/ipv6/syncookies.c4
-rw-r--r--net/ipv6/sysctl_net_ipv6.c18
-rw-r--r--net/ipv6/tcp_ipv6.c93
-rw-r--r--net/ipv6/tcpv6_offload.c123
-rw-r--r--net/ipv6/udp.c48
-rw-r--r--net/ipv6/xfrm6_input.c26
-rw-r--r--net/ipv6/xfrm6_policy.c20
-rw-r--r--net/iucv/af_iucv.c10
-rw-r--r--net/iucv/iucv.c66
-rw-r--r--net/l2tp/l2tp_core.c517
-rw-r--r--net/l2tp/l2tp_core.h43
-rw-r--r--net/l2tp/l2tp_debugfs.c13
-rw-r--r--net/l2tp/l2tp_ip.c4
-rw-r--r--net/l2tp/l2tp_ip6.c4
-rw-r--r--net/l2tp/l2tp_netlink.c6
-rw-r--r--net/l2tp/l2tp_ppp.c6
-rw-r--r--net/llc/af_llc.c7
-rw-r--r--net/llc/llc_c_st.c500
-rw-r--r--net/llc/llc_conn.c20
-rw-r--r--net/llc/llc_s_st.c26
-rw-r--r--net/llc/llc_sap.c12
-rw-r--r--net/llc/sysctl_net_llc.c8
-rw-r--r--net/mac80211/agg-tx.c4
-rw-r--r--net/mac80211/cfg.c338
-rw-r--r--net/mac80211/chan.c434
-rw-r--r--net/mac80211/debugfs.c2
-rw-r--r--net/mac80211/driver-ops.c23
-rw-r--r--net/mac80211/driver-ops.h14
-rw-r--r--net/mac80211/drop.h3
-rw-r--r--net/mac80211/he.c10
-rw-r--r--net/mac80211/ht.c4
-rw-r--r--net/mac80211/ibss.c11
-rw-r--r--net/mac80211/ieee80211_i.h95
-rw-r--r--net/mac80211/iface.c100
-rw-r--r--net/mac80211/link.c48
-rw-r--r--net/mac80211/main.c69
-rw-r--r--net/mac80211/mesh.c3
-rw-r--r--net/mac80211/mesh_pathtbl.c13
-rw-r--r--net/mac80211/mlme.c1174
-rw-r--r--net/mac80211/offchannel.c47
-rw-r--r--net/mac80211/parse.c102
-rw-r--r--net/mac80211/pm.c4
-rw-r--r--net/mac80211/rx.c14
-rw-r--r--net/mac80211/scan.c47
-rw-r--r--net/mac80211/spectmgmt.c41
-rw-r--r--net/mac80211/sta_info.c4
-rw-r--r--net/mac80211/sta_info.h10
-rw-r--r--net/mac80211/status.c22
-rw-r--r--net/mac80211/tests/Makefile2
-rw-r--r--net/mac80211/tests/tpe.c284
-rw-r--r--net/mac80211/trace.h17
-rw-r--r--net/mac80211/tx.c17
-rw-r--r--net/mac80211/util.c216
-rw-r--r--net/mac80211/vht.c73
-rw-r--r--net/mac80211/wpa.c12
-rw-r--r--net/mac802154/main.c14
-rw-r--r--net/mac802154/tx.c8
-rw-r--r--net/mpls/af_mpls.c82
-rw-r--r--net/mpls/mpls_iptunnel.c4
-rw-r--r--net/mptcp/ctrl.c71
-rw-r--r--net/mptcp/diag.c2
-rw-r--r--net/mptcp/mib.c2
-rw-r--r--net/mptcp/mib.h4
-rw-r--r--net/mptcp/options.c6
-rw-r--r--net/mptcp/pm.c12
-rw-r--r--net/mptcp/pm_netlink.c115
-rw-r--r--net/mptcp/pm_userspace.c19
-rw-r--r--net/mptcp/protocol.c66
-rw-r--r--net/mptcp/protocol.h58
-rw-r--r--net/mptcp/sched.c22
-rw-r--r--net/mptcp/sockopt.c88
-rw-r--r--net/mptcp/subflow.c119
-rw-r--r--net/ncsi/internal.h2
-rw-r--r--net/ncsi/ncsi-manage.c73
-rw-r--r--net/ncsi/ncsi-rsp.c4
-rw-r--r--net/netfilter/Makefile7
-rw-r--r--net/netfilter/core.c13
-rw-r--r--net/netfilter/ipset/ip_set_core.c92
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c33
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c65
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c36
-rw-r--r--net/netfilter/nf_conncount.c8
-rw-r--r--net/netfilter/nf_conntrack_bpf.c68
-rw-r--r--net/netfilter/nf_conntrack_core.c6
-rw-r--r--net/netfilter/nf_conntrack_netlink.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_icmpv6.c4
-rw-r--r--net/netfilter/nf_conntrack_standalone.c25
-rw-r--r--net/netfilter/nf_flow_table_bpf.c121
-rw-r--r--net/netfilter/nf_flow_table_core.c8
-rw-r--r--net/netfilter/nf_flow_table_inet.c2
-rw-r--r--net/netfilter/nf_flow_table_ip.c8
-rw-r--r--net/netfilter/nf_flow_table_offload.c4
-rw-r--r--net/netfilter/nf_flow_table_xdp.c147
-rw-r--r--net/netfilter/nf_hooks_lwtunnel.c72
-rw-r--r--net/netfilter/nf_internals.h6
-rw-r--r--net/netfilter/nf_log.c7
-rw-r--r--net/netfilter/nf_tables_api.c743
-rw-r--r--net/netfilter/nf_tables_offload.c40
-rw-r--r--net/netfilter/nf_tables_trace.c2
-rw-r--r--net/netfilter/nfnetlink.c8
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c3
-rw-r--r--net/netfilter/nfnetlink_queue.c39
-rw-r--r--net/netfilter/nft_chain_filter.c6
-rw-r--r--net/netfilter/nft_connlimit.c4
-rw-r--r--net/netfilter/nft_counter.c4
-rw-r--r--net/netfilter/nft_dynset.c2
-rw-r--r--net/netfilter/nft_fib.c8
-rw-r--r--net/netfilter/nft_hash.c3
-rw-r--r--net/netfilter/nft_immediate.c2
-rw-r--r--net/netfilter/nft_last.c4
-rw-r--r--net/netfilter/nft_limit.c14
-rw-r--r--net/netfilter/nft_lookup.c3
-rw-r--r--net/netfilter/nft_meta.c3
-rw-r--r--net/netfilter/nft_payload.c99
-rw-r--r--net/netfilter/nft_quota.c4
-rw-r--r--net/netfilter/nft_rt.c4
-rw-r--r--net/netfilter/nft_set_pipapo.c262
-rw-r--r--net/netfilter/nft_set_pipapo.h23
-rw-r--r--net/netfilter/nft_set_pipapo_avx2.c22
-rw-r--r--net/netfilter/nft_tunnel.c44
-rw-r--r--net/netfilter/xt_recent.c8
-rw-r--r--net/netlabel/netlabel_kapi.c31
-rw-r--r--net/netlink/af_netlink.c157
-rw-r--r--net/netlink/genetlink.c2
-rw-r--r--net/netlink/genetlink.h11
-rw-r--r--net/netrom/af_netrom.c6
-rw-r--r--net/netrom/nr_route.c19
-rw-r--r--net/netrom/nr_timer.c3
-rw-r--r--net/netrom/sysctl_net_netrom.c1
-rw-r--r--net/nfc/llcp_sock.c4
-rw-r--r--net/nfc/nci/core.c17
-rw-r--r--net/nfc/netlink.c6
-rw-r--r--net/openvswitch/Kconfig1
-rw-r--r--net/openvswitch/actions.c72
-rw-r--r--net/openvswitch/conntrack.c54
-rw-r--r--net/openvswitch/datapath.c1
-rw-r--r--net/openvswitch/datapath.h3
-rw-r--r--net/openvswitch/flow.c3
-rw-r--r--net/openvswitch/flow_netlink.c93
-rw-r--r--net/openvswitch/meter.h1
-rw-r--r--net/openvswitch/openvswitch_trace.h8
-rw-r--r--net/openvswitch/vport-internal_dev.c10
-rw-r--r--net/openvswitch/vport-netdev.c7
-rw-r--r--net/openvswitch/vport.c1
-rw-r--r--net/packet/af_packet.c132
-rw-r--r--net/phonet/pep.c12
-rw-r--r--net/phonet/pn_netlink.c19
-rw-r--r--net/phonet/socket.c7
-rw-r--r--net/phonet/sysctl.c3
-rw-r--r--net/psample/psample.c47
-rw-r--r--net/qrtr/mhi.c46
-rw-r--r--net/qrtr/ns.c44
-rw-r--r--net/rds/ib_sysctl.c1
-rw-r--r--net/rds/sysctl.c1
-rw-r--r--net/rds/tcp.c9
-rw-r--r--net/rds/tcp_listen.c6
-rw-r--r--net/rds/tcp_recv.c4
-rw-r--r--net/rfkill/core.c8
-rw-r--r--net/rfkill/rfkill-gpio.c6
-rw-r--r--net/rose/af_rose.c6
-rw-r--r--net/rose/sysctl_net_rose.c1
-rw-r--r--net/rxrpc/af_rxrpc.c2
-rw-r--r--net/rxrpc/ar-internal.h2
-rw-r--r--net/rxrpc/call_object.c7
-rw-r--r--net/rxrpc/input.c49
-rw-r--r--net/rxrpc/sysctl.c1
-rw-r--r--net/sched/act_api.c5
-rw-r--r--net/sched/act_bpf.c4
-rw-r--r--net/sched/act_ct.c57
-rw-r--r--net/sched/act_sample.c12
-rw-r--r--net/sched/act_skbmod.c2
-rw-r--r--net/sched/act_tunnel_key.c36
-rw-r--r--net/sched/cls_api.c41
-rw-r--r--net/sched/cls_bpf.c4
-rw-r--r--net/sched/cls_flower.c266
-rw-r--r--net/sched/sch_api.c3
-rw-r--r--net/sched/sch_cake.c112
-rw-r--r--net/sched/sch_cbs.c20
-rw-r--r--net/sched/sch_choke.c21
-rw-r--r--net/sched/sch_codel.c29
-rw-r--r--net/sched/sch_etf.c10
-rw-r--r--net/sched/sch_ets.c25
-rw-r--r--net/sched/sch_fifo.c13
-rw-r--r--net/sched/sch_fq.c108
-rw-r--r--net/sched/sch_fq_codel.c57
-rw-r--r--net/sched/sch_fq_pie.c61
-rw-r--r--net/sched/sch_generic.c17
-rw-r--r--net/sched/sch_hfsc.c9
-rw-r--r--net/sched/sch_hhf.c35
-rw-r--r--net/sched/sch_htb.c22
-rw-r--r--net/sched/sch_ingress.c12
-rw-r--r--net/sched/sch_mqprio.c6
-rw-r--r--net/sched/sch_multiq.c2
-rw-r--r--net/sched/sch_pie.c39
-rw-r--r--net/sched/sch_sfq.c13
-rw-r--r--net/sched/sch_skbprio.c8
-rw-r--r--net/sched/sch_taprio.c36
-rw-r--r--net/sched/sch_teql.c4
-rw-r--r--net/sctp/input.c19
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sctp/protocol.c4
-rw-r--r--net/sctp/sm_statefuns.c1
-rw-r--r--net/sctp/socket.c31
-rw-r--r--net/sctp/sysctl.c40
-rw-r--r--net/smc/Kconfig13
-rw-r--r--net/smc/Makefile3
-rw-r--r--net/smc/af_smc.c213
-rw-r--r--net/smc/smc.h38
-rw-r--r--net/smc/smc_cdc.c36
-rw-r--r--net/smc/smc_clc.c6
-rw-r--r--net/smc/smc_clc.h26
-rw-r--r--net/smc/smc_core.c68
-rw-r--r--net/smc/smc_core.h1
-rw-r--r--net/smc/smc_ib.c19
-rw-r--r--net/smc/smc_inet.c159
-rw-r--r--net/smc/smc_inet.h22
-rw-r--r--net/smc/smc_ism.c88
-rw-r--r--net/smc/smc_ism.h10
-rw-r--r--net/smc/smc_loopback.c427
-rw-r--r--net/smc/smc_loopback.h61
-rw-r--r--net/smc/smc_rx.c4
-rw-r--r--net/smc/smc_stats.h2
-rw-r--r--net/smc/smc_sysctl.c8
-rw-r--r--net/smc/smc_tracepoint.h4
-rw-r--r--net/socket.c65
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c4
-rw-r--r--net/sunrpc/auth_gss/auth_gss_internal.h6
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_keys.c2
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c12
-rw-r--r--net/sunrpc/clnt.c17
-rw-r--r--net/sunrpc/sched.c4
-rw-r--r--net/sunrpc/svc.c118
-rw-r--r--net/sunrpc/svc_xprt.c169
-rw-r--r--net/sunrpc/sysctl.c5
-rw-r--r--net/sunrpc/xprtrdma/Makefile2
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c9
-rw-r--r--net/sunrpc/xprtrdma/ib_client.c181
-rw-r--r--net/sunrpc/xprtrdma/module.c18
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c3
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c3
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c83
-rw-r--r--net/sunrpc/xprtrdma/transport.c1
-rw-r--r--net/sunrpc/xprtrdma/verbs.c96
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h5
-rw-r--r--net/sunrpc/xprtsock.c8
-rw-r--r--net/switchdev/switchdev.c99
-rw-r--r--net/sysctl_net.c14
-rw-r--r--net/tipc/core.h1
-rw-r--r--net/tipc/link.c27
-rw-r--r--net/tipc/node.c1
-rw-r--r--net/tipc/socket.c20
-rw-r--r--net/tipc/sysctl.c1
-rw-r--r--net/tipc/trace.h16
-rw-r--r--net/tipc/udp_media.c7
-rw-r--r--net/tls/Kconfig1
-rw-r--r--net/tls/tls_device.c12
-rw-r--r--net/tls/tls_device_fallback.c1
-rw-r--r--net/tls/tls_main.c19
-rw-r--r--net/tls/tls_strp.c1
-rw-r--r--net/tls/tls_sw.c1
-rw-r--r--net/unix/af_unix.c500
-rw-r--r--net/unix/diag.c59
-rw-r--r--net/unix/garbage.c614
-rw-r--r--net/unix/sysctl_net_unix.c3
-rw-r--r--net/unix/unix_bpf.c3
-rw-r--r--net/vmw_vsock/af_vsock.c56
-rw-r--r--net/vmw_vsock/virtio_transport.c17
-rw-r--r--net/vmw_vsock/vsock_bpf.c4
-rw-r--r--net/wireless/Makefile2
-rw-r--r--net/wireless/chan.c120
-rw-r--r--net/wireless/core.c17
-rw-r--r--net/wireless/core.h7
-rw-r--r--net/wireless/ibss.c5
-rw-r--r--net/wireless/mesh.c5
-rw-r--r--net/wireless/nl80211.c375
-rw-r--r--net/wireless/nl80211.h4
-rw-r--r--net/wireless/pmsr.c18
-rw-r--r--net/wireless/rdev-ops.h74
-rw-r--r--net/wireless/reg.c20
-rw-r--r--net/wireless/reg.h13
-rw-r--r--net/wireless/scan.c230
-rw-r--r--net/wireless/sme.c6
-rw-r--r--net/wireless/sysfs.c4
-rw-r--r--net/wireless/tests/chan.c22
-rw-r--r--net/wireless/trace.h235
-rw-r--r--net/wireless/util.c83
-rw-r--r--net/x25/af_x25.c4
-rw-r--r--net/x25/sysctl_net_x25.c1
-rw-r--r--net/xdp/xdp_umem.c9
-rw-r--r--net/xdp/xsk.c30
-rw-r--r--net/xdp/xsk_buff_pool.c29
-rw-r--r--net/xfrm/Makefile3
-rw-r--r--net/xfrm/xfrm_compat.c9
-rw-r--r--net/xfrm/xfrm_device.c12
-rw-r--r--net/xfrm/xfrm_input.c20
-rw-r--r--net/xfrm/xfrm_interface_core.c2
-rw-r--r--net/xfrm/xfrm_nat_keepalive.c292
-rw-r--r--net/xfrm/xfrm_policy.c40
-rw-r--r--net/xfrm/xfrm_proc.c2
-rw-r--r--net/xfrm/xfrm_replay.c3
-rw-r--r--net/xfrm/xfrm_state.c90
-rw-r--r--net/xfrm/xfrm_sysctl.c7
-rw-r--r--net/xfrm/xfrm_user.c178
556 files changed, 19666 insertions, 9669 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 39876eff51d2..217be32426b5 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -149,7 +149,7 @@ static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
if (max_mtu < new_mtu)
return -ERANGE;
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
return 0;
}
@@ -677,7 +677,7 @@ static void vlan_ethtool_get_drvinfo(struct net_device *dev,
}
static int vlan_ethtool_get_ts_info(struct net_device *dev,
- struct ethtool_ts_info *info)
+ struct kernel_ethtool_ts_info *info)
{
const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
return ethtool_get_ts_info_by_layer(vlan->real_dev, info);
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index a3b68243fd4b..cf5219df7903 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -117,17 +117,15 @@ static int vlan_changelink(struct net_device *dev, struct nlattr *tb[],
return err;
}
if (data[IFLA_VLAN_INGRESS_QOS]) {
- nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) {
- if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING)
- continue;
+ nla_for_each_nested_type(attr, IFLA_VLAN_QOS_MAPPING,
+ data[IFLA_VLAN_INGRESS_QOS], rem) {
m = nla_data(attr);
vlan_dev_set_ingress_priority(dev, m->to, m->from);
}
}
if (data[IFLA_VLAN_EGRESS_QOS]) {
- nla_for_each_nested(attr, data[IFLA_VLAN_EGRESS_QOS], rem) {
- if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING)
- continue;
+ nla_for_each_nested_type(attr, IFLA_VLAN_QOS_MAPPING,
+ data[IFLA_VLAN_EGRESS_QOS], rem) {
m = nla_data(attr);
err = vlan_dev_set_egress_priority(dev, m->from, m->to);
if (err)
diff --git a/net/9p/Kconfig b/net/9p/Kconfig
index 00ebce9e5a65..bcdab9c23b40 100644
--- a/net/9p/Kconfig
+++ b/net/9p/Kconfig
@@ -5,6 +5,7 @@
menuconfig NET_9P
tristate "Plan 9 Resource Sharing Support (9P2000)"
+ select NETFS_SUPPORT
help
If you say Y here, you will get experimental support for
Plan 9 resource sharing via the 9P2000 protocol.
diff --git a/net/9p/client.c b/net/9p/client.c
index f7e90b4769bb..5cd94721d974 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -18,6 +18,7 @@
#include <linux/sched/signal.h>
#include <linux/uaccess.h>
#include <linux/uio.h>
+#include <linux/netfs.h>
#include <net/9p/9p.h>
#include <linux/parser.h>
#include <linux/seq_file.h>
@@ -235,6 +236,8 @@ static int p9_fcall_init(struct p9_client *c, struct p9_fcall *fc,
if (!fc->sdata)
return -ENOMEM;
fc->capacity = alloc_msize;
+ fc->id = 0;
+ fc->tag = P9_NOTAG;
return 0;
}
@@ -1661,6 +1664,54 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
}
EXPORT_SYMBOL(p9_client_write);
+void
+p9_client_write_subreq(struct netfs_io_subrequest *subreq)
+{
+ struct netfs_io_request *wreq = subreq->rreq;
+ struct p9_fid *fid = wreq->netfs_priv;
+ struct p9_client *clnt = fid->clnt;
+ struct p9_req_t *req;
+ unsigned long long start = subreq->start + subreq->transferred;
+ int written, len = subreq->len - subreq->transferred;
+ int err;
+
+ p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu len %d\n",
+ fid->fid, start, len);
+
+ /* Don't bother zerocopy for small IO (< 1024) */
+ if (clnt->trans_mod->zc_request && len > 1024) {
+ req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, &subreq->io_iter,
+ 0, wreq->len, P9_ZC_HDR_SZ, "dqd",
+ fid->fid, start, len);
+ } else {
+ req = p9_client_rpc(clnt, P9_TWRITE, "dqV", fid->fid,
+ start, len, &subreq->io_iter);
+ }
+ if (IS_ERR(req)) {
+ netfs_write_subrequest_terminated(subreq, PTR_ERR(req), false);
+ return;
+ }
+
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &written);
+ if (err) {
+ trace_9p_protocol_dump(clnt, &req->rc);
+ p9_req_put(clnt, req);
+ netfs_write_subrequest_terminated(subreq, err, false);
+ return;
+ }
+
+ if (written > len) {
+ pr_err("bogus RWRITE count (%d > %u)\n", written, len);
+ written = len;
+ }
+
+ p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", len);
+
+ p9_req_put(clnt, req);
+ netfs_write_subrequest_terminated(subreq, written, false);
+}
+EXPORT_SYMBOL(p9_client_write_subreq);
+
struct p9_wstat *p9_client_stat(struct p9_fid *fid)
{
int err;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index e305071eb7b8..0b8086f58ad5 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -781,7 +781,6 @@ static struct virtio_driver p9_virtio_drv = {
.feature_table = features,
.feature_table_size = ARRAY_SIZE(features),
.driver.name = KBUILD_MODNAME,
- .driver.owner = THIS_MODULE,
.id_table = id_table,
.probe = p9_virtio_probe,
.remove = p9_virtio_remove,
diff --git a/net/Kconfig b/net/Kconfig
index 3e57ccf0da27..d27d0deac0bf 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -60,6 +60,9 @@ config NET_XGRESS
config NET_REDIRECT
bool
+config SKB_DECRYPTED
+ bool
+
config SKB_EXTENSIONS
bool
@@ -287,15 +290,21 @@ config MAX_SKB_FRAGS
If unsure, say 17.
config RPS
- bool
+ bool "Receive packet steering"
depends on SMP && SYSFS
default y
+ help
+ Software receive side packet steering (RPS) distributes the
+ load of received packet processing across multiple CPUs.
config RFS_ACCEL
- bool
+ bool "Hardware acceleration of RFS"
depends on RPS
select CPU_RMAP
default y
+ help
+ Allowing drivers for multiqueue hardware with flow filter tables to
+ accelerate RFS.
config SOCK_RX_QUEUE_MAPPING
bool
@@ -348,7 +357,7 @@ config BPF_STREAM_PARSER
BPF_MAP_TYPE_SOCKMAP.
config NET_FLOW_LIMIT
- bool
+ bool "Net flow limit"
depends on RPS
default y
help
@@ -449,6 +458,9 @@ config GRO_CELLS
config SOCK_VALIDATE_XMIT
bool
+config NET_IEEE8021Q_HELPERS
+ bool
+
config NET_SELFTESTS
def_tristate PHYLIB
depends on PHYLIB && INET
@@ -496,6 +508,7 @@ config FAILOVER
config ETHTOOL_NETLINK
bool "Netlink interface for ethtool"
+ select DIMLIB
default y
help
An alternative userspace interface for ethtool based on generic
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 198f5ba2feae..b068651984fe 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -88,6 +88,7 @@ static inline void atalk_remove_socket(struct sock *sk)
static struct sock *atalk_search_socket(struct sockaddr_at *to,
struct atalk_iface *atif)
{
+ struct sock *def_socket = NULL;
struct sock *s;
read_lock_bh(&atalk_sockets_lock);
@@ -98,8 +99,20 @@ static struct sock *atalk_search_socket(struct sockaddr_at *to,
continue;
if (to->sat_addr.s_net == ATADDR_ANYNET &&
- to->sat_addr.s_node == ATADDR_BCAST)
- goto found;
+ to->sat_addr.s_node == ATADDR_BCAST) {
+ if (atif->address.s_node == at->src_node &&
+ atif->address.s_net == at->src_net) {
+ /* This socket's address matches the address of the interface
+ * that received the packet -- use it
+ */
+ goto found;
+ }
+
+ /* Continue searching for a socket matching the interface address,
+ * but use this socket by default if no other one is found
+ */
+ def_socket = s;
+ }
if (to->sat_addr.s_net == at->src_net &&
(to->sat_addr.s_node == at->src_node ||
@@ -116,7 +129,7 @@ static struct sock *atalk_search_socket(struct sockaddr_at *to,
goto found;
}
}
- s = NULL;
+ s = def_socket;
found:
read_unlock_bh(&atalk_sockets_lock);
return s;
diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c
index d945b7c0176d..7aebfe903242 100644
--- a/net/appletalk/sysctl_net_atalk.c
+++ b/net/appletalk/sysctl_net_atalk.c
@@ -40,7 +40,6 @@ static struct ctl_table atalk_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { },
};
static struct ctl_table_header *atalk_table_header;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 294cb9efe3d3..42b910cb4e8e 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -345,7 +345,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb,
dev->stats.tx_dropped++;
return NETDEV_TX_OK;
}
- rt = (struct rtable *) dst;
+ rt = dst_rtable(dst);
if (rt->rt_gw_family == AF_INET)
daddr = &rt->rt_gw4;
else
@@ -463,7 +463,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
unlink_clip_vcc(clip_vcc);
return 0;
}
- rt = ip_route_output(&init_net, ip, 0, 1, 0);
+ rt = ip_route_output(&init_net, ip, 0, 0, 0, RT_SCOPE_LINK);
if (IS_ERR(rt))
return PTR_ERR(rt);
neigh = __neigh_lookup(&arp_tbl, &ip, rt->dst.dev, 1);
diff --git a/net/atm/common.c b/net/atm/common.c
index 2a1ec014e901..9b75699992ff 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -116,7 +116,7 @@ static void vcc_write_space(struct sock *sk)
if (skwq_has_sleeper(wq))
wake_up_interruptible(&wq->wait);
- sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+ sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
rcu_read_unlock();
diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index f81f8d56f5c0..0f7a39aeccc8 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -68,7 +68,7 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd,
goto done;
}
error = put_user(sk->sk_sndbuf - sk_wmem_alloc_get(sk),
- (int __user *)argp) ? -EFAULT : 0;
+ (int __user *)argp);
goto done;
case SIOCINQ:
{
@@ -83,7 +83,7 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd,
skb = skb_peek(&sk->sk_receive_queue);
amount = skb ? skb->len : 0;
spin_unlock_irq(&sk->sk_receive_queue.lock);
- error = put_user(amount, (int __user *)argp) ? -EFAULT : 0;
+ error = put_user(amount, (int __user *)argp);
goto done;
}
case ATM_SETSC:
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 36a814f1fbd1..f8137ae693b0 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -324,8 +324,8 @@ out:
return error;
}
-static int svc_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int svc_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sock *sk = sock->sk;
struct sk_buff *skb;
@@ -336,7 +336,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags,
lock_sock(sk);
- error = svc_create(sock_net(sk), newsock, 0, kern);
+ error = svc_create(sock_net(sk), newsock, 0, arg->kern);
if (error)
goto out;
@@ -355,7 +355,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags,
error = -sk->sk_err;
break;
}
- if (flags & O_NONBLOCK) {
+ if (arg->flags & O_NONBLOCK) {
error = -EAGAIN;
break;
}
diff --git a/net/ax25/Kconfig b/net/ax25/Kconfig
index fdb666607f10..e23a3dc14b93 100644
--- a/net/ax25/Kconfig
+++ b/net/ax25/Kconfig
@@ -4,7 +4,7 @@
#
menuconfig HAMRADIO
- depends on NET && !S390
+ depends on NET
bool "Amateur Radio support"
help
If you want to connect your Linux box to an amateur radio, answer Y
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 9169efb2f43a..d6f9fae06a9d 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1373,13 +1373,15 @@ out_release:
return err;
}
-static int ax25_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int ax25_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sk_buff *skb;
struct sock *newsk;
+ ax25_dev *ax25_dev;
DEFINE_WAIT(wait);
struct sock *sk;
+ ax25_cb *ax25;
int err = 0;
if (sock->state != SS_UNCONNECTED)
@@ -1409,7 +1411,7 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags,
if (skb)
break;
- if (flags & O_NONBLOCK) {
+ if (arg->flags & O_NONBLOCK) {
err = -EWOULDBLOCK;
break;
}
@@ -1434,6 +1436,10 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags,
kfree_skb(skb);
sk_acceptq_removed(sk);
newsock->state = SS_CONNECTED;
+ ax25 = sk_to_ax25(newsk);
+ ax25_dev = ax25->ax25_dev;
+ netdev_hold(ax25_dev->dev, &ax25->dev_tracker, GFP_ATOMIC);
+ ax25_dev_hold(ax25_dev);
out:
release_sock(sk);
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index 282ec581c072..9efd6690b344 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -22,11 +22,12 @@
#include <net/sock.h>
#include <linux/uaccess.h>
#include <linux/fcntl.h>
+#include <linux/list.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/init.h>
-ax25_dev *ax25_dev_list;
+static LIST_HEAD(ax25_dev_list);
DEFINE_SPINLOCK(ax25_dev_lock);
ax25_dev *ax25_addr_ax25dev(ax25_address *addr)
@@ -34,10 +35,11 @@ ax25_dev *ax25_addr_ax25dev(ax25_address *addr)
ax25_dev *ax25_dev, *res = NULL;
spin_lock_bh(&ax25_dev_lock);
- for (ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next)
+ list_for_each_entry(ax25_dev, &ax25_dev_list, list)
if (ax25cmp(addr, (const ax25_address *)ax25_dev->dev->dev_addr) == 0) {
res = ax25_dev;
ax25_dev_hold(ax25_dev);
+ break;
}
spin_unlock_bh(&ax25_dev_lock);
@@ -59,7 +61,6 @@ void ax25_dev_device_up(struct net_device *dev)
}
refcount_set(&ax25_dev->refcount, 1);
- dev->ax25_ptr = ax25_dev;
ax25_dev->dev = dev;
netdev_hold(dev, &ax25_dev->dev_tracker, GFP_KERNEL);
ax25_dev->forward = NULL;
@@ -78,17 +79,19 @@ void ax25_dev_device_up(struct net_device *dev)
ax25_dev->values[AX25_VALUES_N2] = AX25_DEF_N2;
ax25_dev->values[AX25_VALUES_PACLEN] = AX25_DEF_PACLEN;
ax25_dev->values[AX25_VALUES_PROTOCOL] = AX25_DEF_PROTOCOL;
+
+#ifdef CONFIG_AX25_DAMA_SLAVE
ax25_dev->values[AX25_VALUES_DS_TIMEOUT]= AX25_DEF_DS_TIMEOUT;
+#endif
#if defined(CONFIG_AX25_DAMA_SLAVE) || defined(CONFIG_AX25_DAMA_MASTER)
ax25_ds_setup_timer(ax25_dev);
#endif
spin_lock_bh(&ax25_dev_lock);
- ax25_dev->next = ax25_dev_list;
- ax25_dev_list = ax25_dev;
+ list_add(&ax25_dev->list, &ax25_dev_list);
+ dev->ax25_ptr = ax25_dev;
spin_unlock_bh(&ax25_dev_lock);
- ax25_dev_hold(ax25_dev);
ax25_register_dev_sysctl(ax25_dev);
}
@@ -111,32 +114,19 @@ void ax25_dev_device_down(struct net_device *dev)
/*
* Remove any packet forwarding that points to this device.
*/
- for (s = ax25_dev_list; s != NULL; s = s->next)
+ list_for_each_entry(s, &ax25_dev_list, list)
if (s->forward == dev)
s->forward = NULL;
- if ((s = ax25_dev_list) == ax25_dev) {
- ax25_dev_list = s->next;
- goto unlock_put;
- }
-
- while (s != NULL && s->next != NULL) {
- if (s->next == ax25_dev) {
- s->next = ax25_dev->next;
- goto unlock_put;
+ list_for_each_entry(s, &ax25_dev_list, list) {
+ if (s == ax25_dev) {
+ list_del(&s->list);
+ break;
}
-
- s = s->next;
}
- spin_unlock_bh(&ax25_dev_lock);
- dev->ax25_ptr = NULL;
- ax25_dev_put(ax25_dev);
- return;
-unlock_put:
- spin_unlock_bh(&ax25_dev_lock);
- ax25_dev_put(ax25_dev);
dev->ax25_ptr = NULL;
+ spin_unlock_bh(&ax25_dev_lock);
netdev_put(dev, &ax25_dev->dev_tracker);
ax25_dev_put(ax25_dev);
}
@@ -200,16 +190,13 @@ struct net_device *ax25_fwd_dev(struct net_device *dev)
*/
void __exit ax25_dev_free(void)
{
- ax25_dev *s, *ax25_dev;
+ ax25_dev *s, *n;
spin_lock_bh(&ax25_dev_lock);
- ax25_dev = ax25_dev_list;
- while (ax25_dev != NULL) {
- s = ax25_dev;
- netdev_put(ax25_dev->dev, &ax25_dev->dev_tracker);
- ax25_dev = ax25_dev->next;
- kfree(s);
+ list_for_each_entry_safe(s, n, &ax25_dev_list, list) {
+ netdev_put(s->dev, &s->dev_tracker);
+ list_del(&s->list);
+ ax25_dev_put(s);
}
- ax25_dev_list = NULL;
spin_unlock_bh(&ax25_dev_lock);
}
diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c
index db66e11e7fe8..68753aa30334 100644
--- a/net/ax25/sysctl_net_ax25.c
+++ b/net/ax25/sysctl_net_ax25.c
@@ -141,8 +141,6 @@ static const struct ctl_table ax25_param_table[] = {
.extra2 = &max_ds_timeout
},
#endif
-
- { } /* that's all, folks! */
};
int ax25_register_dev_sysctl(ax25_dev *ax25_dev)
@@ -155,6 +153,7 @@ int ax25_register_dev_sysctl(ax25_dev *ax25_dev)
if (!table)
return -ENOMEM;
+ BUILD_BUG_ON(ARRAY_SIZE(ax25_param_table) != AX25_MAX_VALUES);
for (k = 0; k < AX25_MAX_VALUES; k++)
table[k].data = &ax25_dev->values[k];
@@ -171,7 +170,7 @@ int ax25_register_dev_sysctl(ax25_dev *ax25_dev)
void ax25_unregister_dev_sysctl(ax25_dev *ax25_dev)
{
struct ctl_table_header *header = ax25_dev->sysheader;
- struct ctl_table *table;
+ const struct ctl_table *table;
if (header) {
ax25_dev->sysheader = NULL;
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 75119f1ffccc..8e0f44c71696 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -14,7 +14,6 @@
#include <linux/crc32c.h>
#include <linux/device.h>
#include <linux/errno.h>
-#include <linux/genetlink.h>
#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
@@ -38,6 +37,7 @@
#include <linux/string.h>
#include <linux/workqueue.h>
#include <net/dsfield.h>
+#include <net/genetlink.h>
#include <net/rtnetlink.h>
#include <uapi/linux/batadv_packet.h>
#include <uapi/linux/batman_adv.h>
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 8ca854a75a32..3d4c36ae2e1a 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2024.1"
+#define BATADV_SOURCE_VERSION "2024.2"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 0954757f0b8b..9362cd9d6f3d 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -15,7 +15,6 @@
#include <linux/cache.h>
#include <linux/err.h>
#include <linux/errno.h>
-#include <linux/genetlink.h>
#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 71c143d4b6d0..8f6dd2c6ee41 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -12,6 +12,7 @@
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
+#include <linux/if_vlan.h>
#include <linux/jiffies.h>
#include <linux/kref.h>
#include <linux/list.h>
@@ -132,6 +133,29 @@ batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node,
}
/**
+ * batadv_vlan_id_valid() - check if vlan id is in valid batman-adv encoding
+ * @vid: the VLAN identifier
+ *
+ * Return: true when either no vlan is set or if VLAN is in correct range,
+ * false otherwise
+ */
+static bool batadv_vlan_id_valid(unsigned short vid)
+{
+ unsigned short non_vlan = vid & ~(BATADV_VLAN_HAS_TAG | VLAN_VID_MASK);
+
+ if (vid == 0)
+ return true;
+
+ if (!(vid & BATADV_VLAN_HAS_TAG))
+ return false;
+
+ if (non_vlan)
+ return false;
+
+ return true;
+}
+
+/**
* batadv_orig_node_vlan_new() - search and possibly create an orig_node_vlan
* object
* @orig_node: the originator serving the VLAN
@@ -149,6 +173,9 @@ batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node,
{
struct batadv_orig_node_vlan *vlan;
+ if (!batadv_vlan_id_valid(vid))
+ return NULL;
+
spin_lock_bh(&orig_node->vlan_list_lock);
/* first look if an object for this vid already exists */
@@ -1266,6 +1293,8 @@ void batadv_purge_orig_ref(struct batadv_priv *bat_priv)
/* for all origins... */
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
+ if (hlist_empty(head))
+ continue;
list_lock = &hash->list_locks[i];
spin_lock_bh(list_lock);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 89c51b3cf430..30ecbc2ef1fd 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -159,7 +159,7 @@ static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu)
if (new_mtu < ETH_MIN_MTU || new_mtu > batadv_hardif_min_mtu(dev))
return -EINVAL;
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
bat_priv->mtu_set_by_user = new_mtu;
return 0;
diff --git a/net/batman-adv/trace.h b/net/batman-adv/trace.h
index 5dd52bc5cabb..6b816cf1a953 100644
--- a/net/batman-adv/trace.h
+++ b/net/batman-adv/trace.h
@@ -40,8 +40,8 @@ TRACE_EVENT(batadv_dbg,
),
TP_fast_assign(
- __assign_str(device, bat_priv->soft_iface->name);
- __assign_str(driver, KBUILD_MODNAME);
+ __assign_str(device);
+ __assign_str(driver);
__assign_vstr(msg, vaf->fmt, vaf->va);
),
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 27520a8a486f..50cfec8ccac4 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -133,7 +133,7 @@ static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_btle_dev *dev,
struct in6_addr *daddr,
struct sk_buff *skb)
{
- struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
+ struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
int count = atomic_read(&dev->peer_count);
const struct in6_addr *nexthop;
struct lowpan_peer *peer;
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index 628d448d78be..5a3835b7dfcd 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -14,8 +14,7 @@ bluetooth_6lowpan-y := 6lowpan.o
bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \
hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o lib.o \
- ecdh_helper.o hci_request.o mgmt_util.o mgmt_config.o hci_codec.o \
- eir.o hci_sync.o
+ ecdh_helper.o mgmt_util.o mgmt_config.o hci_codec.o eir.o hci_sync.o
bluetooth-$(CONFIG_DEV_COREDUMP) += coredump.o
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 05346250f719..8e48ccd2af30 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -34,7 +34,6 @@
#include <net/bluetooth/iso.h>
#include <net/bluetooth/mgmt.h>
-#include "hci_request.h"
#include "smp.h"
#include "eir.h"
@@ -241,13 +240,13 @@ static int configure_datapath_sync(struct hci_dev *hdev, struct bt_codec *codec)
__u8 vnd_len, *vnd_data = NULL;
struct hci_op_configure_data_path *cmd = NULL;
+ /* Do not take below 2 checks as error since the 1st means user do not
+ * want to use HFP offload mode and the 2nd means the vendor controller
+ * do not need to send below HCI command for offload mode.
+ */
if (!codec->data_path || !hdev->get_codec_config_data)
return 0;
- /* Do not take me as error */
- if (!hdev->get_codec_config_data)
- return 0;
-
err = hdev->get_codec_config_data(hdev, ESCO_LINK, codec, &vnd_len,
&vnd_data);
if (err < 0)
@@ -664,11 +663,6 @@ static void le_conn_timeout(struct work_struct *work)
hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM);
}
-struct iso_cig_params {
- struct hci_cp_le_set_cig_params cp;
- struct hci_cis_params cis[0x1f];
-};
-
struct iso_list_data {
union {
u8 cig;
@@ -904,16 +898,42 @@ static int hci_conn_hash_alloc_unset(struct hci_dev *hdev)
U16_MAX, GFP_ATOMIC);
}
-struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
- u8 role, u16 handle)
+static struct hci_conn *__hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
+ u8 role, u16 handle)
{
struct hci_conn *conn;
+ switch (type) {
+ case ACL_LINK:
+ if (!hdev->acl_mtu)
+ return ERR_PTR(-ECONNREFUSED);
+ break;
+ case ISO_LINK:
+ if (hdev->iso_mtu)
+ /* Dedicated ISO Buffer exists */
+ break;
+ fallthrough;
+ case LE_LINK:
+ if (hdev->le_mtu && hdev->le_mtu < HCI_MIN_LE_MTU)
+ return ERR_PTR(-ECONNREFUSED);
+ if (!hdev->le_mtu && hdev->acl_mtu < HCI_MIN_LE_MTU)
+ return ERR_PTR(-ECONNREFUSED);
+ break;
+ case SCO_LINK:
+ case ESCO_LINK:
+ if (!hdev->sco_pkts)
+ /* Controller does not support SCO or eSCO over HCI */
+ return ERR_PTR(-ECONNREFUSED);
+ break;
+ default:
+ return ERR_PTR(-ECONNREFUSED);
+ }
+
bt_dev_dbg(hdev, "dst %pMR handle 0x%4.4x", dst, handle);
conn = kzalloc(sizeof(*conn), GFP_KERNEL);
if (!conn)
- return NULL;
+ return ERR_PTR(-ENOMEM);
bacpy(&conn->dst, dst);
bacpy(&conn->src, &hdev->bdaddr);
@@ -944,10 +964,12 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
switch (type) {
case ACL_LINK:
conn->pkt_type = hdev->pkt_type & ACL_PTYPE_MASK;
+ conn->mtu = hdev->acl_mtu;
break;
case LE_LINK:
/* conn->src should reflect the local identity address */
hci_copy_identity_address(hdev, &conn->src, &conn->src_type);
+ conn->mtu = hdev->le_mtu ? hdev->le_mtu : hdev->acl_mtu;
break;
case ISO_LINK:
/* conn->src should reflect the local identity address */
@@ -959,6 +981,8 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
else if (conn->role == HCI_ROLE_MASTER)
conn->cleanup = cis_cleanup;
+ conn->mtu = hdev->iso_mtu ? hdev->iso_mtu :
+ hdev->le_mtu ? hdev->le_mtu : hdev->acl_mtu;
break;
case SCO_LINK:
if (lmp_esco_capable(hdev))
@@ -966,9 +990,12 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
(hdev->esco_type & EDR_ESCO_MASK);
else
conn->pkt_type = hdev->pkt_type & SCO_PTYPE_MASK;
+
+ conn->mtu = hdev->sco_mtu;
break;
case ESCO_LINK:
conn->pkt_type = hdev->esco_type & ~EDR_ESCO_MASK;
+ conn->mtu = hdev->sco_mtu;
break;
}
@@ -1011,9 +1038,18 @@ struct hci_conn *hci_conn_add_unset(struct hci_dev *hdev, int type,
handle = hci_conn_hash_alloc_unset(hdev);
if (unlikely(handle < 0))
- return NULL;
+ return ERR_PTR(-ECONNREFUSED);
+
+ return __hci_conn_add(hdev, type, dst, role, handle);
+}
+
+struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
+ u8 role, u16 handle)
+{
+ if (handle > HCI_CONN_HANDLE_MAX)
+ return ERR_PTR(-EINVAL);
- return hci_conn_add(hdev, type, dst, role, handle);
+ return __hci_conn_add(hdev, type, dst, role, handle);
}
static void hci_conn_cleanup_child(struct hci_conn *conn, u8 reason)
@@ -1140,8 +1176,7 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type)
list_for_each_entry(d, &hci_dev_list, list) {
if (!test_bit(HCI_UP, &d->flags) ||
- hci_dev_test_flag(d, HCI_USER_CHANNEL) ||
- d->dev_type != HCI_PRIMARY)
+ hci_dev_test_flag(d, HCI_USER_CHANNEL))
continue;
/* Simple routing:
@@ -1317,8 +1352,8 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
bacpy(&conn->dst, dst);
} else {
conn = hci_conn_add_unset(hdev, LE_LINK, dst, role);
- if (!conn)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(conn))
+ return conn;
hci_conn_hold(conn);
conn->pending_sec_level = sec_level;
}
@@ -1494,8 +1529,8 @@ static struct hci_conn *hci_add_bis(struct hci_dev *hdev, bdaddr_t *dst,
return ERR_PTR(-EADDRINUSE);
conn = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_MASTER);
- if (!conn)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(conn))
+ return conn;
conn->state = BT_CONNECT;
@@ -1538,8 +1573,8 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst,
BT_DBG("requesting refresh of dst_addr");
conn = hci_conn_add_unset(hdev, LE_LINK, dst, HCI_ROLE_MASTER);
- if (!conn)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(conn))
+ return conn;
if (hci_explicit_conn_params_set(hdev, dst, dst_type) < 0) {
hci_conn_del(conn);
@@ -1586,8 +1621,8 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst);
if (!acl) {
acl = hci_conn_add_unset(hdev, ACL_LINK, dst, HCI_ROLE_MASTER);
- if (!acl)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(acl))
+ return acl;
}
hci_conn_hold(acl);
@@ -1655,9 +1690,9 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
sco = hci_conn_hash_lookup_ba(hdev, type, dst);
if (!sco) {
sco = hci_conn_add_unset(hdev, type, dst, HCI_ROLE_MASTER);
- if (!sco) {
+ if (IS_ERR(sco)) {
hci_conn_drop(acl);
- return ERR_PTR(-ENOMEM);
+ return sco;
}
}
@@ -1722,34 +1757,33 @@ static int hci_le_create_big(struct hci_conn *conn, struct bt_iso_qos *qos)
static int set_cig_params_sync(struct hci_dev *hdev, void *data)
{
+ DEFINE_FLEX(struct hci_cp_le_set_cig_params, pdu, cis, num_cis, 0x1f);
u8 cig_id = PTR_UINT(data);
struct hci_conn *conn;
struct bt_iso_qos *qos;
- struct iso_cig_params pdu;
+ u8 aux_num_cis = 0;
u8 cis_id;
conn = hci_conn_hash_lookup_cig(hdev, cig_id);
if (!conn)
return 0;
- memset(&pdu, 0, sizeof(pdu));
-
qos = &conn->iso_qos;
- pdu.cp.cig_id = cig_id;
- hci_cpu_to_le24(qos->ucast.out.interval, pdu.cp.c_interval);
- hci_cpu_to_le24(qos->ucast.in.interval, pdu.cp.p_interval);
- pdu.cp.sca = qos->ucast.sca;
- pdu.cp.packing = qos->ucast.packing;
- pdu.cp.framing = qos->ucast.framing;
- pdu.cp.c_latency = cpu_to_le16(qos->ucast.out.latency);
- pdu.cp.p_latency = cpu_to_le16(qos->ucast.in.latency);
+ pdu->cig_id = cig_id;
+ hci_cpu_to_le24(qos->ucast.out.interval, pdu->c_interval);
+ hci_cpu_to_le24(qos->ucast.in.interval, pdu->p_interval);
+ pdu->sca = qos->ucast.sca;
+ pdu->packing = qos->ucast.packing;
+ pdu->framing = qos->ucast.framing;
+ pdu->c_latency = cpu_to_le16(qos->ucast.out.latency);
+ pdu->p_latency = cpu_to_le16(qos->ucast.in.latency);
/* Reprogram all CIS(s) with the same CIG, valid range are:
* num_cis: 0x00 to 0x1F
* cis_id: 0x00 to 0xEF
*/
for (cis_id = 0x00; cis_id < 0xf0 &&
- pdu.cp.num_cis < ARRAY_SIZE(pdu.cis); cis_id++) {
+ aux_num_cis < pdu->num_cis; cis_id++) {
struct hci_cis_params *cis;
conn = hci_conn_hash_lookup_cis(hdev, NULL, 0, cig_id, cis_id);
@@ -1758,7 +1792,7 @@ static int set_cig_params_sync(struct hci_dev *hdev, void *data)
qos = &conn->iso_qos;
- cis = &pdu.cis[pdu.cp.num_cis++];
+ cis = &pdu->cis[aux_num_cis++];
cis->cis_id = cis_id;
cis->c_sdu = cpu_to_le16(conn->iso_qos.ucast.out.sdu);
cis->p_sdu = cpu_to_le16(conn->iso_qos.ucast.in.sdu);
@@ -1769,14 +1803,14 @@ static int set_cig_params_sync(struct hci_dev *hdev, void *data)
cis->c_rtn = qos->ucast.out.rtn;
cis->p_rtn = qos->ucast.in.rtn;
}
+ pdu->num_cis = aux_num_cis;
- if (!pdu.cp.num_cis)
+ if (!pdu->num_cis)
return 0;
return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_CIG_PARAMS,
- sizeof(pdu.cp) +
- pdu.cp.num_cis * sizeof(pdu.cis[0]), &pdu,
- HCI_CMD_TIMEOUT);
+ struct_size(pdu, cis, pdu->num_cis),
+ pdu, HCI_CMD_TIMEOUT);
}
static bool hci_le_set_cig_params(struct hci_conn *conn, struct bt_iso_qos *qos)
@@ -1847,8 +1881,8 @@ struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst,
qos->ucast.cis);
if (!cis) {
cis = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_MASTER);
- if (!cis)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(cis))
+ return cis;
cis->cleanup = cis_cleanup;
cis->dst_type = dst_type;
cis->iso_qos.ucast.cig = BT_ISO_QOS_CIG_UNSET;
@@ -1983,14 +2017,8 @@ static void hci_iso_qos_setup(struct hci_dev *hdev, struct hci_conn *conn,
struct bt_iso_io_qos *qos, __u8 phy)
{
/* Only set MTU if PHY is enabled */
- if (!qos->sdu && qos->phy) {
- if (hdev->iso_mtu > 0)
- qos->sdu = hdev->iso_mtu;
- else if (hdev->le_mtu > 0)
- qos->sdu = hdev->le_mtu;
- else
- qos->sdu = hdev->acl_mtu;
- }
+ if (!qos->sdu && qos->phy)
+ qos->sdu = conn->mtu;
/* Use the same PHY as ACL if set to any */
if (qos->phy == BT_ISO_PHY_ANY)
@@ -2071,8 +2099,8 @@ struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst,
return ERR_PTR(-EBUSY);
conn = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_SLAVE);
- if (!conn)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(conn))
+ return conn;
conn->iso_qos = *qos;
conn->state = BT_LISTEN;
@@ -2109,13 +2137,10 @@ int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon,
struct bt_iso_qos *qos,
__u16 sync_handle, __u8 num_bis, __u8 bis[])
{
- struct _packed {
- struct hci_cp_le_big_create_sync cp;
- __u8 bis[0x11];
- } pdu;
+ DEFINE_FLEX(struct hci_cp_le_big_create_sync, pdu, bis, num_bis, 0x11);
int err;
- if (num_bis < 0x01 || num_bis > sizeof(pdu.bis))
+ if (num_bis < 0x01 || num_bis > pdu->num_bis)
return -EINVAL;
err = qos_set_big(hdev, qos);
@@ -2125,18 +2150,17 @@ int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon,
if (hcon)
hcon->iso_qos.bcast.big = qos->bcast.big;
- memset(&pdu, 0, sizeof(pdu));
- pdu.cp.handle = qos->bcast.big;
- pdu.cp.sync_handle = cpu_to_le16(sync_handle);
- pdu.cp.encryption = qos->bcast.encryption;
- memcpy(pdu.cp.bcode, qos->bcast.bcode, sizeof(pdu.cp.bcode));
- pdu.cp.mse = qos->bcast.mse;
- pdu.cp.timeout = cpu_to_le16(qos->bcast.timeout);
- pdu.cp.num_bis = num_bis;
- memcpy(pdu.bis, bis, num_bis);
+ pdu->handle = qos->bcast.big;
+ pdu->sync_handle = cpu_to_le16(sync_handle);
+ pdu->encryption = qos->bcast.encryption;
+ memcpy(pdu->bcode, qos->bcast.bcode, sizeof(pdu->bcode));
+ pdu->mse = qos->bcast.mse;
+ pdu->timeout = cpu_to_le16(qos->bcast.timeout);
+ pdu->num_bis = num_bis;
+ memcpy(pdu->bis, bis, num_bis);
return hci_send_cmd(hdev, HCI_OP_LE_BIG_CREATE_SYNC,
- sizeof(pdu.cp) + num_bis, &pdu);
+ struct_size(pdu, bis, num_bis), pdu);
}
static void create_big_complete(struct hci_dev *hdev, void *data, int err)
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index a7028d38c1f5..06da8ac13dca 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -40,7 +40,6 @@
#include <net/bluetooth/l2cap.h>
#include <net/bluetooth/mgmt.h>
-#include "hci_request.h"
#include "hci_debugfs.h"
#include "smp.h"
#include "leds.h"
@@ -63,50 +62,6 @@ DEFINE_MUTEX(hci_cb_list_lock);
/* HCI ID Numbering */
static DEFINE_IDA(hci_index_ida);
-static int hci_scan_req(struct hci_request *req, unsigned long opt)
-{
- __u8 scan = opt;
-
- BT_DBG("%s %x", req->hdev->name, scan);
-
- /* Inquiry and Page scans */
- hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
- return 0;
-}
-
-static int hci_auth_req(struct hci_request *req, unsigned long opt)
-{
- __u8 auth = opt;
-
- BT_DBG("%s %x", req->hdev->name, auth);
-
- /* Authentication */
- hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth);
- return 0;
-}
-
-static int hci_encrypt_req(struct hci_request *req, unsigned long opt)
-{
- __u8 encrypt = opt;
-
- BT_DBG("%s %x", req->hdev->name, encrypt);
-
- /* Encryption */
- hci_req_add(req, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt);
- return 0;
-}
-
-static int hci_linkpol_req(struct hci_request *req, unsigned long opt)
-{
- __le16 policy = cpu_to_le16(opt);
-
- BT_DBG("%s %x", req->hdev->name, policy);
-
- /* Default link policy */
- hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy);
- return 0;
-}
-
/* Get HCI device by index.
* Device is held on return. */
struct hci_dev *hci_dev_get(int index)
@@ -149,8 +104,6 @@ void hci_discovery_set_state(struct hci_dev *hdev, int state)
{
int old_state = hdev->discovery.state;
- BT_DBG("%s state %u -> %u", hdev->name, hdev->discovery.state, state);
-
if (old_state == state)
return;
@@ -173,6 +126,8 @@ void hci_discovery_set_state(struct hci_dev *hdev, int state)
case DISCOVERY_STOPPING:
break;
}
+
+ bt_dev_dbg(hdev, "state %u -> %u", old_state, state);
}
void hci_inquiry_cache_flush(struct hci_dev *hdev)
@@ -349,33 +304,12 @@ static int inquiry_cache_dump(struct hci_dev *hdev, int num, __u8 *buf)
return copied;
}
-static int hci_inq_req(struct hci_request *req, unsigned long opt)
-{
- struct hci_inquiry_req *ir = (struct hci_inquiry_req *) opt;
- struct hci_dev *hdev = req->hdev;
- struct hci_cp_inquiry cp;
-
- BT_DBG("%s", hdev->name);
-
- if (test_bit(HCI_INQUIRY, &hdev->flags))
- return 0;
-
- /* Start Inquiry */
- memcpy(&cp.lap, &ir->lap, 3);
- cp.length = ir->length;
- cp.num_rsp = ir->num_rsp;
- hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp);
-
- return 0;
-}
-
int hci_inquiry(void __user *arg)
{
__u8 __user *ptr = arg;
struct hci_inquiry_req ir;
struct hci_dev *hdev;
int err = 0, do_inquiry = 0, max_rsp;
- long timeo;
__u8 *buf;
if (copy_from_user(&ir, ptr, sizeof(ir)))
@@ -395,11 +329,6 @@ int hci_inquiry(void __user *arg)
goto done;
}
- if (hdev->dev_type != HCI_PRIMARY) {
- err = -EOPNOTSUPP;
- goto done;
- }
-
if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) {
err = -EOPNOTSUPP;
goto done;
@@ -419,11 +348,11 @@ int hci_inquiry(void __user *arg)
}
hci_dev_unlock(hdev);
- timeo = ir.length * msecs_to_jiffies(2000);
-
if (do_inquiry) {
- err = hci_req_sync(hdev, hci_inq_req, (unsigned long) &ir,
- timeo, NULL);
+ hci_req_sync_lock(hdev);
+ err = hci_inquiry_sync(hdev, ir.length, ir.num_rsp);
+ hci_req_sync_unlock(hdev);
+
if (err < 0)
goto done;
@@ -733,6 +662,7 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
{
struct hci_dev *hdev;
struct hci_dev_req dr;
+ __le16 policy;
int err = 0;
if (copy_from_user(&dr, arg, sizeof(dr)))
@@ -752,11 +682,6 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
goto done;
}
- if (hdev->dev_type != HCI_PRIMARY) {
- err = -EOPNOTSUPP;
- goto done;
- }
-
if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) {
err = -EOPNOTSUPP;
goto done;
@@ -764,8 +689,8 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
switch (cmd) {
case HCISETAUTH:
- err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt,
- HCI_INIT_TIMEOUT, NULL);
+ err = hci_cmd_sync_status(hdev, HCI_OP_WRITE_AUTH_ENABLE,
+ 1, &dr.dev_opt, HCI_CMD_TIMEOUT);
break;
case HCISETENCRYPT:
@@ -776,19 +701,21 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
if (!test_bit(HCI_AUTH, &hdev->flags)) {
/* Auth must be enabled first */
- err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt,
- HCI_INIT_TIMEOUT, NULL);
+ err = hci_cmd_sync_status(hdev,
+ HCI_OP_WRITE_AUTH_ENABLE,
+ 1, &dr.dev_opt,
+ HCI_CMD_TIMEOUT);
if (err)
break;
}
- err = hci_req_sync(hdev, hci_encrypt_req, dr.dev_opt,
- HCI_INIT_TIMEOUT, NULL);
+ err = hci_cmd_sync_status(hdev, HCI_OP_WRITE_ENCRYPT_MODE,
+ 1, &dr.dev_opt, HCI_CMD_TIMEOUT);
break;
case HCISETSCAN:
- err = hci_req_sync(hdev, hci_scan_req, dr.dev_opt,
- HCI_INIT_TIMEOUT, NULL);
+ err = hci_cmd_sync_status(hdev, HCI_OP_WRITE_SCAN_ENABLE,
+ 1, &dr.dev_opt, HCI_CMD_TIMEOUT);
/* Ensure that the connectable and discoverable states
* get correctly modified as this was a non-mgmt change.
@@ -798,8 +725,10 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
break;
case HCISETLINKPOL:
- err = hci_req_sync(hdev, hci_linkpol_req, dr.dev_opt,
- HCI_INIT_TIMEOUT, NULL);
+ policy = cpu_to_le16(dr.dev_opt);
+
+ err = hci_cmd_sync_status(hdev, HCI_OP_WRITE_DEF_LINK_POLICY,
+ 2, &policy, HCI_CMD_TIMEOUT);
break;
case HCISETLINKMODE:
@@ -840,7 +769,7 @@ int hci_get_dev_list(void __user *arg)
struct hci_dev *hdev;
struct hci_dev_list_req *dl;
struct hci_dev_req *dr;
- int n = 0, size, err;
+ int n = 0, err;
__u16 dev_num;
if (get_user(dev_num, (__u16 __user *) arg))
@@ -849,12 +778,11 @@ int hci_get_dev_list(void __user *arg)
if (!dev_num || dev_num > (PAGE_SIZE * 2) / sizeof(*dr))
return -EINVAL;
- size = sizeof(*dl) + dev_num * sizeof(*dr);
-
- dl = kzalloc(size, GFP_KERNEL);
+ dl = kzalloc(struct_size(dl, dev_req, dev_num), GFP_KERNEL);
if (!dl)
return -ENOMEM;
+ dl->dev_num = dev_num;
dr = dl->dev_req;
read_lock(&hci_dev_list_lock);
@@ -868,8 +796,8 @@ int hci_get_dev_list(void __user *arg)
if (hci_dev_test_flag(hdev, HCI_AUTO_OFF))
flags &= ~BIT(HCI_UP);
- (dr + n)->dev_id = hdev->id;
- (dr + n)->dev_opt = flags;
+ dr[n].dev_id = hdev->id;
+ dr[n].dev_opt = flags;
if (++n >= dev_num)
break;
@@ -877,9 +805,7 @@ int hci_get_dev_list(void __user *arg)
read_unlock(&hci_dev_list_lock);
dl->dev_num = n;
- size = sizeof(*dl) + n * sizeof(*dr);
-
- err = copy_to_user(arg, dl, size);
+ err = copy_to_user(arg, dl, struct_size(dl, dev_req, n));
kfree(dl);
return err ? -EFAULT : 0;
@@ -910,7 +836,7 @@ int hci_get_dev_info(void __user *arg)
strscpy(di.name, hdev->name, sizeof(di.name));
di.bdaddr = hdev->bdaddr;
- di.type = (hdev->bus & 0x0f) | ((hdev->dev_type & 0x03) << 4);
+ di.type = (hdev->bus & 0x0f);
di.flags = flags;
di.pkt_type = hdev->pkt_type;
if (lmp_bredr_capable(hdev)) {
@@ -1026,8 +952,7 @@ static void hci_power_on(struct work_struct *work)
*/
if (hci_dev_test_flag(hdev, HCI_RFKILLED) ||
hci_dev_test_flag(hdev, HCI_UNCONFIGURED) ||
- (hdev->dev_type == HCI_PRIMARY &&
- !bacmp(&hdev->bdaddr, BDADDR_ANY) &&
+ (!bacmp(&hdev->bdaddr, BDADDR_ANY) &&
!bacmp(&hdev->static_addr, BDADDR_ANY))) {
hci_dev_clear_flag(hdev, HCI_AUTO_OFF);
hci_dev_do_close(hdev);
@@ -1769,6 +1694,15 @@ struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance,
adv->pending = true;
adv->instance = instance;
+
+ /* If controller support only one set and the instance is set to
+ * 1 then there is no option other than using handle 0x00.
+ */
+ if (hdev->le_num_of_adv_sets == 1 && instance == 1)
+ adv->handle = 0x00;
+ else
+ adv->handle = instance;
+
list_add(&adv->list, &hdev->adv_instances);
hdev->adv_instance_cnt++;
}
@@ -2523,16 +2457,16 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
hdev->le_adv_channel_map = 0x07;
hdev->le_adv_min_interval = 0x0800;
hdev->le_adv_max_interval = 0x0800;
- hdev->le_scan_interval = 0x0060;
- hdev->le_scan_window = 0x0030;
- hdev->le_scan_int_suspend = 0x0400;
- hdev->le_scan_window_suspend = 0x0012;
+ hdev->le_scan_interval = DISCOV_LE_SCAN_INT_FAST;
+ hdev->le_scan_window = DISCOV_LE_SCAN_WIN_FAST;
+ hdev->le_scan_int_suspend = DISCOV_LE_SCAN_INT_SLOW1;
+ hdev->le_scan_window_suspend = DISCOV_LE_SCAN_WIN_SLOW1;
hdev->le_scan_int_discovery = DISCOV_LE_SCAN_INT;
hdev->le_scan_window_discovery = DISCOV_LE_SCAN_WIN;
- hdev->le_scan_int_adv_monitor = 0x0060;
- hdev->le_scan_window_adv_monitor = 0x0030;
- hdev->le_scan_int_connect = 0x0060;
- hdev->le_scan_window_connect = 0x0060;
+ hdev->le_scan_int_adv_monitor = DISCOV_LE_SCAN_INT_FAST;
+ hdev->le_scan_window_adv_monitor = DISCOV_LE_SCAN_WIN_FAST;
+ hdev->le_scan_int_connect = DISCOV_LE_SCAN_INT_CONN;
+ hdev->le_scan_window_connect = DISCOV_LE_SCAN_WIN_CONN;
hdev->le_conn_min_interval = 0x0018;
hdev->le_conn_max_interval = 0x0028;
hdev->le_conn_latency = 0x0000;
@@ -2549,7 +2483,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
hdev->le_rx_def_phys = HCI_LE_SET_PHY_1M;
hdev->le_num_of_adv_sets = HCI_MAX_ADV_INSTANCES;
hdev->def_multi_adv_rotation_duration = HCI_DEFAULT_ADV_DURATION;
- hdev->def_le_autoconnect_timeout = HCI_LE_AUTOCONN_TIMEOUT;
+ hdev->def_le_autoconnect_timeout = HCI_LE_CONN_TIMEOUT;
hdev->min_le_tx_power = HCI_TX_POWER_INVALID;
hdev->max_le_tx_power = HCI_TX_POWER_INVALID;
@@ -2610,7 +2544,6 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
INIT_DELAYED_WORK(&hdev->ncmd_timer, hci_ncmd_timeout);
hci_devcd_setup(hdev);
- hci_request_setup(hdev);
hci_init_sysfs(hdev);
discovery_init(hdev);
@@ -2635,21 +2568,7 @@ int hci_register_dev(struct hci_dev *hdev)
if (!hdev->open || !hdev->close || !hdev->send)
return -EINVAL;
- /* Do not allow HCI_AMP devices to register at index 0,
- * so the index can be used as the AMP controller ID.
- */
- switch (hdev->dev_type) {
- case HCI_PRIMARY:
- id = ida_alloc_max(&hci_index_ida, HCI_MAX_ID - 1, GFP_KERNEL);
- break;
- case HCI_AMP:
- id = ida_alloc_range(&hci_index_ida, 1, HCI_MAX_ID - 1,
- GFP_KERNEL);
- break;
- default:
- return -EINVAL;
- }
-
+ id = ida_alloc_max(&hci_index_ida, HCI_MAX_ID - 1, GFP_KERNEL);
if (id < 0)
return id;
@@ -2701,12 +2620,10 @@ int hci_register_dev(struct hci_dev *hdev)
hci_dev_set_flag(hdev, HCI_SETUP);
hci_dev_set_flag(hdev, HCI_AUTO_OFF);
- if (hdev->dev_type == HCI_PRIMARY) {
- /* Assume BR/EDR support until proven otherwise (such as
- * through reading supported features during init.
- */
- hci_dev_set_flag(hdev, HCI_BREDR_ENABLED);
- }
+ /* Assume BR/EDR support until proven otherwise (such as
+ * through reading supported features during init.
+ */
+ hci_dev_set_flag(hdev, HCI_BREDR_ENABLED);
write_lock(&hci_dev_list_lock);
list_add(&hdev->list, &hci_dev_list);
@@ -2762,14 +2679,16 @@ void hci_unregister_dev(struct hci_dev *hdev)
list_del(&hdev->list);
write_unlock(&hci_dev_list_lock);
+ cancel_work_sync(&hdev->rx_work);
+ cancel_work_sync(&hdev->cmd_work);
+ cancel_work_sync(&hdev->tx_work);
cancel_work_sync(&hdev->power_on);
+ cancel_work_sync(&hdev->error_reset);
hci_cmd_sync_clear(hdev);
hci_unregister_suspend_notifier(hdev);
- msft_unregister(hdev);
-
hci_dev_do_close(hdev);
if (!test_bit(HCI_INIT, &hdev->flags) &&
@@ -2823,6 +2742,7 @@ void hci_release_dev(struct hci_dev *hdev)
hci_discovery_filter_clear(hdev);
hci_blocked_keys_clear(hdev);
hci_codec_list_clear(&hdev->local_codecs);
+ msft_release(hdev);
hci_dev_unlock(hdev);
ida_destroy(&hdev->unset_handle_ida);
@@ -2956,15 +2876,31 @@ int hci_reset_dev(struct hci_dev *hdev)
}
EXPORT_SYMBOL(hci_reset_dev);
+static u8 hci_dev_classify_pkt_type(struct hci_dev *hdev, struct sk_buff *skb)
+{
+ if (hdev->classify_pkt_type)
+ return hdev->classify_pkt_type(hdev, skb);
+
+ return hci_skb_pkt_type(skb);
+}
+
/* Receive frame from HCI drivers */
int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb)
{
+ u8 dev_pkt_type;
+
if (!hdev || (!test_bit(HCI_UP, &hdev->flags)
&& !test_bit(HCI_INIT, &hdev->flags))) {
kfree_skb(skb);
return -ENXIO;
}
+ /* Check if the driver agree with packet type classification */
+ dev_pkt_type = hci_dev_classify_pkt_type(hdev, skb);
+ if (hci_skb_pkt_type(skb) != dev_pkt_type) {
+ hci_skb_pkt_type(skb) = dev_pkt_type;
+ }
+
switch (hci_skb_pkt_type(skb)) {
case HCI_EVENT_PKT:
break;
@@ -3109,7 +3045,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen);
- skb = hci_prepare_cmd(hdev, opcode, plen, param);
+ skb = hci_cmd_sync_alloc(hdev, opcode, plen, param, NULL);
if (!skb) {
bt_dev_err(hdev, "no memory for command");
return -ENOMEM;
@@ -3144,7 +3080,7 @@ int __hci_cmd_send(struct hci_dev *hdev, u16 opcode, u32 plen,
return -EINVAL;
}
- skb = hci_prepare_cmd(hdev, opcode, plen, param);
+ skb = hci_cmd_sync_alloc(hdev, opcode, plen, param, NULL);
if (!skb) {
bt_dev_err(hdev, "no memory for command (opcode 0x%4.4x)",
opcode);
@@ -3243,17 +3179,7 @@ static void hci_queue_acl(struct hci_chan *chan, struct sk_buff_head *queue,
hci_skb_pkt_type(skb) = HCI_ACLDATA_PKT;
- switch (hdev->dev_type) {
- case HCI_PRIMARY:
- hci_add_acl_hdr(skb, conn->handle, flags);
- break;
- case HCI_AMP:
- hci_add_acl_hdr(skb, chan->handle, flags);
- break;
- default:
- bt_dev_err(hdev, "unknown dev_type %d", hdev->dev_type);
- return;
- }
+ hci_add_acl_hdr(skb, conn->handle, flags);
list = skb_shinfo(skb)->frag_list;
if (!list) {
@@ -3413,9 +3339,6 @@ static inline void hci_quote_sent(struct hci_conn *conn, int num, int *quote)
case ACL_LINK:
cnt = hdev->acl_cnt;
break;
- case AMP_LINK:
- cnt = hdev->block_cnt;
- break;
case SCO_LINK:
case ESCO_LINK:
cnt = hdev->sco_cnt;
@@ -3613,12 +3536,6 @@ static void hci_prio_recalculate(struct hci_dev *hdev, __u8 type)
}
-static inline int __get_blocks(struct hci_dev *hdev, struct sk_buff *skb)
-{
- /* Calculate count of blocks used by this packet */
- return DIV_ROUND_UP(skb->len - HCI_ACL_HDR_SIZE, hdev->block_len);
-}
-
static void __check_timeout(struct hci_dev *hdev, unsigned int cnt, u8 type)
{
unsigned long last_tx;
@@ -3732,81 +3649,15 @@ static void hci_sched_acl_pkt(struct hci_dev *hdev)
hci_prio_recalculate(hdev, ACL_LINK);
}
-static void hci_sched_acl_blk(struct hci_dev *hdev)
-{
- unsigned int cnt = hdev->block_cnt;
- struct hci_chan *chan;
- struct sk_buff *skb;
- int quote;
- u8 type;
-
- BT_DBG("%s", hdev->name);
-
- if (hdev->dev_type == HCI_AMP)
- type = AMP_LINK;
- else
- type = ACL_LINK;
-
- __check_timeout(hdev, cnt, type);
-
- while (hdev->block_cnt > 0 &&
- (chan = hci_chan_sent(hdev, type, &quote))) {
- u32 priority = (skb_peek(&chan->data_q))->priority;
- while (quote > 0 && (skb = skb_peek(&chan->data_q))) {
- int blocks;
-
- BT_DBG("chan %p skb %p len %d priority %u", chan, skb,
- skb->len, skb->priority);
-
- /* Stop if priority has changed */
- if (skb->priority < priority)
- break;
-
- skb = skb_dequeue(&chan->data_q);
-
- blocks = __get_blocks(hdev, skb);
- if (blocks > hdev->block_cnt)
- return;
-
- hci_conn_enter_active_mode(chan->conn,
- bt_cb(skb)->force_active);
-
- hci_send_frame(hdev, skb);
- hdev->acl_last_tx = jiffies;
-
- hdev->block_cnt -= blocks;
- quote -= blocks;
-
- chan->sent += blocks;
- chan->conn->sent += blocks;
- }
- }
-
- if (cnt != hdev->block_cnt)
- hci_prio_recalculate(hdev, type);
-}
-
static void hci_sched_acl(struct hci_dev *hdev)
{
BT_DBG("%s", hdev->name);
/* No ACL link over BR/EDR controller */
- if (!hci_conn_num(hdev, ACL_LINK) && hdev->dev_type == HCI_PRIMARY)
- return;
-
- /* No AMP link over AMP controller */
- if (!hci_conn_num(hdev, AMP_LINK) && hdev->dev_type == HCI_AMP)
+ if (!hci_conn_num(hdev, ACL_LINK))
return;
- switch (hdev->flow_ctl_mode) {
- case HCI_FLOW_CTL_MODE_PACKET_BASED:
- hci_sched_acl_pkt(hdev);
- break;
-
- case HCI_FLOW_CTL_MODE_BLOCK_BASED:
- hci_sched_acl_blk(hdev);
- break;
- }
+ hci_sched_acl_pkt(hdev);
}
static void hci_sched_le(struct hci_dev *hdev)
@@ -4214,7 +4065,7 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb)
return;
}
- if (hci_req_status_pend(hdev) &&
+ if (hdev->req_status == HCI_REQ_PEND &&
!hci_dev_test_and_set_flag(hdev, HCI_CMD_PENDING)) {
kfree_skb(hdev->req_skb);
hdev->req_skb = skb_clone(hdev->sent_cmd, GFP_KERNEL);
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index ce3ff2fa72e5..f625074d1f00 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -28,7 +28,6 @@
#include <net/bluetooth/hci_core.h>
#include "smp.h"
-#include "hci_request.h"
#include "hci_debugfs.h"
#define DEFINE_QUIRK_ATTRIBUTE(__name, __quirk) \
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 4a27e4a17a67..d0c118c47f6c 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1,7 +1,7 @@
/*
BlueZ - Bluetooth protocol stack for Linux
Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved.
- Copyright 2023 NXP
+ Copyright 2023-2024 NXP
Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
@@ -33,7 +33,6 @@
#include <net/bluetooth/hci_core.h>
#include <net/bluetooth/mgmt.h>
-#include "hci_request.h"
#include "hci_debugfs.h"
#include "hci_codec.h"
#include "smp.h"
@@ -913,21 +912,6 @@ static u8 hci_cc_read_local_ext_features(struct hci_dev *hdev, void *data,
return rp->status;
}
-static u8 hci_cc_read_flow_control_mode(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
-{
- struct hci_rp_read_flow_control_mode *rp = data;
-
- bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
-
- if (rp->status)
- return rp->status;
-
- hdev->flow_ctl_mode = rp->mode;
-
- return rp->status;
-}
-
static u8 hci_cc_read_buffer_size(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
@@ -954,6 +938,9 @@ static u8 hci_cc_read_buffer_size(struct hci_dev *hdev, void *data,
BT_DBG("%s acl mtu %d:%d sco mtu %d:%d", hdev->name, hdev->acl_mtu,
hdev->acl_pkts, hdev->sco_mtu, hdev->sco_pkts);
+ if (!hdev->acl_mtu || !hdev->acl_pkts)
+ return HCI_ERROR_INVALID_PARAMETERS;
+
return rp->status;
}
@@ -1068,28 +1055,6 @@ static u8 hci_cc_write_page_scan_type(struct hci_dev *hdev, void *data,
return rp->status;
}
-static u8 hci_cc_read_data_block_size(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
-{
- struct hci_rp_read_data_block_size *rp = data;
-
- bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
-
- if (rp->status)
- return rp->status;
-
- hdev->block_mtu = __le16_to_cpu(rp->max_acl_len);
- hdev->block_len = __le16_to_cpu(rp->block_len);
- hdev->num_blocks = __le16_to_cpu(rp->num_blocks);
-
- hdev->block_cnt = hdev->num_blocks;
-
- BT_DBG("%s blk mtu %d cnt %d len %d", hdev->name, hdev->block_mtu,
- hdev->block_cnt, hdev->block_len);
-
- return rp->status;
-}
-
static u8 hci_cc_read_clock(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
@@ -1124,30 +1089,6 @@ unlock:
return rp->status;
}
-static u8 hci_cc_read_local_amp_info(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
-{
- struct hci_rp_read_local_amp_info *rp = data;
-
- bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
-
- if (rp->status)
- return rp->status;
-
- hdev->amp_status = rp->amp_status;
- hdev->amp_total_bw = __le32_to_cpu(rp->total_bw);
- hdev->amp_max_bw = __le32_to_cpu(rp->max_bw);
- hdev->amp_min_latency = __le32_to_cpu(rp->min_latency);
- hdev->amp_max_pdu = __le32_to_cpu(rp->max_pdu);
- hdev->amp_type = rp->amp_type;
- hdev->amp_pal_cap = __le16_to_cpu(rp->pal_cap);
- hdev->amp_assoc_size = __le16_to_cpu(rp->max_assoc_size);
- hdev->amp_be_flush_to = __le32_to_cpu(rp->be_flush_to);
- hdev->amp_max_flush_to = __le32_to_cpu(rp->max_flush_to);
-
- return rp->status;
-}
-
static u8 hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
@@ -1263,6 +1204,9 @@ static u8 hci_cc_le_read_buffer_size(struct hci_dev *hdev, void *data,
BT_DBG("%s le mtu %d:%d", hdev->name, hdev->le_mtu, hdev->le_pkts);
+ if (hdev->le_mtu && hdev->le_mtu < HCI_MIN_LE_MTU)
+ return HCI_ERROR_INVALID_PARAMETERS;
+
return rp->status;
}
@@ -1777,10 +1721,10 @@ static void le_set_scan_enable_complete(struct hci_dev *hdev, u8 enable)
switch (enable) {
case LE_SCAN_ENABLE:
hci_dev_set_flag(hdev, HCI_LE_SCAN);
- if (hdev->le_scan_type == LE_SCAN_ACTIVE)
+ if (hdev->le_scan_type == LE_SCAN_ACTIVE) {
clear_pending_adv_report(hdev);
- if (hci_dev_test_flag(hdev, HCI_MESH))
hci_discovery_set_state(hdev, DISCOVERY_FINDING);
+ }
break;
case LE_SCAN_DISABLE:
@@ -2342,8 +2286,8 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status)
if (!conn) {
conn = hci_conn_add_unset(hdev, ACL_LINK, &cp->bdaddr,
HCI_ROLE_MASTER);
- if (!conn)
- bt_dev_err(hdev, "no memory for new connection");
+ if (IS_ERR(conn))
+ bt_dev_err(hdev, "connection err: %ld", PTR_ERR(conn));
}
}
@@ -3154,8 +3098,8 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
BDADDR_BREDR)) {
conn = hci_conn_add_unset(hdev, ev->link_type,
&ev->bdaddr, HCI_ROLE_SLAVE);
- if (!conn) {
- bt_dev_err(hdev, "no memory for new conn");
+ if (IS_ERR(conn)) {
+ bt_dev_err(hdev, "connection err: %ld", PTR_ERR(conn));
goto unlock;
}
} else {
@@ -3343,8 +3287,8 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data,
if (!conn) {
conn = hci_conn_add_unset(hdev, ev->link_type, &ev->bdaddr,
HCI_ROLE_SLAVE);
- if (!conn) {
- bt_dev_err(hdev, "no memory for new connection");
+ if (IS_ERR(conn)) {
+ bt_dev_err(hdev, "connection err: %ld", PTR_ERR(conn));
goto unlock;
}
}
@@ -3821,6 +3765,9 @@ static u8 hci_cc_le_read_buffer_size_v2(struct hci_dev *hdev, void *data,
BT_DBG("%s acl mtu %d:%d iso mtu %d:%d", hdev->name, hdev->acl_mtu,
hdev->acl_pkts, hdev->iso_mtu, hdev->iso_pkts);
+ if (hdev->le_mtu && hdev->le_mtu < HCI_MIN_LE_MTU)
+ return HCI_ERROR_INVALID_PARAMETERS;
+
return rp->status;
}
@@ -4112,12 +4059,6 @@ static const struct hci_cc {
HCI_CC(HCI_OP_READ_PAGE_SCAN_TYPE, hci_cc_read_page_scan_type,
sizeof(struct hci_rp_read_page_scan_type)),
HCI_CC_STATUS(HCI_OP_WRITE_PAGE_SCAN_TYPE, hci_cc_write_page_scan_type),
- HCI_CC(HCI_OP_READ_DATA_BLOCK_SIZE, hci_cc_read_data_block_size,
- sizeof(struct hci_rp_read_data_block_size)),
- HCI_CC(HCI_OP_READ_FLOW_CONTROL_MODE, hci_cc_read_flow_control_mode,
- sizeof(struct hci_rp_read_flow_control_mode)),
- HCI_CC(HCI_OP_READ_LOCAL_AMP_INFO, hci_cc_read_local_amp_info,
- sizeof(struct hci_rp_read_local_amp_info)),
HCI_CC(HCI_OP_READ_CLOCK, hci_cc_read_clock,
sizeof(struct hci_rp_read_clock)),
HCI_CC(HCI_OP_READ_ENC_KEY_SIZE, hci_cc_read_enc_key_size,
@@ -4308,7 +4249,7 @@ static void hci_cs_le_create_cis(struct hci_dev *hdev, u8 status)
hci_dev_lock(hdev);
/* Remove connection if command failed */
- for (i = 0; cp->num_cis; cp->num_cis--, i++) {
+ for (i = 0; i < cp->num_cis; i++) {
struct hci_conn *conn;
u16 handle;
@@ -4324,6 +4265,7 @@ static void hci_cs_le_create_cis(struct hci_dev *hdev, u8 status)
hci_conn_del(conn);
}
}
+ cp->num_cis = 0;
if (pending)
hci_le_create_cis_pending(hdev);
@@ -4452,11 +4394,6 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data,
flex_array_size(ev, handles, ev->num)))
return;
- if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_PACKET_BASED) {
- bt_dev_err(hdev, "wrong event for mode %d", hdev->flow_ctl_mode);
- return;
- }
-
bt_dev_dbg(hdev, "num %d", ev->num);
for (i = 0; i < ev->num; i++) {
@@ -4524,78 +4461,6 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data,
queue_work(hdev->workqueue, &hdev->tx_work);
}
-static struct hci_conn *__hci_conn_lookup_handle(struct hci_dev *hdev,
- __u16 handle)
-{
- struct hci_chan *chan;
-
- switch (hdev->dev_type) {
- case HCI_PRIMARY:
- return hci_conn_hash_lookup_handle(hdev, handle);
- case HCI_AMP:
- chan = hci_chan_lookup_handle(hdev, handle);
- if (chan)
- return chan->conn;
- break;
- default:
- bt_dev_err(hdev, "unknown dev_type %d", hdev->dev_type);
- break;
- }
-
- return NULL;
-}
-
-static void hci_num_comp_blocks_evt(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
-{
- struct hci_ev_num_comp_blocks *ev = data;
- int i;
-
- if (!hci_ev_skb_pull(hdev, skb, HCI_EV_NUM_COMP_BLOCKS,
- flex_array_size(ev, handles, ev->num_hndl)))
- return;
-
- if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_BLOCK_BASED) {
- bt_dev_err(hdev, "wrong event for mode %d",
- hdev->flow_ctl_mode);
- return;
- }
-
- bt_dev_dbg(hdev, "num_blocks %d num_hndl %d", ev->num_blocks,
- ev->num_hndl);
-
- for (i = 0; i < ev->num_hndl; i++) {
- struct hci_comp_blocks_info *info = &ev->handles[i];
- struct hci_conn *conn = NULL;
- __u16 handle, block_count;
-
- handle = __le16_to_cpu(info->handle);
- block_count = __le16_to_cpu(info->blocks);
-
- conn = __hci_conn_lookup_handle(hdev, handle);
- if (!conn)
- continue;
-
- conn->sent -= block_count;
-
- switch (conn->type) {
- case ACL_LINK:
- case AMP_LINK:
- hdev->block_cnt += block_count;
- if (hdev->block_cnt > hdev->num_blocks)
- hdev->block_cnt = hdev->num_blocks;
- break;
-
- default:
- bt_dev_err(hdev, "unknown type %d conn %p",
- conn->type, conn);
- break;
- }
- }
-
- queue_work(hdev->workqueue, &hdev->tx_work);
-}
-
static void hci_mode_change_evt(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
@@ -5768,8 +5633,8 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
goto unlock;
conn = hci_conn_add_unset(hdev, LE_LINK, bdaddr, role);
- if (!conn) {
- bt_dev_err(hdev, "no memory for new connection");
+ if (IS_ERR(conn)) {
+ bt_dev_err(hdev, "connection err: %ld", PTR_ERR(conn));
goto unlock;
}
@@ -6446,6 +6311,13 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data,
evt_type = __le16_to_cpu(info->type) & LE_EXT_ADV_EVT_TYPE_MASK;
legacy_evt_type = ext_evt_type_to_legacy(hdev, evt_type);
+
+ if (test_bit(HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY,
+ &hdev->quirks)) {
+ info->primary_phy &= 0x1f;
+ info->secondary_phy &= 0x1f;
+ }
+
if (legacy_evt_type != LE_ADV_INVALID) {
process_adv_report(hdev, legacy_evt_type, &info->bdaddr,
info->bdaddr_type, NULL, 0,
@@ -6493,14 +6365,16 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data,
if (!(flags & HCI_PROTO_DEFER))
goto unlock;
- if (ev->status) {
- /* Add connection to indicate the failed PA sync event */
- pa_sync = hci_conn_add_unset(hdev, ISO_LINK, BDADDR_ANY,
- HCI_ROLE_SLAVE);
+ /* Add connection to indicate PA sync event */
+ pa_sync = hci_conn_add_unset(hdev, ISO_LINK, BDADDR_ANY,
+ HCI_ROLE_SLAVE);
- if (!pa_sync)
- goto unlock;
+ if (IS_ERR(pa_sync))
+ goto unlock;
+ pa_sync->sync_handle = le16_to_cpu(ev->handle);
+
+ if (ev->status) {
set_bit(HCI_CONN_PA_SYNC_FAILED, &pa_sync->flags);
/* Notify iso layer */
@@ -6517,6 +6391,7 @@ static void hci_le_per_adv_report_evt(struct hci_dev *hdev, void *data,
struct hci_ev_le_per_adv_report *ev = data;
int mask = hdev->link_mode;
__u8 flags = 0;
+ struct hci_conn *pa_sync;
bt_dev_dbg(hdev, "sync_handle 0x%4.4x", le16_to_cpu(ev->sync_handle));
@@ -6524,8 +6399,28 @@ static void hci_le_per_adv_report_evt(struct hci_dev *hdev, void *data,
mask |= hci_proto_connect_ind(hdev, BDADDR_ANY, ISO_LINK, &flags);
if (!(mask & HCI_LM_ACCEPT))
- hci_le_pa_term_sync(hdev, ev->sync_handle);
+ goto unlock;
+
+ if (!(flags & HCI_PROTO_DEFER))
+ goto unlock;
+
+ pa_sync = hci_conn_hash_lookup_pa_sync_handle
+ (hdev,
+ le16_to_cpu(ev->sync_handle));
+
+ if (!pa_sync)
+ goto unlock;
+
+ if (ev->data_status == LE_PA_DATA_COMPLETE &&
+ !test_and_set_bit(HCI_CONN_PA_SYNC, &pa_sync->flags)) {
+ /* Notify iso layer */
+ hci_connect_cfm(pa_sync, 0);
+
+ /* Notify MGMT layer */
+ mgmt_device_connected(hdev, pa_sync, NULL, 0);
+ }
+unlock:
hci_dev_unlock(hdev);
}
@@ -6772,6 +6667,7 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data,
struct bt_iso_qos *qos;
bool pending = false;
u16 handle = __le16_to_cpu(ev->handle);
+ u32 c_sdu_interval, p_sdu_interval;
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
@@ -6796,12 +6692,25 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data,
pending = test_and_clear_bit(HCI_CONN_CREATE_CIS, &conn->flags);
- /* Convert ISO Interval (1.25 ms slots) to SDU Interval (us) */
- qos->ucast.in.interval = le16_to_cpu(ev->interval) * 1250;
- qos->ucast.out.interval = qos->ucast.in.interval;
+ /* BLUETOOTH CORE SPECIFICATION Version 5.4 | Vol 6, Part G
+ * page 3075:
+ * Transport_Latency_C_To_P = CIG_Sync_Delay + (FT_C_To_P) ×
+ * ISO_Interval + SDU_Interval_C_To_P
+ * ...
+ * SDU_Interval = (CIG_Sync_Delay + (FT) x ISO_Interval) -
+ * Transport_Latency
+ */
+ c_sdu_interval = (get_unaligned_le24(ev->cig_sync_delay) +
+ (ev->c_ft * le16_to_cpu(ev->interval) * 1250)) -
+ get_unaligned_le24(ev->c_latency);
+ p_sdu_interval = (get_unaligned_le24(ev->cig_sync_delay) +
+ (ev->p_ft * le16_to_cpu(ev->interval) * 1250)) -
+ get_unaligned_le24(ev->p_latency);
switch (conn->role) {
case HCI_ROLE_SLAVE:
+ qos->ucast.in.interval = c_sdu_interval;
+ qos->ucast.out.interval = p_sdu_interval;
/* Convert Transport Latency (us) to Latency (msec) */
qos->ucast.in.latency =
DIV_ROUND_CLOSEST(get_unaligned_le24(ev->c_latency),
@@ -6815,6 +6724,8 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data,
qos->ucast.out.phy = ev->p_phy;
break;
case HCI_ROLE_MASTER:
+ qos->ucast.in.interval = p_sdu_interval;
+ qos->ucast.out.interval = c_sdu_interval;
/* Convert Transport Latency (us) to Latency (msec) */
qos->ucast.out.latency =
DIV_ROUND_CLOSEST(get_unaligned_le24(ev->c_latency),
@@ -6898,7 +6809,7 @@ static void hci_le_cis_req_evt(struct hci_dev *hdev, void *data,
if (!cis) {
cis = hci_conn_add(hdev, ISO_LINK, &acl->dst, HCI_ROLE_SLAVE,
cis_handle);
- if (!cis) {
+ if (IS_ERR(cis)) {
hci_le_reject_cis(hdev, ev->cis_handle);
goto unlock;
}
@@ -7005,9 +6916,13 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
bis = hci_conn_hash_lookup_handle(hdev, handle);
if (!bis) {
+ if (handle > HCI_CONN_HANDLE_MAX) {
+ bt_dev_dbg(hdev, "ignore too large handle %u", handle);
+ continue;
+ }
bis = hci_conn_add(hdev, ISO_LINK, BDADDR_ANY,
HCI_ROLE_SLAVE, handle);
- if (!bis)
+ if (IS_ERR(bis))
continue;
}
@@ -7037,6 +6952,8 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
u16 handle = le16_to_cpu(ev->bis[i]);
bis = hci_conn_hash_lookup_handle(hdev, handle);
+ if (!bis)
+ continue;
set_bit(HCI_CONN_BIG_SYNC_FAILED, &bis->flags);
hci_connect_cfm(bis, ev->status);
@@ -7058,10 +6975,8 @@ static void hci_le_big_info_adv_report_evt(struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
mask |= hci_proto_connect_ind(hdev, BDADDR_ANY, ISO_LINK, &flags);
- if (!(mask & HCI_LM_ACCEPT)) {
- hci_le_pa_term_sync(hdev, ev->sync_handle);
+ if (!(mask & HCI_LM_ACCEPT))
goto unlock;
- }
if (!(flags & HCI_PROTO_DEFER))
goto unlock;
@@ -7070,24 +6985,13 @@ static void hci_le_big_info_adv_report_evt(struct hci_dev *hdev, void *data,
(hdev,
le16_to_cpu(ev->sync_handle));
- if (pa_sync)
- goto unlock;
-
- /* Add connection to indicate the PA sync event */
- pa_sync = hci_conn_add_unset(hdev, ISO_LINK, BDADDR_ANY,
- HCI_ROLE_SLAVE);
-
if (!pa_sync)
goto unlock;
- pa_sync->sync_handle = le16_to_cpu(ev->sync_handle);
- set_bit(HCI_CONN_PA_SYNC, &pa_sync->flags);
+ pa_sync->iso_qos.bcast.encryption = ev->encryption;
/* Notify iso layer */
- hci_connect_cfm(pa_sync, 0x00);
-
- /* Notify MGMT layer */
- mgmt_device_connected(hdev, pa_sync, NULL, 0);
+ hci_connect_cfm(pa_sync, 0);
unlock:
hci_dev_unlock(hdev);
@@ -7501,9 +7405,6 @@ static const struct hci_ev {
/* [0x3e = HCI_EV_LE_META] */
HCI_EV_REQ_VL(HCI_EV_LE_META, hci_le_meta_evt,
sizeof(struct hci_ev_le_meta), HCI_MAX_EVENT_SIZE),
- /* [0x48 = HCI_EV_NUM_COMP_BLOCKS] */
- HCI_EV(HCI_EV_NUM_COMP_BLOCKS, hci_num_comp_blocks_evt,
- sizeof(struct hci_ev_num_comp_blocks)),
/* [0xff = HCI_EV_VENDOR] */
HCI_EV_VL(HCI_EV_VENDOR, msft_vendor_evt, 0, HCI_MAX_EVENT_SIZE),
};
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
deleted file mode 100644
index efea25eb56ce..000000000000
--- a/net/bluetooth/hci_request.c
+++ /dev/null
@@ -1,903 +0,0 @@
-/*
- BlueZ - Bluetooth protocol stack for Linux
-
- Copyright (C) 2014 Intel Corporation
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License version 2 as
- published by the Free Software Foundation;
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
- IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
- CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
- ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
- COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
- SOFTWARE IS DISCLAIMED.
-*/
-
-#include <linux/sched/signal.h>
-
-#include <net/bluetooth/bluetooth.h>
-#include <net/bluetooth/hci_core.h>
-#include <net/bluetooth/mgmt.h>
-
-#include "smp.h"
-#include "hci_request.h"
-#include "msft.h"
-#include "eir.h"
-
-void hci_req_init(struct hci_request *req, struct hci_dev *hdev)
-{
- skb_queue_head_init(&req->cmd_q);
- req->hdev = hdev;
- req->err = 0;
-}
-
-void hci_req_purge(struct hci_request *req)
-{
- skb_queue_purge(&req->cmd_q);
-}
-
-bool hci_req_status_pend(struct hci_dev *hdev)
-{
- return hdev->req_status == HCI_REQ_PEND;
-}
-
-static int req_run(struct hci_request *req, hci_req_complete_t complete,
- hci_req_complete_skb_t complete_skb)
-{
- struct hci_dev *hdev = req->hdev;
- struct sk_buff *skb;
- unsigned long flags;
-
- bt_dev_dbg(hdev, "length %u", skb_queue_len(&req->cmd_q));
-
- /* If an error occurred during request building, remove all HCI
- * commands queued on the HCI request queue.
- */
- if (req->err) {
- skb_queue_purge(&req->cmd_q);
- return req->err;
- }
-
- /* Do not allow empty requests */
- if (skb_queue_empty(&req->cmd_q))
- return -ENODATA;
-
- skb = skb_peek_tail(&req->cmd_q);
- if (complete) {
- bt_cb(skb)->hci.req_complete = complete;
- } else if (complete_skb) {
- bt_cb(skb)->hci.req_complete_skb = complete_skb;
- bt_cb(skb)->hci.req_flags |= HCI_REQ_SKB;
- }
-
- spin_lock_irqsave(&hdev->cmd_q.lock, flags);
- skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q);
- spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
-
- queue_work(hdev->workqueue, &hdev->cmd_work);
-
- return 0;
-}
-
-int hci_req_run(struct hci_request *req, hci_req_complete_t complete)
-{
- return req_run(req, complete, NULL);
-}
-
-int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete)
-{
- return req_run(req, NULL, complete);
-}
-
-void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode,
- struct sk_buff *skb)
-{
- bt_dev_dbg(hdev, "result 0x%2.2x", result);
-
- if (hdev->req_status == HCI_REQ_PEND) {
- hdev->req_result = result;
- hdev->req_status = HCI_REQ_DONE;
- if (skb) {
- kfree_skb(hdev->req_skb);
- hdev->req_skb = skb_get(skb);
- }
- wake_up_interruptible(&hdev->req_wait_q);
- }
-}
-
-/* Execute request and wait for completion. */
-int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req,
- unsigned long opt),
- unsigned long opt, u32 timeout, u8 *hci_status)
-{
- struct hci_request req;
- int err = 0;
-
- bt_dev_dbg(hdev, "start");
-
- hci_req_init(&req, hdev);
-
- hdev->req_status = HCI_REQ_PEND;
-
- err = func(&req, opt);
- if (err) {
- if (hci_status)
- *hci_status = HCI_ERROR_UNSPECIFIED;
- return err;
- }
-
- err = hci_req_run_skb(&req, hci_req_sync_complete);
- if (err < 0) {
- hdev->req_status = 0;
-
- /* ENODATA means the HCI request command queue is empty.
- * This can happen when a request with conditionals doesn't
- * trigger any commands to be sent. This is normal behavior
- * and should not trigger an error return.
- */
- if (err == -ENODATA) {
- if (hci_status)
- *hci_status = 0;
- return 0;
- }
-
- if (hci_status)
- *hci_status = HCI_ERROR_UNSPECIFIED;
-
- return err;
- }
-
- err = wait_event_interruptible_timeout(hdev->req_wait_q,
- hdev->req_status != HCI_REQ_PEND, timeout);
-
- if (err == -ERESTARTSYS)
- return -EINTR;
-
- switch (hdev->req_status) {
- case HCI_REQ_DONE:
- err = -bt_to_errno(hdev->req_result);
- if (hci_status)
- *hci_status = hdev->req_result;
- break;
-
- case HCI_REQ_CANCELED:
- err = -hdev->req_result;
- if (hci_status)
- *hci_status = HCI_ERROR_UNSPECIFIED;
- break;
-
- default:
- err = -ETIMEDOUT;
- if (hci_status)
- *hci_status = HCI_ERROR_UNSPECIFIED;
- break;
- }
-
- kfree_skb(hdev->req_skb);
- hdev->req_skb = NULL;
- hdev->req_status = hdev->req_result = 0;
-
- bt_dev_dbg(hdev, "end: err %d", err);
-
- return err;
-}
-
-int hci_req_sync(struct hci_dev *hdev, int (*req)(struct hci_request *req,
- unsigned long opt),
- unsigned long opt, u32 timeout, u8 *hci_status)
-{
- int ret;
-
- /* Serialize all requests */
- hci_req_sync_lock(hdev);
- /* check the state after obtaing the lock to protect the HCI_UP
- * against any races from hci_dev_do_close when the controller
- * gets removed.
- */
- if (test_bit(HCI_UP, &hdev->flags))
- ret = __hci_req_sync(hdev, req, opt, timeout, hci_status);
- else
- ret = -ENETDOWN;
- hci_req_sync_unlock(hdev);
-
- return ret;
-}
-
-struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen,
- const void *param)
-{
- int len = HCI_COMMAND_HDR_SIZE + plen;
- struct hci_command_hdr *hdr;
- struct sk_buff *skb;
-
- skb = bt_skb_alloc(len, GFP_ATOMIC);
- if (!skb)
- return NULL;
-
- hdr = skb_put(skb, HCI_COMMAND_HDR_SIZE);
- hdr->opcode = cpu_to_le16(opcode);
- hdr->plen = plen;
-
- if (plen)
- skb_put_data(skb, param, plen);
-
- bt_dev_dbg(hdev, "skb len %d", skb->len);
-
- hci_skb_pkt_type(skb) = HCI_COMMAND_PKT;
- hci_skb_opcode(skb) = opcode;
-
- return skb;
-}
-
-/* Queue a command to an asynchronous HCI request */
-void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
- const void *param, u8 event)
-{
- struct hci_dev *hdev = req->hdev;
- struct sk_buff *skb;
-
- bt_dev_dbg(hdev, "opcode 0x%4.4x plen %d", opcode, plen);
-
- /* If an error occurred during request building, there is no point in
- * queueing the HCI command. We can simply return.
- */
- if (req->err)
- return;
-
- skb = hci_prepare_cmd(hdev, opcode, plen, param);
- if (!skb) {
- bt_dev_err(hdev, "no memory for command (opcode 0x%4.4x)",
- opcode);
- req->err = -ENOMEM;
- return;
- }
-
- if (skb_queue_empty(&req->cmd_q))
- bt_cb(skb)->hci.req_flags |= HCI_REQ_START;
-
- hci_skb_event(skb) = event;
-
- skb_queue_tail(&req->cmd_q, skb);
-}
-
-void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
- const void *param)
-{
- bt_dev_dbg(req->hdev, "HCI_REQ-0x%4.4x", opcode);
- hci_req_add_ev(req, opcode, plen, param, 0);
-}
-
-static void start_interleave_scan(struct hci_dev *hdev)
-{
- hdev->interleave_scan_state = INTERLEAVE_SCAN_NO_FILTER;
- queue_delayed_work(hdev->req_workqueue,
- &hdev->interleave_scan, 0);
-}
-
-static bool is_interleave_scanning(struct hci_dev *hdev)
-{
- return hdev->interleave_scan_state != INTERLEAVE_SCAN_NONE;
-}
-
-static void cancel_interleave_scan(struct hci_dev *hdev)
-{
- bt_dev_dbg(hdev, "cancelling interleave scan");
-
- cancel_delayed_work_sync(&hdev->interleave_scan);
-
- hdev->interleave_scan_state = INTERLEAVE_SCAN_NONE;
-}
-
-/* Return true if interleave_scan wasn't started until exiting this function,
- * otherwise, return false
- */
-static bool __hci_update_interleaved_scan(struct hci_dev *hdev)
-{
- /* Do interleaved scan only if all of the following are true:
- * - There is at least one ADV monitor
- * - At least one pending LE connection or one device to be scanned for
- * - Monitor offloading is not supported
- * If so, we should alternate between allowlist scan and one without
- * any filters to save power.
- */
- bool use_interleaving = hci_is_adv_monitoring(hdev) &&
- !(list_empty(&hdev->pend_le_conns) &&
- list_empty(&hdev->pend_le_reports)) &&
- hci_get_adv_monitor_offload_ext(hdev) ==
- HCI_ADV_MONITOR_EXT_NONE;
- bool is_interleaving = is_interleave_scanning(hdev);
-
- if (use_interleaving && !is_interleaving) {
- start_interleave_scan(hdev);
- bt_dev_dbg(hdev, "starting interleave scan");
- return true;
- }
-
- if (!use_interleaving && is_interleaving)
- cancel_interleave_scan(hdev);
-
- return false;
-}
-
-void hci_req_add_le_scan_disable(struct hci_request *req, bool rpa_le_conn)
-{
- struct hci_dev *hdev = req->hdev;
-
- if (hdev->scanning_paused) {
- bt_dev_dbg(hdev, "Scanning is paused for suspend");
- return;
- }
-
- if (use_ext_scan(hdev)) {
- struct hci_cp_le_set_ext_scan_enable cp;
-
- memset(&cp, 0, sizeof(cp));
- cp.enable = LE_SCAN_DISABLE;
- hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_ENABLE, sizeof(cp),
- &cp);
- } else {
- struct hci_cp_le_set_scan_enable cp;
-
- memset(&cp, 0, sizeof(cp));
- cp.enable = LE_SCAN_DISABLE;
- hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
- }
-
- /* Disable address resolution */
- if (hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION) && !rpa_le_conn) {
- __u8 enable = 0x00;
-
- hci_req_add(req, HCI_OP_LE_SET_ADDR_RESOLV_ENABLE, 1, &enable);
- }
-}
-
-static void del_from_accept_list(struct hci_request *req, bdaddr_t *bdaddr,
- u8 bdaddr_type)
-{
- struct hci_cp_le_del_from_accept_list cp;
-
- cp.bdaddr_type = bdaddr_type;
- bacpy(&cp.bdaddr, bdaddr);
-
- bt_dev_dbg(req->hdev, "Remove %pMR (0x%x) from accept list", &cp.bdaddr,
- cp.bdaddr_type);
- hci_req_add(req, HCI_OP_LE_DEL_FROM_ACCEPT_LIST, sizeof(cp), &cp);
-
- if (use_ll_privacy(req->hdev)) {
- struct smp_irk *irk;
-
- irk = hci_find_irk_by_addr(req->hdev, bdaddr, bdaddr_type);
- if (irk) {
- struct hci_cp_le_del_from_resolv_list cp;
-
- cp.bdaddr_type = bdaddr_type;
- bacpy(&cp.bdaddr, bdaddr);
-
- hci_req_add(req, HCI_OP_LE_DEL_FROM_RESOLV_LIST,
- sizeof(cp), &cp);
- }
- }
-}
-
-/* Adds connection to accept list if needed. On error, returns -1. */
-static int add_to_accept_list(struct hci_request *req,
- struct hci_conn_params *params, u8 *num_entries,
- bool allow_rpa)
-{
- struct hci_cp_le_add_to_accept_list cp;
- struct hci_dev *hdev = req->hdev;
-
- /* Already in accept list */
- if (hci_bdaddr_list_lookup(&hdev->le_accept_list, &params->addr,
- params->addr_type))
- return 0;
-
- /* Select filter policy to accept all advertising */
- if (*num_entries >= hdev->le_accept_list_size)
- return -1;
-
- /* Accept list can not be used with RPAs */
- if (!allow_rpa &&
- !hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) &&
- hci_find_irk_by_addr(hdev, &params->addr, params->addr_type)) {
- return -1;
- }
-
- /* During suspend, only wakeable devices can be in accept list */
- if (hdev->suspended &&
- !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP))
- return 0;
-
- *num_entries += 1;
- cp.bdaddr_type = params->addr_type;
- bacpy(&cp.bdaddr, &params->addr);
-
- bt_dev_dbg(hdev, "Add %pMR (0x%x) to accept list", &cp.bdaddr,
- cp.bdaddr_type);
- hci_req_add(req, HCI_OP_LE_ADD_TO_ACCEPT_LIST, sizeof(cp), &cp);
-
- if (use_ll_privacy(hdev)) {
- struct smp_irk *irk;
-
- irk = hci_find_irk_by_addr(hdev, &params->addr,
- params->addr_type);
- if (irk) {
- struct hci_cp_le_add_to_resolv_list cp;
-
- cp.bdaddr_type = params->addr_type;
- bacpy(&cp.bdaddr, &params->addr);
- memcpy(cp.peer_irk, irk->val, 16);
-
- if (hci_dev_test_flag(hdev, HCI_PRIVACY))
- memcpy(cp.local_irk, hdev->irk, 16);
- else
- memset(cp.local_irk, 0, 16);
-
- hci_req_add(req, HCI_OP_LE_ADD_TO_RESOLV_LIST,
- sizeof(cp), &cp);
- }
- }
-
- return 0;
-}
-
-static u8 update_accept_list(struct hci_request *req)
-{
- struct hci_dev *hdev = req->hdev;
- struct hci_conn_params *params;
- struct bdaddr_list *b;
- u8 num_entries = 0;
- bool pend_conn, pend_report;
- /* We allow usage of accept list even with RPAs in suspend. In the worst
- * case, we won't be able to wake from devices that use the privacy1.2
- * features. Additionally, once we support privacy1.2 and IRK
- * offloading, we can update this to also check for those conditions.
- */
- bool allow_rpa = hdev->suspended;
-
- if (use_ll_privacy(hdev))
- allow_rpa = true;
-
- /* Go through the current accept list programmed into the
- * controller one by one and check if that address is still
- * in the list of pending connections or list of devices to
- * report. If not present in either list, then queue the
- * command to remove it from the controller.
- */
- list_for_each_entry(b, &hdev->le_accept_list, list) {
- pend_conn = hci_pend_le_action_lookup(&hdev->pend_le_conns,
- &b->bdaddr,
- b->bdaddr_type);
- pend_report = hci_pend_le_action_lookup(&hdev->pend_le_reports,
- &b->bdaddr,
- b->bdaddr_type);
-
- /* If the device is not likely to connect or report,
- * remove it from the accept list.
- */
- if (!pend_conn && !pend_report) {
- del_from_accept_list(req, &b->bdaddr, b->bdaddr_type);
- continue;
- }
-
- /* Accept list can not be used with RPAs */
- if (!allow_rpa &&
- !hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) &&
- hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) {
- return 0x00;
- }
-
- num_entries++;
- }
-
- /* Since all no longer valid accept list entries have been
- * removed, walk through the list of pending connections
- * and ensure that any new device gets programmed into
- * the controller.
- *
- * If the list of the devices is larger than the list of
- * available accept list entries in the controller, then
- * just abort and return filer policy value to not use the
- * accept list.
- */
- list_for_each_entry(params, &hdev->pend_le_conns, action) {
- if (add_to_accept_list(req, params, &num_entries, allow_rpa))
- return 0x00;
- }
-
- /* After adding all new pending connections, walk through
- * the list of pending reports and also add these to the
- * accept list if there is still space. Abort if space runs out.
- */
- list_for_each_entry(params, &hdev->pend_le_reports, action) {
- if (add_to_accept_list(req, params, &num_entries, allow_rpa))
- return 0x00;
- }
-
- /* Use the allowlist unless the following conditions are all true:
- * - We are not currently suspending
- * - There are 1 or more ADV monitors registered and it's not offloaded
- * - Interleaved scanning is not currently using the allowlist
- */
- if (!idr_is_empty(&hdev->adv_monitors_idr) && !hdev->suspended &&
- hci_get_adv_monitor_offload_ext(hdev) == HCI_ADV_MONITOR_EXT_NONE &&
- hdev->interleave_scan_state != INTERLEAVE_SCAN_ALLOWLIST)
- return 0x00;
-
- /* Select filter policy to use accept list */
- return 0x01;
-}
-
-static bool scan_use_rpa(struct hci_dev *hdev)
-{
- return hci_dev_test_flag(hdev, HCI_PRIVACY);
-}
-
-static void hci_req_start_scan(struct hci_request *req, u8 type, u16 interval,
- u16 window, u8 own_addr_type, u8 filter_policy,
- bool filter_dup, bool addr_resolv)
-{
- struct hci_dev *hdev = req->hdev;
-
- if (hdev->scanning_paused) {
- bt_dev_dbg(hdev, "Scanning is paused for suspend");
- return;
- }
-
- if (use_ll_privacy(hdev) && addr_resolv) {
- u8 enable = 0x01;
-
- hci_req_add(req, HCI_OP_LE_SET_ADDR_RESOLV_ENABLE, 1, &enable);
- }
-
- /* Use ext scanning if set ext scan param and ext scan enable is
- * supported
- */
- if (use_ext_scan(hdev)) {
- struct hci_cp_le_set_ext_scan_params *ext_param_cp;
- struct hci_cp_le_set_ext_scan_enable ext_enable_cp;
- struct hci_cp_le_scan_phy_params *phy_params;
- u8 data[sizeof(*ext_param_cp) + sizeof(*phy_params) * 2];
- u32 plen;
-
- ext_param_cp = (void *)data;
- phy_params = (void *)ext_param_cp->data;
-
- memset(ext_param_cp, 0, sizeof(*ext_param_cp));
- ext_param_cp->own_addr_type = own_addr_type;
- ext_param_cp->filter_policy = filter_policy;
-
- plen = sizeof(*ext_param_cp);
-
- if (scan_1m(hdev) || scan_2m(hdev)) {
- ext_param_cp->scanning_phys |= LE_SCAN_PHY_1M;
-
- memset(phy_params, 0, sizeof(*phy_params));
- phy_params->type = type;
- phy_params->interval = cpu_to_le16(interval);
- phy_params->window = cpu_to_le16(window);
-
- plen += sizeof(*phy_params);
- phy_params++;
- }
-
- if (scan_coded(hdev)) {
- ext_param_cp->scanning_phys |= LE_SCAN_PHY_CODED;
-
- memset(phy_params, 0, sizeof(*phy_params));
- phy_params->type = type;
- phy_params->interval = cpu_to_le16(interval);
- phy_params->window = cpu_to_le16(window);
-
- plen += sizeof(*phy_params);
- phy_params++;
- }
-
- hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_PARAMS,
- plen, ext_param_cp);
-
- memset(&ext_enable_cp, 0, sizeof(ext_enable_cp));
- ext_enable_cp.enable = LE_SCAN_ENABLE;
- ext_enable_cp.filter_dup = filter_dup;
-
- hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_ENABLE,
- sizeof(ext_enable_cp), &ext_enable_cp);
- } else {
- struct hci_cp_le_set_scan_param param_cp;
- struct hci_cp_le_set_scan_enable enable_cp;
-
- memset(&param_cp, 0, sizeof(param_cp));
- param_cp.type = type;
- param_cp.interval = cpu_to_le16(interval);
- param_cp.window = cpu_to_le16(window);
- param_cp.own_address_type = own_addr_type;
- param_cp.filter_policy = filter_policy;
- hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp),
- &param_cp);
-
- memset(&enable_cp, 0, sizeof(enable_cp));
- enable_cp.enable = LE_SCAN_ENABLE;
- enable_cp.filter_dup = filter_dup;
- hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp),
- &enable_cp);
- }
-}
-
-static void set_random_addr(struct hci_request *req, bdaddr_t *rpa);
-static int hci_update_random_address(struct hci_request *req,
- bool require_privacy, bool use_rpa,
- u8 *own_addr_type)
-{
- struct hci_dev *hdev = req->hdev;
- int err;
-
- /* If privacy is enabled use a resolvable private address. If
- * current RPA has expired or there is something else than
- * the current RPA in use, then generate a new one.
- */
- if (use_rpa) {
- /* If Controller supports LL Privacy use own address type is
- * 0x03
- */
- if (use_ll_privacy(hdev))
- *own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED;
- else
- *own_addr_type = ADDR_LE_DEV_RANDOM;
-
- if (rpa_valid(hdev))
- return 0;
-
- err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa);
- if (err < 0) {
- bt_dev_err(hdev, "failed to generate new RPA");
- return err;
- }
-
- set_random_addr(req, &hdev->rpa);
-
- return 0;
- }
-
- /* In case of required privacy without resolvable private address,
- * use an non-resolvable private address. This is useful for active
- * scanning and non-connectable advertising.
- */
- if (require_privacy) {
- bdaddr_t nrpa;
-
- while (true) {
- /* The non-resolvable private address is generated
- * from random six bytes with the two most significant
- * bits cleared.
- */
- get_random_bytes(&nrpa, 6);
- nrpa.b[5] &= 0x3f;
-
- /* The non-resolvable private address shall not be
- * equal to the public address.
- */
- if (bacmp(&hdev->bdaddr, &nrpa))
- break;
- }
-
- *own_addr_type = ADDR_LE_DEV_RANDOM;
- set_random_addr(req, &nrpa);
- return 0;
- }
-
- /* If forcing static address is in use or there is no public
- * address use the static address as random address (but skip
- * the HCI command if the current random address is already the
- * static one.
- *
- * In case BR/EDR has been disabled on a dual-mode controller
- * and a static address has been configured, then use that
- * address instead of the public BR/EDR address.
- */
- if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) ||
- !bacmp(&hdev->bdaddr, BDADDR_ANY) ||
- (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) &&
- bacmp(&hdev->static_addr, BDADDR_ANY))) {
- *own_addr_type = ADDR_LE_DEV_RANDOM;
- if (bacmp(&hdev->static_addr, &hdev->random_addr))
- hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6,
- &hdev->static_addr);
- return 0;
- }
-
- /* Neither privacy nor static address is being used so use a
- * public address.
- */
- *own_addr_type = ADDR_LE_DEV_PUBLIC;
-
- return 0;
-}
-
-/* Ensure to call hci_req_add_le_scan_disable() first to disable the
- * controller based address resolution to be able to reconfigure
- * resolving list.
- */
-void hci_req_add_le_passive_scan(struct hci_request *req)
-{
- struct hci_dev *hdev = req->hdev;
- u8 own_addr_type;
- u8 filter_policy;
- u16 window, interval;
- /* Default is to enable duplicates filter */
- u8 filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
- /* Background scanning should run with address resolution */
- bool addr_resolv = true;
-
- if (hdev->scanning_paused) {
- bt_dev_dbg(hdev, "Scanning is paused for suspend");
- return;
- }
-
- /* Set require_privacy to false since no SCAN_REQ are send
- * during passive scanning. Not using an non-resolvable address
- * here is important so that peer devices using direct
- * advertising with our address will be correctly reported
- * by the controller.
- */
- if (hci_update_random_address(req, false, scan_use_rpa(hdev),
- &own_addr_type))
- return;
-
- if (hdev->enable_advmon_interleave_scan &&
- __hci_update_interleaved_scan(hdev))
- return;
-
- bt_dev_dbg(hdev, "interleave state %d", hdev->interleave_scan_state);
- /* Adding or removing entries from the accept list must
- * happen before enabling scanning. The controller does
- * not allow accept list modification while scanning.
- */
- filter_policy = update_accept_list(req);
-
- /* When the controller is using random resolvable addresses and
- * with that having LE privacy enabled, then controllers with
- * Extended Scanner Filter Policies support can now enable support
- * for handling directed advertising.
- *
- * So instead of using filter polices 0x00 (no accept list)
- * and 0x01 (accept list enabled) use the new filter policies
- * 0x02 (no accept list) and 0x03 (accept list enabled).
- */
- if (hci_dev_test_flag(hdev, HCI_PRIVACY) &&
- (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY))
- filter_policy |= 0x02;
-
- if (hdev->suspended) {
- window = hdev->le_scan_window_suspend;
- interval = hdev->le_scan_int_suspend;
- } else if (hci_is_le_conn_scanning(hdev)) {
- window = hdev->le_scan_window_connect;
- interval = hdev->le_scan_int_connect;
- } else if (hci_is_adv_monitoring(hdev)) {
- window = hdev->le_scan_window_adv_monitor;
- interval = hdev->le_scan_int_adv_monitor;
-
- /* Disable duplicates filter when scanning for advertisement
- * monitor for the following reasons.
- *
- * For HW pattern filtering (ex. MSFT), Realtek and Qualcomm
- * controllers ignore RSSI_Sampling_Period when the duplicates
- * filter is enabled.
- *
- * For SW pattern filtering, when we're not doing interleaved
- * scanning, it is necessary to disable duplicates filter,
- * otherwise hosts can only receive one advertisement and it's
- * impossible to know if a peer is still in range.
- */
- filter_dup = LE_SCAN_FILTER_DUP_DISABLE;
- } else {
- window = hdev->le_scan_window;
- interval = hdev->le_scan_interval;
- }
-
- bt_dev_dbg(hdev, "LE passive scan with accept list = %d",
- filter_policy);
- hci_req_start_scan(req, LE_SCAN_PASSIVE, interval, window,
- own_addr_type, filter_policy, filter_dup,
- addr_resolv);
-}
-
-static int hci_req_add_le_interleaved_scan(struct hci_request *req,
- unsigned long opt)
-{
- struct hci_dev *hdev = req->hdev;
- int ret = 0;
-
- hci_dev_lock(hdev);
-
- if (hci_dev_test_flag(hdev, HCI_LE_SCAN))
- hci_req_add_le_scan_disable(req, false);
- hci_req_add_le_passive_scan(req);
-
- switch (hdev->interleave_scan_state) {
- case INTERLEAVE_SCAN_ALLOWLIST:
- bt_dev_dbg(hdev, "next state: allowlist");
- hdev->interleave_scan_state = INTERLEAVE_SCAN_NO_FILTER;
- break;
- case INTERLEAVE_SCAN_NO_FILTER:
- bt_dev_dbg(hdev, "next state: no filter");
- hdev->interleave_scan_state = INTERLEAVE_SCAN_ALLOWLIST;
- break;
- case INTERLEAVE_SCAN_NONE:
- BT_ERR("unexpected error");
- ret = -1;
- }
-
- hci_dev_unlock(hdev);
-
- return ret;
-}
-
-static void interleave_scan_work(struct work_struct *work)
-{
- struct hci_dev *hdev = container_of(work, struct hci_dev,
- interleave_scan.work);
- u8 status;
- unsigned long timeout;
-
- if (hdev->interleave_scan_state == INTERLEAVE_SCAN_ALLOWLIST) {
- timeout = msecs_to_jiffies(hdev->advmon_allowlist_duration);
- } else if (hdev->interleave_scan_state == INTERLEAVE_SCAN_NO_FILTER) {
- timeout = msecs_to_jiffies(hdev->advmon_no_filter_duration);
- } else {
- bt_dev_err(hdev, "unexpected error");
- return;
- }
-
- hci_req_sync(hdev, hci_req_add_le_interleaved_scan, 0,
- HCI_CMD_TIMEOUT, &status);
-
- /* Don't continue interleaving if it was canceled */
- if (is_interleave_scanning(hdev))
- queue_delayed_work(hdev->req_workqueue,
- &hdev->interleave_scan, timeout);
-}
-
-static void set_random_addr(struct hci_request *req, bdaddr_t *rpa)
-{
- struct hci_dev *hdev = req->hdev;
-
- /* If we're advertising or initiating an LE connection we can't
- * go ahead and change the random address at this time. This is
- * because the eventual initiator address used for the
- * subsequently created connection will be undefined (some
- * controllers use the new address and others the one we had
- * when the operation started).
- *
- * In this kind of scenario skip the update and let the random
- * address be updated at the next cycle.
- */
- if (hci_dev_test_flag(hdev, HCI_LE_ADV) ||
- hci_lookup_le_connect(hdev)) {
- bt_dev_dbg(hdev, "Deferring random address update");
- hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
- return;
- }
-
- hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa);
-}
-
-void hci_request_setup(struct hci_dev *hdev)
-{
- INIT_DELAYED_WORK(&hdev->interleave_scan, interleave_scan_work);
-}
-
-void hci_request_cancel_all(struct hci_dev *hdev)
-{
- hci_cmd_sync_cancel_sync(hdev, ENODEV);
-
- cancel_interleave_scan(hdev);
-}
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
deleted file mode 100644
index 0be75cf0efed..000000000000
--- a/net/bluetooth/hci_request.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- BlueZ - Bluetooth protocol stack for Linux
- Copyright (C) 2014 Intel Corporation
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License version 2 as
- published by the Free Software Foundation;
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
- IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
- CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
- ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
- COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
- SOFTWARE IS DISCLAIMED.
-*/
-
-#include <asm/unaligned.h>
-
-#define HCI_REQ_DONE 0
-#define HCI_REQ_PEND 1
-#define HCI_REQ_CANCELED 2
-
-#define hci_req_sync_lock(hdev) mutex_lock(&hdev->req_lock)
-#define hci_req_sync_unlock(hdev) mutex_unlock(&hdev->req_lock)
-
-#define HCI_REQ_DONE 0
-#define HCI_REQ_PEND 1
-#define HCI_REQ_CANCELED 2
-
-struct hci_request {
- struct hci_dev *hdev;
- struct sk_buff_head cmd_q;
-
- /* If something goes wrong when building the HCI request, the error
- * value is stored in this field.
- */
- int err;
-};
-
-void hci_req_init(struct hci_request *req, struct hci_dev *hdev);
-void hci_req_purge(struct hci_request *req);
-bool hci_req_status_pend(struct hci_dev *hdev);
-int hci_req_run(struct hci_request *req, hci_req_complete_t complete);
-int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete);
-void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode,
- struct sk_buff *skb);
-void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
- const void *param);
-void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
- const void *param, u8 event);
-void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status,
- hci_req_complete_t *req_complete,
- hci_req_complete_skb_t *req_complete_skb);
-
-int hci_req_sync(struct hci_dev *hdev, int (*req)(struct hci_request *req,
- unsigned long opt),
- unsigned long opt, u32 timeout, u8 *hci_status);
-int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req,
- unsigned long opt),
- unsigned long opt, u32 timeout, u8 *hci_status);
-
-struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen,
- const void *param);
-
-void hci_req_add_le_scan_disable(struct hci_request *req, bool rpa_le_conn);
-void hci_req_add_le_passive_scan(struct hci_request *req);
-
-void hci_request_setup(struct hci_dev *hdev);
-void hci_request_cancel_all(struct hci_dev *hdev);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 703b84bd48d5..69c2ba1e843e 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -485,7 +485,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event)
return NULL;
ni = skb_put(skb, HCI_MON_NEW_INDEX_SIZE);
- ni->type = hdev->dev_type;
+ ni->type = 0x00; /* Old hdev->dev_type */
ni->bus = hdev->bus;
bacpy(&ni->bdaddr, &hdev->bdaddr);
memcpy_and_pad(ni->name, sizeof(ni->name), hdev->name,
@@ -1007,9 +1007,6 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd,
if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED))
return -EOPNOTSUPP;
- if (hdev->dev_type != HCI_PRIMARY)
- return -EOPNOTSUPP;
-
switch (cmd) {
case HCISETRAW:
if (!capable(CAP_NET_ADMIN))
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 4c707eb64e6f..e79cd40bd079 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -12,7 +12,6 @@
#include <net/bluetooth/hci_core.h>
#include <net/bluetooth/mgmt.h>
-#include "hci_request.h"
#include "hci_codec.h"
#include "hci_debugfs.h"
#include "smp.h"
@@ -49,9 +48,8 @@ static void hci_cmd_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode,
wake_up_interruptible(&hdev->req_wait_q);
}
-static struct sk_buff *hci_cmd_sync_alloc(struct hci_dev *hdev, u16 opcode,
- u32 plen, const void *param,
- struct sock *sk)
+struct sk_buff *hci_cmd_sync_alloc(struct hci_dev *hdev, u16 opcode, u32 plen,
+ const void *param, struct sock *sk)
{
int len = HCI_COMMAND_HDR_SIZE + plen;
struct hci_command_hdr *hdr;
@@ -147,6 +145,13 @@ static int hci_cmd_sync_run(struct hci_request *req)
return 0;
}
+static void hci_request_init(struct hci_request *req, struct hci_dev *hdev)
+{
+ skb_queue_head_init(&req->cmd_q);
+ req->hdev = hdev;
+ req->err = 0;
+}
+
/* This function requires the caller holds hdev->req_lock. */
struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
const void *param, u8 event, u32 timeout,
@@ -158,7 +163,7 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
bt_dev_dbg(hdev, "Opcode 0x%4.4x", opcode);
- hci_req_init(&req, hdev);
+ hci_request_init(&req, hdev);
hci_cmd_sync_add(&req, opcode, plen, param, event, sk);
@@ -280,6 +285,19 @@ int __hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen,
}
EXPORT_SYMBOL(__hci_cmd_sync_status);
+int hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen,
+ const void *param, u32 timeout)
+{
+ int err;
+
+ hci_req_sync_lock(hdev);
+ err = __hci_cmd_sync_status(hdev, opcode, plen, param, timeout);
+ hci_req_sync_unlock(hdev);
+
+ return err;
+}
+EXPORT_SYMBOL(hci_cmd_sync_status);
+
static void hci_cmd_sync_work(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_sync_work);
@@ -334,10 +352,9 @@ static int scan_disable_sync(struct hci_dev *hdev, void *data)
return hci_scan_disable_sync(hdev);
}
-static int hci_inquiry_sync(struct hci_dev *hdev, u8 length);
static int interleaved_inquiry_sync(struct hci_dev *hdev, void *data)
{
- return hci_inquiry_sync(hdev, DISCOV_INTERLEAVED_INQUIRY_LEN);
+ return hci_inquiry_sync(hdev, DISCOV_INTERLEAVED_INQUIRY_LEN, 0);
}
static void le_scan_disable(struct work_struct *work)
@@ -358,8 +375,6 @@ static void le_scan_disable(struct work_struct *work)
goto _return;
}
- hdev->discovery.scan_start = 0;
-
/* If we were running LE only scan, change discovery state. If
* we were running both LE and BR/EDR inquiry simultaneously,
* and BR/EDR inquiry is already finished, stop discovery,
@@ -557,6 +572,53 @@ unlock:
hci_dev_unlock(hdev);
}
+static bool is_interleave_scanning(struct hci_dev *hdev)
+{
+ return hdev->interleave_scan_state != INTERLEAVE_SCAN_NONE;
+}
+
+static int hci_passive_scan_sync(struct hci_dev *hdev);
+
+static void interleave_scan_work(struct work_struct *work)
+{
+ struct hci_dev *hdev = container_of(work, struct hci_dev,
+ interleave_scan.work);
+ unsigned long timeout;
+
+ if (hdev->interleave_scan_state == INTERLEAVE_SCAN_ALLOWLIST) {
+ timeout = msecs_to_jiffies(hdev->advmon_allowlist_duration);
+ } else if (hdev->interleave_scan_state == INTERLEAVE_SCAN_NO_FILTER) {
+ timeout = msecs_to_jiffies(hdev->advmon_no_filter_duration);
+ } else {
+ bt_dev_err(hdev, "unexpected error");
+ return;
+ }
+
+ hci_passive_scan_sync(hdev);
+
+ hci_dev_lock(hdev);
+
+ switch (hdev->interleave_scan_state) {
+ case INTERLEAVE_SCAN_ALLOWLIST:
+ bt_dev_dbg(hdev, "next state: allowlist");
+ hdev->interleave_scan_state = INTERLEAVE_SCAN_NO_FILTER;
+ break;
+ case INTERLEAVE_SCAN_NO_FILTER:
+ bt_dev_dbg(hdev, "next state: no filter");
+ hdev->interleave_scan_state = INTERLEAVE_SCAN_ALLOWLIST;
+ break;
+ case INTERLEAVE_SCAN_NONE:
+ bt_dev_err(hdev, "unexpected error");
+ }
+
+ hci_dev_unlock(hdev);
+
+ /* Don't continue interleaving if it was canceled */
+ if (is_interleave_scanning(hdev))
+ queue_delayed_work(hdev->req_workqueue,
+ &hdev->interleave_scan, timeout);
+}
+
void hci_cmd_sync_init(struct hci_dev *hdev)
{
INIT_WORK(&hdev->cmd_sync_work, hci_cmd_sync_work);
@@ -568,6 +630,7 @@ void hci_cmd_sync_init(struct hci_dev *hdev)
INIT_WORK(&hdev->reenable_adv_work, reenable_adv);
INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable);
INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire);
+ INIT_DELAYED_WORK(&hdev->interleave_scan, interleave_scan_work);
}
static void _hci_cmd_sync_cancel_entry(struct hci_dev *hdev,
@@ -1043,11 +1106,10 @@ static int hci_disable_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance)
struct hci_cp_ext_adv_set *set;
u8 data[sizeof(*cp) + sizeof(*set) * 1];
u8 size;
+ struct adv_info *adv = NULL;
/* If request specifies an instance that doesn't exist, fail */
if (instance > 0) {
- struct adv_info *adv;
-
adv = hci_find_adv_instance(hdev, instance);
if (!adv)
return -EINVAL;
@@ -1066,7 +1128,7 @@ static int hci_disable_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance)
cp->num_of_sets = !!instance;
cp->enable = 0x00;
- set->handle = instance;
+ set->handle = adv ? adv->handle : instance;
size = sizeof(*cp) + sizeof(*set) * cp->num_of_sets;
@@ -1195,7 +1257,7 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance)
cp.own_addr_type = own_addr_type;
cp.channel_map = hdev->le_adv_channel_map;
- cp.handle = instance;
+ cp.handle = adv ? adv->handle : instance;
if (flags & MGMT_ADV_FLAG_SEC_2M) {
cp.primary_phy = HCI_ADV_PHY_1M;
@@ -1235,31 +1297,27 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance)
static int hci_set_ext_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance)
{
- struct {
- struct hci_cp_le_set_ext_scan_rsp_data cp;
- u8 data[HCI_MAX_EXT_AD_LENGTH];
- } pdu;
+ DEFINE_FLEX(struct hci_cp_le_set_ext_scan_rsp_data, pdu, data, length,
+ HCI_MAX_EXT_AD_LENGTH);
u8 len;
struct adv_info *adv = NULL;
int err;
- memset(&pdu, 0, sizeof(pdu));
-
if (instance) {
adv = hci_find_adv_instance(hdev, instance);
if (!adv || !adv->scan_rsp_changed)
return 0;
}
- len = eir_create_scan_rsp(hdev, instance, pdu.data);
+ len = eir_create_scan_rsp(hdev, instance, pdu->data);
- pdu.cp.handle = instance;
- pdu.cp.length = len;
- pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
- pdu.cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG;
+ pdu->handle = adv ? adv->handle : instance;
+ pdu->length = len;
+ pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE;
+ pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG;
err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_SCAN_RSP_DATA,
- sizeof(pdu.cp) + len, &pdu.cp,
+ struct_size(pdu, data, len), pdu,
HCI_CMD_TIMEOUT);
if (err)
return err;
@@ -1267,7 +1325,7 @@ static int hci_set_ext_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance)
if (adv) {
adv->scan_rsp_changed = false;
} else {
- memcpy(hdev->scan_rsp_data, pdu.data, len);
+ memcpy(hdev->scan_rsp_data, pdu->data, len);
hdev->scan_rsp_data_len = len;
}
@@ -1335,7 +1393,7 @@ int hci_enable_ext_advertising_sync(struct hci_dev *hdev, u8 instance)
memset(set, 0, sizeof(*set));
- set->handle = instance;
+ set->handle = adv ? adv->handle : instance;
/* Set duration per instance since controller is responsible for
* scheduling it.
@@ -1411,29 +1469,25 @@ static int hci_set_per_adv_params_sync(struct hci_dev *hdev, u8 instance,
static int hci_set_per_adv_data_sync(struct hci_dev *hdev, u8 instance)
{
- struct {
- struct hci_cp_le_set_per_adv_data cp;
- u8 data[HCI_MAX_PER_AD_LENGTH];
- } pdu;
+ DEFINE_FLEX(struct hci_cp_le_set_per_adv_data, pdu, data, length,
+ HCI_MAX_PER_AD_LENGTH);
u8 len;
-
- memset(&pdu, 0, sizeof(pdu));
+ struct adv_info *adv = NULL;
if (instance) {
- struct adv_info *adv = hci_find_adv_instance(hdev, instance);
-
+ adv = hci_find_adv_instance(hdev, instance);
if (!adv || !adv->periodic)
return 0;
}
- len = eir_create_per_adv_data(hdev, instance, pdu.data);
+ len = eir_create_per_adv_data(hdev, instance, pdu->data);
- pdu.cp.length = len;
- pdu.cp.handle = instance;
- pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
+ pdu->length = len;
+ pdu->handle = adv ? adv->handle : instance;
+ pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE;
return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PER_ADV_DATA,
- sizeof(pdu.cp) + len, &pdu,
+ struct_size(pdu, data, len), pdu,
HCI_CMD_TIMEOUT);
}
@@ -1727,31 +1781,27 @@ int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason)
static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance)
{
- struct {
- struct hci_cp_le_set_ext_adv_data cp;
- u8 data[HCI_MAX_EXT_AD_LENGTH];
- } pdu;
+ DEFINE_FLEX(struct hci_cp_le_set_ext_adv_data, pdu, data, length,
+ HCI_MAX_EXT_AD_LENGTH);
u8 len;
struct adv_info *adv = NULL;
int err;
- memset(&pdu, 0, sizeof(pdu));
-
if (instance) {
adv = hci_find_adv_instance(hdev, instance);
if (!adv || !adv->adv_data_changed)
return 0;
}
- len = eir_create_adv_data(hdev, instance, pdu.data);
+ len = eir_create_adv_data(hdev, instance, pdu->data);
- pdu.cp.length = len;
- pdu.cp.handle = instance;
- pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
- pdu.cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG;
+ pdu->length = len;
+ pdu->handle = adv ? adv->handle : instance;
+ pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE;
+ pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG;
err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA,
- sizeof(pdu.cp) + len, &pdu.cp,
+ struct_size(pdu, data, len), pdu,
HCI_CMD_TIMEOUT);
if (err)
return err;
@@ -1760,7 +1810,7 @@ static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance)
if (adv) {
adv->adv_data_changed = false;
} else {
- memcpy(hdev->adv_data, pdu.data, len);
+ memcpy(hdev->adv_data, pdu->data, len);
hdev->adv_data_len = len;
}
@@ -2114,11 +2164,6 @@ static void hci_start_interleave_scan(struct hci_dev *hdev)
&hdev->interleave_scan, 0);
}
-static bool is_interleave_scanning(struct hci_dev *hdev)
-{
- return hdev->interleave_scan_state != INTERLEAVE_SCAN_NONE;
-}
-
static void cancel_interleave_scan(struct hci_dev *hdev)
{
bt_dev_dbg(hdev, "cancelling interleave scan");
@@ -2931,6 +2976,27 @@ static int hci_passive_scan_sync(struct hci_dev *hdev)
*/
filter_policy = hci_update_accept_list_sync(hdev);
+ /* If suspended and filter_policy set to 0x00 (no acceptlist) then
+ * passive scanning cannot be started since that would require the host
+ * to be woken up to process the reports.
+ */
+ if (hdev->suspended && !filter_policy) {
+ /* Check if accept list is empty then there is no need to scan
+ * while suspended.
+ */
+ if (list_empty(&hdev->le_accept_list))
+ return 0;
+
+ /* If there are devices is the accept_list that means some
+ * devices could not be programmed which in non-suspended case
+ * means filter_policy needs to be set to 0x00 so the host needs
+ * to filter, but since this is treating suspended case we
+ * can ignore device needing host to filter to allow devices in
+ * the acceptlist to be able to wakeup the system.
+ */
+ filter_policy = 0x01;
+ }
+
/* When the controller is using random resolvable addresses and
* with that having LE privacy enabled, then controllers with
* Extended Scanner Filter Policies support can now enable support
@@ -2953,6 +3019,20 @@ static int hci_passive_scan_sync(struct hci_dev *hdev)
} else if (hci_is_adv_monitoring(hdev)) {
window = hdev->le_scan_window_adv_monitor;
interval = hdev->le_scan_int_adv_monitor;
+
+ /* Disable duplicates filter when scanning for advertisement
+ * monitor for the following reasons.
+ *
+ * For HW pattern filtering (ex. MSFT), Realtek and Qualcomm
+ * controllers ignore RSSI_Sampling_Period when the duplicates
+ * filter is enabled.
+ *
+ * For SW pattern filtering, when we're not doing interleaved
+ * scanning, it is necessary to disable duplicates filter,
+ * otherwise hosts can only receive one advertisement and it's
+ * impossible to know if a peer is still in range.
+ */
+ filter_dups = LE_SCAN_FILTER_DUP_DISABLE;
} else {
window = hdev->le_scan_window;
interval = hdev->le_scan_interval;
@@ -3523,10 +3603,6 @@ static int hci_unconf_init_sync(struct hci_dev *hdev)
/* Read Local Supported Features. */
static int hci_read_local_features_sync(struct hci_dev *hdev)
{
- /* Not all AMP controllers support this command */
- if (hdev->dev_type == HCI_AMP && !(hdev->commands[14] & 0x20))
- return 0;
-
return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_FEATURES,
0, NULL, HCI_CMD_TIMEOUT);
}
@@ -3561,51 +3637,6 @@ static int hci_read_local_cmds_sync(struct hci_dev *hdev)
return 0;
}
-/* Read Local AMP Info */
-static int hci_read_local_amp_info_sync(struct hci_dev *hdev)
-{
- return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_AMP_INFO,
- 0, NULL, HCI_CMD_TIMEOUT);
-}
-
-/* Read Data Blk size */
-static int hci_read_data_block_size_sync(struct hci_dev *hdev)
-{
- return __hci_cmd_sync_status(hdev, HCI_OP_READ_DATA_BLOCK_SIZE,
- 0, NULL, HCI_CMD_TIMEOUT);
-}
-
-/* Read Flow Control Mode */
-static int hci_read_flow_control_mode_sync(struct hci_dev *hdev)
-{
- return __hci_cmd_sync_status(hdev, HCI_OP_READ_FLOW_CONTROL_MODE,
- 0, NULL, HCI_CMD_TIMEOUT);
-}
-
-/* Read Location Data */
-static int hci_read_location_data_sync(struct hci_dev *hdev)
-{
- return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCATION_DATA,
- 0, NULL, HCI_CMD_TIMEOUT);
-}
-
-/* AMP Controller init stage 1 command sequence */
-static const struct hci_init_stage amp_init1[] = {
- /* HCI_OP_READ_LOCAL_VERSION */
- HCI_INIT(hci_read_local_version_sync),
- /* HCI_OP_READ_LOCAL_COMMANDS */
- HCI_INIT(hci_read_local_cmds_sync),
- /* HCI_OP_READ_LOCAL_AMP_INFO */
- HCI_INIT(hci_read_local_amp_info_sync),
- /* HCI_OP_READ_DATA_BLOCK_SIZE */
- HCI_INIT(hci_read_data_block_size_sync),
- /* HCI_OP_READ_FLOW_CONTROL_MODE */
- HCI_INIT(hci_read_flow_control_mode_sync),
- /* HCI_OP_READ_LOCATION_DATA */
- HCI_INIT(hci_read_location_data_sync),
- {}
-};
-
static int hci_init1_sync(struct hci_dev *hdev)
{
int err;
@@ -3619,28 +3650,9 @@ static int hci_init1_sync(struct hci_dev *hdev)
return err;
}
- switch (hdev->dev_type) {
- case HCI_PRIMARY:
- hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_PACKET_BASED;
- return hci_init_stage_sync(hdev, br_init1);
- case HCI_AMP:
- hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED;
- return hci_init_stage_sync(hdev, amp_init1);
- default:
- bt_dev_err(hdev, "Unknown device type %d", hdev->dev_type);
- break;
- }
-
- return 0;
+ return hci_init_stage_sync(hdev, br_init1);
}
-/* AMP Controller init stage 2 command sequence */
-static const struct hci_init_stage amp_init2[] = {
- /* HCI_OP_READ_LOCAL_FEATURES */
- HCI_INIT(hci_read_local_features_sync),
- {}
-};
-
/* Read Buffer Size (ACL mtu, max pkt, etc.) */
static int hci_read_buffer_size_sync(struct hci_dev *hdev)
{
@@ -3898,9 +3910,6 @@ static int hci_init2_sync(struct hci_dev *hdev)
bt_dev_dbg(hdev, "");
- if (hdev->dev_type == HCI_AMP)
- return hci_init_stage_sync(hdev, amp_init2);
-
err = hci_init_stage_sync(hdev, hci_init2);
if (err)
return err;
@@ -4728,13 +4737,6 @@ static int hci_init_sync(struct hci_dev *hdev)
if (err < 0)
return err;
- /* HCI_PRIMARY covers both single-mode LE, BR/EDR and dual-mode
- * BR/EDR/LE type controllers. AMP controllers only need the
- * first two stages of init.
- */
- if (hdev->dev_type != HCI_PRIMARY)
- return 0;
-
err = hci_init3_sync(hdev);
if (err < 0)
return err;
@@ -4963,12 +4965,8 @@ int hci_dev_open_sync(struct hci_dev *hdev)
* In case of user channel usage, it is not important
* if a public address or static random address is
* available.
- *
- * This check is only valid for BR/EDR controllers
- * since AMP controllers do not have an address.
*/
if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
- hdev->dev_type == HCI_PRIMARY &&
!bacmp(&hdev->bdaddr, BDADDR_ANY) &&
!bacmp(&hdev->static_addr, BDADDR_ANY)) {
ret = -EADDRNOTAVAIL;
@@ -5003,8 +5001,7 @@ int hci_dev_open_sync(struct hci_dev *hdev)
!hci_dev_test_flag(hdev, HCI_CONFIG) &&
!hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
- hci_dev_test_flag(hdev, HCI_MGMT) &&
- hdev->dev_type == HCI_PRIMARY) {
+ hci_dev_test_flag(hdev, HCI_MGMT)) {
ret = hci_powered_update_sync(hdev);
mgmt_power_on(hdev, ret);
}
@@ -5100,7 +5097,9 @@ int hci_dev_close_sync(struct hci_dev *hdev)
cancel_delayed_work(&hdev->ncmd_timer);
cancel_delayed_work(&hdev->le_scan_disable);
- hci_request_cancel_all(hdev);
+ hci_cmd_sync_cancel_sync(hdev, ENODEV);
+
+ cancel_interleave_scan(hdev);
if (hdev->adv_instance_timeout) {
cancel_delayed_work_sync(&hdev->adv_instance_expire);
@@ -5149,8 +5148,7 @@ int hci_dev_close_sync(struct hci_dev *hdev)
auto_off = hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF);
- if (!auto_off && hdev->dev_type == HCI_PRIMARY &&
- !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
+ if (!auto_off && !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
hci_dev_test_flag(hdev, HCI_MGMT))
__mgmt_power_off(hdev);
@@ -5212,9 +5210,6 @@ int hci_dev_close_sync(struct hci_dev *hdev)
hdev->flags &= BIT(HCI_RAW);
hci_dev_clear_volatile_flags(hdev);
- /* Controller radio is available but is currently powered down */
- hdev->amp_status = AMP_STATUS_POWERED_DOWN;
-
memset(hdev->eir, 0, sizeof(hdev->eir));
memset(hdev->dev_class, 0, sizeof(hdev->dev_class));
bacpy(&hdev->random_addr, BDADDR_ANY);
@@ -5251,8 +5246,7 @@ static int hci_power_on_sync(struct hci_dev *hdev)
*/
if (hci_dev_test_flag(hdev, HCI_RFKILLED) ||
hci_dev_test_flag(hdev, HCI_UNCONFIGURED) ||
- (hdev->dev_type == HCI_PRIMARY &&
- !bacmp(&hdev->bdaddr, BDADDR_ANY) &&
+ (!bacmp(&hdev->bdaddr, BDADDR_ANY) &&
!bacmp(&hdev->static_addr, BDADDR_ANY))) {
hci_dev_clear_flag(hdev, HCI_AUTO_OFF);
hci_dev_close_sync(hdev);
@@ -5354,27 +5348,11 @@ int hci_stop_discovery_sync(struct hci_dev *hdev)
return 0;
}
-static int hci_disconnect_phy_link_sync(struct hci_dev *hdev, u16 handle,
- u8 reason)
-{
- struct hci_cp_disconn_phy_link cp;
-
- memset(&cp, 0, sizeof(cp));
- cp.phy_handle = HCI_PHY_HANDLE(handle);
- cp.reason = reason;
-
- return __hci_cmd_sync_status(hdev, HCI_OP_DISCONN_PHY_LINK,
- sizeof(cp), &cp, HCI_CMD_TIMEOUT);
-}
-
static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn,
u8 reason)
{
struct hci_cp_disconnect cp;
- if (conn->type == AMP_LINK)
- return hci_disconnect_phy_link_sync(hdev, conn->handle, reason);
-
if (test_bit(HCI_CONN_BIG_CREATED, &conn->flags)) {
/* This is a BIS connection, hci_conn_del will
* do the necessary cleanup.
@@ -5768,7 +5746,7 @@ int hci_update_connectable_sync(struct hci_dev *hdev)
return hci_update_passive_scan_sync(hdev);
}
-static int hci_inquiry_sync(struct hci_dev *hdev, u8 length)
+int hci_inquiry_sync(struct hci_dev *hdev, u8 length, u8 num_rsp)
{
const u8 giac[3] = { 0x33, 0x8b, 0x9e };
const u8 liac[3] = { 0x00, 0x8b, 0x9e };
@@ -5791,6 +5769,7 @@ static int hci_inquiry_sync(struct hci_dev *hdev, u8 length)
memcpy(&cp.lap, giac, sizeof(cp.lap));
cp.length = length;
+ cp.num_rsp = num_rsp;
return __hci_cmd_sync_status(hdev, HCI_OP_INQUIRY,
sizeof(cp), &cp, HCI_CMD_TIMEOUT);
@@ -5877,7 +5856,7 @@ static int hci_start_interleaved_discovery_sync(struct hci_dev *hdev)
if (err)
return err;
- return hci_inquiry_sync(hdev, DISCOV_BREDR_INQUIRY_LEN);
+ return hci_inquiry_sync(hdev, DISCOV_BREDR_INQUIRY_LEN, 0);
}
int hci_start_discovery_sync(struct hci_dev *hdev)
@@ -5889,7 +5868,7 @@ int hci_start_discovery_sync(struct hci_dev *hdev)
switch (hdev->discovery.type) {
case DISCOV_TYPE_BREDR:
- return hci_inquiry_sync(hdev, DISCOV_BREDR_INQUIRY_LEN);
+ return hci_inquiry_sync(hdev, DISCOV_BREDR_INQUIRY_LEN, 0);
case DISCOV_TYPE_INTERLEAVED:
/* When running simultaneous discovery, the LE scanning time
* should occupy the whole discovery time sine BR/EDR inquiry
@@ -5959,7 +5938,6 @@ static int hci_pause_discovery_sync(struct hci_dev *hdev)
return err;
hdev->discovery_paused = true;
- hdev->discovery_old_state = old_state;
hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
return 0;
@@ -6493,10 +6471,8 @@ done:
int hci_le_create_cis_sync(struct hci_dev *hdev)
{
- struct {
- struct hci_cp_le_create_cis cp;
- struct hci_cis cis[0x1f];
- } cmd;
+ DEFINE_FLEX(struct hci_cp_le_create_cis, cmd, cis, num_cis, 0x1f);
+ size_t aux_num_cis = 0;
struct hci_conn *conn;
u8 cig = BT_ISO_QOS_CIG_UNSET;
@@ -6523,8 +6499,6 @@ int hci_le_create_cis_sync(struct hci_dev *hdev)
* remains pending.
*/
- memset(&cmd, 0, sizeof(cmd));
-
hci_dev_lock(hdev);
rcu_read_lock();
@@ -6561,7 +6535,7 @@ int hci_le_create_cis_sync(struct hci_dev *hdev)
goto done;
list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
- struct hci_cis *cis = &cmd.cis[cmd.cp.num_cis];
+ struct hci_cis *cis = &cmd->cis[aux_num_cis];
if (hci_conn_check_create_cis(conn) ||
conn->iso_qos.ucast.cig != cig)
@@ -6570,25 +6544,25 @@ int hci_le_create_cis_sync(struct hci_dev *hdev)
set_bit(HCI_CONN_CREATE_CIS, &conn->flags);
cis->acl_handle = cpu_to_le16(conn->parent->handle);
cis->cis_handle = cpu_to_le16(conn->handle);
- cmd.cp.num_cis++;
+ aux_num_cis++;
- if (cmd.cp.num_cis >= ARRAY_SIZE(cmd.cis))
+ if (aux_num_cis >= cmd->num_cis)
break;
}
+ cmd->num_cis = aux_num_cis;
done:
rcu_read_unlock();
hci_dev_unlock(hdev);
- if (!cmd.cp.num_cis)
+ if (!aux_num_cis)
return 0;
/* Wait for HCI_LE_CIS_Established */
return __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_CREATE_CIS,
- sizeof(cmd.cp) + sizeof(cmd.cis[0]) *
- cmd.cp.num_cis, &cmd,
- HCI_EVT_LE_CIS_ESTABLISHED,
+ struct_size(cmd, cis, cmd->num_cis),
+ cmd, HCI_EVT_LE_CIS_ESTABLISHED,
conn->conn_timeout, NULL);
}
@@ -6832,3 +6806,21 @@ int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn)
return -ENOENT;
}
+
+int hci_le_conn_update_sync(struct hci_dev *hdev, struct hci_conn *conn,
+ struct hci_conn_params *params)
+{
+ struct hci_cp_le_conn_update cp;
+
+ memset(&cp, 0, sizeof(cp));
+ cp.handle = cpu_to_le16(conn->handle);
+ cp.conn_interval_min = cpu_to_le16(params->conn_min_interval);
+ cp.conn_interval_max = cpu_to_le16(params->conn_max_interval);
+ cp.conn_latency = cpu_to_le16(params->conn_latency);
+ cp.supervision_timeout = cpu_to_le16(params->supervision_timeout);
+ cp.min_ce_len = cpu_to_le16(0x0000);
+ cp.max_ce_len = cpu_to_le16(0x0000);
+
+ return __hci_cmd_sync_status(hdev, HCI_OP_LE_CONN_UPDATE,
+ sizeof(cp), &cp, HCI_CMD_TIMEOUT);
+}
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index ef0cc80b4c0c..d5e00d0dd1a0 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -54,7 +54,6 @@ static void iso_sock_kill(struct sock *sk);
enum {
BT_SK_BIG_SYNC,
BT_SK_PA_SYNC,
- BT_SK_PA_SYNC_TERM,
};
struct iso_pinfo {
@@ -81,12 +80,14 @@ static bool check_ucast_qos(struct bt_iso_qos *qos);
static bool check_bcast_qos(struct bt_iso_qos *qos);
static bool iso_match_sid(struct sock *sk, void *data);
static bool iso_match_sync_handle(struct sock *sk, void *data);
+static bool iso_match_sync_handle_pa_report(struct sock *sk, void *data);
static void iso_sock_disconn(struct sock *sk);
typedef bool (*iso_sock_match_t)(struct sock *sk, void *data);
-static struct sock *iso_get_sock_listen(bdaddr_t *src, bdaddr_t *dst,
- iso_sock_match_t match, void *data);
+static struct sock *iso_get_sock(bdaddr_t *src, bdaddr_t *dst,
+ enum bt_sock_state state,
+ iso_sock_match_t match, void *data);
/* ---- ISO timers ---- */
#define ISO_CONN_TIMEOUT (HZ * 40)
@@ -196,21 +197,10 @@ static void iso_chan_del(struct sock *sk, int err)
sock_set_flag(sk, SOCK_ZAPPED);
}
-static bool iso_match_conn_sync_handle(struct sock *sk, void *data)
-{
- struct hci_conn *hcon = data;
-
- if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags))
- return false;
-
- return hcon->sync_handle == iso_pi(sk)->sync_handle;
-}
-
static void iso_conn_del(struct hci_conn *hcon, int err)
{
struct iso_conn *conn = hcon->iso_data;
struct sock *sk;
- struct sock *parent;
if (!conn)
return;
@@ -226,25 +216,6 @@ static void iso_conn_del(struct hci_conn *hcon, int err)
if (sk) {
lock_sock(sk);
-
- /* While a PA sync hcon is in the process of closing,
- * mark parent socket with a flag, so that any residual
- * BIGInfo adv reports that arrive before PA sync is
- * terminated are not processed anymore.
- */
- if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) {
- parent = iso_get_sock_listen(&hcon->src,
- &hcon->dst,
- iso_match_conn_sync_handle,
- hcon);
-
- if (parent) {
- set_bit(BT_SK_PA_SYNC_TERM,
- &iso_pi(parent)->flags);
- sock_put(parent);
- }
- }
-
iso_sock_clear_timer(sk);
iso_chan_del(sk, err);
release_sock(sk);
@@ -581,22 +552,23 @@ static struct sock *__iso_get_sock_listen_by_sid(bdaddr_t *ba, bdaddr_t *bc,
return NULL;
}
-/* Find socket listening:
+/* Find socket in given state:
* source bdaddr (Unicast)
* destination bdaddr (Broadcast only)
* match func - pass NULL to ignore
* match func data - pass -1 to ignore
* Returns closest match.
*/
-static struct sock *iso_get_sock_listen(bdaddr_t *src, bdaddr_t *dst,
- iso_sock_match_t match, void *data)
+static struct sock *iso_get_sock(bdaddr_t *src, bdaddr_t *dst,
+ enum bt_sock_state state,
+ iso_sock_match_t match, void *data)
{
struct sock *sk = NULL, *sk1 = NULL;
read_lock(&iso_sk_list.lock);
sk_for_each(sk, &iso_sk_list.head) {
- if (sk->sk_state != BT_LISTEN)
+ if (sk->sk_state != state)
continue;
/* Match Broadcast destination */
@@ -857,6 +829,7 @@ static struct sock *iso_sock_alloc(struct net *net, struct socket *sock,
iso_pi(sk)->src_type = BDADDR_LE_PUBLIC;
iso_pi(sk)->qos = default_qos;
+ iso_pi(sk)->sync_handle = -1;
bt_sock_link(&iso_sk_list, sk);
return sk;
@@ -904,7 +877,6 @@ static int iso_sock_bind_bc(struct socket *sock, struct sockaddr *addr,
return -EINVAL;
iso_pi(sk)->dst_type = sa->iso_bc->bc_bdaddr_type;
- iso_pi(sk)->sync_handle = -1;
if (sa->iso_bc->bc_sid > 0x0f)
return -EINVAL;
@@ -981,7 +953,8 @@ static int iso_sock_bind(struct socket *sock, struct sockaddr *addr,
/* Allow the user to bind a PA sync socket to a number
* of BISes to sync to.
*/
- if (sk->sk_state == BT_CONNECT2 &&
+ if ((sk->sk_state == BT_CONNECT2 ||
+ sk->sk_state == BT_CONNECTED) &&
test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) {
err = iso_sock_bind_pa_sk(sk, sa, addr_len);
goto done;
@@ -1186,7 +1159,7 @@ done:
}
static int iso_sock_accept(struct socket *sock, struct socket *newsock,
- int flags, bool kern)
+ struct proto_accept_arg *arg)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = sock->sk, *ch;
@@ -1195,7 +1168,7 @@ static int iso_sock_accept(struct socket *sock, struct socket *newsock,
lock_sock(sk);
- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
BT_DBG("sk %p timeo %ld", sk, timeo);
@@ -1285,7 +1258,7 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg,
return -ENOTCONN;
}
- mtu = iso_pi(sk)->conn->hcon->hdev->iso_mtu;
+ mtu = iso_pi(sk)->conn->hcon->mtu;
release_sock(sk);
@@ -1383,8 +1356,7 @@ static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg,
lock_sock(sk);
switch (sk->sk_state) {
case BT_CONNECT2:
- if (pi->conn->hcon &&
- test_bit(HCI_CONN_PA_SYNC, &pi->conn->hcon->flags)) {
+ if (test_bit(BT_SK_PA_SYNC, &pi->flags)) {
iso_conn_big_sync(sk);
sk->sk_state = BT_LISTEN;
} else {
@@ -1393,6 +1365,16 @@ static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg,
}
release_sock(sk);
return 0;
+ case BT_CONNECTED:
+ if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) {
+ iso_conn_big_sync(sk);
+ sk->sk_state = BT_LISTEN;
+ release_sock(sk);
+ return 0;
+ }
+
+ release_sock(sk);
+ break;
case BT_CONNECT:
release_sock(sk);
return iso_connect_cis(sk);
@@ -1538,7 +1520,9 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
case BT_ISO_QOS:
if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND &&
- sk->sk_state != BT_CONNECT2) {
+ sk->sk_state != BT_CONNECT2 &&
+ (!test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags) ||
+ sk->sk_state != BT_CONNECTED)) {
err = -EINVAL;
break;
}
@@ -1736,11 +1720,6 @@ static void iso_sock_ready(struct sock *sk)
release_sock(sk);
}
-struct iso_list_data {
- struct hci_conn *hcon;
- int count;
-};
-
static bool iso_match_big(struct sock *sk, void *data)
{
struct hci_evt_le_big_sync_estabilished *ev = data;
@@ -1759,7 +1738,7 @@ static void iso_conn_ready(struct iso_conn *conn)
struct sock *sk = conn->sk;
struct hci_ev_le_big_sync_estabilished *ev = NULL;
struct hci_ev_le_pa_sync_established *ev2 = NULL;
- struct hci_evt_le_big_info_adv_report *ev3 = NULL;
+ struct hci_ev_le_per_adv_report *ev3 = NULL;
struct hci_conn *hcon;
BT_DBG("conn %p", conn);
@@ -1777,32 +1756,37 @@ static void iso_conn_ready(struct iso_conn *conn)
HCI_EVT_LE_BIG_SYNC_ESTABILISHED);
/* Get reference to PA sync parent socket, if it exists */
- parent = iso_get_sock_listen(&hcon->src,
- &hcon->dst,
- iso_match_pa_sync_flag, NULL);
+ parent = iso_get_sock(&hcon->src, &hcon->dst,
+ BT_LISTEN,
+ iso_match_pa_sync_flag,
+ NULL);
if (!parent && ev)
- parent = iso_get_sock_listen(&hcon->src,
- &hcon->dst,
- iso_match_big, ev);
+ parent = iso_get_sock(&hcon->src,
+ &hcon->dst,
+ BT_LISTEN,
+ iso_match_big, ev);
} else if (test_bit(HCI_CONN_PA_SYNC_FAILED, &hcon->flags)) {
ev2 = hci_recv_event_data(hcon->hdev,
HCI_EV_LE_PA_SYNC_ESTABLISHED);
if (ev2)
- parent = iso_get_sock_listen(&hcon->src,
- &hcon->dst,
- iso_match_sid, ev2);
+ parent = iso_get_sock(&hcon->src,
+ &hcon->dst,
+ BT_LISTEN,
+ iso_match_sid, ev2);
} else if (test_bit(HCI_CONN_PA_SYNC, &hcon->flags)) {
ev3 = hci_recv_event_data(hcon->hdev,
- HCI_EVT_LE_BIG_INFO_ADV_REPORT);
+ HCI_EV_LE_PER_ADV_REPORT);
if (ev3)
- parent = iso_get_sock_listen(&hcon->src,
- &hcon->dst,
- iso_match_sync_handle, ev3);
+ parent = iso_get_sock(&hcon->src,
+ &hcon->dst,
+ BT_LISTEN,
+ iso_match_sync_handle_pa_report,
+ ev3);
}
if (!parent)
- parent = iso_get_sock_listen(&hcon->src,
- BDADDR_ANY, NULL, NULL);
+ parent = iso_get_sock(&hcon->src, BDADDR_ANY,
+ BT_LISTEN, NULL, NULL);
if (!parent)
return;
@@ -1839,7 +1823,6 @@ static void iso_conn_ready(struct iso_conn *conn)
if (ev3) {
iso_pi(sk)->qos = iso_pi(parent)->qos;
- iso_pi(sk)->qos.bcast.encryption = ev3->encryption;
hcon->iso_qos = iso_pi(sk)->qos;
iso_pi(sk)->bc_num_bis = iso_pi(parent)->bc_num_bis;
memcpy(iso_pi(sk)->bc_bis, iso_pi(parent)->bc_bis, ISO_MAX_NUM_BIS);
@@ -1923,8 +1906,8 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
*/
ev1 = hci_recv_event_data(hdev, HCI_EV_LE_PA_SYNC_ESTABLISHED);
if (ev1) {
- sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr, iso_match_sid,
- ev1);
+ sk = iso_get_sock(&hdev->bdaddr, bdaddr, BT_LISTEN,
+ iso_match_sid, ev1);
if (sk && !ev1->status)
iso_pi(sk)->sync_handle = le16_to_cpu(ev1->handle);
@@ -1933,26 +1916,29 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
ev2 = hci_recv_event_data(hdev, HCI_EVT_LE_BIG_INFO_ADV_REPORT);
if (ev2) {
- /* Try to get PA sync listening socket, if it exists */
- sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr,
- iso_match_pa_sync_flag, NULL);
-
- if (!sk) {
- sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr,
- iso_match_sync_handle, ev2);
-
- /* If PA Sync is in process of terminating,
- * do not handle any more BIGInfo adv reports.
- */
-
- if (sk && test_bit(BT_SK_PA_SYNC_TERM,
- &iso_pi(sk)->flags))
- return 0;
+ /* Check if BIGInfo report has already been handled */
+ sk = iso_get_sock(&hdev->bdaddr, bdaddr, BT_CONNECTED,
+ iso_match_sync_handle, ev2);
+ if (sk) {
+ sock_put(sk);
+ sk = NULL;
+ goto done;
}
+ /* Try to get PA sync socket, if it exists */
+ sk = iso_get_sock(&hdev->bdaddr, bdaddr, BT_CONNECT2,
+ iso_match_sync_handle, ev2);
+ if (!sk)
+ sk = iso_get_sock(&hdev->bdaddr, bdaddr,
+ BT_LISTEN,
+ iso_match_sync_handle,
+ ev2);
+
if (sk) {
int err;
+ iso_pi(sk)->qos.bcast.encryption = ev2->encryption;
+
if (ev2->num_bis < iso_pi(sk)->bc_num_bis)
iso_pi(sk)->bc_num_bis = ev2->num_bis;
@@ -1971,6 +1957,8 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
}
}
}
+
+ goto done;
}
ev3 = hci_recv_event_data(hdev, HCI_EV_LE_PER_ADV_REPORT);
@@ -1979,8 +1967,8 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
u8 *base;
struct hci_conn *hcon;
- sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr,
- iso_match_sync_handle_pa_report, ev3);
+ sk = iso_get_sock(&hdev->bdaddr, bdaddr, BT_LISTEN,
+ iso_match_sync_handle_pa_report, ev3);
if (!sk)
goto done;
@@ -2029,7 +2017,8 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
hcon->le_per_adv_data_len = 0;
}
} else {
- sk = iso_get_sock_listen(&hdev->bdaddr, BDADDR_ANY, NULL, NULL);
+ sk = iso_get_sock(&hdev->bdaddr, BDADDR_ANY,
+ BT_LISTEN, NULL, NULL);
}
done:
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 84fc70862d78..9988ba382b68 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -415,6 +415,9 @@ static void l2cap_chan_timeout(struct work_struct *work)
BT_DBG("chan %p state %s", chan, state_to_string(chan->state));
+ if (!conn)
+ return;
+
mutex_lock(&conn->chan_lock);
/* __set_chan_timer() calls l2cap_chan_hold(chan) while scheduling
* this work. No need to call l2cap_chan_hold(chan) here again.
@@ -454,6 +457,9 @@ struct l2cap_chan *l2cap_chan_create(void)
/* Set default lock nesting level */
atomic_set(&chan->nesting, L2CAP_NESTING_NORMAL);
+ /* Available receive buffer space is initially unknown */
+ chan->rx_avail = -1;
+
write_lock(&chan_list_lock);
list_add(&chan->global_l, &chan_list);
write_unlock(&chan_list_lock);
@@ -535,6 +541,28 @@ void l2cap_chan_set_defaults(struct l2cap_chan *chan)
}
EXPORT_SYMBOL_GPL(l2cap_chan_set_defaults);
+static __u16 l2cap_le_rx_credits(struct l2cap_chan *chan)
+{
+ size_t sdu_len = chan->sdu ? chan->sdu->len : 0;
+
+ if (chan->mps == 0)
+ return 0;
+
+ /* If we don't know the available space in the receiver buffer, give
+ * enough credits for a full packet.
+ */
+ if (chan->rx_avail == -1)
+ return (chan->imtu / chan->mps) + 1;
+
+ /* If we know how much space is available in the receive buffer, give
+ * out as many credits as would fill the buffer.
+ */
+ if (chan->rx_avail <= sdu_len)
+ return 0;
+
+ return DIV_ROUND_UP(chan->rx_avail - sdu_len, chan->mps);
+}
+
static void l2cap_le_flowctl_init(struct l2cap_chan *chan, u16 tx_credits)
{
chan->sdu = NULL;
@@ -543,8 +571,7 @@ static void l2cap_le_flowctl_init(struct l2cap_chan *chan, u16 tx_credits)
chan->tx_credits = tx_credits;
/* Derive MPS from connection MTU to stop HCI fragmentation */
chan->mps = min_t(u16, chan->imtu, chan->conn->mtu - L2CAP_HDR_SIZE);
- /* Give enough credits for a full packet */
- chan->rx_credits = (chan->imtu / chan->mps) + 1;
+ chan->rx_credits = l2cap_le_rx_credits(chan);
skb_queue_head_init(&chan->tx_q);
}
@@ -556,7 +583,7 @@ static void l2cap_ecred_init(struct l2cap_chan *chan, u16 tx_credits)
/* L2CAP implementations shall support a minimum MPS of 64 octets */
if (chan->mps < L2CAP_ECRED_MIN_MPS) {
chan->mps = L2CAP_ECRED_MIN_MPS;
- chan->rx_credits = (chan->imtu / chan->mps) + 1;
+ chan->rx_credits = l2cap_le_rx_credits(chan);
}
}
@@ -1257,7 +1284,7 @@ static void l2cap_le_connect(struct l2cap_chan *chan)
struct l2cap_ecred_conn_data {
struct {
- struct l2cap_ecred_conn_req req;
+ struct l2cap_ecred_conn_req_hdr req;
__le16 scid[5];
} __packed pdu;
struct l2cap_chan *chan;
@@ -3737,7 +3764,7 @@ static void l2cap_ecred_list_defer(struct l2cap_chan *chan, void *data)
struct l2cap_ecred_rsp_data {
struct {
- struct l2cap_ecred_conn_rsp rsp;
+ struct l2cap_ecred_conn_rsp_hdr rsp;
__le16 scid[L2CAP_ECRED_MAX_CID];
} __packed pdu;
int count;
@@ -3746,6 +3773,8 @@ struct l2cap_ecred_rsp_data {
static void l2cap_ecred_rsp_defer(struct l2cap_chan *chan, void *data)
{
struct l2cap_ecred_rsp_data *rsp = data;
+ struct l2cap_ecred_conn_rsp *rsp_flex =
+ container_of(&rsp->pdu.rsp, struct l2cap_ecred_conn_rsp, hdr);
if (test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags))
return;
@@ -3755,7 +3784,7 @@ static void l2cap_ecred_rsp_defer(struct l2cap_chan *chan, void *data)
/* Include all channels pending with the same ident */
if (!rsp->pdu.rsp.result)
- rsp->pdu.rsp.dcid[rsp->count++] = cpu_to_le16(chan->scid);
+ rsp_flex->dcid[rsp->count++] = cpu_to_le16(chan->scid);
else
l2cap_chan_del(chan, ECONNRESET);
}
@@ -3902,13 +3931,12 @@ static inline int l2cap_command_rej(struct l2cap_conn *conn,
return 0;
}
-static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn,
- struct l2cap_cmd_hdr *cmd,
- u8 *data, u8 rsp_code, u8 amp_id)
+static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd,
+ u8 *data, u8 rsp_code)
{
struct l2cap_conn_req *req = (struct l2cap_conn_req *) data;
struct l2cap_conn_rsp rsp;
- struct l2cap_chan *chan = NULL, *pchan;
+ struct l2cap_chan *chan = NULL, *pchan = NULL;
int result, status = L2CAP_CS_NO_INFO;
u16 dcid = 0, scid = __le16_to_cpu(req->scid);
@@ -3921,7 +3949,7 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn,
&conn->hcon->dst, ACL_LINK);
if (!pchan) {
result = L2CAP_CR_BAD_PSM;
- goto sendresp;
+ goto response;
}
mutex_lock(&conn->chan_lock);
@@ -3983,17 +4011,8 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn,
status = L2CAP_CS_AUTHOR_PEND;
chan->ops->defer(chan);
} else {
- /* Force pending result for AMP controllers.
- * The connection will succeed after the
- * physical link is up.
- */
- if (amp_id == AMP_ID_BREDR) {
- l2cap_state_change(chan, BT_CONFIG);
- result = L2CAP_CR_SUCCESS;
- } else {
- l2cap_state_change(chan, BT_CONNECT2);
- result = L2CAP_CR_PEND;
- }
+ l2cap_state_change(chan, BT_CONFIG);
+ result = L2CAP_CR_SUCCESS;
status = L2CAP_CS_NO_INFO;
}
} else {
@@ -4008,17 +4027,15 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn,
}
response:
- l2cap_chan_unlock(pchan);
- mutex_unlock(&conn->chan_lock);
- l2cap_chan_put(pchan);
-
-sendresp:
rsp.scid = cpu_to_le16(scid);
rsp.dcid = cpu_to_le16(dcid);
rsp.result = cpu_to_le16(result);
rsp.status = cpu_to_le16(status);
l2cap_send_cmd(conn, cmd->ident, rsp_code, sizeof(rsp), &rsp);
+ if (!pchan)
+ return;
+
if (result == L2CAP_CR_PEND && status == L2CAP_CS_NO_INFO) {
struct l2cap_info_req info;
info.type = cpu_to_le16(L2CAP_IT_FEAT_MASK);
@@ -4041,7 +4058,9 @@ sendresp:
chan->num_conf_req++;
}
- return chan;
+ l2cap_chan_unlock(pchan);
+ mutex_unlock(&conn->chan_lock);
+ l2cap_chan_put(pchan);
}
static int l2cap_connect_req(struct l2cap_conn *conn,
@@ -4058,7 +4077,7 @@ static int l2cap_connect_req(struct l2cap_conn *conn,
mgmt_device_connected(hdev, hcon, NULL, 0);
hci_dev_unlock(hdev);
- l2cap_connect(conn, cmd, data, L2CAP_CONN_RSP, 0);
+ l2cap_connect(conn, cmd, data, L2CAP_CONN_RSP);
return 0;
}
@@ -4628,13 +4647,7 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn,
memset(&rsp, 0, sizeof(rsp));
- if (max > hcon->le_conn_max_interval) {
- BT_DBG("requested connection interval exceeds current bounds.");
- err = -EINVAL;
- } else {
- err = hci_check_conn_params(min, max, latency, to_multiplier);
- }
-
+ err = hci_check_conn_params(min, max, latency, to_multiplier);
if (err)
rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED);
else
@@ -4994,10 +5007,7 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn,
u8 *data)
{
struct l2cap_ecred_conn_req *req = (void *) data;
- struct {
- struct l2cap_ecred_conn_rsp rsp;
- __le16 dcid[L2CAP_ECRED_MAX_CID];
- } __packed pdu;
+ DEFINE_RAW_FLEX(struct l2cap_ecred_conn_rsp, pdu, dcid, L2CAP_ECRED_MAX_CID);
struct l2cap_chan *chan, *pchan;
u16 mtu, mps;
__le16 psm;
@@ -5016,7 +5026,7 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn,
cmd_len -= sizeof(*req);
num_scid = cmd_len / sizeof(u16);
- if (num_scid > ARRAY_SIZE(pdu.dcid)) {
+ if (num_scid > L2CAP_ECRED_MAX_CID) {
result = L2CAP_CR_LE_INVALID_PARAMS;
goto response;
}
@@ -5045,7 +5055,7 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn,
BT_DBG("psm 0x%2.2x mtu %u mps %u", __le16_to_cpu(psm), mtu, mps);
- memset(&pdu, 0, sizeof(pdu));
+ memset(pdu, 0, sizeof(*pdu));
/* Check if we have socket listening on psm */
pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src,
@@ -5071,8 +5081,8 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn,
BT_DBG("scid[%d] 0x%4.4x", i, scid);
- pdu.dcid[i] = 0x0000;
- len += sizeof(*pdu.dcid);
+ pdu->dcid[i] = 0x0000;
+ len += sizeof(*pdu->dcid);
/* Check for valid dynamic CID range */
if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_LE_DYN_END) {
@@ -5106,13 +5116,13 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn,
l2cap_ecred_init(chan, __le16_to_cpu(req->credits));
/* Init response */
- if (!pdu.rsp.credits) {
- pdu.rsp.mtu = cpu_to_le16(chan->imtu);
- pdu.rsp.mps = cpu_to_le16(chan->mps);
- pdu.rsp.credits = cpu_to_le16(chan->rx_credits);
+ if (!pdu->credits) {
+ pdu->mtu = cpu_to_le16(chan->imtu);
+ pdu->mps = cpu_to_le16(chan->mps);
+ pdu->credits = cpu_to_le16(chan->rx_credits);
}
- pdu.dcid[i] = cpu_to_le16(chan->scid);
+ pdu->dcid[i] = cpu_to_le16(chan->scid);
__set_chan_timer(chan, chan->ops->get_sndtimeo(chan));
@@ -5134,13 +5144,13 @@ unlock:
l2cap_chan_put(pchan);
response:
- pdu.rsp.result = cpu_to_le16(result);
+ pdu->result = cpu_to_le16(result);
if (defer)
return 0;
l2cap_send_cmd(conn, cmd->ident, L2CAP_ECRED_CONN_RSP,
- sizeof(pdu.rsp) + len, &pdu);
+ sizeof(*pdu) + len, pdu);
return 0;
}
@@ -6239,7 +6249,7 @@ static int l2cap_finish_move(struct l2cap_chan *chan)
BT_DBG("chan %p", chan);
chan->rx_state = L2CAP_RX_STATE_RECV;
- chan->conn->mtu = chan->conn->hcon->hdev->acl_mtu;
+ chan->conn->mtu = chan->conn->hcon->mtu;
return l2cap_resegment(chan);
}
@@ -6306,7 +6316,7 @@ static int l2cap_rx_state_wait_f(struct l2cap_chan *chan,
*/
chan->next_tx_seq = control->reqseq;
chan->unacked_frames = 0;
- chan->conn->mtu = chan->conn->hcon->hdev->acl_mtu;
+ chan->conn->mtu = chan->conn->hcon->mtu;
err = l2cap_resegment(chan);
@@ -6511,9 +6521,7 @@ static void l2cap_chan_le_send_credits(struct l2cap_chan *chan)
{
struct l2cap_conn *conn = chan->conn;
struct l2cap_le_credits pkt;
- u16 return_credits;
-
- return_credits = (chan->imtu / chan->mps) + 1;
+ u16 return_credits = l2cap_le_rx_credits(chan);
if (chan->rx_credits >= return_credits)
return;
@@ -6532,6 +6540,19 @@ static void l2cap_chan_le_send_credits(struct l2cap_chan *chan)
l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CREDITS, sizeof(pkt), &pkt);
}
+void l2cap_chan_rx_avail(struct l2cap_chan *chan, ssize_t rx_avail)
+{
+ if (chan->rx_avail == rx_avail)
+ return;
+
+ BT_DBG("chan %p has %zd bytes avail for rx", chan, rx_avail);
+
+ chan->rx_avail = rx_avail;
+
+ if (chan->state == BT_CONNECTED)
+ l2cap_chan_le_send_credits(chan);
+}
+
static int l2cap_ecred_recv(struct l2cap_chan *chan, struct sk_buff *skb)
{
int err;
@@ -6541,6 +6562,12 @@ static int l2cap_ecred_recv(struct l2cap_chan *chan, struct sk_buff *skb)
/* Wait recv to confirm reception before updating the credits */
err = chan->ops->recv(chan, skb);
+ if (err < 0 && chan->rx_avail != -1) {
+ BT_ERR("Queueing received LE L2CAP data failed");
+ l2cap_send_disconn_req(chan, ECONNRESET);
+ return err;
+ }
+
/* Update credits whenever an SDU is received */
l2cap_chan_le_send_credits(chan);
@@ -6563,7 +6590,8 @@ static int l2cap_ecred_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb)
}
chan->rx_credits--;
- BT_DBG("rx_credits %u -> %u", chan->rx_credits + 1, chan->rx_credits);
+ BT_DBG("chan %p: rx_credits %u -> %u",
+ chan, chan->rx_credits + 1, chan->rx_credits);
/* Update if remote had run out of credits, this should only happens
* if the remote is not using the entire MPS.
@@ -6733,6 +6761,8 @@ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm,
BT_DBG("chan %p, len %d", chan, skb->len);
+ l2cap_chan_lock(chan);
+
if (chan->state != BT_BOUND && chan->state != BT_CONNECTED)
goto drop;
@@ -6744,11 +6774,13 @@ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm,
bt_cb(skb)->l2cap.psm = psm;
if (!chan->ops->recv(chan, skb)) {
+ l2cap_chan_unlock(chan);
l2cap_chan_put(chan);
return;
}
drop:
+ l2cap_chan_unlock(chan);
l2cap_chan_put(chan);
free_skb:
kfree_skb(skb);
@@ -6846,18 +6878,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon)
BT_DBG("hcon %p conn %p hchan %p", hcon, conn, hchan);
- switch (hcon->type) {
- case LE_LINK:
- if (hcon->hdev->le_mtu) {
- conn->mtu = hcon->hdev->le_mtu;
- break;
- }
- fallthrough;
- default:
- conn->mtu = hcon->hdev->acl_mtu;
- break;
- }
-
+ conn->mtu = hcon->mtu;
conn->feat_mask = 0;
conn->local_fixed_chan = L2CAP_FC_SIG_BREDR | L2CAP_FC_CONNLESS;
@@ -7111,14 +7132,11 @@ EXPORT_SYMBOL_GPL(l2cap_chan_connect);
static void l2cap_ecred_reconfigure(struct l2cap_chan *chan)
{
struct l2cap_conn *conn = chan->conn;
- struct {
- struct l2cap_ecred_reconf_req req;
- __le16 scid;
- } pdu;
+ DEFINE_RAW_FLEX(struct l2cap_ecred_reconf_req, pdu, scid, 1);
- pdu.req.mtu = cpu_to_le16(chan->imtu);
- pdu.req.mps = cpu_to_le16(chan->mps);
- pdu.scid = cpu_to_le16(chan->scid);
+ pdu->mtu = cpu_to_le16(chan->imtu);
+ pdu->mps = cpu_to_le16(chan->mps);
+ pdu->scid[0] = cpu_to_le16(chan->scid);
chan->ident = l2cap_get_ident(conn);
@@ -7462,10 +7480,6 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
struct l2cap_conn *conn = hcon->l2cap_data;
int len;
- /* For AMP controller do not create l2cap conn */
- if (!conn && hcon->hdev->dev_type != HCI_PRIMARY)
- goto drop;
-
if (!conn)
conn = l2cap_conn_add(hcon);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 5cc83f906c12..ba437c6f6ee5 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -327,7 +327,7 @@ done:
}
static int l2cap_sock_accept(struct socket *sock, struct socket *newsock,
- int flags, bool kern)
+ struct proto_accept_arg *arg)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = sock->sk, *nsk;
@@ -336,7 +336,7 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock,
lock_sock_nested(sk, L2CAP_NESTING_PARENT);
- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
BT_DBG("sk %p timeo %ld", sk, timeo);
@@ -1131,6 +1131,34 @@ static int l2cap_sock_sendmsg(struct socket *sock, struct msghdr *msg,
return err;
}
+static void l2cap_publish_rx_avail(struct l2cap_chan *chan)
+{
+ struct sock *sk = chan->data;
+ ssize_t avail = sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc);
+ int expected_skbs, skb_overhead;
+
+ if (avail <= 0) {
+ l2cap_chan_rx_avail(chan, 0);
+ return;
+ }
+
+ if (!chan->mps) {
+ l2cap_chan_rx_avail(chan, -1);
+ return;
+ }
+
+ /* Correct available memory by estimated sk_buff overhead.
+ * This is significant due to small transfer sizes. However, accept
+ * at least one full packet if receive space is non-zero.
+ */
+ expected_skbs = DIV_ROUND_UP(avail, chan->mps);
+ skb_overhead = expected_skbs * sizeof(struct sk_buff);
+ if (skb_overhead < avail)
+ l2cap_chan_rx_avail(chan, avail - skb_overhead);
+ else
+ l2cap_chan_rx_avail(chan, -1);
+}
+
static int l2cap_sock_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags)
{
@@ -1167,28 +1195,33 @@ static int l2cap_sock_recvmsg(struct socket *sock, struct msghdr *msg,
else
err = bt_sock_recvmsg(sock, msg, len, flags);
- if (pi->chan->mode != L2CAP_MODE_ERTM)
+ if (pi->chan->mode != L2CAP_MODE_ERTM &&
+ pi->chan->mode != L2CAP_MODE_LE_FLOWCTL &&
+ pi->chan->mode != L2CAP_MODE_EXT_FLOWCTL)
return err;
- /* Attempt to put pending rx data in the socket buffer */
-
lock_sock(sk);
- if (!test_bit(CONN_LOCAL_BUSY, &pi->chan->conn_state))
- goto done;
+ l2cap_publish_rx_avail(pi->chan);
- if (pi->rx_busy_skb) {
- if (!__sock_queue_rcv_skb(sk, pi->rx_busy_skb))
- pi->rx_busy_skb = NULL;
- else
+ /* Attempt to put pending rx data in the socket buffer */
+ while (!list_empty(&pi->rx_busy)) {
+ struct l2cap_rx_busy *rx_busy =
+ list_first_entry(&pi->rx_busy,
+ struct l2cap_rx_busy,
+ list);
+ if (__sock_queue_rcv_skb(sk, rx_busy->skb) < 0)
goto done;
+ list_del(&rx_busy->list);
+ kfree(rx_busy);
}
/* Restore data flow when half of the receive buffer is
* available. This avoids resending large numbers of
* frames.
*/
- if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf >> 1)
+ if (test_bit(CONN_LOCAL_BUSY, &pi->chan->conn_state) &&
+ atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf >> 1)
l2cap_chan_busy(pi->chan, 0);
done:
@@ -1206,6 +1239,10 @@ static void l2cap_sock_kill(struct sock *sk)
BT_DBG("sk %p state %s", sk, state_to_string(sk->sk_state));
+ /* Sock is dead, so set chan data to NULL, avoid other task use invalid
+ * sock pointer.
+ */
+ l2cap_pi(sk)->chan->data = NULL;
/* Kill poor orphan */
l2cap_chan_put(l2cap_pi(sk)->chan);
@@ -1448,18 +1485,25 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan)
static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
{
- struct sock *sk = chan->data;
+ struct sock *sk;
+ struct l2cap_pinfo *pi;
int err;
- lock_sock(sk);
+ sk = chan->data;
+ if (!sk)
+ return -ENXIO;
- if (l2cap_pi(sk)->rx_busy_skb) {
+ pi = l2cap_pi(sk);
+ lock_sock(sk);
+ if (chan->mode == L2CAP_MODE_ERTM && !list_empty(&pi->rx_busy)) {
err = -ENOMEM;
goto done;
}
if (chan->mode != L2CAP_MODE_ERTM &&
- chan->mode != L2CAP_MODE_STREAMING) {
+ chan->mode != L2CAP_MODE_STREAMING &&
+ chan->mode != L2CAP_MODE_LE_FLOWCTL &&
+ chan->mode != L2CAP_MODE_EXT_FLOWCTL) {
/* Even if no filter is attached, we could potentially
* get errors from security modules, etc.
*/
@@ -1470,7 +1514,9 @@ static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
err = __sock_queue_rcv_skb(sk, skb);
- /* For ERTM, handle one skb that doesn't fit into the recv
+ l2cap_publish_rx_avail(chan);
+
+ /* For ERTM and LE, handle a skb that doesn't fit into the recv
* buffer. This is important to do because the data frames
* have already been acked, so the skb cannot be discarded.
*
@@ -1479,8 +1525,18 @@ static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
* acked and reassembled until there is buffer space
* available.
*/
- if (err < 0 && chan->mode == L2CAP_MODE_ERTM) {
- l2cap_pi(sk)->rx_busy_skb = skb;
+ if (err < 0 &&
+ (chan->mode == L2CAP_MODE_ERTM ||
+ chan->mode == L2CAP_MODE_LE_FLOWCTL ||
+ chan->mode == L2CAP_MODE_EXT_FLOWCTL)) {
+ struct l2cap_rx_busy *rx_busy =
+ kmalloc(sizeof(*rx_busy), GFP_KERNEL);
+ if (!rx_busy) {
+ err = -ENOMEM;
+ goto done;
+ }
+ rx_busy->skb = skb;
+ list_add_tail(&rx_busy->list, &pi->rx_busy);
l2cap_chan_busy(chan, 1);
err = 0;
}
@@ -1706,6 +1762,8 @@ static const struct l2cap_ops l2cap_chan_ops = {
static void l2cap_sock_destruct(struct sock *sk)
{
+ struct l2cap_rx_busy *rx_busy, *next;
+
BT_DBG("sk %p", sk);
if (l2cap_pi(sk)->chan) {
@@ -1713,9 +1771,10 @@ static void l2cap_sock_destruct(struct sock *sk)
l2cap_chan_put(l2cap_pi(sk)->chan);
}
- if (l2cap_pi(sk)->rx_busy_skb) {
- kfree_skb(l2cap_pi(sk)->rx_busy_skb);
- l2cap_pi(sk)->rx_busy_skb = NULL;
+ list_for_each_entry_safe(rx_busy, next, &l2cap_pi(sk)->rx_busy, list) {
+ kfree_skb(rx_busy->skb);
+ list_del(&rx_busy->list);
+ kfree(rx_busy);
}
skb_queue_purge(&sk->sk_receive_queue);
@@ -1799,6 +1858,8 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
chan->data = sk;
chan->ops = &l2cap_chan_ops;
+
+ l2cap_publish_rx_avail(chan);
}
static struct proto l2cap_proto = {
@@ -1820,6 +1881,8 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock,
sk->sk_destruct = l2cap_sock_destruct;
sk->sk_sndtimeo = L2CAP_CONN_TIMEOUT;
+ INIT_LIST_HEAD(&l2cap_pi(sk)->rx_busy);
+
chan = l2cap_chan_create();
if (!chan) {
sk_free(sk);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 965f621ef865..40d4887c7f79 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -33,7 +33,6 @@
#include <net/bluetooth/l2cap.h>
#include <net/bluetooth/mgmt.h>
-#include "hci_request.h"
#include "smp.h"
#include "mgmt_util.h"
#include "mgmt_config.h"
@@ -42,7 +41,7 @@
#include "aosp.h"
#define MGMT_VERSION 1
-#define MGMT_REVISION 22
+#define MGMT_REVISION 23
static const u16 mgmt_commands[] = {
MGMT_OP_READ_INDEX_LIST,
@@ -443,8 +442,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
count = 0;
list_for_each_entry(d, &hci_dev_list, list) {
- if (d->dev_type == HCI_PRIMARY &&
- !hci_dev_test_flag(d, HCI_UNCONFIGURED))
+ if (!hci_dev_test_flag(d, HCI_UNCONFIGURED))
count++;
}
@@ -468,8 +466,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks))
continue;
- if (d->dev_type == HCI_PRIMARY &&
- !hci_dev_test_flag(d, HCI_UNCONFIGURED)) {
+ if (!hci_dev_test_flag(d, HCI_UNCONFIGURED)) {
rp->index[count++] = cpu_to_le16(d->id);
bt_dev_dbg(hdev, "Added hci%u", d->id);
}
@@ -503,8 +500,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev,
count = 0;
list_for_each_entry(d, &hci_dev_list, list) {
- if (d->dev_type == HCI_PRIMARY &&
- hci_dev_test_flag(d, HCI_UNCONFIGURED))
+ if (hci_dev_test_flag(d, HCI_UNCONFIGURED))
count++;
}
@@ -528,8 +524,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev,
if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks))
continue;
- if (d->dev_type == HCI_PRIMARY &&
- hci_dev_test_flag(d, HCI_UNCONFIGURED)) {
+ if (hci_dev_test_flag(d, HCI_UNCONFIGURED)) {
rp->index[count++] = cpu_to_le16(d->id);
bt_dev_dbg(hdev, "Added hci%u", d->id);
}
@@ -561,10 +556,8 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev,
read_lock(&hci_dev_list_lock);
count = 0;
- list_for_each_entry(d, &hci_dev_list, list) {
- if (d->dev_type == HCI_PRIMARY || d->dev_type == HCI_AMP)
- count++;
- }
+ list_for_each_entry(d, &hci_dev_list, list)
+ count++;
rp = kmalloc(struct_size(rp, entry, count), GFP_ATOMIC);
if (!rp) {
@@ -585,16 +578,10 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev,
if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks))
continue;
- if (d->dev_type == HCI_PRIMARY) {
- if (hci_dev_test_flag(d, HCI_UNCONFIGURED))
- rp->entry[count].type = 0x01;
- else
- rp->entry[count].type = 0x00;
- } else if (d->dev_type == HCI_AMP) {
- rp->entry[count].type = 0x02;
- } else {
- continue;
- }
+ if (hci_dev_test_flag(d, HCI_UNCONFIGURED))
+ rp->entry[count].type = 0x01;
+ else
+ rp->entry[count].type = 0x00;
rp->entry[count].bus = d->bus;
rp->entry[count++].index = cpu_to_le16(d->id);
@@ -7825,6 +7812,18 @@ unlock:
return err;
}
+static int conn_update_sync(struct hci_dev *hdev, void *data)
+{
+ struct hci_conn_params *params = data;
+ struct hci_conn *conn;
+
+ conn = hci_conn_hash_lookup_le(hdev, &params->addr, params->addr_type);
+ if (!conn)
+ return -ECANCELED;
+
+ return hci_le_conn_update_sync(hdev, conn, params);
+}
+
static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
u16 len)
{
@@ -7858,12 +7857,14 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
- hci_conn_params_clear_disabled(hdev);
+ if (param_count > 1)
+ hci_conn_params_clear_disabled(hdev);
for (i = 0; i < param_count; i++) {
struct mgmt_conn_param *param = &cp->params[i];
struct hci_conn_params *hci_param;
u16 min, max, latency, timeout;
+ bool update = false;
u8 addr_type;
bt_dev_dbg(hdev, "Adding %pMR (type %u)", &param->addr.bdaddr,
@@ -7891,6 +7892,19 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
continue;
}
+ /* Detect when the loading is for an existing parameter then
+ * attempt to trigger the connection update procedure.
+ */
+ if (!i && param_count == 1) {
+ hci_param = hci_conn_params_lookup(hdev,
+ &param->addr.bdaddr,
+ addr_type);
+ if (hci_param)
+ update = true;
+ else
+ hci_conn_params_clear_disabled(hdev);
+ }
+
hci_param = hci_conn_params_add(hdev, &param->addr.bdaddr,
addr_type);
if (!hci_param) {
@@ -7902,6 +7916,25 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
hci_param->conn_max_interval = max;
hci_param->conn_latency = latency;
hci_param->supervision_timeout = timeout;
+
+ /* Check if we need to trigger a connection update */
+ if (update) {
+ struct hci_conn *conn;
+
+ /* Lookup for existing connection as central and check
+ * if parameters match and if they don't then trigger
+ * a connection update.
+ */
+ conn = hci_conn_hash_lookup_le(hdev, &hci_param->addr,
+ addr_type);
+ if (conn && conn->role == HCI_ROLE_MASTER &&
+ (conn->le_conn_min_interval != min ||
+ conn->le_conn_max_interval != max ||
+ conn->le_conn_latency != latency ||
+ conn->le_supv_timeout != timeout))
+ hci_cmd_sync_queue(hdev, conn_update_sync,
+ hci_param, NULL);
+ }
}
hci_dev_unlock(hdev);
@@ -9331,23 +9364,14 @@ void mgmt_index_added(struct hci_dev *hdev)
if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
return;
- switch (hdev->dev_type) {
- case HCI_PRIMARY:
- if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
- mgmt_index_event(MGMT_EV_UNCONF_INDEX_ADDED, hdev,
- NULL, 0, HCI_MGMT_UNCONF_INDEX_EVENTS);
- ev.type = 0x01;
- } else {
- mgmt_index_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0,
- HCI_MGMT_INDEX_EVENTS);
- ev.type = 0x00;
- }
- break;
- case HCI_AMP:
- ev.type = 0x02;
- break;
- default:
- return;
+ if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
+ mgmt_index_event(MGMT_EV_UNCONF_INDEX_ADDED, hdev, NULL, 0,
+ HCI_MGMT_UNCONF_INDEX_EVENTS);
+ ev.type = 0x01;
+ } else {
+ mgmt_index_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0,
+ HCI_MGMT_INDEX_EVENTS);
+ ev.type = 0x00;
}
ev.bus = hdev->bus;
@@ -9364,25 +9388,16 @@ void mgmt_index_removed(struct hci_dev *hdev)
if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
return;
- switch (hdev->dev_type) {
- case HCI_PRIMARY:
- mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status);
+ mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status);
- if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
- mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev,
- NULL, 0, HCI_MGMT_UNCONF_INDEX_EVENTS);
- ev.type = 0x01;
- } else {
- mgmt_index_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0,
- HCI_MGMT_INDEX_EVENTS);
- ev.type = 0x00;
- }
- break;
- case HCI_AMP:
- ev.type = 0x02;
- break;
- default:
- return;
+ if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
+ mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, NULL, 0,
+ HCI_MGMT_UNCONF_INDEX_EVENTS);
+ ev.type = 0x01;
+ } else {
+ mgmt_index_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0,
+ HCI_MGMT_INDEX_EVENTS);
+ ev.type = 0x00;
}
ev.bus = hdev->bus;
diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c
index 9612c5d1b13f..5a8ccc491b14 100644
--- a/net/bluetooth/msft.c
+++ b/net/bluetooth/msft.c
@@ -7,7 +7,6 @@
#include <net/bluetooth/hci_core.h>
#include <net/bluetooth/mgmt.h>
-#include "hci_request.h"
#include "mgmt_util.h"
#include "msft.h"
@@ -769,7 +768,7 @@ void msft_register(struct hci_dev *hdev)
mutex_init(&msft->filter_lock);
}
-void msft_unregister(struct hci_dev *hdev)
+void msft_release(struct hci_dev *hdev)
{
struct msft_data *msft = hdev->msft_data;
diff --git a/net/bluetooth/msft.h b/net/bluetooth/msft.h
index 2a63205b377b..fe538e9c91c0 100644
--- a/net/bluetooth/msft.h
+++ b/net/bluetooth/msft.h
@@ -14,7 +14,7 @@
bool msft_monitor_supported(struct hci_dev *hdev);
void msft_register(struct hci_dev *hdev);
-void msft_unregister(struct hci_dev *hdev);
+void msft_release(struct hci_dev *hdev);
void msft_do_open(struct hci_dev *hdev);
void msft_do_close(struct hci_dev *hdev);
void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb);
@@ -35,7 +35,7 @@ static inline bool msft_monitor_supported(struct hci_dev *hdev)
}
static inline void msft_register(struct hci_dev *hdev) {}
-static inline void msft_unregister(struct hci_dev *hdev) {}
+static inline void msft_release(struct hci_dev *hdev) {}
static inline void msft_do_open(struct hci_dev *hdev) {}
static inline void msft_do_close(struct hci_dev *hdev) {}
static inline void msft_vendor_evt(struct hci_dev *hdev, void *data,
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 29aa07e9db9d..37d63d768afb 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -468,8 +468,8 @@ done:
return err;
}
-static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = sock->sk, *nsk;
@@ -483,7 +483,7 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f
goto done;
}
- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
BT_DBG("sk %p timeo %ld", sk, timeo);
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 69c75c041fe1..af80d599c337 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -504,7 +504,7 @@ static int rfcomm_get_dev_list(void __user *arg)
struct rfcomm_dev *dev;
struct rfcomm_dev_list_req *dl;
struct rfcomm_dev_info *di;
- int n = 0, size, err;
+ int n = 0, err;
u16 dev_num;
BT_DBG("");
@@ -515,12 +515,11 @@ static int rfcomm_get_dev_list(void __user *arg)
if (!dev_num || dev_num > (PAGE_SIZE * 4) / sizeof(*di))
return -EINVAL;
- size = sizeof(*dl) + dev_num * sizeof(*di);
-
- dl = kzalloc(size, GFP_KERNEL);
+ dl = kzalloc(struct_size(dl, dev_info, dev_num), GFP_KERNEL);
if (!dl)
return -ENOMEM;
+ dl->dev_num = dev_num;
di = dl->dev_info;
mutex_lock(&rfcomm_dev_lock);
@@ -528,12 +527,12 @@ static int rfcomm_get_dev_list(void __user *arg)
list_for_each_entry(dev, &rfcomm_dev_list, list) {
if (!tty_port_get(&dev->port))
continue;
- (di + n)->id = dev->id;
- (di + n)->flags = dev->flags;
- (di + n)->state = dev->dlc->state;
- (di + n)->channel = dev->channel;
- bacpy(&(di + n)->src, &dev->src);
- bacpy(&(di + n)->dst, &dev->dst);
+ di[n].id = dev->id;
+ di[n].flags = dev->flags;
+ di[n].state = dev->dlc->state;
+ di[n].channel = dev->channel;
+ bacpy(&di[n].src, &dev->src);
+ bacpy(&di[n].dst, &dev->dst);
tty_port_put(&dev->port);
if (++n >= dev_num)
break;
@@ -542,9 +541,7 @@ static int rfcomm_get_dev_list(void __user *arg)
mutex_unlock(&rfcomm_dev_lock);
dl->dev_num = n;
- size = sizeof(*dl) + n * sizeof(*di);
-
- err = copy_to_user(arg, dl, size);
+ err = copy_to_user(arg, dl, struct_size(dl, dev_info, n));
kfree(dl);
return err ? -EFAULT : 0;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 5d03c5440b06..a5ac160c592e 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -83,6 +83,10 @@ static void sco_sock_timeout(struct work_struct *work)
struct sock *sk;
sco_conn_lock(conn);
+ if (!conn->hcon) {
+ sco_conn_unlock(conn);
+ return;
+ }
sk = conn->sk;
if (sk)
sock_hold(sk);
@@ -122,7 +126,6 @@ static void sco_sock_clear_timer(struct sock *sk)
/* ---- SCO connections ---- */
static struct sco_conn *sco_conn_add(struct hci_conn *hcon)
{
- struct hci_dev *hdev = hcon->hdev;
struct sco_conn *conn = hcon->sco_data;
if (conn) {
@@ -140,9 +143,10 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon)
hcon->sco_data = conn;
conn->hcon = hcon;
+ conn->mtu = hcon->mtu;
- if (hdev->sco_mtu > 0)
- conn->mtu = hdev->sco_mtu;
+ if (hcon->mtu > 0)
+ conn->mtu = hcon->mtu;
else
conn->mtu = 60;
@@ -643,7 +647,7 @@ done:
}
static int sco_sock_accept(struct socket *sock, struct socket *newsock,
- int flags, bool kern)
+ struct proto_accept_arg *arg)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = sock->sk, *ch;
@@ -652,7 +656,7 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock,
lock_sock(sk);
- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
BT_DBG("sk %p timeo %ld", sk, timeo);
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index de33dc1b0daa..3ea52b05adfb 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -79,6 +79,51 @@ static int dummy_ops_call_op(void *image, struct bpf_dummy_ops_test_args *args)
args->args[3], args->args[4]);
}
+static const struct bpf_ctx_arg_aux *find_ctx_arg_info(struct bpf_prog_aux *aux, int offset)
+{
+ int i;
+
+ for (i = 0; i < aux->ctx_arg_info_size; i++)
+ if (aux->ctx_arg_info[i].offset == offset)
+ return &aux->ctx_arg_info[i];
+
+ return NULL;
+}
+
+/* There is only one check at the moment:
+ * - zero should not be passed for pointer parameters not marked as nullable.
+ */
+static int check_test_run_args(struct bpf_prog *prog, struct bpf_dummy_ops_test_args *args)
+{
+ const struct btf_type *func_proto = prog->aux->attach_func_proto;
+
+ for (u32 arg_no = 0; arg_no < btf_type_vlen(func_proto) ; ++arg_no) {
+ const struct btf_param *param = &btf_params(func_proto)[arg_no];
+ const struct bpf_ctx_arg_aux *info;
+ const struct btf_type *t;
+ int offset;
+
+ if (args->args[arg_no] != 0)
+ continue;
+
+ /* Program is validated already, so there is no need
+ * to check if t is NULL.
+ */
+ t = btf_type_skip_modifiers(bpf_dummy_ops_btf, param->type, NULL);
+ if (!btf_type_is_ptr(t))
+ continue;
+
+ offset = btf_ctx_arg_offset(bpf_dummy_ops_btf, func_proto, arg_no);
+ info = find_ctx_arg_info(prog->aux, offset);
+ if (info && (info->reg_type & PTR_MAYBE_NULL))
+ continue;
+
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
extern const struct bpf_link_ops bpf_struct_ops_link_lops;
int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
@@ -87,7 +132,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
const struct bpf_struct_ops *st_ops = &bpf_bpf_dummy_ops;
const struct btf_type *func_proto;
struct bpf_dummy_ops_test_args *args;
- struct bpf_tramp_links *tlinks;
+ struct bpf_tramp_links *tlinks = NULL;
struct bpf_tramp_link *link = NULL;
void *image = NULL;
unsigned int op_idx;
@@ -109,6 +154,10 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
if (IS_ERR(args))
return PTR_ERR(args);
+ err = check_test_run_args(prog, args);
+ if (err)
+ goto out;
+
tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL);
if (!tlinks) {
err = -ENOMEM;
@@ -133,7 +182,9 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
if (err < 0)
goto out;
- arch_protect_bpf_trampoline(image, PAGE_SIZE);
+ err = arch_protect_bpf_trampoline(image, PAGE_SIZE);
+ if (err)
+ goto out;
prog_ret = dummy_ops_call_op(image, args);
err = dummy_ops_copy_args(args);
@@ -221,16 +272,16 @@ static int bpf_dummy_init_member(const struct btf_type *t,
return -EOPNOTSUPP;
}
-static int bpf_dummy_reg(void *kdata)
+static int bpf_dummy_reg(void *kdata, struct bpf_link *link)
{
return -EOPNOTSUPP;
}
-static void bpf_dummy_unreg(void *kdata)
+static void bpf_dummy_unreg(void *kdata, struct bpf_link *link)
{
}
-static int bpf_dummy_test_1(struct bpf_dummy_ops_state *cb)
+static int bpf_dummy_ops__test_1(struct bpf_dummy_ops_state *cb__nullable)
{
return 0;
}
@@ -247,7 +298,7 @@ static int bpf_dummy_test_sleepable(struct bpf_dummy_ops_state *cb)
}
static struct bpf_dummy_ops __bpf_bpf_dummy_ops = {
- .test_1 = bpf_dummy_test_1,
+ .test_1 = bpf_dummy_ops__test_1,
.test_2 = bpf_dummy_test_2,
.test_sleepable = bpf_dummy_test_sleepable,
};
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 61efeadaff8d..6d7a442ceb89 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -127,9 +127,10 @@ struct xdp_test_data {
#define TEST_XDP_FRAME_SIZE (PAGE_SIZE - sizeof(struct xdp_page_head))
#define TEST_XDP_MAX_BATCH 256
-static void xdp_test_run_init_page(struct page *page, void *arg)
+static void xdp_test_run_init_page(netmem_ref netmem, void *arg)
{
- struct xdp_page_head *head = phys_to_virt(page_to_phys(page));
+ struct xdp_page_head *head =
+ phys_to_virt(page_to_phys(netmem_to_page(netmem)));
struct xdp_buff *new_ctx, *orig_ctx;
u32 headroom = XDP_PACKET_HEADROOM;
struct xdp_test_data *xdp = arg;
@@ -283,9 +284,10 @@ static int xdp_recv_frames(struct xdp_frame **frames, int nframes,
static int xdp_test_run_batch(struct xdp_test_data *xdp, struct bpf_prog *prog,
u32 repeat)
{
- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
int err = 0, act, ret, i, nframes = 0, batch_sz;
struct xdp_frame **frames = xdp->frames;
+ struct bpf_redirect_info *ri;
struct xdp_page_head *head;
struct xdp_frame *frm;
bool redirect = false;
@@ -295,6 +297,8 @@ static int xdp_test_run_batch(struct xdp_test_data *xdp, struct bpf_prog *prog,
batch_sz = min_t(u32, repeat, xdp->batch_size);
local_bh_disable();
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
+ ri = bpf_net_ctx_get_ri();
xdp_set_return_frame_no_direct();
for (i = 0; i < batch_sz; i++) {
@@ -359,6 +363,7 @@ out:
}
xdp_clear_return_frame_no_direct();
+ bpf_net_ctx_clear(bpf_net_ctx);
local_bh_enable();
return err;
}
@@ -394,6 +399,7 @@ static int bpf_test_run_xdp_live(struct bpf_prog *prog, struct xdp_buff *ctx,
static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
u32 *retval, u32 *time, bool xdp)
{
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
struct bpf_prog_array_item item = {.prog = prog};
struct bpf_run_ctx *old_ctx;
struct bpf_cg_run_ctx run_ctx;
@@ -419,10 +425,14 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
do {
run_ctx.prog_item = &item;
local_bh_disable();
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
+
if (xdp)
*retval = bpf_prog_run_xdp(prog, ctx);
else
*retval = bpf_prog_run(prog, ctx);
+
+ bpf_net_ctx_clear(bpf_net_ctx);
local_bh_enable();
} while (bpf_test_timer_continue(&t, 1, repeat, &ret, time));
bpf_reset_run_ctx(old_ctx);
@@ -575,6 +585,13 @@ __bpf_kfunc int bpf_modify_return_test2(int a, int *b, short c, int d,
return a + *b + c + d + (long)e + f + g;
}
+__bpf_kfunc int bpf_modify_return_test_tp(int nonce)
+{
+ trace_bpf_trigger_tp(nonce);
+
+ return nonce;
+}
+
int noinline bpf_fentry_shadow_test(int a)
{
return a + 1;
@@ -622,6 +639,7 @@ __bpf_kfunc_end_defs();
BTF_KFUNCS_START(bpf_test_modify_return_ids)
BTF_ID_FLAGS(func, bpf_modify_return_test)
BTF_ID_FLAGS(func, bpf_modify_return_test2)
+BTF_ID_FLAGS(func, bpf_modify_return_test_tp)
BTF_ID_FLAGS(func, bpf_fentry_test1, KF_SLEEPABLE)
BTF_KFUNCS_END(bpf_test_modify_return_ids)
@@ -719,10 +737,16 @@ static void
__bpf_prog_test_run_raw_tp(void *data)
{
struct bpf_raw_tp_test_run_info *info = data;
+ struct bpf_trace_run_ctx run_ctx = {};
+ struct bpf_run_ctx *old_run_ctx;
+
+ old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
rcu_read_lock();
info->retval = bpf_prog_run(info->prog, info->ctx);
rcu_read_unlock();
+
+ bpf_reset_run_ctx(old_run_ctx);
}
int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
@@ -969,7 +993,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
void *data;
int ret;
- if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size)
+ if ((kattr->test.flags & ~BPF_F_TEST_SKB_CHECKSUM_COMPLETE) ||
+ kattr->test.cpu || kattr->test.batch_size)
return -EINVAL;
data = bpf_test_init(kattr, kattr->test.data_size_in,
@@ -1017,6 +1042,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
__skb_put(skb, size);
+
if (ctx && ctx->ifindex > 1) {
dev = dev_get_by_index(net, ctx->ifindex);
if (!dev) {
@@ -1052,9 +1078,19 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
__skb_push(skb, hh_len);
if (is_direct_pkt_access)
bpf_compute_data_pointers(skb);
+
ret = convert___skb_to_skb(skb, ctx);
if (ret)
goto out;
+
+ if (kattr->test.flags & BPF_F_TEST_SKB_CHECKSUM_COMPLETE) {
+ const int off = skb_network_offset(skb);
+ int len = skb->len - off;
+
+ skb->csum = skb_checksum(skb, off, len, 0);
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ }
+
ret = bpf_test_run(prog, skb, repeat, &retval, &duration, false);
if (ret)
goto out;
@@ -1069,6 +1105,20 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
}
memset(__skb_push(skb, hh_len), 0, hh_len);
}
+
+ if (kattr->test.flags & BPF_F_TEST_SKB_CHECKSUM_COMPLETE) {
+ const int off = skb_network_offset(skb);
+ int len = skb->len - off;
+ __wsum csum;
+
+ csum = skb_checksum(skb, off, len, 0);
+
+ if (csum_fold(skb->csum) != csum_fold(csum)) {
+ ret = -EBADMSG;
+ goto out;
+ }
+ }
+
convert_skb_to___skb(skb, ctx);
size = skb->len;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index c366ccc8b3db..fb1115857e49 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -27,6 +27,7 @@ EXPORT_SYMBOL_GPL(nf_br_ops);
/* net device transmit always called with BH disabled */
netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
{
+ enum skb_drop_reason reason = pskb_may_pull_reason(skb, ETH_HLEN);
struct net_bridge_mcast_port *pmctx_null = NULL;
struct net_bridge *br = netdev_priv(dev);
struct net_bridge_mcast *brmctx = &br->multicast_ctx;
@@ -38,6 +39,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
const unsigned char *dest;
u16 vid = 0;
+ if (unlikely(reason != SKB_NOT_DROPPED_YET)) {
+ kfree_skb_reason(skb, reason);
+ return NETDEV_TX_OK;
+ }
+
memset(skb->cb, 0, sizeof(struct br_input_skb_cb));
br_tc_skb_miss_set(skb, false);
@@ -197,7 +203,7 @@ static int br_change_mtu(struct net_device *dev, int new_mtu)
{
struct net_bridge *br = netdev_priv(dev);
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
/* this flag will be cleared if the MTU was automatically adjusted */
br_opt_toggle(br, BROPT_MTU_SET_BY_USER, true);
@@ -389,7 +395,7 @@ static int br_fill_forward_path(struct net_device_path_ctx *ctx,
br_vlan_fill_forward_path_pvid(br, ctx, path);
f = br_fdb_find_rcu(br, ctx->daddr, path->bridge.vlan_id);
- if (!f || !f->dst)
+ if (!f)
return -1;
dst = READ_ONCE(f->dst);
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index d7c35f55bd69..e19b583ff2c6 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -25,8 +25,8 @@ static inline int should_deliver(const struct net_bridge_port *p,
vg = nbp_vlan_group_rcu(p);
return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) &&
- p->state == BR_STATE_FORWARDING && br_allowed_egress(vg, skb) &&
- nbp_switchdev_allowed_egress(p, skb) &&
+ (br_mst_is_enabled(p->br) || p->state == BR_STATE_FORWARDING) &&
+ br_allowed_egress(vg, skb) && nbp_switchdev_allowed_egress(p, skb) &&
!br_skb_isolated(p, skb);
}
@@ -258,6 +258,7 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb,
{
struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
const unsigned char *src = eth_hdr(skb)->h_source;
+ struct sk_buff *nskb;
if (!should_deliver(p, skb))
return;
@@ -266,12 +267,16 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb,
if (skb->dev == p->dev && ether_addr_equal(src, addr))
return;
- skb = pskb_copy(skb, GFP_ATOMIC);
- if (!skb) {
+ __skb_push(skb, ETH_HLEN);
+ nskb = pskb_copy(skb, GFP_ATOMIC);
+ __skb_pull(skb, ETH_HLEN);
+ if (!nskb) {
DEV_STATS_INC(dev, tx_dropped);
return;
}
+ skb = nskb;
+ __skb_pull(skb, ETH_HLEN);
if (!is_broadcast_ether_addr(addr))
memcpy(eth_hdr(skb)->h_dest, addr, ETH_ALEN);
diff --git a/net/bridge/br_mst.c b/net/bridge/br_mst.c
index ee680adcee17..1820f09ff59c 100644
--- a/net/bridge/br_mst.c
+++ b/net/bridge/br_mst.c
@@ -73,12 +73,11 @@ int br_mst_get_state(const struct net_device *dev, u16 msti, u8 *state)
}
EXPORT_SYMBOL_GPL(br_mst_get_state);
-static void br_mst_vlan_set_state(struct net_bridge_port *p, struct net_bridge_vlan *v,
+static void br_mst_vlan_set_state(struct net_bridge_vlan_group *vg,
+ struct net_bridge_vlan *v,
u8 state)
{
- struct net_bridge_vlan_group *vg = nbp_vlan_group(p);
-
- if (v->state == state)
+ if (br_vlan_get_state(v) == state)
return;
br_vlan_set_state(v, state);
@@ -100,11 +99,12 @@ int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state,
};
struct net_bridge_vlan_group *vg;
struct net_bridge_vlan *v;
- int err;
+ int err = 0;
- vg = nbp_vlan_group(p);
+ rcu_read_lock();
+ vg = nbp_vlan_group_rcu(p);
if (!vg)
- return 0;
+ goto out;
/* MSTI 0 (CST) state changes are notified via the regular
* SWITCHDEV_ATTR_ID_PORT_STP_STATE.
@@ -112,17 +112,20 @@ int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state,
if (msti) {
err = switchdev_port_attr_set(p->dev, &attr, extack);
if (err && err != -EOPNOTSUPP)
- return err;
+ goto out;
}
- list_for_each_entry(v, &vg->vlan_list, vlist) {
+ err = 0;
+ list_for_each_entry_rcu(v, &vg->vlan_list, vlist) {
if (v->brvlan->msti != msti)
continue;
- br_mst_vlan_set_state(p, v, state);
+ br_mst_vlan_set_state(vg, v, state);
}
- return 0;
+out:
+ rcu_read_unlock();
+ return err;
}
static void br_mst_vlan_sync_state(struct net_bridge_vlan *pv, u16 msti)
@@ -136,13 +139,13 @@ static void br_mst_vlan_sync_state(struct net_bridge_vlan *pv, u16 msti)
* it.
*/
if (v != pv && v->brvlan->msti == msti) {
- br_mst_vlan_set_state(pv->port, pv, v->state);
+ br_mst_vlan_set_state(vg, pv, v->state);
return;
}
}
/* Otherwise, start out in a new MSTI with all ports disabled. */
- return br_mst_vlan_set_state(pv->port, pv, BR_STATE_DISABLED);
+ return br_mst_vlan_set_state(vg, pv, BR_STATE_DISABLED);
}
int br_mst_vlan_set_msti(struct net_bridge_vlan *mv, u16 msti)
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 9a1cb5079a7a..b2ae0d2434d2 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -2045,16 +2045,14 @@ void br_multicast_del_port(struct net_bridge_port *port)
{
struct net_bridge *br = port->br;
struct net_bridge_port_group *pg;
- HLIST_HEAD(deleted_head);
struct hlist_node *n;
/* Take care of the remaining groups, only perm ones should be left */
spin_lock_bh(&br->multicast_lock);
hlist_for_each_entry_safe(pg, n, &port->mglist, mglist)
br_multicast_find_del_pg(br, pg);
- hlist_move_list(&br->mcast_gc_list, &deleted_head);
spin_unlock_bh(&br->multicast_lock);
- br_multicast_gc(&deleted_head);
+ flush_work(&br->mcast_gc_work);
br_multicast_port_ctx_deinit(&port->multicast_ctx);
free_percpu(port->mcast_stats);
}
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 22e35623c148..8f9c19d992ac 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -137,6 +137,7 @@ static inline bool is_pppoe_ipv6(const struct sk_buff *skb,
#define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN)
struct brnf_frag_data {
+ local_lock_t bh_lock;
char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH];
u8 encap_size;
u8 size;
@@ -144,7 +145,9 @@ struct brnf_frag_data {
__be16 vlan_proto;
};
-static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage);
+static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage) = {
+ .bh_lock = INIT_LOCAL_LOCK(bh_lock),
+};
static void nf_bridge_info_free(struct sk_buff *skb)
{
@@ -399,7 +402,8 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_
goto free_skb;
rt = ip_route_output(net, iph->daddr, 0,
- RT_TOS(iph->tos), 0);
+ RT_TOS(iph->tos), 0,
+ RT_SCOPE_UNIVERSE);
if (!IS_ERR(rt)) {
/* - Bridged-and-DNAT'ed traffic doesn't
* require ip_forwarding. */
@@ -618,8 +622,12 @@ static unsigned int br_nf_local_in(void *priv,
if (likely(nf_ct_is_confirmed(ct)))
return NF_ACCEPT;
+ if (WARN_ON_ONCE(refcount_read(&nfct->use) != 1)) {
+ nf_reset_ct(skb);
+ return NF_ACCEPT;
+ }
+
WARN_ON_ONCE(skb_shared(skb));
- WARN_ON_ONCE(refcount_read(&nfct->use) != 1);
/* We can't call nf_confirm here, it would create a dependency
* on nf_conntrack module.
@@ -849,6 +857,7 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
{
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
unsigned int mtu, mtu_reserved;
+ int ret;
mtu_reserved = nf_bridge_mtu_reduction(skb);
mtu = skb->dev->mtu;
@@ -881,6 +890,7 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
+ local_lock_nested_bh(&brnf_frag_data_storage.bh_lock);
data = this_cpu_ptr(&brnf_frag_data_storage);
if (skb_vlan_tag_present(skb)) {
@@ -896,7 +906,9 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
data->size);
- return br_nf_ip_fragment(net, sk, skb, br_nf_push_frag_xmit);
+ ret = br_nf_ip_fragment(net, sk, skb, br_nf_push_frag_xmit);
+ local_unlock_nested_bh(&brnf_frag_data_storage.bh_lock);
+ return ret;
}
if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) &&
skb->protocol == htons(ETH_P_IPV6)) {
@@ -908,6 +920,7 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
+ local_lock_nested_bh(&brnf_frag_data_storage.bh_lock);
data = this_cpu_ptr(&brnf_frag_data_storage);
data->encap_size = nf_bridge_encap_header_len(skb);
data->size = ETH_HLEN + data->encap_size;
@@ -915,8 +928,12 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
data->size);
- if (v6ops)
- return v6ops->fragment(net, sk, skb, br_nf_push_frag_xmit);
+ if (v6ops) {
+ ret = v6ops->fragment(net, sk, skb, br_nf_push_frag_xmit);
+ local_unlock_nested_bh(&brnf_frag_data_storage.bh_lock);
+ return ret;
+ }
+ local_unlock_nested_bh(&brnf_frag_data_storage.bh_lock);
kfree_skb(skb);
return -EMSGSIZE;
@@ -1176,7 +1193,7 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
#ifdef CONFIG_SYSCTL
static
-int brnf_sysctl_call_tables(struct ctl_table *ctl, int write,
+int brnf_sysctl_call_tables(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
@@ -1225,7 +1242,6 @@ static struct ctl_table brnf_table[] = {
.mode = 0644,
.proc_handler = brnf_sysctl_call_tables,
},
- { }
};
static inline void br_netfilter_sysctl_default(struct brnf_net *brnf)
@@ -1274,7 +1290,7 @@ static int br_netfilter_sysctl_init_net(struct net *net)
static void br_netfilter_sysctl_exit_net(struct net *net,
struct brnf_net *brnet)
{
- struct ctl_table *table = brnet->ctl_hdr->ctl_table_arg;
+ const struct ctl_table *table = brnet->ctl_hdr->ctl_table_arg;
unregister_net_sysctl_table(brnet->ctl_hdr);
if (!net_eq(net, &init_net))
diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c
index 17abf092f7ca..71a12da30004 100644
--- a/net/bridge/br_netlink_tunnel.c
+++ b/net/bridge/br_netlink_tunnel.c
@@ -315,8 +315,8 @@ int br_process_vlan_tunnel_info(const struct net_bridge *br,
if (curr_change)
*changed = curr_change;
- __vlan_tunnel_handle_range(p, &v_start, &v_end, v,
- curr_change);
+ __vlan_tunnel_handle_range(p, &v_start, &v_end, v,
+ curr_change);
}
if (v_start && v_end)
br_vlan_notify(br, p, v_start->vid, v_end->vid,
diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c
index 81833ca7a2c7..a966a6ec8263 100644
--- a/net/bridge/br_vlan_tunnel.c
+++ b/net/bridge/br_vlan_tunnel.c
@@ -65,13 +65,14 @@ static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg,
{
struct metadata_dst *metadata = rtnl_dereference(vlan->tinfo.tunnel_dst);
__be64 key = key32_to_tunnel_id(cpu_to_be32(tun_id));
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
int err;
if (metadata)
return -EEXIST;
- metadata = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY,
- key, 0);
+ __set_bit(IP_TUNNEL_KEY_BIT, flags);
+ metadata = __ip_tun_set_dst(0, 0, 0, 0, 0, flags, key, 0);
if (!metadata)
return -EINVAL;
@@ -185,6 +186,7 @@ void br_handle_ingress_vlan_tunnel(struct sk_buff *skb,
int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
struct net_bridge_vlan *vlan)
{
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
struct metadata_dst *tunnel_dst;
__be64 tunnel_id;
int err;
@@ -202,7 +204,8 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
return err;
if (BR_INPUT_SKB_CB(skb)->backup_nhid) {
- tunnel_dst = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY,
+ __set_bit(IP_TUNNEL_KEY_BIT, flags);
+ tunnel_dst = __ip_tun_set_dst(0, 0, 0, 0, 0, flags,
tunnel_id, 0);
if (!tunnel_dst)
return -ENOMEM;
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index c3c51b9a6826..816bb0fde718 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -32,7 +32,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
struct sk_buff *))
{
int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
- bool mono_delivery_time = skb->mono_delivery_time;
+ u8 tstamp_type = skb->tstamp_type;
unsigned int hlen, ll_rs, mtu;
ktime_t tstamp = skb->tstamp;
struct ip_frag_state state;
@@ -82,7 +82,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
if (iter.frag)
ip_fraglist_prepare(skb, &iter);
- skb_set_delivery_time(skb, tstamp, mono_delivery_time);
+ skb_set_delivery_time(skb, tstamp, tstamp_type);
err = output(net, sk, data, skb);
if (err || !iter.frag)
break;
@@ -113,7 +113,7 @@ slow_path:
goto blackhole;
}
- skb_set_delivery_time(skb2, tstamp, mono_delivery_time);
+ skb_set_delivery_time(skb2, tstamp, tstamp_type);
err = output(net, sk, data, skb2);
if (err)
goto blackhole;
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 8480684f2762..20139fa1be1f 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -201,14 +201,14 @@ int cfctrl_linkup_request(struct cflayer *layer,
struct cflayer *user_layer)
{
struct cfctrl *cfctrl = container_obj(layer);
+ struct cflayer *dn = cfctrl->serv.layer.dn;
+ char utility_name[UTILITY_NAME_LENGTH];
+ struct cfctrl_request_info *req;
+ struct cfpkt *pkt;
u32 tmp32;
u16 tmp16;
u8 tmp8;
- struct cfctrl_request_info *req;
int ret;
- char utility_name[16];
- struct cfpkt *pkt;
- struct cflayer *dn = cfctrl->serv.layer.dn;
if (!dn) {
pr_debug("not able to send linkup request\n");
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index 7796414d47e5..2ae8cfa3df88 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -21,13 +21,6 @@ do { \
pr_warn(errmsg); \
} while (0)
-struct cfpktq {
- struct sk_buff_head head;
- atomic_t count;
- /* Lock protects count updates */
- spinlock_t lock;
-};
-
/*
* net/caif/ is generic and does not
* understand SKB, so we do this typecast
diff --git a/net/can/Kconfig b/net/can/Kconfig
index cb56be8e3862..af64a6f76458 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig
@@ -56,18 +56,17 @@ config CAN_GW
source "net/can/j1939/Kconfig"
config CAN_ISOTP
- tristate "ISO 15765-2:2016 CAN transport protocol"
+ tristate "ISO 15765-2 CAN transport protocol"
help
CAN Transport Protocols offer support for segmented Point-to-Point
communication between CAN nodes via two defined CAN Identifiers.
+ This protocol driver implements segmented data transfers for CAN CC
+ (aka Classical CAN, CAN 2.0B) and CAN FD frame types which were
+ introduced with ISO 15765-2:2016.
As CAN frames can only transport a small amount of data bytes
- (max. 8 bytes for 'classic' CAN and max. 64 bytes for CAN FD) this
+ (max. 8 bytes for CAN CC and max. 64 bytes for CAN FD) this
segmentation is needed to transport longer Protocol Data Units (PDU)
as needed e.g. for vehicle diagnosis (UDS, ISO 14229) or IP-over-CAN
traffic.
- This protocol driver implements data transfers according to
- ISO 15765-2:2016 for 'classic' CAN and CAN FD frame types.
- If you want to perform automotive vehicle diagnostic services (UDS),
- say 'y'.
endif
diff --git a/net/can/isotp.c b/net/can/isotp.c
index 25bac0fafc83..16046931542a 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -72,7 +72,7 @@
#include <net/sock.h>
#include <net/net_namespace.h>
-MODULE_DESCRIPTION("PF_CAN isotp 15765-2:2016 protocol");
+MODULE_DESCRIPTION("PF_CAN ISO 15765-2 transport protocol");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Oliver Hartkopp <socketcan@hartkopp.net>");
MODULE_ALIAS("can-proto-6");
@@ -83,10 +83,11 @@ MODULE_ALIAS("can-proto-6");
(CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG) : \
(CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG))
-/* ISO 15765-2:2016 supports more than 4095 byte per ISO PDU as the FF_DL can
- * take full 32 bit values (4 Gbyte). We would need some good concept to handle
- * this between user space and kernel space. For now set the static buffer to
- * something about 8 kbyte to be able to test this new functionality.
+/* Since ISO 15765-2:2016 the CAN isotp protocol supports more than 4095
+ * byte per ISO PDU as the FF_DL can take full 32 bit values (4 Gbyte).
+ * We would need some good concept to handle this between user space and
+ * kernel space. For now set the static buffer to something about 8 kbyte
+ * to be able to test this new functionality.
*/
#define DEFAULT_MAX_PDU_SIZE 8300
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
index a6fb89fa6278..7e8a20f2fc42 100644
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -30,10 +30,6 @@ MODULE_ALIAS("can-proto-" __stringify(CAN_J1939));
/* CAN_HDR: #bytes before can_frame data part */
#define J1939_CAN_HDR (offsetof(struct can_frame, data))
-/* CAN_FTR: #bytes beyond data part */
-#define J1939_CAN_FTR (sizeof(struct can_frame) - J1939_CAN_HDR - \
- sizeof(((struct can_frame *)0)->data))
-
/* lowest layer */
static void j1939_can_recv(struct sk_buff *iskb, void *data)
{
@@ -342,7 +338,7 @@ int j1939_send_one(struct j1939_priv *priv, struct sk_buff *skb)
memset(cf, 0, J1939_CAN_HDR);
/* make it a full can frame again */
- skb_put(skb, J1939_CAN_FTR + (8 - dlc));
+ skb_put_zero(skb, 8 - dlc);
canid = CAN_EFF_FLAG |
(skcb->priority << 26) |
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index fe3df23a2595..4be73de5033c 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -1593,8 +1593,8 @@ j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv,
struct j1939_sk_buff_cb skcb = *j1939_skb_to_cb(skb);
struct j1939_session *session;
const u8 *dat;
+ int len, ret;
pgn_t pgn;
- int len;
netdev_dbg(priv->ndev, "%s\n", __func__);
@@ -1653,7 +1653,22 @@ j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv,
session->tskey = priv->rx_tskey++;
j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_RTS);
- WARN_ON_ONCE(j1939_session_activate(session));
+ ret = j1939_session_activate(session);
+ if (ret) {
+ /* Entering this scope indicates an issue with the J1939 bus.
+ * Possible scenarios include:
+ * - A time lapse occurred, and a new session was initiated
+ * due to another packet being sent correctly. This could
+ * have been caused by too long interrupt, debugger, or being
+ * out-scheduled by another task.
+ * - The bus is receiving numerous erroneous packets, either
+ * from a malfunctioning device or during a test scenario.
+ */
+ netdev_alert(priv->ndev, "%s: 0x%p: concurrent session with same addr (%02x %02x) is already active.\n",
+ __func__, session, skcb.addr.sa, skcb.addr.da);
+ j1939_session_put(session);
+ return NULL;
+ }
return session;
}
@@ -1681,6 +1696,8 @@ static int j1939_xtp_rx_rts_session_active(struct j1939_session *session,
j1939_session_timers_cancel(session);
j1939_session_cancel(session, J1939_XTP_ABORT_BUSY);
+ if (session->transmission)
+ j1939_session_deactivate_activate_next(session);
return -EBUSY;
}
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 1daf95e17d67..3a5bd1cd1e99 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -429,7 +429,10 @@ static int is_out(const struct crush_map *map,
/**
* crush_choose_firstn - choose numrep distinct items of given type
* @map: the crush_map
+ * @work: working space initialized by crush_init_workspace()
* @bucket: the bucket we are choose an item from
+ * @weight: weight vector (for map leaves)
+ * @weight_max: size of weight vector
* @x: crush input value
* @numrep: the number of items to choose
* @type: the type of item to choose
@@ -445,6 +448,7 @@ static int is_out(const struct crush_map *map,
* @vary_r: pass r to recursive calls
* @out2: second output vector for leaf items (if @recurse_to_leaf)
* @parent_r: r value passed from the parent
+ * @choose_args: weights and ids for each known bucket
*/
static int crush_choose_firstn(const struct crush_map *map,
struct crush_work *work,
@@ -636,9 +640,8 @@ reject:
}
-/**
+/*
* crush_choose_indep: alternative breadth-first positionally stable mapping
- *
*/
static void crush_choose_indep(const struct crush_map *map,
struct crush_work *work,
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index f263f7e91a21..ab66b599ac47 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1085,13 +1085,19 @@ static void delayed_work(struct work_struct *work)
struct ceph_mon_client *monc =
container_of(work, struct ceph_mon_client, delayed_work.work);
- dout("monc delayed_work\n");
mutex_lock(&monc->mutex);
+ dout("%s mon%d\n", __func__, monc->cur_mon);
+ if (monc->cur_mon < 0) {
+ goto out;
+ }
+
if (monc->hunting) {
dout("%s continuing hunt\n", __func__);
reopen_session(monc);
} else {
int is_auth = ceph_auth_is_authenticated(monc->auth);
+
+ dout("%s is_authed %d\n", __func__, is_auth);
if (ceph_con_keepalive_expired(&monc->con,
CEPH_MONC_PING_TIMEOUT)) {
dout("monc keepalive timeout\n");
@@ -1116,6 +1122,8 @@ static void delayed_work(struct work_struct *work)
}
}
__schedule_delayed(monc);
+
+out:
mutex_unlock(&monc->mutex);
}
@@ -1232,13 +1240,15 @@ EXPORT_SYMBOL(ceph_monc_init);
void ceph_monc_stop(struct ceph_mon_client *monc)
{
dout("stop\n");
- cancel_delayed_work_sync(&monc->delayed_work);
mutex_lock(&monc->mutex);
__close_session(monc);
+ monc->hunting = false;
monc->cur_mon = -1;
mutex_unlock(&monc->mutex);
+ cancel_delayed_work_sync(&monc->delayed_work);
+
/*
* flush msgr queue before we destroy ourselves to ensure that:
* - any work that references our embedded con is finished.
diff --git a/net/core/Makefile b/net/core/Makefile
index 6e6548011fae..62be9aef2528 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_NETPOLL) += netpoll.o
obj-$(CONFIG_FIB_RULES) += fib_rules.o
obj-$(CONFIG_TRACEPOINTS) += net-traces.o
obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
+obj-$(CONFIG_NET_IEEE8021Q_HELPERS) += ieee8021q_helpers.o
obj-$(CONFIG_NET_SELFTESTS) += selftests.o
obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o
@@ -41,4 +42,4 @@ obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
obj-$(CONFIG_OF) += of_net.o
-obj-$(CONFIG_NET_TEST) += gso_test.o
+obj-$(CONFIG_NET_TEST) += net_test.o
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 6c4d90b24d46..bc01b3aa6b0f 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -496,27 +496,22 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
if (!bpf_capable())
return ERR_PTR(-EPERM);
- nla_for_each_nested(nla, nla_stgs, rem) {
- if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD) {
- if (nla_len(nla) != sizeof(u32))
- return ERR_PTR(-EINVAL);
- nr_maps++;
- }
+ nla_for_each_nested_type(nla, SK_DIAG_BPF_STORAGE_REQ_MAP_FD,
+ nla_stgs, rem) {
+ if (nla_len(nla) != sizeof(u32))
+ return ERR_PTR(-EINVAL);
+ nr_maps++;
}
diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL);
if (!diag)
return ERR_PTR(-ENOMEM);
- nla_for_each_nested(nla, nla_stgs, rem) {
- struct bpf_map *map;
- int map_fd;
-
- if (nla_type(nla) != SK_DIAG_BPF_STORAGE_REQ_MAP_FD)
- continue;
+ nla_for_each_nested_type(nla, SK_DIAG_BPF_STORAGE_REQ_MAP_FD,
+ nla_stgs, rem) {
+ int map_fd = nla_get_u32(nla);
+ struct bpf_map *map = bpf_map_get(map_fd);
- map_fd = nla_get_u32(nla);
- map = bpf_map_get(map_fd);
if (IS_ERR(map)) {
err = PTR_ERR(map);
goto err_free;
diff --git a/net/core/datagram.c b/net/core/datagram.c
index a8b625abe242..a40f733b37d7 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -324,25 +324,6 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(skb_free_datagram);
-void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
-{
- bool slow;
-
- if (!skb_unref(skb)) {
- sk_peek_offset_bwd(sk, len);
- return;
- }
-
- slow = lock_sock_fast(sk);
- sk_peek_offset_bwd(sk, len);
- skb_orphan(skb);
- unlock_sock_fast(sk, slow);
-
- /* skb is now orphaned, can be freed outside of locked section */
- __kfree_skb(skb);
-}
-EXPORT_SYMBOL(__skb_free_datagram_locked);
-
int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
struct sk_buff *skb, unsigned int flags,
void (*destructor)(struct sock *sk,
@@ -435,15 +416,23 @@ static int __skb_datagram_iter(const struct sk_buff *skb, int offset,
end = start + skb_frag_size(frag);
if ((copy = end - offset) > 0) {
- struct page *page = skb_frag_page(frag);
- u8 *vaddr = kmap(page);
+ u32 p_off, p_len, copied;
+ struct page *p;
+ u8 *vaddr;
if (copy > len)
copy = len;
- n = INDIRECT_CALL_1(cb, simple_copy_to_iter,
- vaddr + skb_frag_off(frag) + offset - start,
- copy, data, to);
- kunmap(page);
+
+ n = 0;
+ skb_frag_foreach_page(frag,
+ skb_frag_off(frag) + offset - start,
+ copy, p, p_off, p_len, copied) {
+ vaddr = kmap_local_page(p);
+ n += INDIRECT_CALL_1(cb, simple_copy_to_iter,
+ vaddr + p_off, p_len, data, to);
+ kunmap_local(vaddr);
+ }
+
offset += n;
if (n != copy)
goto short_copy;
@@ -629,16 +618,10 @@ fault:
}
EXPORT_SYMBOL(skb_copy_datagram_from_iter);
-int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
- struct sk_buff *skb, struct iov_iter *from,
- size_t length)
+int zerocopy_fill_skb_from_iter(struct sk_buff *skb,
+ struct iov_iter *from, size_t length)
{
- int frag;
-
- if (msg && msg->msg_ubuf && msg->sg_from_iter)
- return msg->sg_from_iter(sk, skb, from, length);
-
- frag = skb_shinfo(skb)->nr_frags;
+ int frag = skb_shinfo(skb)->nr_frags;
while (length && iov_iter_count(from)) {
struct page *head, *last_head = NULL;
@@ -646,7 +629,6 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
int refs, order, n = 0;
size_t start;
ssize_t copied;
- unsigned long truesize;
if (frag == MAX_SKB_FRAGS)
return -EMSGSIZE;
@@ -658,17 +640,9 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
length -= copied;
- truesize = PAGE_ALIGN(copied + start);
skb->data_len += copied;
skb->len += copied;
- skb->truesize += truesize;
- if (sk && sk->sk_type == SOCK_STREAM) {
- sk_wmem_queued_add(sk, truesize);
- if (!skb_zcopy_pure(skb))
- sk_mem_charge(sk, truesize);
- } else {
- refcount_add(truesize, &skb->sk->sk_wmem_alloc);
- }
+ skb->truesize += PAGE_ALIGN(copied + start);
head = compound_head(pages[n]);
order = compound_order(head);
@@ -711,6 +685,30 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
}
return 0;
}
+
+int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
+ struct sk_buff *skb, struct iov_iter *from,
+ size_t length)
+{
+ unsigned long orig_size = skb->truesize;
+ unsigned long truesize;
+ int ret;
+
+ if (msg && msg->msg_ubuf && msg->sg_from_iter)
+ ret = msg->sg_from_iter(skb, from, length);
+ else
+ ret = zerocopy_fill_skb_from_iter(skb, from, length);
+
+ truesize = skb->truesize - orig_size;
+ if (sk && sk->sk_type == SOCK_STREAM) {
+ sk_wmem_queued_add(sk, truesize);
+ if (!skb_zcopy_pure(skb))
+ sk_mem_charge(sk, truesize);
+ } else {
+ refcount_add(truesize, &skb->sk->sk_wmem_alloc);
+ }
+ return ret;
+}
EXPORT_SYMBOL(__zerocopy_sg_from_iter);
/**
diff --git a/net/core/dev.c b/net/core/dev.c
index 331848eca7d3..f66e61407883 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -77,7 +77,9 @@
#include <linux/hash.h>
#include <linux/slab.h>
#include <linux/sched.h>
+#include <linux/sched/isolation.h>
#include <linux/sched/mm.h>
+#include <linux/smpboot.h>
#include <linux/mutex.h>
#include <linux/rwsem.h>
#include <linux/string.h>
@@ -197,37 +199,62 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
}
-static inline void rps_lock_irqsave(struct softnet_data *sd,
- unsigned long *flags)
+#ifndef CONFIG_PREEMPT_RT
+
+static DEFINE_STATIC_KEY_FALSE(use_backlog_threads_key);
+
+static int __init setup_backlog_napi_threads(char *arg)
+{
+ static_branch_enable(&use_backlog_threads_key);
+ return 0;
+}
+early_param("thread_backlog_napi", setup_backlog_napi_threads);
+
+static bool use_backlog_threads(void)
+{
+ return static_branch_unlikely(&use_backlog_threads_key);
+}
+
+#else
+
+static bool use_backlog_threads(void)
+{
+ return true;
+}
+
+#endif
+
+static inline void backlog_lock_irq_save(struct softnet_data *sd,
+ unsigned long *flags)
{
- if (IS_ENABLED(CONFIG_RPS))
+ if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads())
spin_lock_irqsave(&sd->input_pkt_queue.lock, *flags);
- else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ else
local_irq_save(*flags);
}
-static inline void rps_lock_irq_disable(struct softnet_data *sd)
+static inline void backlog_lock_irq_disable(struct softnet_data *sd)
{
- if (IS_ENABLED(CONFIG_RPS))
+ if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads())
spin_lock_irq(&sd->input_pkt_queue.lock);
- else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ else
local_irq_disable();
}
-static inline void rps_unlock_irq_restore(struct softnet_data *sd,
- unsigned long *flags)
+static inline void backlog_unlock_irq_restore(struct softnet_data *sd,
+ unsigned long *flags)
{
- if (IS_ENABLED(CONFIG_RPS))
+ if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads())
spin_unlock_irqrestore(&sd->input_pkt_queue.lock, *flags);
- else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ else
local_irq_restore(*flags);
}
-static inline void rps_unlock_irq_enable(struct softnet_data *sd)
+static inline void backlog_unlock_irq_enable(struct softnet_data *sd)
{
- if (IS_ENABLED(CONFIG_RPS))
+ if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads())
spin_unlock_irq(&sd->input_pkt_queue.lock);
- else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ else
local_irq_enable();
}
@@ -422,7 +449,9 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
* queue in the local softnet handler.
*/
-DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
+DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data) = {
+ .process_queue_bh_lock = INIT_LOCAL_LOCK(process_queue_bh_lock),
+};
EXPORT_PER_CPU_SYMBOL(softnet_data);
/* Page_pool has a lockless array/stack to alloc/recycle pages.
@@ -912,6 +941,18 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id)
}
EXPORT_SYMBOL(dev_get_by_napi_id);
+static DEFINE_SEQLOCK(netdev_rename_lock);
+
+void netdev_copy_name(struct net_device *dev, char *name)
+{
+ unsigned int seq;
+
+ do {
+ seq = read_seqbegin(&netdev_rename_lock);
+ strscpy(name, dev->name, IFNAMSIZ);
+ } while (read_seqretry(&netdev_rename_lock, seq));
+}
+
/**
* netdev_get_name - get a netdevice name, knowing its ifindex.
* @net: network namespace
@@ -923,7 +964,6 @@ int netdev_get_name(struct net *net, char *name, int ifindex)
struct net_device *dev;
int ret;
- down_read(&devnet_rename_sem);
rcu_read_lock();
dev = dev_get_by_index_rcu(net, ifindex);
@@ -932,12 +972,11 @@ int netdev_get_name(struct net *net, char *name, int ifindex)
goto out;
}
- strcpy(name, dev->name);
+ netdev_copy_name(dev, name);
ret = 0;
out:
rcu_read_unlock();
- up_read(&devnet_rename_sem);
return ret;
}
@@ -1189,7 +1228,10 @@ int dev_change_name(struct net_device *dev, const char *newname)
memcpy(oldname, dev->name, IFNAMSIZ);
+ write_seqlock_bh(&netdev_rename_lock);
err = dev_get_valid_name(net, dev, newname);
+ write_sequnlock_bh(&netdev_rename_lock);
+
if (err < 0) {
up_write(&devnet_rename_sem);
return err;
@@ -1229,7 +1271,9 @@ rollback:
if (err >= 0) {
err = ret;
down_write(&devnet_rename_sem);
+ write_seqlock_bh(&netdev_rename_lock);
memcpy(dev->name, oldname, IFNAMSIZ);
+ write_sequnlock_bh(&netdev_rename_lock);
memcpy(oldname, newname, IFNAMSIZ);
WRITE_ONCE(dev->name_assign_type, old_assign_type);
old_assign_type = NET_NAME_RENAMED;
@@ -2057,6 +2101,11 @@ void net_dec_egress_queue(void)
EXPORT_SYMBOL_GPL(net_dec_egress_queue);
#endif
+#ifdef CONFIG_NET_CLS_ACT
+DEFINE_STATIC_KEY_FALSE(tcf_bypass_check_needed_key);
+EXPORT_SYMBOL(tcf_bypass_check_needed_key);
+#endif
+
DEFINE_STATIC_KEY_FALSE(netstamp_needed_key);
EXPORT_SYMBOL(netstamp_needed_key);
#ifdef CONFIG_JUMP_LABEL
@@ -2113,7 +2162,7 @@ EXPORT_SYMBOL(net_disable_timestamp);
static inline void net_timestamp_set(struct sk_buff *skb)
{
skb->tstamp = 0;
- skb->mono_delivery_time = 0;
+ skb->tstamp_type = SKB_CLOCK_REALTIME;
if (static_branch_unlikely(&netstamp_needed_key))
skb->tstamp = ktime_get_real();
}
@@ -3893,6 +3942,7 @@ netdev_tx_queue_mapping(struct net_device *dev, struct sk_buff *skb)
return netdev_get_tx_queue(dev, netdev_cap_txqueue(dev, qm));
}
+#ifndef CONFIG_PREEMPT_RT
static bool netdev_xmit_txqueue_skipped(void)
{
return __this_cpu_read(softnet_data.xmit.skip_txqueue);
@@ -3903,6 +3953,19 @@ void netdev_xmit_skip_txqueue(bool skip)
__this_cpu_write(softnet_data.xmit.skip_txqueue, skip);
}
EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
+
+#else
+static bool netdev_xmit_txqueue_skipped(void)
+{
+ return current->net_xmit.skip_txqueue;
+}
+
+void netdev_xmit_skip_txqueue(bool skip)
+{
+ current->net_xmit.skip_txqueue = skip;
+}
+EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
+#endif
#endif /* CONFIG_NET_EGRESS */
#ifdef CONFIG_NET_XGRESS
@@ -3917,6 +3980,11 @@ static int tc_run(struct tcx_entry *entry, struct sk_buff *skb,
if (!miniq)
return ret;
+ if (static_branch_unlikely(&tcf_bypass_check_needed_key)) {
+ if (tcf_block_bypass_sw(miniq->block))
+ return ret;
+ }
+
tc_skb_cb(skb)->mru = 0;
tc_skb_cb(skb)->post_ct = false;
tcf_set_drop_reason(skb, *drop_reason);
@@ -3977,10 +4045,13 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
{
struct bpf_mprog_entry *entry = rcu_dereference_bh(skb->dev->tcx_ingress);
enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_INGRESS;
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
int sch_ret;
if (!entry)
return skb;
+
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
if (*pt_prev) {
*ret = deliver_skb(skb, *pt_prev, orig_dev);
*pt_prev = NULL;
@@ -4009,10 +4080,12 @@ ingress_verdict:
break;
}
*ret = NET_RX_SUCCESS;
+ bpf_net_ctx_clear(bpf_net_ctx);
return NULL;
case TC_ACT_SHOT:
kfree_skb_reason(skb, drop_reason);
*ret = NET_RX_DROP;
+ bpf_net_ctx_clear(bpf_net_ctx);
return NULL;
/* used by tc_run */
case TC_ACT_STOLEN:
@@ -4022,8 +4095,10 @@ ingress_verdict:
fallthrough;
case TC_ACT_CONSUMED:
*ret = NET_RX_SUCCESS;
+ bpf_net_ctx_clear(bpf_net_ctx);
return NULL;
}
+ bpf_net_ctx_clear(bpf_net_ctx);
return skb;
}
@@ -4033,11 +4108,14 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
{
struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress);
enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_EGRESS;
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
int sch_ret;
if (!entry)
return skb;
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
+
/* qdisc_skb_cb(skb)->pkt_len & tcx_set_ingress() was
* already set by the caller.
*/
@@ -4053,10 +4131,12 @@ egress_verdict:
/* No need to push/pop skb's mac_header here on egress! */
skb_do_redirect(skb);
*ret = NET_XMIT_SUCCESS;
+ bpf_net_ctx_clear(bpf_net_ctx);
return NULL;
case TC_ACT_SHOT:
kfree_skb_reason(skb, drop_reason);
*ret = NET_XMIT_DROP;
+ bpf_net_ctx_clear(bpf_net_ctx);
return NULL;
/* used by tc_run */
case TC_ACT_STOLEN:
@@ -4066,8 +4146,10 @@ egress_verdict:
fallthrough;
case TC_ACT_CONSUMED:
*ret = NET_XMIT_SUCCESS;
+ bpf_net_ctx_clear(bpf_net_ctx);
return NULL;
}
+ bpf_net_ctx_clear(bpf_net_ctx);
return skb;
}
@@ -4410,8 +4492,8 @@ EXPORT_SYMBOL(__dev_direct_xmit);
/*************************************************************************
* Receiver routines
*************************************************************************/
+static DEFINE_PER_CPU(struct task_struct *, backlog_napi);
-unsigned int sysctl_skb_defer_max __read_mostly = 64;
int weight_p __read_mostly = 64; /* old backlog weight */
int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */
int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */
@@ -4433,18 +4515,16 @@ static inline void ____napi_schedule(struct softnet_data *sd,
*/
thread = READ_ONCE(napi->thread);
if (thread) {
- /* Avoid doing set_bit() if the thread is in
- * INTERRUPTIBLE state, cause napi_thread_wait()
- * makes sure to proceed with napi polling
- * if the thread is explicitly woken from here.
- */
- if (READ_ONCE(thread->__state) != TASK_INTERRUPTIBLE)
- set_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
+ if (use_backlog_threads() && thread == raw_cpu_read(backlog_napi))
+ goto use_local_napi;
+
+ set_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
wake_up_process(thread);
return;
}
}
+use_local_napi:
list_add_tail(&napi->poll_list, &sd->poll_list);
WRITE_ONCE(napi->list_owner, smp_processor_id());
/* If not called from net_rx_action()
@@ -4466,12 +4546,13 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
struct rps_dev_flow *rflow, u16 next_cpu)
{
if (next_cpu < nr_cpu_ids) {
+ u32 head;
#ifdef CONFIG_RFS_ACCEL
struct netdev_rx_queue *rxqueue;
struct rps_dev_flow_table *flow_table;
struct rps_dev_flow *old_rflow;
- u32 flow_id;
u16 rxq_index;
+ u32 flow_id;
int rc;
/* Should we steer this flow to a different hardware queue? */
@@ -4493,16 +4574,16 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
goto out;
old_rflow = rflow;
rflow = &flow_table->flows[flow_id];
- rflow->filter = rc;
- if (old_rflow->filter == rflow->filter)
- old_rflow->filter = RPS_NO_FILTER;
+ WRITE_ONCE(rflow->filter, rc);
+ if (old_rflow->filter == rc)
+ WRITE_ONCE(old_rflow->filter, RPS_NO_FILTER);
out:
#endif
- rflow->last_qtail =
- per_cpu(softnet_data, next_cpu).input_queue_head;
+ head = READ_ONCE(per_cpu(softnet_data, next_cpu).input_queue_head);
+ rps_input_queue_tail_save(&rflow->last_qtail, head);
}
- rflow->cpu = next_cpu;
+ WRITE_ONCE(rflow->cpu, next_cpu);
return rflow;
}
@@ -4581,7 +4662,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
*/
if (unlikely(tcpu != next_cpu) &&
(tcpu >= nr_cpu_ids || !cpu_online(tcpu) ||
- ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
+ ((int)(READ_ONCE(per_cpu(softnet_data, tcpu).input_queue_head) -
rflow->last_qtail)) >= 0)) {
tcpu = next_cpu;
rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
@@ -4635,9 +4716,9 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
if (flow_table && flow_id <= flow_table->mask) {
rflow = &flow_table->flows[flow_id];
cpu = READ_ONCE(rflow->cpu);
- if (rflow->filter == filter_id && cpu < nr_cpu_ids &&
- ((int)(per_cpu(softnet_data, cpu).input_queue_head -
- rflow->last_qtail) <
+ if (READ_ONCE(rflow->filter) == filter_id && cpu < nr_cpu_ids &&
+ ((int)(READ_ONCE(per_cpu(softnet_data, cpu).input_queue_head) -
+ READ_ONCE(rflow->last_qtail)) <
(int)(10 * flow_table->mask)))
expire = false;
}
@@ -4684,6 +4765,11 @@ static void napi_schedule_rps(struct softnet_data *sd)
#ifdef CONFIG_RPS
if (sd != mysd) {
+ if (use_backlog_threads()) {
+ __napi_schedule_irqoff(&sd->backlog);
+ return;
+ }
+
sd->rps_ipi_next = mysd->rps_ipi_list;
mysd->rps_ipi_list = sd;
@@ -4698,6 +4784,23 @@ static void napi_schedule_rps(struct softnet_data *sd)
__napi_schedule_irqoff(&mysd->backlog);
}
+void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu)
+{
+ unsigned long flags;
+
+ if (use_backlog_threads()) {
+ backlog_lock_irq_save(sd, &flags);
+
+ if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state))
+ __napi_schedule_irqoff(&sd->backlog);
+
+ backlog_unlock_irq_restore(sd, &flags);
+
+ } else if (!cmpxchg(&sd->defer_ipi_scheduled, 0, 1)) {
+ smp_call_function_single_async(cpu, &sd->defer_csd);
+ }
+}
+
#ifdef CONFIG_NET_FLOW_LIMIT
int netdev_flow_limit_table_len __read_mostly = (1 << 12);
#endif
@@ -4749,37 +4852,45 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
struct softnet_data *sd;
unsigned long flags;
unsigned int qlen;
+ int max_backlog;
+ u32 tail;
- reason = SKB_DROP_REASON_NOT_SPECIFIED;
+ reason = SKB_DROP_REASON_DEV_READY;
+ if (!netif_running(skb->dev))
+ goto bad_dev;
+
+ reason = SKB_DROP_REASON_CPU_BACKLOG;
sd = &per_cpu(softnet_data, cpu);
- rps_lock_irqsave(sd, &flags);
- if (!netif_running(skb->dev))
- goto drop;
+ qlen = skb_queue_len_lockless(&sd->input_pkt_queue);
+ max_backlog = READ_ONCE(net_hotdata.max_backlog);
+ if (unlikely(qlen > max_backlog))
+ goto cpu_backlog_drop;
+ backlog_lock_irq_save(sd, &flags);
qlen = skb_queue_len(&sd->input_pkt_queue);
- if (qlen <= READ_ONCE(net_hotdata.max_backlog) &&
- !skb_flow_limit(skb, qlen)) {
- if (qlen) {
-enqueue:
- __skb_queue_tail(&sd->input_pkt_queue, skb);
- input_queue_tail_incr_save(sd, qtail);
- rps_unlock_irq_restore(sd, &flags);
- return NET_RX_SUCCESS;
+ if (qlen <= max_backlog && !skb_flow_limit(skb, qlen)) {
+ if (!qlen) {
+ /* Schedule NAPI for backlog device. We can use
+ * non atomic operation as we own the queue lock.
+ */
+ if (!__test_and_set_bit(NAPI_STATE_SCHED,
+ &sd->backlog.state))
+ napi_schedule_rps(sd);
}
+ __skb_queue_tail(&sd->input_pkt_queue, skb);
+ tail = rps_input_queue_tail_incr(sd);
+ backlog_unlock_irq_restore(sd, &flags);
- /* Schedule NAPI for backlog device
- * We can use non atomic operation since we own the queue lock
- */
- if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state))
- napi_schedule_rps(sd);
- goto enqueue;
+ /* save the tail outside of the critical section */
+ rps_input_queue_tail_save(qtail, tail);
+ return NET_RX_SUCCESS;
}
- reason = SKB_DROP_REASON_CPU_BACKLOG;
-drop:
- sd->dropped++;
- rps_unlock_irq_restore(sd, &flags);
+ backlog_unlock_irq_restore(sd, &flags);
+cpu_backlog_drop:
+ atomic_inc(&sd->dropped);
+bad_dev:
dev_core_stats_rx_dropped_inc(skb->dev);
kfree_skb_reason(skb, reason);
return NET_RX_DROP;
@@ -5015,11 +5126,14 @@ static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key);
int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb)
{
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
+
if (xdp_prog) {
struct xdp_buff xdp;
u32 act;
int err;
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
act = netif_receive_generic_xdp(pskb, &xdp, xdp_prog);
if (act != XDP_PASS) {
switch (act) {
@@ -5033,11 +5147,14 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb)
generic_xdp_tx(*pskb, xdp_prog);
break;
}
+ bpf_net_ctx_clear(bpf_net_ctx);
return XDP_DROP;
}
+ bpf_net_ctx_clear(bpf_net_ctx);
}
return XDP_PASS;
out_redir:
+ bpf_net_ctx_clear(bpf_net_ctx);
kfree_skb_reason(*pskb, SKB_DROP_REASON_XDP);
return XDP_DROP;
}
@@ -5153,7 +5270,7 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
trace_consume_skb(skb, net_tx_action);
else
trace_kfree_skb(skb, net_tx_action,
- get_kfree_skb_cb(skb)->reason);
+ get_kfree_skb_cb(skb)->reason, NULL);
if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
__kfree_skb(skb);
@@ -5844,23 +5961,25 @@ static void flush_backlog(struct work_struct *work)
local_bh_disable();
sd = this_cpu_ptr(&softnet_data);
- rps_lock_irq_disable(sd);
+ backlog_lock_irq_disable(sd);
skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
if (skb->dev->reg_state == NETREG_UNREGISTERING) {
__skb_unlink(skb, &sd->input_pkt_queue);
dev_kfree_skb_irq(skb);
- input_queue_head_incr(sd);
+ rps_input_queue_head_incr(sd);
}
}
- rps_unlock_irq_enable(sd);
+ backlog_unlock_irq_enable(sd);
+ local_lock_nested_bh(&softnet_data.process_queue_bh_lock);
skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
if (skb->dev->reg_state == NETREG_UNREGISTERING) {
__skb_unlink(skb, &sd->process_queue);
kfree_skb(skb);
- input_queue_head_incr(sd);
+ rps_input_queue_head_incr(sd);
}
}
+ local_unlock_nested_bh(&softnet_data.process_queue_bh_lock);
local_bh_enable();
}
@@ -5870,14 +5989,14 @@ static bool flush_required(int cpu)
struct softnet_data *sd = &per_cpu(softnet_data, cpu);
bool do_flush;
- rps_lock_irq_disable(sd);
+ backlog_lock_irq_disable(sd);
/* as insertion into process_queue happens with the rps lock held,
* process_queue access may race only with dequeue
*/
do_flush = !skb_queue_empty(&sd->input_pkt_queue) ||
!skb_queue_empty_lockless(&sd->process_queue);
- rps_unlock_irq_enable(sd);
+ backlog_unlock_irq_enable(sd);
return do_flush;
#endif
@@ -5943,7 +6062,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
#ifdef CONFIG_RPS
struct softnet_data *remsd = sd->rps_ipi_list;
- if (remsd) {
+ if (!use_backlog_threads() && remsd) {
sd->rps_ipi_list = NULL;
local_irq_enable();
@@ -5958,7 +6077,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
{
#ifdef CONFIG_RPS
- return sd->rps_ipi_list != NULL;
+ return !use_backlog_threads() && sd->rps_ipi_list;
#else
return false;
#endif
@@ -5982,17 +6101,22 @@ static int process_backlog(struct napi_struct *napi, int quota)
while (again) {
struct sk_buff *skb;
+ local_lock_nested_bh(&softnet_data.process_queue_bh_lock);
while ((skb = __skb_dequeue(&sd->process_queue))) {
+ local_unlock_nested_bh(&softnet_data.process_queue_bh_lock);
rcu_read_lock();
__netif_receive_skb(skb);
rcu_read_unlock();
- input_queue_head_incr(sd);
- if (++work >= quota)
+ if (++work >= quota) {
+ rps_input_queue_head_add(sd, work);
return work;
+ }
+ local_lock_nested_bh(&softnet_data.process_queue_bh_lock);
}
+ local_unlock_nested_bh(&softnet_data.process_queue_bh_lock);
- rps_lock_irq_disable(sd);
+ backlog_lock_irq_disable(sd);
if (skb_queue_empty(&sd->input_pkt_queue)) {
/*
* Inline a custom version of __napi_complete().
@@ -6002,15 +6126,19 @@ static int process_backlog(struct napi_struct *napi, int quota)
* We can use a plain write instead of clear_bit(),
* and we dont need an smp_mb() memory barrier.
*/
- napi->state = 0;
+ napi->state &= NAPIF_STATE_THREADED;
again = false;
} else {
+ local_lock_nested_bh(&softnet_data.process_queue_bh_lock);
skb_queue_splice_tail_init(&sd->input_pkt_queue,
&sd->process_queue);
+ local_unlock_nested_bh(&softnet_data.process_queue_bh_lock);
}
- rps_unlock_irq_enable(sd);
+ backlog_unlock_irq_enable(sd);
}
+ if (work)
+ rps_input_queue_head_add(sd, work);
return work;
}
@@ -6217,6 +6345,7 @@ enum {
static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
unsigned flags, u16 budget)
{
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
bool skip_schedule = false;
unsigned long timeout;
int rc;
@@ -6234,6 +6363,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
clear_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state);
local_bh_disable();
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
if (flags & NAPI_F_PREFER_BUSY_POLL) {
napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs);
@@ -6256,6 +6386,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
netpoll_poll_unlock(have_poll_lock);
if (rc == budget)
__busy_poll_stop(napi, skip_schedule);
+ bpf_net_ctx_clear(bpf_net_ctx);
local_bh_enable();
}
@@ -6265,6 +6396,7 @@ static void __napi_busy_loop(unsigned int napi_id,
{
unsigned long start_time = loop_end ? busy_loop_current_time() : 0;
int (*napi_poll)(struct napi_struct *napi, int budget);
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
void *have_poll_lock = NULL;
struct napi_struct *napi;
@@ -6283,6 +6415,7 @@ restart:
int work = 0;
local_bh_disable();
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
if (!napi_poll) {
unsigned long val = READ_ONCE(napi->state);
@@ -6313,6 +6446,7 @@ count:
__NET_ADD_STATS(dev_net(napi->dev),
LINUX_MIB_BUSYPOLLRXPACKETS, work);
skb_defer_free_flush(this_cpu_ptr(&softnet_data));
+ bpf_net_ctx_clear(bpf_net_ctx);
local_bh_enable();
if (!loop_end || loop_end(loop_end_arg, start_time))
@@ -6447,7 +6581,7 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
}
}
- dev->threaded = threaded;
+ WRITE_ONCE(dev->threaded, threaded);
/* Make sure kthread is created before THREADED bit
* is set.
@@ -6538,7 +6672,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
* threaded mode will not be enabled in napi_enable().
*/
if (dev->threaded && napi_kthread_create(napi))
- dev->threaded = 0;
+ dev->threaded = false;
netif_napi_set_irq(napi, -1);
}
EXPORT_SYMBOL(netif_napi_add_weight);
@@ -6716,8 +6850,6 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
static int napi_thread_wait(struct napi_struct *napi)
{
- bool woken = false;
-
set_current_state(TASK_INTERRUPTIBLE);
while (!kthread_should_stop()) {
@@ -6726,15 +6858,13 @@ static int napi_thread_wait(struct napi_struct *napi)
* Testing SCHED bit is not enough because SCHED bit might be
* set by some other busy poll thread or by napi_disable().
*/
- if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state) || woken) {
+ if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state)) {
WARN_ON(!list_empty(&napi->poll_list));
__set_current_state(TASK_RUNNING);
return 0;
}
schedule();
- /* woken being true indicates this thread owns this napi. */
- woken = true;
set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);
@@ -6742,43 +6872,52 @@ static int napi_thread_wait(struct napi_struct *napi)
return -1;
}
-static int napi_threaded_poll(void *data)
+static void napi_threaded_poll_loop(struct napi_struct *napi)
{
- struct napi_struct *napi = data;
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
struct softnet_data *sd;
- void *have;
+ unsigned long last_qs = jiffies;
- while (!napi_thread_wait(napi)) {
- unsigned long last_qs = jiffies;
+ for (;;) {
+ bool repoll = false;
+ void *have;
- for (;;) {
- bool repoll = false;
+ local_bh_disable();
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
- local_bh_disable();
- sd = this_cpu_ptr(&softnet_data);
- sd->in_napi_threaded_poll = true;
+ sd = this_cpu_ptr(&softnet_data);
+ sd->in_napi_threaded_poll = true;
- have = netpoll_poll_lock(napi);
- __napi_poll(napi, &repoll);
- netpoll_poll_unlock(have);
+ have = netpoll_poll_lock(napi);
+ __napi_poll(napi, &repoll);
+ netpoll_poll_unlock(have);
- sd->in_napi_threaded_poll = false;
- barrier();
+ sd->in_napi_threaded_poll = false;
+ barrier();
- if (sd_has_rps_ipi_waiting(sd)) {
- local_irq_disable();
- net_rps_action_and_irq_enable(sd);
- }
- skb_defer_free_flush(sd);
- local_bh_enable();
+ if (sd_has_rps_ipi_waiting(sd)) {
+ local_irq_disable();
+ net_rps_action_and_irq_enable(sd);
+ }
+ skb_defer_free_flush(sd);
+ bpf_net_ctx_clear(bpf_net_ctx);
+ local_bh_enable();
- if (!repoll)
- break;
+ if (!repoll)
+ break;
- rcu_softirq_qs_periodic(last_qs);
- cond_resched();
- }
+ rcu_softirq_qs_periodic(last_qs);
+ cond_resched();
}
+}
+
+static int napi_threaded_poll(void *data)
+{
+ struct napi_struct *napi = data;
+
+ while (!napi_thread_wait(napi))
+ napi_threaded_poll_loop(napi);
+
return 0;
}
@@ -6787,10 +6926,12 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
unsigned long time_limit = jiffies +
usecs_to_jiffies(READ_ONCE(net_hotdata.netdev_budget_usecs));
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
int budget = READ_ONCE(net_hotdata.netdev_budget);
LIST_HEAD(list);
LIST_HEAD(repoll);
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
start:
sd->in_net_rx_action = true;
local_irq_disable();
@@ -6843,7 +6984,8 @@ start:
sd->in_net_rx_action = false;
net_rps_action_and_irq_enable(sd);
-end:;
+end:
+ bpf_net_ctx_clear(bpf_net_ctx);
}
struct netdev_adjacent {
@@ -8459,27 +8601,29 @@ static void dev_change_rx_flags(struct net_device *dev, int flags)
static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
{
unsigned int old_flags = dev->flags;
+ unsigned int promiscuity, flags;
kuid_t uid;
kgid_t gid;
ASSERT_RTNL();
- dev->flags |= IFF_PROMISC;
- dev->promiscuity += inc;
- if (dev->promiscuity == 0) {
+ promiscuity = dev->promiscuity + inc;
+ if (promiscuity == 0) {
/*
* Avoid overflow.
* If inc causes overflow, untouch promisc and return error.
*/
- if (inc < 0)
- dev->flags &= ~IFF_PROMISC;
- else {
- dev->promiscuity -= inc;
+ if (unlikely(inc > 0)) {
netdev_warn(dev, "promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n");
return -EOVERFLOW;
}
+ flags = old_flags & ~IFF_PROMISC;
+ } else {
+ flags = old_flags | IFF_PROMISC;
}
- if (dev->flags != old_flags) {
+ WRITE_ONCE(dev->promiscuity, promiscuity);
+ if (flags != old_flags) {
+ WRITE_ONCE(dev->flags, flags);
netdev_info(dev, "%s promiscuous mode\n",
dev->flags & IFF_PROMISC ? "entered" : "left");
if (audit_enabled) {
@@ -8530,25 +8674,27 @@ EXPORT_SYMBOL(dev_set_promiscuity);
static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)
{
unsigned int old_flags = dev->flags, old_gflags = dev->gflags;
+ unsigned int allmulti, flags;
ASSERT_RTNL();
- dev->flags |= IFF_ALLMULTI;
- dev->allmulti += inc;
- if (dev->allmulti == 0) {
+ allmulti = dev->allmulti + inc;
+ if (allmulti == 0) {
/*
* Avoid overflow.
* If inc causes overflow, untouch allmulti and return error.
*/
- if (inc < 0)
- dev->flags &= ~IFF_ALLMULTI;
- else {
- dev->allmulti -= inc;
+ if (unlikely(inc > 0)) {
netdev_warn(dev, "allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n");
return -EOVERFLOW;
}
+ flags = old_flags & ~IFF_ALLMULTI;
+ } else {
+ flags = old_flags | IFF_ALLMULTI;
}
- if (dev->flags ^ old_flags) {
+ WRITE_ONCE(dev->allmulti, allmulti);
+ if (flags != old_flags) {
+ WRITE_ONCE(dev->flags, flags);
netdev_info(dev, "%s allmulticast mode\n",
dev->flags & IFF_ALLMULTI ? "entered" : "left");
dev_change_rx_flags(dev, IFF_ALLMULTI);
@@ -8874,7 +9020,7 @@ int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len)
return -ERANGE;
if (new_len != orig_len) {
- dev->tx_queue_len = new_len;
+ WRITE_ONCE(dev->tx_queue_len, new_len);
res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev);
res = notifier_to_errno(res);
if (res)
@@ -8888,7 +9034,7 @@ int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len)
err_rollback:
netdev_err(dev, "refused to change device tx_queue_len\n");
- dev->tx_queue_len = orig_len;
+ WRITE_ONCE(dev->tx_queue_len, orig_len);
return res;
}
@@ -9134,7 +9280,7 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
netif_carrier_off(dev);
else
netif_carrier_on(dev);
- dev->proto_down = proto_down;
+ WRITE_ONCE(dev->proto_down, proto_down);
return 0;
}
@@ -9148,18 +9294,21 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask,
u32 value)
{
+ u32 proto_down_reason;
int b;
if (!mask) {
- dev->proto_down_reason = value;
+ proto_down_reason = value;
} else {
+ proto_down_reason = dev->proto_down_reason;
for_each_set_bit(b, &mask, 32) {
if (value & (1 << b))
- dev->proto_down_reason |= BIT(b);
+ proto_down_reason |= BIT(b);
else
- dev->proto_down_reason &= ~BIT(b);
+ proto_down_reason &= ~BIT(b);
}
}
+ WRITE_ONCE(dev->proto_down_reason, proto_down_reason);
}
struct bpf_xdp_link {
@@ -9763,6 +9912,15 @@ static void netdev_sync_lower_features(struct net_device *upper,
}
}
+static bool netdev_has_ip_or_hw_csum(netdev_features_t features)
+{
+ netdev_features_t ip_csum_mask = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+ bool ip_csum = (features & ip_csum_mask) == ip_csum_mask;
+ bool hw_csum = features & NETIF_F_HW_CSUM;
+
+ return ip_csum || hw_csum;
+}
+
static netdev_features_t netdev_fix_features(struct net_device *dev,
netdev_features_t features)
{
@@ -9844,15 +10002,9 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
features &= ~NETIF_F_LRO;
}
- if (features & NETIF_F_HW_TLS_TX) {
- bool ip_csum = (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) ==
- (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
- bool hw_csum = features & NETIF_F_HW_CSUM;
-
- if (!ip_csum && !hw_csum) {
- netdev_dbg(dev, "Dropping TLS TX HW offload feature since no CSUM feature.\n");
- features &= ~NETIF_F_HW_TLS_TX;
- }
+ if ((features & NETIF_F_HW_TLS_TX) && !netdev_has_ip_or_hw_csum(features)) {
+ netdev_dbg(dev, "Dropping TLS TX HW offload feature since no CSUM feature.\n");
+ features &= ~NETIF_F_HW_TLS_TX;
}
if ((features & NETIF_F_HW_TLS_RX) && !(features & NETIF_F_RXCSUM)) {
@@ -9860,6 +10012,11 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
features &= ~NETIF_F_HW_TLS_RX;
}
+ if ((features & NETIF_F_GSO_UDP_L4) && !netdev_has_ip_or_hw_csum(features)) {
+ netdev_dbg(dev, "Dropping USO feature since no CSUM feature.\n");
+ features &= ~NETIF_F_GSO_UDP_L4;
+ }
+
return features;
}
@@ -10193,6 +10350,10 @@ int register_netdevice(struct net_device *dev)
if (ret)
return ret;
+ /* rss ctx ID 0 is reserved for the default context, start from 1 */
+ xa_init_flags(&dev->ethtool->rss_ctx, XA_FLAGS_ALLOC1);
+ mutex_init(&dev->ethtool->rss_lock);
+
spin_lock_init(&dev->addr_list_lock);
netdev_set_addr_lockdep_class(dev);
@@ -10349,25 +10510,12 @@ err_free_name:
}
EXPORT_SYMBOL(register_netdevice);
-/**
- * init_dummy_netdev - init a dummy network device for NAPI
- * @dev: device to init
- *
- * This takes a network device structure and initialize the minimum
- * amount of fields so it can be used to schedule NAPI polls without
- * registering a full blown interface. This is to be used by drivers
- * that need to tie several hardware interfaces to a single NAPI
- * poll scheduler due to HW limitations.
+/* Initialize the core of a dummy net device.
+ * This is useful if you are calling this function after alloc_netdev(),
+ * since it does not memset the net_device fields.
*/
-void init_dummy_netdev(struct net_device *dev)
+static void init_dummy_netdev_core(struct net_device *dev)
{
- /* Clear everything. Note we don't initialize spinlocks
- * are they aren't supposed to be taken by any of the
- * NAPI code and this dummy netdev is supposed to be
- * only ever used for NAPI polls
- */
- memset(dev, 0, sizeof(struct net_device));
-
/* make sure we BUG if trying to hit standard
* register/unregister code path
*/
@@ -10388,8 +10536,28 @@ void init_dummy_netdev(struct net_device *dev)
* its refcount.
*/
}
-EXPORT_SYMBOL_GPL(init_dummy_netdev);
+/**
+ * init_dummy_netdev - init a dummy network device for NAPI
+ * @dev: device to init
+ *
+ * This takes a network device structure and initializes the minimum
+ * amount of fields so it can be used to schedule NAPI polls without
+ * registering a full blown interface. This is to be used by drivers
+ * that need to tie several hardware interfaces to a single NAPI
+ * poll scheduler due to HW limitations.
+ */
+void init_dummy_netdev(struct net_device *dev)
+{
+ /* Clear everything. Note we don't initialize spinlocks
+ * as they aren't supposed to be taken by any of the
+ * NAPI code and this dummy netdev is supposed to be
+ * only ever used for NAPI polls
+ */
+ memset(dev, 0, sizeof(struct net_device));
+ init_dummy_netdev_core(dev);
+}
+EXPORT_SYMBOL_GPL(init_dummy_netdev);
/**
* register_netdev - register a network device
@@ -10488,8 +10656,9 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list)
rebroadcast_time = jiffies;
}
+ rcu_barrier();
+
if (!wait) {
- rcu_barrier();
wait = WAIT_REFS_MIN_MSECS;
} else {
msleep(wait);
@@ -10603,6 +10772,54 @@ void netdev_run_todo(void)
wake_up(&netdev_unregistering_wq);
}
+/* Collate per-cpu network dstats statistics
+ *
+ * Read per-cpu network statistics from dev->dstats and populate the related
+ * fields in @s.
+ */
+static void dev_fetch_dstats(struct rtnl_link_stats64 *s,
+ const struct pcpu_dstats __percpu *dstats)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ u64 rx_packets, rx_bytes, rx_drops;
+ u64 tx_packets, tx_bytes, tx_drops;
+ const struct pcpu_dstats *stats;
+ unsigned int start;
+
+ stats = per_cpu_ptr(dstats, cpu);
+ do {
+ start = u64_stats_fetch_begin(&stats->syncp);
+ rx_packets = u64_stats_read(&stats->rx_packets);
+ rx_bytes = u64_stats_read(&stats->rx_bytes);
+ rx_drops = u64_stats_read(&stats->rx_drops);
+ tx_packets = u64_stats_read(&stats->tx_packets);
+ tx_bytes = u64_stats_read(&stats->tx_bytes);
+ tx_drops = u64_stats_read(&stats->tx_drops);
+ } while (u64_stats_fetch_retry(&stats->syncp, start));
+
+ s->rx_packets += rx_packets;
+ s->rx_bytes += rx_bytes;
+ s->rx_dropped += rx_drops;
+ s->tx_packets += tx_packets;
+ s->tx_bytes += tx_bytes;
+ s->tx_dropped += tx_drops;
+ }
+}
+
+/* ndo_get_stats64 implementation for dtstats-based accounting.
+ *
+ * Populate @s from dev->stats and dev->dstats. This is used internally by the
+ * core for NETDEV_PCPU_STAT_DSTAT-type stats collection.
+ */
+static void dev_get_dstats64(const struct net_device *dev,
+ struct rtnl_link_stats64 *s)
+{
+ netdev_stats_to_stats64(s, &dev->stats);
+ dev_fetch_dstats(s, dev->dstats);
+}
+
/* Convert net_device_stats to rtnl_link_stats64. rtnl_link_stats64 has
* all the same fields in the same order as net_device_stats, with only
* the type differing, but rtnl_link_stats64 may have additional fields
@@ -10679,6 +10896,8 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
} else if (dev->pcpu_stat_type == NETDEV_PCPU_STAT_TSTATS) {
dev_get_tstats64(dev, storage);
+ } else if (dev->pcpu_stat_type == NETDEV_PCPU_STAT_DSTATS) {
+ dev_get_dstats64(dev, storage);
} else {
netdev_stats_to_stats64(storage, &dev->stats);
}
@@ -10796,13 +11015,6 @@ void netdev_sw_irq_coalesce_default_on(struct net_device *dev)
}
EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on);
-void netdev_freemem(struct net_device *dev)
-{
- char *addr = (char *)dev - dev->padded;
-
- kvfree(addr);
-}
-
/**
* alloc_netdev_mqs - allocate network device
* @sizeof_priv: size of private data to allocate space for
@@ -10822,8 +11034,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
unsigned int txqs, unsigned int rxqs)
{
struct net_device *dev;
- unsigned int alloc_size;
- struct net_device *p;
BUG_ON(strlen(name) >= sizeof(dev->name));
@@ -10837,21 +11047,12 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
return NULL;
}
- alloc_size = sizeof(struct net_device);
- if (sizeof_priv) {
- /* ensure 32-byte alignment of private area */
- alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
- alloc_size += sizeof_priv;
- }
- /* ensure 32-byte alignment of whole construct */
- alloc_size += NETDEV_ALIGN - 1;
-
- p = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
- if (!p)
+ dev = kvzalloc(struct_size(dev, priv, sizeof_priv),
+ GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
+ if (!dev)
return NULL;
- dev = PTR_ALIGN(p, NETDEV_ALIGN);
- dev->padded = (char *)dev - (char *)p;
+ dev->priv_len = sizeof_priv;
ref_tracker_dir_init(&dev->refcnt_tracker, 128, name);
#ifdef CONFIG_PCPU_DEV_REFCNT
@@ -10915,6 +11116,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->real_num_rx_queues = rxqs;
if (netif_alloc_rx_queues(dev))
goto free_all;
+ dev->ethtool = kzalloc(sizeof(*dev->ethtool), GFP_KERNEL_ACCOUNT);
+ if (!dev->ethtool)
+ goto free_all;
strcpy(dev->name, name);
dev->name_assign_type = name_assign_type;
@@ -10935,7 +11139,7 @@ free_pcpu:
free_percpu(dev->pcpu_refcnt);
free_dev:
#endif
- netdev_freemem(dev);
+ kvfree(dev);
return NULL;
}
EXPORT_SYMBOL(alloc_netdev_mqs);
@@ -10965,6 +11169,7 @@ void free_netdev(struct net_device *dev)
return;
}
+ kfree(dev->ethtool);
netif_free_tx_queues(dev);
netif_free_rx_queues(dev);
@@ -10987,8 +11192,9 @@ void free_netdev(struct net_device *dev)
dev->xdp_bulkq = NULL;
/* Compatibility with error handling in drivers */
- if (dev->reg_state == NETREG_UNINITIALIZED) {
- netdev_freemem(dev);
+ if (dev->reg_state == NETREG_UNINITIALIZED ||
+ dev->reg_state == NETREG_DUMMY) {
+ kvfree(dev);
return;
}
@@ -11001,6 +11207,19 @@ void free_netdev(struct net_device *dev)
EXPORT_SYMBOL(free_netdev);
/**
+ * alloc_netdev_dummy - Allocate and initialize a dummy net device.
+ * @sizeof_priv: size of private data to allocate space for
+ *
+ * Return: the allocated net_device on success, NULL otherwise
+ */
+struct net_device *alloc_netdev_dummy(int sizeof_priv)
+{
+ return alloc_netdev(sizeof_priv, "dummy#", NET_NAME_UNKNOWN,
+ init_dummy_netdev_core);
+}
+EXPORT_SYMBOL_GPL(alloc_netdev_dummy);
+
+/**
* synchronize_net - Synchronize with packet receive processing
*
* Wait for packets currently being received to be done.
@@ -11016,6 +11235,34 @@ void synchronize_net(void)
}
EXPORT_SYMBOL(synchronize_net);
+static void netdev_rss_contexts_free(struct net_device *dev)
+{
+ struct ethtool_rxfh_context *ctx;
+ unsigned long context;
+
+ mutex_lock(&dev->ethtool->rss_lock);
+ xa_for_each(&dev->ethtool->rss_ctx, context, ctx) {
+ struct ethtool_rxfh_param rxfh;
+
+ rxfh.indir = ethtool_rxfh_context_indir(ctx);
+ rxfh.key = ethtool_rxfh_context_key(ctx);
+ rxfh.hfunc = ctx->hfunc;
+ rxfh.input_xfrm = ctx->input_xfrm;
+ rxfh.rss_context = context;
+ rxfh.rss_delete = true;
+
+ xa_erase(&dev->ethtool->rss_ctx, context);
+ if (dev->ethtool_ops->create_rxfh_context)
+ dev->ethtool_ops->remove_rxfh_context(dev, ctx,
+ context, NULL);
+ else
+ dev->ethtool_ops->set_rxfh(dev, &rxfh, NULL);
+ kfree(ctx);
+ }
+ xa_destroy(&dev->ethtool->rss_ctx);
+ mutex_unlock(&dev->ethtool->rss_lock);
+}
+
/**
* unregister_netdevice_queue - remove device from the kernel
* @dev: device
@@ -11119,11 +11366,15 @@ void unregister_netdevice_many_notify(struct list_head *head,
netdev_name_node_alt_flush(dev);
netdev_name_node_free(dev->name_node);
+ netdev_rss_contexts_free(dev);
+
call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev);
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
+ mutex_destroy(&dev->ethtool->rss_lock);
+
if (skb)
rtmsg_ifinfo_send(skb, dev, GFP_KERNEL, portid, nlh);
@@ -11303,8 +11554,12 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
dev_net_set(dev, net);
dev->ifindex = new_ifindex;
- if (new_name[0]) /* Rename the netdev to prepared name */
+ if (new_name[0]) {
+ /* Rename the netdev to prepared name */
+ write_seqlock_bh(&netdev_rename_lock);
strscpy(dev->name, new_name, IFNAMSIZ);
+ write_sequnlock_bh(&netdev_rename_lock);
+ }
/* Fixup kobjects */
dev_set_uevent_suppress(&dev->dev, 1);
@@ -11379,7 +11634,7 @@ static int dev_cpu_dead(unsigned int oldcpu)
list_del_init(&napi->poll_list);
if (napi->poll == process_backlog)
- napi->state = 0;
+ napi->state &= NAPIF_STATE_THREADED;
else
____napi_schedule(sd, napi);
}
@@ -11387,21 +11642,23 @@ static int dev_cpu_dead(unsigned int oldcpu)
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_enable();
+ if (!use_backlog_threads()) {
#ifdef CONFIG_RPS
- remsd = oldsd->rps_ipi_list;
- oldsd->rps_ipi_list = NULL;
+ remsd = oldsd->rps_ipi_list;
+ oldsd->rps_ipi_list = NULL;
#endif
- /* send out pending IPI's on offline CPU */
- net_rps_send_ipi(remsd);
+ /* send out pending IPI's on offline CPU */
+ net_rps_send_ipi(remsd);
+ }
/* Process offline CPU's input_pkt_queue */
while ((skb = __skb_dequeue(&oldsd->process_queue))) {
netif_rx(skb);
- input_queue_head_incr(oldsd);
+ rps_input_queue_head_incr(oldsd);
}
while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
netif_rx(skb);
- input_queue_head_incr(oldsd);
+ rps_input_queue_head_incr(oldsd);
}
return 0;
@@ -11718,7 +11975,7 @@ static int net_page_pool_create(int cpuid)
struct page_pool_params page_pool_params = {
.pool_size = SYSTEM_PERCPU_PAGE_POOL_SIZE,
.flags = PP_FLAG_SYSTEM_POOL,
- .nid = NUMA_NO_NODE,
+ .nid = cpu_to_mem(cpuid),
};
struct page_pool *pp_ptr;
@@ -11731,6 +11988,38 @@ static int net_page_pool_create(int cpuid)
return 0;
}
+static int backlog_napi_should_run(unsigned int cpu)
+{
+ struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu);
+ struct napi_struct *napi = &sd->backlog;
+
+ return test_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
+}
+
+static void run_backlog_napi(unsigned int cpu)
+{
+ struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu);
+
+ napi_threaded_poll_loop(&sd->backlog);
+}
+
+static void backlog_napi_setup(unsigned int cpu)
+{
+ struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu);
+ struct napi_struct *napi = &sd->backlog;
+
+ napi->thread = this_cpu_read(backlog_napi);
+ set_bit(NAPI_STATE_THREADED, &napi->state);
+}
+
+static struct smp_hotplug_thread backlog_threads = {
+ .store = &backlog_napi,
+ .thread_should_run = backlog_napi_should_run,
+ .thread_fn = run_backlog_napi,
+ .thread_comm = "backlog_napi/%u",
+ .setup = backlog_napi_setup,
+};
+
/*
* This is called single threaded during boot, so no need
* to take the rtnl semaphore.
@@ -11782,10 +12071,13 @@ static int __init net_dev_init(void)
init_gro_hash(&sd->backlog);
sd->backlog.poll = process_backlog;
sd->backlog.weight = weight_p;
+ INIT_LIST_HEAD(&sd->backlog.poll_list);
if (net_page_pool_create(i))
goto out;
}
+ if (use_backlog_threads())
+ smpboot_register_percpu_thread(&backlog_threads);
dev_boot_phase = 0;
@@ -11811,6 +12103,10 @@ static int __init net_dev_init(void)
NULL, dev_cpu_dead);
WARN_ON(rc < 0);
rc = 0;
+
+ /* avoid static key IPIs to isolated CPUs */
+ if (housekeeping_enabled(HK_TYPE_MISC))
+ net_enable_timestamp();
out:
if (rc < 0) {
for_each_possible_cpu(i) {
diff --git a/net/core/dev.h b/net/core/dev.h
index 2bcaf8eee50c..5654325c5b71 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -4,11 +4,9 @@
#include <linux/types.h>
#include <linux/rwsem.h>
+#include <linux/netdevice.h>
struct net;
-struct net_device;
-struct netdev_bpf;
-struct netdev_phys_item_id;
struct netlink_ext_ack;
struct cpumask;
@@ -38,7 +36,6 @@ int dev_addr_init(struct net_device *dev);
void dev_addr_check(struct net_device *dev);
/* sysctls not referred to from outside net/core/ */
-extern unsigned int sysctl_skb_defer_max;
extern int netdev_unregister_timeout_secs;
extern int weight_p;
extern int dev_weight_rx_bias;
@@ -150,4 +147,45 @@ static inline void xdp_do_check_flushed(struct napi_struct *napi) { }
#endif
struct napi_struct *napi_by_id(unsigned int napi_id);
+void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu);
+
+#define XMIT_RECURSION_LIMIT 8
+
+#ifndef CONFIG_PREEMPT_RT
+static inline bool dev_xmit_recursion(void)
+{
+ return unlikely(__this_cpu_read(softnet_data.xmit.recursion) >
+ XMIT_RECURSION_LIMIT);
+}
+
+static inline void dev_xmit_recursion_inc(void)
+{
+ __this_cpu_inc(softnet_data.xmit.recursion);
+}
+
+static inline void dev_xmit_recursion_dec(void)
+{
+ __this_cpu_dec(softnet_data.xmit.recursion);
+}
+#else
+static inline bool dev_xmit_recursion(void)
+{
+ return unlikely(current->net_xmit.recursion > XMIT_RECURSION_LIMIT);
+}
+
+static inline void dev_xmit_recursion_inc(void)
+{
+ current->net_xmit.recursion++;
+}
+
+static inline void dev_xmit_recursion_dec(void)
+{
+ current->net_xmit.recursion--;
+}
+#endif
+
+int dev_set_hwtstamp_phylib(struct net_device *dev,
+ struct kernel_hwtstamp_config *cfg,
+ struct netlink_ext_ack *extack);
+
#endif
diff --git a/net/core/dev_addr_lists_test.c b/net/core/dev_addr_lists_test.c
index 4dbd0dc6aea2..8e1dba825e94 100644
--- a/net/core/dev_addr_lists_test.c
+++ b/net/core/dev_addr_lists_test.c
@@ -49,7 +49,6 @@ static int dev_addr_test_init(struct kunit *test)
KUNIT_FAIL(test, "Can't register netdev %d", err);
}
- rtnl_lock();
return 0;
}
@@ -57,7 +56,6 @@ static void dev_addr_test_exit(struct kunit *test)
{
struct net_device *netdev = test->priv;
- rtnl_unlock();
unregister_netdev(netdev);
free_netdev(netdev);
}
@@ -67,6 +65,7 @@ static void dev_addr_test_basic(struct kunit *test)
struct net_device *netdev = test->priv;
u8 addr[ETH_ALEN];
+ rtnl_lock();
KUNIT_EXPECT_TRUE(test, !!netdev->dev_addr);
memset(addr, 2, sizeof(addr));
@@ -76,6 +75,7 @@ static void dev_addr_test_basic(struct kunit *test)
memset(addr, 3, sizeof(addr));
dev_addr_set(netdev, addr);
KUNIT_EXPECT_MEMEQ(test, netdev->dev_addr, addr, sizeof(addr));
+ rtnl_unlock();
}
static void dev_addr_test_sync_one(struct kunit *test)
@@ -86,6 +86,7 @@ static void dev_addr_test_sync_one(struct kunit *test)
datp = netdev_priv(netdev);
+ rtnl_lock();
memset(addr, 1, sizeof(addr));
eth_hw_addr_set(netdev, addr);
@@ -103,6 +104,7 @@ static void dev_addr_test_sync_one(struct kunit *test)
* considered synced and we overwrite in place.
*/
KUNIT_EXPECT_EQ(test, 0, datp->addr_seen);
+ rtnl_unlock();
}
static void dev_addr_test_add_del(struct kunit *test)
@@ -114,6 +116,7 @@ static void dev_addr_test_add_del(struct kunit *test)
datp = netdev_priv(netdev);
+ rtnl_lock();
for (i = 1; i < 4; i++) {
memset(addr, i, sizeof(addr));
KUNIT_EXPECT_EQ(test, 0, dev_addr_add(netdev, addr,
@@ -143,6 +146,7 @@ static void dev_addr_test_add_del(struct kunit *test)
__hw_addr_sync_dev(&netdev->dev_addrs, netdev, dev_addr_test_sync,
dev_addr_test_unsync);
KUNIT_EXPECT_EQ(test, 1, datp->addr_seen);
+ rtnl_unlock();
}
static void dev_addr_test_del_main(struct kunit *test)
@@ -150,6 +154,7 @@ static void dev_addr_test_del_main(struct kunit *test)
struct net_device *netdev = test->priv;
u8 addr[ETH_ALEN];
+ rtnl_lock();
memset(addr, 1, sizeof(addr));
eth_hw_addr_set(netdev, addr);
@@ -161,6 +166,7 @@ static void dev_addr_test_del_main(struct kunit *test)
NETDEV_HW_ADDR_T_LAN));
KUNIT_EXPECT_EQ(test, -ENOENT, dev_addr_del(netdev, addr,
NETDEV_HW_ADDR_T_LAN));
+ rtnl_unlock();
}
static void dev_addr_test_add_set(struct kunit *test)
@@ -172,6 +178,7 @@ static void dev_addr_test_add_set(struct kunit *test)
datp = netdev_priv(netdev);
+ rtnl_lock();
/* There is no external API like dev_addr_add_excl(),
* so shuffle the tree a little bit and exploit aliasing.
*/
@@ -191,6 +198,7 @@ static void dev_addr_test_add_set(struct kunit *test)
__hw_addr_sync_dev(&netdev->dev_addrs, netdev, dev_addr_test_sync,
dev_addr_test_unsync);
KUNIT_EXPECT_EQ(test, 0xffff, datp->addr_seen);
+ rtnl_unlock();
}
static void dev_addr_test_add_excl(struct kunit *test)
@@ -199,6 +207,7 @@ static void dev_addr_test_add_excl(struct kunit *test)
u8 addr[ETH_ALEN];
int i;
+ rtnl_lock();
for (i = 0; i < 10; i++) {
memset(addr, i, sizeof(addr));
KUNIT_EXPECT_EQ(test, 0, dev_uc_add_excl(netdev, addr));
@@ -213,6 +222,7 @@ static void dev_addr_test_add_excl(struct kunit *test)
memset(addr, i, sizeof(addr));
KUNIT_EXPECT_EQ(test, -EEXIST, dev_uc_add_excl(netdev, addr));
}
+ rtnl_unlock();
}
static struct kunit_case dev_addr_test_cases[] = {
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 9a66cf5015f2..8592c052c0f4 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -259,9 +259,7 @@ static int dev_eth_ioctl(struct net_device *dev,
* @dev: Network device
* @cfg: Timestamping configuration structure
*
- * Helper for enforcing a common policy that phylib timestamping, if available,
- * should take precedence in front of hardware timestamping provided by the
- * netdev.
+ * Helper for calling the default hardware provider timestamping.
*
* Note: phy_mii_ioctl() only handles SIOCSHWTSTAMP (not SIOCGHWTSTAMP), and
* there only exists a phydev->mii_ts->hwtstamp() method. So this will return
@@ -271,7 +269,7 @@ static int dev_eth_ioctl(struct net_device *dev,
static int dev_get_hwtstamp_phylib(struct net_device *dev,
struct kernel_hwtstamp_config *cfg)
{
- if (phy_has_hwtstamp(dev->phydev))
+ if (phy_is_default_hwtstamp(dev->phydev))
return phy_hwtstamp_get(dev->phydev, cfg);
return dev->netdev_ops->ndo_hwtstamp_get(dev, cfg);
@@ -327,7 +325,7 @@ int dev_set_hwtstamp_phylib(struct net_device *dev,
struct netlink_ext_ack *extack)
{
const struct net_device_ops *ops = dev->netdev_ops;
- bool phy_ts = phy_has_hwtstamp(dev->phydev);
+ bool phy_ts = phy_is_default_hwtstamp(dev->phydev);
struct kernel_hwtstamp_config old_cfg = {};
bool changed = false;
int err;
@@ -363,7 +361,6 @@ int dev_set_hwtstamp_phylib(struct net_device *dev,
return 0;
}
-EXPORT_SYMBOL_GPL(dev_set_hwtstamp_phylib);
static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr)
{
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index b0f221d658be..2e0ae3328232 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -74,7 +74,7 @@ struct net_dm_hw_entries {
};
struct per_cpu_dm_data {
- spinlock_t lock; /* Protects 'skb', 'hw_entries' and
+ raw_spinlock_t lock; /* Protects 'skb', 'hw_entries' and
* 'send_timer'
*/
union {
@@ -109,7 +109,8 @@ static u32 net_dm_queue_len = 1000;
struct net_dm_alert_ops {
void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb,
void *location,
- enum skb_drop_reason reason);
+ enum skb_drop_reason reason,
+ struct sock *rx_sk);
void (*napi_poll_probe)(void *ignore, struct napi_struct *napi,
int work, int budget);
void (*work_item_func)(struct work_struct *work);
@@ -168,9 +169,9 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
err:
mod_timer(&data->send_timer, jiffies + HZ / 10);
out:
- spin_lock_irqsave(&data->lock, flags);
+ raw_spin_lock_irqsave(&data->lock, flags);
swap(data->skb, skb);
- spin_unlock_irqrestore(&data->lock, flags);
+ raw_spin_unlock_irqrestore(&data->lock, flags);
if (skb) {
struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data;
@@ -225,7 +226,7 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
local_irq_save(flags);
data = this_cpu_ptr(&dm_cpu_data);
- spin_lock(&data->lock);
+ raw_spin_lock(&data->lock);
dskb = data->skb;
if (!dskb)
@@ -259,12 +260,13 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
}
out:
- spin_unlock_irqrestore(&data->lock, flags);
+ raw_spin_unlock_irqrestore(&data->lock, flags);
}
static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb,
void *location,
- enum skb_drop_reason reason)
+ enum skb_drop_reason reason,
+ struct sock *rx_sk)
{
trace_drop_common(skb, location);
}
@@ -314,9 +316,9 @@ net_dm_hw_reset_per_cpu_data(struct per_cpu_dm_data *hw_data)
mod_timer(&hw_data->send_timer, jiffies + HZ / 10);
}
- spin_lock_irqsave(&hw_data->lock, flags);
+ raw_spin_lock_irqsave(&hw_data->lock, flags);
swap(hw_data->hw_entries, hw_entries);
- spin_unlock_irqrestore(&hw_data->lock, flags);
+ raw_spin_unlock_irqrestore(&hw_data->lock, flags);
return hw_entries;
}
@@ -448,7 +450,7 @@ net_dm_hw_trap_summary_probe(void *ignore, const struct devlink *devlink,
return;
hw_data = this_cpu_ptr(&dm_hw_cpu_data);
- spin_lock_irqsave(&hw_data->lock, flags);
+ raw_spin_lock_irqsave(&hw_data->lock, flags);
hw_entries = hw_data->hw_entries;
if (!hw_entries)
@@ -477,7 +479,7 @@ net_dm_hw_trap_summary_probe(void *ignore, const struct devlink *devlink,
}
out:
- spin_unlock_irqrestore(&hw_data->lock, flags);
+ raw_spin_unlock_irqrestore(&hw_data->lock, flags);
}
static const struct net_dm_alert_ops net_dm_alert_summary_ops = {
@@ -491,7 +493,8 @@ static const struct net_dm_alert_ops net_dm_alert_summary_ops = {
static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
struct sk_buff *skb,
void *location,
- enum skb_drop_reason reason)
+ enum skb_drop_reason reason,
+ struct sock *rx_sk)
{
ktime_t tstamp = ktime_get_real();
struct per_cpu_dm_data *data;
@@ -1673,7 +1676,7 @@ static struct notifier_block dropmon_net_notifier = {
static void __net_dm_cpu_data_init(struct per_cpu_dm_data *data)
{
- spin_lock_init(&data->lock);
+ raw_spin_lock_init(&data->lock);
skb_queue_head_init(&data->drop_queue);
u64_stats_init(&data->stats.syncp);
}
diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c
index 0ccfd5fa5cb9..70c634b9e7b0 100644
--- a/net/core/dst_cache.c
+++ b/net/core/dst_cache.c
@@ -27,6 +27,7 @@ struct dst_cache_pcpu {
static void dst_cache_per_cpu_dst_set(struct dst_cache_pcpu *dst_cache,
struct dst_entry *dst, u32 cookie)
{
+ DEBUG_NET_WARN_ON_ONCE(!in_softirq());
dst_release(dst_cache->dst);
if (dst)
dst_hold(dst);
@@ -40,6 +41,7 @@ static struct dst_entry *dst_cache_per_cpu_get(struct dst_cache *dst_cache,
{
struct dst_entry *dst;
+ DEBUG_NET_WARN_ON_ONCE(!in_softirq());
dst = idst->dst;
if (!dst)
goto fail;
@@ -47,7 +49,8 @@ static struct dst_entry *dst_cache_per_cpu_get(struct dst_cache *dst_cache,
/* the cache already hold a dst reference; it can't go away */
dst_hold(dst);
- if (unlikely(!time_after(idst->refresh_ts, dst_cache->reset_ts) ||
+ if (unlikely(!time_after(idst->refresh_ts,
+ READ_ONCE(dst_cache->reset_ts)) ||
(dst->obsolete && !dst->ops->check(dst, idst->cookie)))) {
dst_cache_per_cpu_dst_set(idst, NULL, 0);
dst_release(dst);
@@ -83,7 +86,7 @@ struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr)
return NULL;
*saddr = idst->in_saddr.s_addr;
- return container_of(dst, struct rtable, dst);
+ return dst_rtable(dst);
}
EXPORT_SYMBOL_GPL(dst_cache_get_ip4);
@@ -111,8 +114,8 @@ void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
return;
idst = this_cpu_ptr(dst_cache->cache);
- dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst,
- rt6_get_cookie((struct rt6_info *)dst));
+ dst_cache_per_cpu_dst_set(idst, dst,
+ rt6_get_cookie(dst_rt6_info(dst)));
idst->in6_saddr = *saddr;
}
EXPORT_SYMBOL_GPL(dst_cache_set_ip6);
@@ -170,7 +173,7 @@ void dst_cache_reset_now(struct dst_cache *dst_cache)
if (!dst_cache->cache)
return;
- dst_cache->reset_ts = jiffies;
+ dst_cache_reset(dst_cache);
for_each_possible_cpu(i) {
struct dst_cache_pcpu *idst = per_cpu_ptr(dst_cache->cache, i);
struct dst_entry *dst = idst->dst;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 3f933ffcefc3..6ebffbc63236 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -1142,10 +1142,10 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
struct fib_rules_ops *ops;
- int idx = 0, family;
+ int err, idx = 0, family;
if (cb->strict_check) {
- int err = fib_valid_dumprule_req(nlh, cb->extack);
+ err = fib_valid_dumprule_req(nlh, cb->extack);
if (err < 0)
return err;
@@ -1158,17 +1158,17 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
if (ops == NULL)
return -EAFNOSUPPORT;
- dump_rules(skb, cb, ops);
-
- return skb->len;
+ return dump_rules(skb, cb, ops);
}
+ err = 0;
rcu_read_lock();
list_for_each_entry_rcu(ops, &net->rules_ops, list) {
if (idx < cb->args[0] || !try_module_get(ops->owner))
goto skip;
- if (dump_rules(skb, cb, ops) < 0)
+ err = dump_rules(skb, cb, ops);
+ if (err < 0)
break;
cb->args[1] = 0;
@@ -1178,7 +1178,7 @@ skip:
rcu_read_unlock();
cb->args[0] = idx;
- return skb->len;
+ return err;
}
static void notify_rule_change(int event, struct fib_rule *rule,
@@ -1293,7 +1293,8 @@ static int __init fib_rules_init(void)
int err;
rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, 0);
+ rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule,
+ RTNL_FLAG_DUMP_UNLOCKED);
err = register_pernet_subsys(&fib_rules_net_ops);
if (err < 0)
diff --git a/net/core/filter.c b/net/core/filter.c
index ae5254f712c9..f3c72cf86099 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -87,6 +87,9 @@
#include "dev.h"
+/* Keep the struct bpf_fib_lookup small so that it fits into a cacheline */
+static_assert(sizeof(struct bpf_fib_lookup) == 64, "struct bpf_fib_lookup size check");
+
static const struct bpf_func_proto *
bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
@@ -1655,13 +1658,21 @@ struct bpf_scratchpad {
__be32 diff[MAX_BPF_STACK / sizeof(__be32)];
u8 buff[MAX_BPF_STACK];
};
+ local_lock_t bh_lock;
};
-static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
+static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp) = {
+ .bh_lock = INIT_LOCAL_LOCK(bh_lock),
+};
static inline int __bpf_try_make_writable(struct sk_buff *skb,
unsigned int write_len)
{
+#ifdef CONFIG_DEBUG_NET
+ /* Avoid a splat in pskb_may_pull_reason() */
+ if (write_len > INT_MAX)
+ return -EINVAL;
+#endif
return skb_ensure_writable(skb, write_len);
}
@@ -2013,6 +2024,7 @@ BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size,
struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
u32 diff_size = from_size + to_size;
int i, j = 0;
+ __wsum ret;
/* This is quite flexible, some examples:
*
@@ -2026,12 +2038,15 @@ BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size,
diff_size > sizeof(sp->diff)))
return -EINVAL;
+ local_lock_nested_bh(&bpf_sp.bh_lock);
for (i = 0; i < from_size / sizeof(__be32); i++, j++)
sp->diff[j] = ~from[i];
for (i = 0; i < to_size / sizeof(__be32); i++, j++)
sp->diff[j] = to[i];
- return csum_partial(sp->diff, diff_size, seed);
+ ret = csum_partial(sp->diff, diff_size, seed);
+ local_unlock_nested_bh(&bpf_sp.bh_lock);
+ return ret;
}
static const struct bpf_func_proto bpf_csum_diff_proto = {
@@ -2215,7 +2230,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
rcu_read_lock();
if (!nh) {
dst = skb_dst(skb);
- nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
+ nexthop = rt6_nexthop(dst_rt6_info(dst),
&ipv6_hdr(skb)->daddr);
} else {
nexthop = &nh->ipv6_nh;
@@ -2271,12 +2286,12 @@ static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
err = bpf_out_neigh_v6(net, skb, dev, nh);
if (unlikely(net_xmit_eval(err)))
- dev->stats.tx_errors++;
+ DEV_STATS_INC(dev, tx_errors);
else
ret = NET_XMIT_SUCCESS;
goto out_xmit;
out_drop:
- dev->stats.tx_errors++;
+ DEV_STATS_INC(dev, tx_errors);
kfree_skb(skb);
out_xmit:
return ret;
@@ -2314,8 +2329,7 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
rcu_read_lock();
if (!nh) {
- struct dst_entry *dst = skb_dst(skb);
- struct rtable *rt = container_of(dst, struct rtable, dst);
+ struct rtable *rt = skb_rtable(skb);
neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
} else if (nh->nh_family == AF_INET6) {
@@ -2378,12 +2392,12 @@ static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
err = bpf_out_neigh_v4(net, skb, dev, nh);
if (unlikely(net_xmit_eval(err)))
- dev->stats.tx_errors++;
+ DEV_STATS_INC(dev, tx_errors);
else
ret = NET_XMIT_SUCCESS;
goto out_xmit;
out_drop:
- dev->stats.tx_errors++;
+ DEV_STATS_INC(dev, tx_errors);
kfree_skb(skb);
out_xmit:
return ret;
@@ -2469,9 +2483,6 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = {
.arg3_type = ARG_ANYTHING,
};
-DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
-EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
-
static struct net_device *skb_get_peer_dev(struct net_device *dev)
{
const struct net_device_ops *ops = dev->netdev_ops;
@@ -2484,7 +2495,7 @@ static struct net_device *skb_get_peer_dev(struct net_device *dev)
int skb_do_redirect(struct sk_buff *skb)
{
- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
struct net *net = dev_net(skb->dev);
struct net_device *dev;
u32 flags = ri->flags;
@@ -2517,7 +2528,7 @@ out_drop:
BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
{
- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL)))
return TC_ACT_SHOT;
@@ -2538,7 +2549,7 @@ static const struct bpf_func_proto bpf_redirect_proto = {
BPF_CALL_2(bpf_redirect_peer, u32, ifindex, u64, flags)
{
- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
if (unlikely(flags))
return TC_ACT_SHOT;
@@ -2560,7 +2571,7 @@ static const struct bpf_func_proto bpf_redirect_peer_proto = {
BPF_CALL_4(bpf_redirect_neigh, u32, ifindex, struct bpf_redir_neigh *, params,
int, plen, u64, flags)
{
- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
if (unlikely((plen && plen < sizeof(*params)) || flags))
return TC_ACT_SHOT;
@@ -3537,13 +3548,20 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
if (skb_is_gso(skb)) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
- /* Due to header grow, MSS needs to be downgraded. */
- if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
- skb_decrease_gso_size(shinfo, len_diff);
-
/* Header must be checked, and gso_segs recomputed. */
shinfo->gso_type |= gso_type;
shinfo->gso_segs = 0;
+
+ /* Due to header growth, MSS needs to be downgraded.
+ * There is a BUG_ON() when segmenting the frag_list with
+ * head_frag true, so linearize the skb after downgrading
+ * the MSS.
+ */
+ if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO)) {
+ skb_decrease_gso_size(shinfo, len_diff);
+ if (shinfo->frag_list)
+ return skb_linearize(skb);
+ }
}
return 0;
@@ -4266,50 +4284,50 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
*/
void xdp_do_flush(void)
{
- __dev_flush();
- __cpu_map_flush();
- __xsk_map_flush();
+ struct list_head *lh_map, *lh_dev, *lh_xsk;
+
+ bpf_net_ctx_get_all_used_flush_lists(&lh_map, &lh_dev, &lh_xsk);
+ if (lh_dev)
+ __dev_flush(lh_dev);
+ if (lh_map)
+ __cpu_map_flush(lh_map);
+ if (lh_xsk)
+ __xsk_map_flush(lh_xsk);
}
EXPORT_SYMBOL_GPL(xdp_do_flush);
#if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL)
void xdp_do_check_flushed(struct napi_struct *napi)
{
- bool ret;
+ struct list_head *lh_map, *lh_dev, *lh_xsk;
+ bool missed = false;
- ret = dev_check_flush();
- ret |= cpu_map_check_flush();
- ret |= xsk_map_check_flush();
+ bpf_net_ctx_get_all_used_flush_lists(&lh_map, &lh_dev, &lh_xsk);
+ if (lh_dev) {
+ __dev_flush(lh_dev);
+ missed = true;
+ }
+ if (lh_map) {
+ __cpu_map_flush(lh_map);
+ missed = true;
+ }
+ if (lh_xsk) {
+ __xsk_map_flush(lh_xsk);
+ missed = true;
+ }
- WARN_ONCE(ret, "Missing xdp_do_flush() invocation after NAPI by %ps\n",
+ WARN_ONCE(missed, "Missing xdp_do_flush() invocation after NAPI by %ps\n",
napi->poll);
}
#endif
-void bpf_clear_redirect_map(struct bpf_map *map)
-{
- struct bpf_redirect_info *ri;
- int cpu;
-
- for_each_possible_cpu(cpu) {
- ri = per_cpu_ptr(&bpf_redirect_info, cpu);
- /* Avoid polluting remote cacheline due to writes if
- * not needed. Once we pass this test, we need the
- * cmpxchg() to make sure it hasn't been changed in
- * the meantime by remote CPU.
- */
- if (unlikely(READ_ONCE(ri->map) == map))
- cmpxchg(&ri->map, map, NULL);
- }
-}
-
DEFINE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
EXPORT_SYMBOL_GPL(bpf_master_redirect_enabled_key);
u32 xdp_master_redirect(struct xdp_buff *xdp)
{
+ struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
struct net_device *master, *slave;
- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
master = netdev_master_upper_dev_get_rcu(xdp->rxq->dev);
slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp);
@@ -4381,7 +4399,7 @@ static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri,
map = READ_ONCE(ri->map);
/* The map pointer is cleared when the map is being torn
- * down by bpf_clear_redirect_map()
+ * down by dev_map_free()
*/
if (unlikely(!map)) {
err = -ENOENT;
@@ -4426,7 +4444,7 @@ err:
int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
enum bpf_map_type map_type = ri->map_type;
if (map_type == BPF_MAP_TYPE_XSKMAP)
@@ -4440,7 +4458,7 @@ EXPORT_SYMBOL_GPL(xdp_do_redirect);
int xdp_do_redirect_frame(struct net_device *dev, struct xdp_buff *xdp,
struct xdp_frame *xdpf, struct bpf_prog *xdp_prog)
{
- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
enum bpf_map_type map_type = ri->map_type;
if (map_type == BPF_MAP_TYPE_XSKMAP)
@@ -4457,7 +4475,7 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
enum bpf_map_type map_type, u32 map_id,
u32 flags)
{
- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
struct bpf_map *map;
int err;
@@ -4469,7 +4487,7 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
map = READ_ONCE(ri->map);
/* The map pointer is cleared when the map is being torn
- * down by bpf_clear_redirect_map()
+ * down by dev_map_free()
*/
if (unlikely(!map)) {
err = -ENOENT;
@@ -4511,7 +4529,7 @@ err:
int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
{
- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
enum bpf_map_type map_type = ri->map_type;
void *fwd = ri->tgt_value;
u32 map_id = ri->map_id;
@@ -4547,7 +4565,7 @@ err:
BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
{
- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
if (unlikely(flags))
return XDP_ABORTED;
@@ -4684,7 +4702,7 @@ set_compat:
to->tunnel_tos = info->key.tos;
to->tunnel_ttl = info->key.ttl;
if (flags & BPF_F_TUNINFO_FLAGS)
- to->tunnel_flags = info->key.tun_flags;
+ to->tunnel_flags = ip_tunnel_flags_to_be16(info->key.tun_flags);
else
to->tunnel_ext = 0;
@@ -4727,7 +4745,7 @@ BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff *, skb, u8 *, to, u32, size)
int err;
if (unlikely(!info ||
- !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) {
+ !ip_tunnel_is_options_present(info->key.tun_flags))) {
err = -ENOENT;
goto err_clear;
}
@@ -4797,15 +4815,15 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
memset(info, 0, sizeof(*info));
info->mode = IP_TUNNEL_INFO_TX;
- info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
- if (flags & BPF_F_DONT_FRAGMENT)
- info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
- if (flags & BPF_F_ZERO_CSUM_TX)
- info->key.tun_flags &= ~TUNNEL_CSUM;
- if (flags & BPF_F_SEQ_NUMBER)
- info->key.tun_flags |= TUNNEL_SEQ;
- if (flags & BPF_F_NO_TUNNEL_KEY)
- info->key.tun_flags &= ~TUNNEL_KEY;
+ __set_bit(IP_TUNNEL_NOCACHE_BIT, info->key.tun_flags);
+ __assign_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, info->key.tun_flags,
+ flags & BPF_F_DONT_FRAGMENT);
+ __assign_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags,
+ !(flags & BPF_F_ZERO_CSUM_TX));
+ __assign_bit(IP_TUNNEL_SEQ_BIT, info->key.tun_flags,
+ flags & BPF_F_SEQ_NUMBER);
+ __assign_bit(IP_TUNNEL_KEY_BIT, info->key.tun_flags,
+ !(flags & BPF_F_NO_TUNNEL_KEY));
info->key.tun_id = cpu_to_be64(from->tunnel_id);
info->key.tos = from->tunnel_tos;
@@ -4843,13 +4861,15 @@ BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
{
struct ip_tunnel_info *info = skb_tunnel_info(skb);
const struct metadata_dst *md = this_cpu_ptr(md_dst);
+ IP_TUNNEL_DECLARE_FLAGS(present) = { };
if (unlikely(info != &md->u.tun_info || (size & (sizeof(u32) - 1))))
return -EINVAL;
if (unlikely(size > IP_TUNNEL_OPTS_MAX))
return -ENOMEM;
- ip_tunnel_info_opts_set(info, from, size, TUNNEL_OPTIONS_PRESENT);
+ ip_tunnel_set_options_present(present);
+ ip_tunnel_info_opts_set(info, from, size, present);
return 0;
}
@@ -5906,7 +5926,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
} else {
- fl4.flowi4_mark = 0;
+ if (flags & BPF_FIB_LOOKUP_MARK)
+ fl4.flowi4_mark = params->mark;
+ else
+ fl4.flowi4_mark = 0;
fl4.flowi4_secid = 0;
fl4.flowi4_tun_key.tun_id = 0;
fl4.flowi4_uid = sock_net_uid(net, NULL);
@@ -6049,7 +6072,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res,
strict);
} else {
- fl6.flowi6_mark = 0;
+ if (flags & BPF_FIB_LOOKUP_MARK)
+ fl6.flowi6_mark = params->mark;
+ else
+ fl6.flowi6_mark = 0;
fl6.flowi6_secid = 0;
fl6.flowi6_tun_key.tun_id = 0;
fl6.flowi6_uid = sock_net_uid(net, NULL);
@@ -6127,7 +6153,7 @@ set_fwd_params:
#define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \
- BPF_FIB_LOOKUP_SRC)
+ BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_MARK)
BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
struct bpf_fib_lookup *, params, int, plen, u32, flags)
@@ -6440,6 +6466,7 @@ BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
void *srh_tlvs, *srh_end, *ptr;
int srhoff = 0;
+ lockdep_assert_held(&srh_state->bh_lock);
if (srh == NULL)
return -EINVAL;
@@ -6496,6 +6523,7 @@ BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
int hdroff = 0;
int err;
+ lockdep_assert_held(&srh_state->bh_lock);
switch (action) {
case SEG6_LOCAL_ACTION_END_X:
if (!seg6_bpf_has_valid_srh(skb))
@@ -6572,6 +6600,7 @@ BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
int srhoff = 0;
int ret;
+ lockdep_assert_held(&srh_state->bh_lock);
if (unlikely(srh == NULL))
return -EINVAL;
@@ -6805,7 +6834,7 @@ static const struct bpf_func_proto bpf_skc_lookup_tcp_proto = {
.ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@@ -6824,7 +6853,7 @@ static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = {
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@@ -6843,7 +6872,7 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@@ -6867,7 +6896,7 @@ static const struct bpf_func_proto bpf_tc_skc_lookup_tcp_proto = {
.ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@@ -6891,7 +6920,7 @@ static const struct bpf_func_proto bpf_tc_sk_lookup_tcp_proto = {
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@@ -6915,7 +6944,7 @@ static const struct bpf_func_proto bpf_tc_sk_lookup_udp_proto = {
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@@ -6953,7 +6982,7 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@@ -6977,7 +7006,7 @@ static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = {
.ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@@ -7001,7 +7030,7 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@@ -7021,7 +7050,7 @@ static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = {
.ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@@ -7040,7 +7069,7 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@@ -7059,7 +7088,7 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@@ -7716,17 +7745,21 @@ BPF_CALL_3(bpf_skb_set_tstamp, struct sk_buff *, skb,
return -EOPNOTSUPP;
switch (tstamp_type) {
- case BPF_SKB_TSTAMP_DELIVERY_MONO:
+ case BPF_SKB_CLOCK_REALTIME:
+ skb->tstamp = tstamp;
+ skb->tstamp_type = SKB_CLOCK_REALTIME;
+ break;
+ case BPF_SKB_CLOCK_MONOTONIC:
if (!tstamp)
return -EINVAL;
skb->tstamp = tstamp;
- skb->mono_delivery_time = 1;
+ skb->tstamp_type = SKB_CLOCK_MONOTONIC;
break;
- case BPF_SKB_TSTAMP_UNSPEC:
- if (tstamp)
+ case BPF_SKB_CLOCK_TAI:
+ if (!tstamp)
return -EINVAL;
- skb->tstamp = 0;
- skb->mono_delivery_time = 0;
+ skb->tstamp = tstamp;
+ skb->tstamp_type = SKB_CLOCK_TAI;
break;
default:
return -EINVAL;
@@ -8364,8 +8397,6 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_event_output_data_proto;
case BPF_FUNC_get_current_uid_gid:
return &bpf_get_current_uid_gid_proto;
- case BPF_FUNC_get_current_pid_tgid:
- return &bpf_get_current_pid_tgid_proto;
case BPF_FUNC_sk_storage_get:
return &bpf_sk_storage_get_proto;
case BPF_FUNC_sk_storage_delete:
@@ -9379,16 +9410,17 @@ static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si,
{
__u8 value_reg = si->dst_reg;
__u8 skb_reg = si->src_reg;
- /* AX is needed because src_reg and dst_reg could be the same */
- __u8 tmp_reg = BPF_REG_AX;
-
- *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg,
- SKB_BF_MONO_TC_OFFSET);
- *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
- SKB_MONO_DELIVERY_TIME_MASK, 2);
- *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_UNSPEC);
- *insn++ = BPF_JMP_A(1);
- *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_MONO);
+ BUILD_BUG_ON(__SKB_CLOCK_MAX != (int)BPF_SKB_CLOCK_TAI);
+ BUILD_BUG_ON(SKB_CLOCK_REALTIME != (int)BPF_SKB_CLOCK_REALTIME);
+ BUILD_BUG_ON(SKB_CLOCK_MONOTONIC != (int)BPF_SKB_CLOCK_MONOTONIC);
+ BUILD_BUG_ON(SKB_CLOCK_TAI != (int)BPF_SKB_CLOCK_TAI);
+ *insn++ = BPF_LDX_MEM(BPF_B, value_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, value_reg, SKB_TSTAMP_TYPE_MASK);
+#ifdef __BIG_ENDIAN_BITFIELD
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, value_reg, SKB_TSTAMP_TYPE_RSHIFT);
+#else
+ BUILD_BUG_ON(!(SKB_TSTAMP_TYPE_MASK & 0x1));
+#endif
return insn;
}
@@ -9431,11 +9463,12 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog,
__u8 tmp_reg = BPF_REG_AX;
*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
- *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg,
- TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK);
- *insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg,
- TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 2);
- /* skb->tc_at_ingress && skb->mono_delivery_time,
+ /* check if ingress mask bits is set */
+ *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1);
+ *insn++ = BPF_JMP_A(4);
+ *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, SKB_TSTAMP_TYPE_MASK, 1);
+ *insn++ = BPF_JMP_A(2);
+ /* skb->tc_at_ingress && skb->tstamp_type,
* read 0 as the (rcv) timestamp.
*/
*insn++ = BPF_MOV64_IMM(value_reg, 0);
@@ -9460,7 +9493,7 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog,
* the bpf prog is aware the tstamp could have delivery time.
* Thus, write skb->tstamp as is if tstamp_type_access is true.
* Otherwise, writing at ingress will have to clear the
- * mono_delivery_time bit also.
+ * skb->tstamp_type bit also.
*/
if (!prog->tstamp_type_access) {
__u8 tmp_reg = BPF_REG_AX;
@@ -9470,8 +9503,8 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog,
*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1);
/* goto <store> */
*insn++ = BPF_JMP_A(2);
- /* <clear>: mono_delivery_time */
- *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_MONO_DELIVERY_TIME_MASK);
+ /* <clear>: skb->tstamp_type */
+ *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_TSTAMP_TYPE_MASK);
*insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, SKB_BF_MONO_TC_OFFSET);
}
#endif
@@ -11027,7 +11060,6 @@ const struct bpf_verifier_ops lwt_seg6local_verifier_ops = {
};
const struct bpf_prog_ops lwt_seg6local_prog_ops = {
- .test_run = bpf_prog_test_run_skb,
};
const struct bpf_verifier_ops cg_sock_verifier_ops = {
@@ -11845,28 +11877,34 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
}
__bpf_kfunc_start_defs();
-__bpf_kfunc int bpf_dynptr_from_skb(struct sk_buff *skb, u64 flags,
- struct bpf_dynptr_kern *ptr__uninit)
+__bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags,
+ struct bpf_dynptr *ptr__uninit)
{
+ struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)ptr__uninit;
+ struct sk_buff *skb = (struct sk_buff *)s;
+
if (flags) {
- bpf_dynptr_set_null(ptr__uninit);
+ bpf_dynptr_set_null(ptr);
return -EINVAL;
}
- bpf_dynptr_init(ptr__uninit, skb, BPF_DYNPTR_TYPE_SKB, 0, skb->len);
+ bpf_dynptr_init(ptr, skb, BPF_DYNPTR_TYPE_SKB, 0, skb->len);
return 0;
}
-__bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_buff *xdp, u64 flags,
- struct bpf_dynptr_kern *ptr__uninit)
+__bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_md *x, u64 flags,
+ struct bpf_dynptr *ptr__uninit)
{
+ struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)ptr__uninit;
+ struct xdp_buff *xdp = (struct xdp_buff *)x;
+
if (flags) {
- bpf_dynptr_set_null(ptr__uninit);
+ bpf_dynptr_set_null(ptr);
return -EINVAL;
}
- bpf_dynptr_init(ptr__uninit, xdp, BPF_DYNPTR_TYPE_XDP, 0, xdp_get_buff_len(xdp));
+ bpf_dynptr_init(ptr, xdp, BPF_DYNPTR_TYPE_XDP, 0, xdp_get_buff_len(xdp));
return 0;
}
@@ -11892,10 +11930,11 @@ __bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern,
return 0;
}
-__bpf_kfunc int bpf_sk_assign_tcp_reqsk(struct sk_buff *skb, struct sock *sk,
+__bpf_kfunc int bpf_sk_assign_tcp_reqsk(struct __sk_buff *s, struct sock *sk,
struct bpf_tcp_req_attrs *attrs, int attrs__sz)
{
#if IS_ENABLED(CONFIG_SYN_COOKIES)
+ struct sk_buff *skb = (struct sk_buff *)s;
const struct request_sock_ops *ops;
struct inet_request_sock *ireq;
struct tcp_request_sock *treq;
@@ -11990,16 +12029,17 @@ __bpf_kfunc int bpf_sk_assign_tcp_reqsk(struct sk_buff *skb, struct sock *sk,
__bpf_kfunc_end_defs();
-int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
- struct bpf_dynptr_kern *ptr__uninit)
+int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
+ struct bpf_dynptr *ptr__uninit)
{
+ struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)ptr__uninit;
int err;
err = bpf_dynptr_from_skb(skb, flags, ptr__uninit);
if (err)
return err;
- bpf_dynptr_set_rdonly(ptr__uninit);
+ bpf_dynptr_set_rdonly(ptr);
return 0;
}
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 272f09251343..0e638a37aa09 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -299,9 +299,10 @@ void skb_flow_dissect_meta(const struct sk_buff *skb,
EXPORT_SYMBOL(skb_flow_dissect_meta);
static void
-skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type,
- struct flow_dissector *flow_dissector,
- void *target_container)
+skb_flow_dissect_set_enc_control(enum flow_dissector_key_id type,
+ u32 ctrl_flags,
+ struct flow_dissector *flow_dissector,
+ void *target_container)
{
struct flow_dissector_key_control *ctrl;
@@ -312,6 +313,7 @@ skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type,
FLOW_DISSECTOR_KEY_ENC_CONTROL,
target_container);
ctrl->addr_type = type;
+ ctrl->flags = ctrl_flags;
}
void
@@ -367,6 +369,7 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
{
struct ip_tunnel_info *info;
struct ip_tunnel_key *key;
+ u32 ctrl_flags = 0;
/* A quick check to see if there might be something to do. */
if (!dissector_uses_key(flow_dissector,
@@ -391,11 +394,20 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
key = &info->key;
+ if (test_bit(IP_TUNNEL_CSUM_BIT, key->tun_flags))
+ ctrl_flags |= FLOW_DIS_F_TUNNEL_CSUM;
+ if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags))
+ ctrl_flags |= FLOW_DIS_F_TUNNEL_DONT_FRAGMENT;
+ if (test_bit(IP_TUNNEL_OAM_BIT, key->tun_flags))
+ ctrl_flags |= FLOW_DIS_F_TUNNEL_OAM;
+ if (test_bit(IP_TUNNEL_CRIT_OPT_BIT, key->tun_flags))
+ ctrl_flags |= FLOW_DIS_F_TUNNEL_CRIT_OPT;
+
switch (ip_tunnel_info_af(info)) {
case AF_INET:
- skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV4_ADDRS,
- flow_dissector,
- target_container);
+ skb_flow_dissect_set_enc_control(FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ ctrl_flags, flow_dissector,
+ target_container);
if (dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
struct flow_dissector_key_ipv4_addrs *ipv4;
@@ -408,9 +420,9 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
}
break;
case AF_INET6:
- skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV6_ADDRS,
- flow_dissector,
- target_container);
+ skb_flow_dissect_set_enc_control(FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ ctrl_flags, flow_dissector,
+ target_container);
if (dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
struct flow_dissector_key_ipv6_addrs *ipv6;
@@ -422,6 +434,10 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
ipv6->dst = key->u.ipv6.dst;
}
break;
+ default:
+ skb_flow_dissect_set_enc_control(0, ctrl_flags, flow_dissector,
+ target_container);
+ break;
}
if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
@@ -455,17 +471,25 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_OPTS)) {
struct flow_dissector_key_enc_opts *enc_opt;
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
+ u32 val;
enc_opt = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_ENC_OPTS,
target_container);
- if (info->options_len) {
- enc_opt->len = info->options_len;
- ip_tunnel_info_opts_get(enc_opt->data, info);
- enc_opt->dst_opt_type = info->key.tun_flags &
- TUNNEL_OPTIONS_PRESENT;
- }
+ if (!info->options_len)
+ return;
+
+ enc_opt->len = info->options_len;
+ ip_tunnel_info_opts_get(enc_opt->data, info);
+
+ ip_tunnel_set_options_present(flags);
+ ip_tunnel_flags_and(flags, info->key.tun_flags, flags);
+
+ val = find_next_bit(flags, __IP_TUNNEL_FLAG_NUM,
+ IP_TUNNEL_GENEVE_OPT_BIT);
+ enc_opt->dst_opt_type = val < __IP_TUNNEL_FLAG_NUM ? val : 0;
}
}
EXPORT_SYMBOL(skb_flow_dissect_tunnel_info);
@@ -1093,7 +1117,7 @@ bool __skb_flow_dissect(const struct net *net,
}
}
- WARN_ON_ONCE(!net);
+ DEBUG_NET_WARN_ON_ONCE(!net);
if (net) {
enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR;
struct bpf_prog_array *run_array;
@@ -1784,6 +1808,13 @@ u32 flow_hash_from_keys(struct flow_keys *keys)
}
EXPORT_SYMBOL(flow_hash_from_keys);
+u32 flow_hash_from_keys_seed(struct flow_keys *keys,
+ const siphash_key_t *keyval)
+{
+ return __flow_hash_from_keys(keys, keyval);
+}
+EXPORT_SYMBOL(flow_hash_from_keys_seed);
+
static inline u32 ___skb_get_hash(const struct sk_buff *skb,
struct flow_keys *keys,
const siphash_key_t *keyval)
@@ -1823,22 +1854,23 @@ EXPORT_SYMBOL(make_flow_keys_digest);
static struct flow_dissector flow_keys_dissector_symmetric __read_mostly;
-u32 __skb_get_hash_symmetric(const struct sk_buff *skb)
+u32 __skb_get_hash_symmetric_net(const struct net *net, const struct sk_buff *skb)
{
struct flow_keys keys;
__flow_hash_secret_init();
memset(&keys, 0, sizeof(keys));
- __skb_flow_dissect(NULL, skb, &flow_keys_dissector_symmetric,
+ __skb_flow_dissect(net, skb, &flow_keys_dissector_symmetric,
&keys, NULL, 0, 0, 0, 0);
return __flow_hash_from_keys(&keys, &hashrnd);
}
-EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric);
+EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric_net);
/**
- * __skb_get_hash: calculate a flow hash
+ * __skb_get_hash_net: calculate a flow hash
+ * @net: associated network namespace, derived from @skb if NULL
* @skb: sk_buff to calculate flow hash from
*
* This function calculates a flow hash based on src/dst addresses
@@ -1846,18 +1878,24 @@ EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric);
* on success, zero indicates no valid hash. Also, sets l4_hash in skb
* if hash is a canonical 4-tuple hash over transport ports.
*/
-void __skb_get_hash(struct sk_buff *skb)
+void __skb_get_hash_net(const struct net *net, struct sk_buff *skb)
{
struct flow_keys keys;
u32 hash;
+ memset(&keys, 0, sizeof(keys));
+
+ __skb_flow_dissect(net, skb, &flow_keys_dissector,
+ &keys, NULL, 0, 0, 0,
+ FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+
__flow_hash_secret_init();
- hash = ___skb_get_hash(skb, &keys, &hashrnd);
+ hash = __flow_hash_from_keys(&keys, &hashrnd);
__skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys));
}
-EXPORT_SYMBOL(__skb_get_hash);
+EXPORT_SYMBOL(__skb_get_hash_net);
__u32 skb_get_hash_perturb(const struct sk_buff *skb,
const siphash_key_t *perturb)
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index fae9c4694186..412816076b8b 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -206,7 +206,7 @@ void gen_kill_estimator(struct net_rate_estimator __rcu **rate_est)
{
struct net_rate_estimator *est;
- est = xchg((__force struct net_rate_estimator **)rate_est, NULL);
+ est = unrcu_pointer(xchg(rate_est, NULL));
if (est) {
timer_shutdown_sync(&est->timer);
kfree_rcu(est, rcu);
diff --git a/net/core/gro.c b/net/core/gro.c
index c7901253a1a8..b3b43de1a650 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -3,6 +3,7 @@
#include <net/dst_metadata.h>
#include <net/busy_poll.h>
#include <trace/events/net.h>
+#include <linux/skbuff_ref.h>
#define MAX_GRO_SKBS 8
@@ -230,6 +231,33 @@ done:
return 0;
}
+int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
+{
+ if (unlikely(p->len + skb->len >= 65536))
+ return -E2BIG;
+
+ if (NAPI_GRO_CB(p)->last == p)
+ skb_shinfo(p)->frag_list = skb;
+ else
+ NAPI_GRO_CB(p)->last->next = skb;
+
+ skb_pull(skb, skb_gro_offset(skb));
+
+ NAPI_GRO_CB(p)->last = skb;
+ NAPI_GRO_CB(p)->count++;
+ p->data_len += skb->len;
+
+ /* sk ownership - if any - completely transferred to the aggregated packet */
+ skb->destructor = NULL;
+ skb->sk = NULL;
+ p->truesize += skb->truesize;
+ p->len += skb->len;
+
+ NAPI_GRO_CB(skb)->same_flow = 1;
+
+ return 0;
+}
+
static void napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
{
@@ -330,8 +358,6 @@ static void gro_list_prepare(const struct list_head *head,
list_for_each_entry(p, head, list) {
unsigned long diffs;
- NAPI_GRO_CB(p)->flush = 0;
-
if (hash != skb_get_hash_raw(p)) {
NAPI_GRO_CB(p)->same_flow = 0;
continue;
@@ -471,7 +497,6 @@ found_ptype:
sizeof(u32))); /* Avoid slow unaligned acc */
*(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0;
NAPI_GRO_CB(skb)->flush = skb_has_frag_list(skb);
- NAPI_GRO_CB(skb)->is_atomic = 1;
NAPI_GRO_CB(skb)->count = 1;
if (unlikely(skb_is_gso(skb))) {
NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs;
diff --git a/net/core/hotdata.c b/net/core/hotdata.c
index c8a7a451c18a..d0aaaaa556f2 100644
--- a/net/core/hotdata.c
+++ b/net/core/hotdata.c
@@ -1,9 +1,9 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-#include <net/hotdata.h>
#include <linux/cache.h>
#include <linux/jiffies.h>
#include <linux/list.h>
-
+#include <net/hotdata.h>
+#include <net/proto_memory.h>
struct net_hotdata net_hotdata __cacheline_aligned = {
.offload_base = LIST_HEAD_INIT(net_hotdata.offload_base),
@@ -18,5 +18,8 @@ struct net_hotdata net_hotdata __cacheline_aligned = {
.max_backlog = 1000,
.dev_tx_weight = 64,
.dev_rx_weight = 64,
+ .sysctl_max_skb_frags = MAX_SKB_FRAGS,
+ .sysctl_skb_defer_max = 64,
+ .sysctl_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE
};
EXPORT_SYMBOL(net_hotdata);
diff --git a/net/core/ieee8021q_helpers.c b/net/core/ieee8021q_helpers.c
new file mode 100644
index 000000000000..759a9b9f3f89
--- /dev/null
+++ b/net/core/ieee8021q_helpers.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2024 Pengutronix, Oleksij Rempel <kernel@pengutronix.de>
+
+#include <linux/array_size.h>
+#include <linux/printk.h>
+#include <linux/types.h>
+#include <net/dscp.h>
+#include <net/ieee8021q.h>
+
+/* The following arrays map Traffic Types (TT) to traffic classes (TC) for
+ * different number of queues as shown in the example provided by
+ * IEEE 802.1Q-2022 in Annex I "I.3 Traffic type to traffic class mapping" and
+ * Table I-1 "Traffic type to traffic class mapping".
+ */
+static const u8 ieee8021q_8queue_tt_tc_map[] = {
+ [IEEE8021Q_TT_BK] = 0,
+ [IEEE8021Q_TT_BE] = 1,
+ [IEEE8021Q_TT_EE] = 2,
+ [IEEE8021Q_TT_CA] = 3,
+ [IEEE8021Q_TT_VI] = 4,
+ [IEEE8021Q_TT_VO] = 5,
+ [IEEE8021Q_TT_IC] = 6,
+ [IEEE8021Q_TT_NC] = 7,
+};
+
+static const u8 ieee8021q_7queue_tt_tc_map[] = {
+ [IEEE8021Q_TT_BK] = 0,
+ [IEEE8021Q_TT_BE] = 1,
+ [IEEE8021Q_TT_EE] = 2,
+ [IEEE8021Q_TT_CA] = 3,
+ [IEEE8021Q_TT_VI] = 4, [IEEE8021Q_TT_VO] = 4,
+ [IEEE8021Q_TT_IC] = 5,
+ [IEEE8021Q_TT_NC] = 6,
+};
+
+static const u8 ieee8021q_6queue_tt_tc_map[] = {
+ [IEEE8021Q_TT_BK] = 0,
+ [IEEE8021Q_TT_BE] = 1,
+ [IEEE8021Q_TT_EE] = 2, [IEEE8021Q_TT_CA] = 2,
+ [IEEE8021Q_TT_VI] = 3, [IEEE8021Q_TT_VO] = 3,
+ [IEEE8021Q_TT_IC] = 4,
+ [IEEE8021Q_TT_NC] = 5,
+};
+
+static const u8 ieee8021q_5queue_tt_tc_map[] = {
+ [IEEE8021Q_TT_BK] = 0, [IEEE8021Q_TT_BE] = 0,
+ [IEEE8021Q_TT_EE] = 1, [IEEE8021Q_TT_CA] = 1,
+ [IEEE8021Q_TT_VI] = 2, [IEEE8021Q_TT_VO] = 2,
+ [IEEE8021Q_TT_IC] = 3,
+ [IEEE8021Q_TT_NC] = 4,
+};
+
+static const u8 ieee8021q_4queue_tt_tc_map[] = {
+ [IEEE8021Q_TT_BK] = 0, [IEEE8021Q_TT_BE] = 0,
+ [IEEE8021Q_TT_EE] = 1, [IEEE8021Q_TT_CA] = 1,
+ [IEEE8021Q_TT_VI] = 2, [IEEE8021Q_TT_VO] = 2,
+ [IEEE8021Q_TT_IC] = 3, [IEEE8021Q_TT_NC] = 3,
+};
+
+static const u8 ieee8021q_3queue_tt_tc_map[] = {
+ [IEEE8021Q_TT_BK] = 0, [IEEE8021Q_TT_BE] = 0,
+ [IEEE8021Q_TT_EE] = 0, [IEEE8021Q_TT_CA] = 0,
+ [IEEE8021Q_TT_VI] = 1, [IEEE8021Q_TT_VO] = 1,
+ [IEEE8021Q_TT_IC] = 2, [IEEE8021Q_TT_NC] = 2,
+};
+
+static const u8 ieee8021q_2queue_tt_tc_map[] = {
+ [IEEE8021Q_TT_BK] = 0, [IEEE8021Q_TT_BE] = 0,
+ [IEEE8021Q_TT_EE] = 0, [IEEE8021Q_TT_CA] = 0,
+ [IEEE8021Q_TT_VI] = 1, [IEEE8021Q_TT_VO] = 1,
+ [IEEE8021Q_TT_IC] = 1, [IEEE8021Q_TT_NC] = 1,
+};
+
+static const u8 ieee8021q_1queue_tt_tc_map[] = {
+ [IEEE8021Q_TT_BK] = 0, [IEEE8021Q_TT_BE] = 0,
+ [IEEE8021Q_TT_EE] = 0, [IEEE8021Q_TT_CA] = 0,
+ [IEEE8021Q_TT_VI] = 0, [IEEE8021Q_TT_VO] = 0,
+ [IEEE8021Q_TT_IC] = 0, [IEEE8021Q_TT_NC] = 0,
+};
+
+/**
+ * ieee8021q_tt_to_tc - Map IEEE 802.1Q Traffic Type to Traffic Class
+ * @tt: IEEE 802.1Q Traffic Type
+ * @num_queues: Number of queues
+ *
+ * This function maps an IEEE 802.1Q Traffic Type to a Traffic Class (TC) based
+ * on the number of queues configured on the NIC. The mapping is based on the
+ * example provided by IEEE 802.1Q-2022 in Annex I "I.3 Traffic type to traffic
+ * class mapping" and Table I-1 "Traffic type to traffic class mapping".
+ *
+ * Return: Traffic Class corresponding to the given Traffic Type or negative
+ * value in case of error.
+ */
+int ieee8021q_tt_to_tc(enum ieee8021q_traffic_type tt, unsigned int num_queues)
+{
+ if (tt < 0 || tt >= IEEE8021Q_TT_MAX) {
+ pr_err("Requested Traffic Type (%d) is out of range (%d)\n", tt,
+ IEEE8021Q_TT_MAX);
+ return -EINVAL;
+ }
+
+ switch (num_queues) {
+ case 8:
+ compiletime_assert(ARRAY_SIZE(ieee8021q_8queue_tt_tc_map) !=
+ IEEE8021Q_TT_MAX - 1,
+ "ieee8021q_8queue_tt_tc_map != max - 1");
+ return ieee8021q_8queue_tt_tc_map[tt];
+ case 7:
+ compiletime_assert(ARRAY_SIZE(ieee8021q_7queue_tt_tc_map) !=
+ IEEE8021Q_TT_MAX - 1,
+ "ieee8021q_7queue_tt_tc_map != max - 1");
+
+ return ieee8021q_7queue_tt_tc_map[tt];
+ case 6:
+ compiletime_assert(ARRAY_SIZE(ieee8021q_6queue_tt_tc_map) !=
+ IEEE8021Q_TT_MAX - 1,
+ "ieee8021q_6queue_tt_tc_map != max - 1");
+
+ return ieee8021q_6queue_tt_tc_map[tt];
+ case 5:
+ compiletime_assert(ARRAY_SIZE(ieee8021q_5queue_tt_tc_map) !=
+ IEEE8021Q_TT_MAX - 1,
+ "ieee8021q_5queue_tt_tc_map != max - 1");
+
+ return ieee8021q_5queue_tt_tc_map[tt];
+ case 4:
+ compiletime_assert(ARRAY_SIZE(ieee8021q_4queue_tt_tc_map) !=
+ IEEE8021Q_TT_MAX - 1,
+ "ieee8021q_4queue_tt_tc_map != max - 1");
+
+ return ieee8021q_4queue_tt_tc_map[tt];
+ case 3:
+ compiletime_assert(ARRAY_SIZE(ieee8021q_3queue_tt_tc_map) !=
+ IEEE8021Q_TT_MAX - 1,
+ "ieee8021q_3queue_tt_tc_map != max - 1");
+
+ return ieee8021q_3queue_tt_tc_map[tt];
+ case 2:
+ compiletime_assert(ARRAY_SIZE(ieee8021q_2queue_tt_tc_map) !=
+ IEEE8021Q_TT_MAX - 1,
+ "ieee8021q_2queue_tt_tc_map != max - 1");
+
+ return ieee8021q_2queue_tt_tc_map[tt];
+ case 1:
+ compiletime_assert(ARRAY_SIZE(ieee8021q_1queue_tt_tc_map) !=
+ IEEE8021Q_TT_MAX - 1,
+ "ieee8021q_1queue_tt_tc_map != max - 1");
+
+ return ieee8021q_1queue_tt_tc_map[tt];
+ }
+
+ pr_err("Invalid number of queues %d\n", num_queues);
+
+ return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(ieee8021q_tt_to_tc);
+
+/**
+ * ietf_dscp_to_ieee8021q_tt - Map IETF DSCP to IEEE 802.1Q Traffic Type
+ * @dscp: IETF DSCP value
+ *
+ * This function maps an IETF DSCP value to an IEEE 802.1Q Traffic Type (TT).
+ * Since there is no corresponding mapping between DSCP and IEEE 802.1Q Traffic
+ * Type, this function is inspired by the RFC8325 documentation which describe
+ * the mapping between DSCP and 802.11 User Priority (UP) values.
+ *
+ * Return: IEEE 802.1Q Traffic Type corresponding to the given DSCP value
+ */
+int ietf_dscp_to_ieee8021q_tt(u8 dscp)
+{
+ switch (dscp) {
+ case DSCP_CS0:
+ /* Comment from RFC8325:
+ * [RFC4594], Section 4.8, recommends High-Throughput Data be marked
+ * AF1x (that is, AF11, AF12, and AF13, according to the rules defined
+ * in [RFC2475]).
+ *
+ * By default (as described in Section 2.3), High-Throughput Data will
+ * map to UP 1 and, thus, to the Background Access Category (AC_BK),
+ * which is contrary to the intent expressed in [RFC4594].
+
+ * Unfortunately, there really is no corresponding fit for the High-
+ * Throughput Data service class within the constrained 4 Access
+ * Category [IEEE.802.11-2016] model. If the High-Throughput Data
+ * service class is assigned to the Best Effort Access Category (AC_BE),
+ * then it would contend with Low-Latency Data (while [RFC4594]
+ * recommends a distinction in servicing between these service classes)
+ * as well as with the default service class; alternatively, if it is
+ * assigned to the Background Access Category (AC_BK), then it would
+ * receive a less-then-best-effort service and contend with Low-Priority
+ * Data (as discussed in Section 4.2.10).
+ *
+ * As such, since there is no directly corresponding fit for the High-
+ * Throughout Data service class within the [IEEE.802.11-2016] model, it
+ * is generally RECOMMENDED to map High-Throughput Data to UP 0, thereby
+ * admitting it to the Best Effort Access Category (AC_BE).
+ *
+ * Note: The above text is from RFC8325 which is describing the mapping
+ * between DSCP and 802.11 User Priority (UP) values. The mapping
+ * between UP and IEEE 802.1Q Traffic Type is not defined in the RFC but
+ * the 802.11 AC_BK and AC_BE are closely related to the IEEE 802.1Q
+ * Traffic Types BE and BK.
+ */
+ case DSCP_AF11:
+ case DSCP_AF12:
+ case DSCP_AF13:
+ return IEEE8021Q_TT_BE;
+ /* Comment from RFC8325:
+ * RFC3662 and RFC4594 both recommend Low-Priority Data be marked
+ * with DSCP CS1. The Low-Priority Data service class loosely
+ * corresponds to the [IEEE.802.11-2016] Background Access Category
+ */
+ case DSCP_CS1:
+ return IEEE8021Q_TT_BK;
+ case DSCP_CS2:
+ case DSCP_AF21:
+ case DSCP_AF22:
+ case DSCP_AF23:
+ return IEEE8021Q_TT_EE;
+ case DSCP_CS3:
+ case DSCP_AF31:
+ case DSCP_AF32:
+ case DSCP_AF33:
+ return IEEE8021Q_TT_CA;
+ case DSCP_CS4:
+ case DSCP_AF41:
+ case DSCP_AF42:
+ case DSCP_AF43:
+ return IEEE8021Q_TT_VI;
+ case DSCP_CS5:
+ case DSCP_EF:
+ case DSCP_VOICE_ADMIT:
+ return IEEE8021Q_TT_VO;
+ case DSCP_CS6:
+ return IEEE8021Q_TT_IC;
+ case DSCP_CS7:
+ return IEEE8021Q_TT_NC;
+ }
+
+ return SIMPLE_IETF_DSCP_TO_IEEE8021Q_TT(dscp);
+}
+EXPORT_SYMBOL_GPL(ietf_dscp_to_ieee8021q_tt);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 8ec35194bfcb..ab150641142a 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -148,9 +148,9 @@ static void linkwatch_schedule_work(int urgent)
* override the existing timer.
*/
if (test_bit(LW_URGENT, &linkwatch_flags))
- mod_delayed_work(system_wq, &linkwatch_work, 0);
+ mod_delayed_work(system_unbound_wq, &linkwatch_work, 0);
else
- schedule_delayed_work(&linkwatch_work, delay);
+ queue_delayed_work(system_unbound_wq, &linkwatch_work, delay);
}
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 4a0797f0a154..afb05f58b64c 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -38,13 +38,14 @@ static inline struct bpf_lwt *bpf_lwt_lwtunnel(struct lwtunnel_state *lwt)
static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
struct dst_entry *dst, bool can_redirect)
{
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
int ret;
- /* Migration disable and BH disable are needed to protect per-cpu
- * redirect_info between BPF prog and skb_do_redirect().
+ /* Disabling BH is needed to protect per-CPU bpf_redirect_info between
+ * BPF prog and skb_do_redirect().
*/
- migrate_disable();
local_bh_disable();
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
bpf_compute_data_pointers(skb);
ret = bpf_prog_run_save_cb(lwt->prog, skb);
@@ -77,8 +78,8 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
break;
}
+ bpf_net_ctx_clear(bpf_net_ctx);
local_bh_enable();
- migrate_enable();
return ret;
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 552719c3bbc3..a6fe88eca939 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -734,7 +734,9 @@ out_neigh_release:
struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
struct net_device *dev, bool want_ref)
{
- return ___neigh_create(tbl, pkey, dev, 0, false, want_ref);
+ bool exempt_from_gc = !!(dev->flags & IFF_LOOPBACK);
+
+ return ___neigh_create(tbl, pkey, dev, 0, exempt_from_gc, want_ref);
}
EXPORT_SYMBOL(__neigh_create);
@@ -1769,7 +1771,7 @@ static void neigh_parms_destroy(struct neigh_parms *parms)
static struct lock_class_key neigh_table_proxy_queue_class;
-static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
+static struct neigh_table __rcu *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
void neigh_table_init(int index, struct neigh_table *tbl)
{
@@ -1826,13 +1828,19 @@ void neigh_table_init(int index, struct neigh_table *tbl)
tbl->last_flush = now;
tbl->last_rand = now + tbl->parms.reachable_time * 20;
- neigh_tables[index] = tbl;
+ rcu_assign_pointer(neigh_tables[index], tbl);
}
EXPORT_SYMBOL(neigh_table_init);
+/*
+ * Only called from ndisc_cleanup(), which means this is dead code
+ * because we no longer can unload IPv6 module.
+ */
int neigh_table_clear(int index, struct neigh_table *tbl)
{
- neigh_tables[index] = NULL;
+ RCU_INIT_POINTER(neigh_tables[index], NULL);
+ synchronize_rcu();
+
/* It is not clean... Fix it to unload IPv6 module safely */
cancel_delayed_work_sync(&tbl->managed_work);
cancel_delayed_work_sync(&tbl->gc_work);
@@ -1864,10 +1872,10 @@ static struct neigh_table *neigh_find_table(int family)
switch (family) {
case AF_INET:
- tbl = neigh_tables[NEIGH_ARP_TABLE];
+ tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ARP_TABLE]);
break;
case AF_INET6:
- tbl = neigh_tables[NEIGH_ND_TABLE];
+ tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ND_TABLE]);
break;
}
@@ -2331,7 +2339,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
ndtmsg = nlmsg_data(nlh);
for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
- tbl = neigh_tables[tidx];
+ tbl = rcu_dereference_rtnl(neigh_tables[tidx]);
if (!tbl)
continue;
if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
@@ -2519,7 +2527,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
struct neigh_parms *p;
- tbl = neigh_tables[tidx];
+ tbl = rcu_dereference_rtnl(neigh_tables[tidx]);
if (!tbl)
continue;
@@ -2674,7 +2682,7 @@ static bool neigh_master_filtered(struct net_device *dev, int master_idx)
if (!master_idx)
return false;
- master = dev ? netdev_master_upper_dev_get(dev) : NULL;
+ master = dev ? netdev_master_upper_dev_get_rcu(dev) : NULL;
/* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another
* invalid value for ifindex to denote "no master".
@@ -2707,7 +2715,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
{
struct net *net = sock_net(skb->sk);
struct neighbour *n;
- int rc, h, s_h = cb->args[1];
+ int err = 0, h, s_h = cb->args[1];
int idx, s_idx = idx = cb->args[2];
struct neigh_hash_table *nht;
unsigned int flags = NLM_F_MULTI;
@@ -2715,7 +2723,6 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
if (filter->dev_idx || filter->master_idx)
flags |= NLM_F_DUMP_FILTERED;
- rcu_read_lock();
nht = rcu_dereference(tbl->nht);
for (h = s_h; h < (1 << nht->hash_shift); h++) {
@@ -2729,23 +2736,19 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
neigh_master_filtered(n->dev, filter->master_idx))
goto next;
- if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWNEIGH,
- flags) < 0) {
- rc = -1;
+ err = neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNEIGH, flags);
+ if (err < 0)
goto out;
- }
next:
idx++;
}
}
- rc = skb->len;
out:
- rcu_read_unlock();
cb->args[1] = h;
cb->args[2] = idx;
- return rc;
+ return err;
}
static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
@@ -2754,7 +2757,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
{
struct pneigh_entry *n;
struct net *net = sock_net(skb->sk);
- int rc, h, s_h = cb->args[3];
+ int err = 0, h, s_h = cb->args[3];
int idx, s_idx = idx = cb->args[4];
unsigned int flags = NLM_F_MULTI;
@@ -2772,11 +2775,11 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
neigh_master_filtered(n->dev, filter->master_idx))
goto next;
- if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWNEIGH, flags, tbl) < 0) {
+ err = pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNEIGH, flags, tbl);
+ if (err < 0) {
read_unlock_bh(&tbl->lock);
- rc = -1;
goto out;
}
next:
@@ -2785,12 +2788,10 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
}
read_unlock_bh(&tbl->lock);
- rc = skb->len;
out:
cb->args[3] = h;
cb->args[4] = idx;
- return rc;
-
+ return err;
}
static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
@@ -2878,8 +2879,9 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
s_t = cb->args[0];
+ rcu_read_lock();
for (t = 0; t < NEIGH_NR_TABLES; t++) {
- tbl = neigh_tables[t];
+ tbl = rcu_dereference(neigh_tables[t]);
if (!tbl)
continue;
@@ -2895,9 +2897,10 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
if (err < 0)
break;
}
+ rcu_read_unlock();
cb->args[0] = t;
- return skb->len;
+ return err;
}
static int neigh_valid_get_req(const struct nlmsghdr *nlh,
@@ -3143,14 +3146,15 @@ int neigh_xmit(int index, struct net_device *dev,
const void *addr, struct sk_buff *skb)
{
int err = -EAFNOSUPPORT;
+
if (likely(index < NEIGH_NR_TABLES)) {
struct neigh_table *tbl;
struct neighbour *neigh;
- tbl = neigh_tables[index];
- if (!tbl)
- goto out;
rcu_read_lock();
+ tbl = rcu_dereference(neigh_tables[index]);
+ if (!tbl)
+ goto out_unlock;
if (index == NEIGH_ARP_TABLE) {
u32 key = *((u32 *)addr);
@@ -3166,6 +3170,7 @@ int neigh_xmit(int index, struct net_device *dev,
goto out_kfree_skb;
}
err = READ_ONCE(neigh->output)(neigh, skb);
+out_unlock:
rcu_read_unlock();
}
else if (index == NEIGH_LINK_TABLE) {
@@ -3538,7 +3543,7 @@ EXPORT_SYMBOL(neigh_app_ns);
#ifdef CONFIG_SYSCTL
static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
-static int proc_unres_qlen(struct ctl_table *ctl, int write,
+static int proc_unres_qlen(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int size, ret;
@@ -3573,7 +3578,7 @@ static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
rcu_read_unlock();
}
-static void neigh_proc_update(struct ctl_table *ctl, int write)
+static void neigh_proc_update(const struct ctl_table *ctl, int write)
{
struct net_device *dev = ctl->extra1;
struct neigh_parms *p = ctl->extra2;
@@ -3590,7 +3595,7 @@ static void neigh_proc_update(struct ctl_table *ctl, int write)
neigh_copy_dflt_parms(net, p, index);
}
-static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
+static int neigh_proc_dointvec_zero_intmax(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp,
loff_t *ppos)
{
@@ -3605,7 +3610,7 @@ static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
return ret;
}
-static int neigh_proc_dointvec_ms_jiffies_positive(struct ctl_table *ctl, int write,
+static int neigh_proc_dointvec_ms_jiffies_positive(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table tmp = *ctl;
@@ -3621,7 +3626,7 @@ static int neigh_proc_dointvec_ms_jiffies_positive(struct ctl_table *ctl, int wr
return ret;
}
-int neigh_proc_dointvec(struct ctl_table *ctl, int write, void *buffer,
+int neigh_proc_dointvec(const struct ctl_table *ctl, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
@@ -3631,7 +3636,7 @@ int neigh_proc_dointvec(struct ctl_table *ctl, int write, void *buffer,
}
EXPORT_SYMBOL(neigh_proc_dointvec);
-int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, void *buffer,
+int neigh_proc_dointvec_jiffies(const struct ctl_table *ctl, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
@@ -3641,7 +3646,7 @@ int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, void *buffer,
}
EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
-static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
+static int neigh_proc_dointvec_userhz_jiffies(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp,
loff_t *ppos)
{
@@ -3651,7 +3656,7 @@ static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
return ret;
}
-int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
+int neigh_proc_dointvec_ms_jiffies(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
@@ -3661,7 +3666,7 @@ int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
}
EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
-static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
+static int neigh_proc_dointvec_unres_qlen(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp,
loff_t *ppos)
{
@@ -3671,7 +3676,7 @@ static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
return ret;
}
-static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
+static int neigh_proc_base_reachable_time(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp,
loff_t *ppos)
{
@@ -3728,7 +3733,7 @@ static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
static struct neigh_sysctl_table {
struct ctl_table_header *sysctl_header;
- struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
+ struct ctl_table neigh_vars[NEIGH_VAR_MAX];
} neigh_sysctl_template __read_mostly = {
.neigh_vars = {
NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
@@ -3779,7 +3784,6 @@ static struct neigh_sysctl_table {
.extra2 = SYSCTL_INT_MAX,
.proc_handler = proc_dointvec_minmax,
},
- {},
},
};
@@ -3807,8 +3811,6 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
if (dev) {
dev_name_source = dev->name;
/* Terminate the table early */
- memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
- sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
neigh_vars_size = NEIGH_VAR_BASE_REACHABLE_TIME_MS + 1;
} else {
struct neigh_table *tbl = p->tbl;
@@ -3889,7 +3891,8 @@ static int __init neigh_init(void)
{
rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0);
+ rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info,
+ RTNL_FLAG_DUMP_UNLOCKED);
rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
0);
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index a97eceb84e61..fa6d3969734a 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -144,7 +144,8 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
seq_printf(seq,
"%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x "
"%08x %08x\n",
- sd->processed, sd->dropped, sd->time_squeeze, 0,
+ sd->processed, atomic_read(&sd->dropped),
+ sd->time_squeeze, 0,
0, 0, 0, 0, /* was fastroute */
0, /* was cpu_collision */
sd->received_rps, flow_limit_count,
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index e3d7a8cfa20b..0e2084ce7b75 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -605,13 +605,13 @@ static ssize_t threaded_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
ssize_t ret = -EINVAL;
- if (!rtnl_trylock())
- return restart_syscall();
+ rcu_read_lock();
if (dev_isalive(netdev))
- ret = sysfs_emit(buf, fmt_dec, netdev->threaded);
+ ret = sysfs_emit(buf, fmt_dec, READ_ONCE(netdev->threaded));
+
+ rcu_read_unlock();
- rtnl_unlock();
return ret;
}
@@ -1419,7 +1419,7 @@ static ssize_t bql_show_stall_thrs(struct netdev_queue *queue, char *buf)
{
struct dql *dql = &queue->dql;
- return sprintf(buf, "%u\n", jiffies_to_msecs(dql->stall_thrs));
+ return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->stall_thrs));
}
static ssize_t bql_set_stall_thrs(struct netdev_queue *queue,
@@ -1451,7 +1451,7 @@ static struct netdev_queue_attribute bql_stall_thrs_attribute __ro_after_init =
static ssize_t bql_show_stall_max(struct netdev_queue *queue, char *buf)
{
- return sprintf(buf, "%u\n", READ_ONCE(queue->dql.stall_max));
+ return sysfs_emit(buf, "%u\n", READ_ONCE(queue->dql.stall_max));
}
static ssize_t bql_set_stall_max(struct netdev_queue *queue,
@@ -1468,7 +1468,7 @@ static ssize_t bql_show_stall_cnt(struct netdev_queue *queue, char *buf)
{
struct dql *dql = &queue->dql;
- return sprintf(buf, "%lu\n", dql->stall_cnt);
+ return sysfs_emit(buf, "%lu\n", dql->stall_cnt);
}
static struct netdev_queue_attribute bql_stall_cnt_attribute __ro_after_init =
@@ -2028,7 +2028,7 @@ static void netdev_release(struct device *d)
* device is dead and about to be freed.
*/
kfree(rcu_access_pointer(dev->ifalias));
- netdev_freemem(dev);
+ kvfree(dev);
}
static const void *net_namespace(const struct device *d)
@@ -2046,7 +2046,7 @@ static void net_get_ownership(const struct device *d, kuid_t *uid, kgid_t *gid)
net_ns_get_ownership(net, uid, gid);
}
-static struct class net_class __ro_after_init = {
+static const struct class net_class = {
.name = "net",
.dev_release = netdev_release,
.dev_groups = net_class_groups,
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index f0540c557515..6a823ba906c6 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -69,12 +69,15 @@ DEFINE_COOKIE(net_cookie);
static struct net_generic *net_alloc_generic(void)
{
+ unsigned int gen_ptrs = READ_ONCE(max_gen_ptrs);
+ unsigned int generic_size;
struct net_generic *ng;
- unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
+
+ generic_size = offsetof(struct net_generic, ptr[gen_ptrs]);
ng = kzalloc(generic_size, GFP_KERNEL);
if (ng)
- ng->s.len = max_gen_ptrs;
+ ng->s.len = gen_ptrs;
return ng;
}
@@ -690,11 +693,16 @@ EXPORT_SYMBOL_GPL(__put_net);
* get_net_ns - increment the refcount of the network namespace
* @ns: common namespace (net)
*
- * Returns the net's common namespace.
+ * Returns the net's common namespace or ERR_PTR() if ref is zero.
*/
struct ns_common *get_net_ns(struct ns_common *ns)
{
- return &get_net(container_of(ns, struct net, ns))->ns;
+ struct net *net;
+
+ net = maybe_get_net(container_of(ns, struct net, ns));
+ if (net)
+ return &net->ns;
+ return ERR_PTR(-EINVAL);
}
EXPORT_SYMBOL_GPL(get_net_ns);
@@ -1090,7 +1098,7 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
end:
if (net_cb.fillargs.add_ref)
put_net(net_cb.tgt_net);
- return err < 0 ? err : skb->len;
+ return err;
}
static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
@@ -1205,7 +1213,8 @@ void __init net_ns_init(void)
rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL,
RTNL_FLAG_DOIT_UNLOCKED);
rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
- RTNL_FLAG_DOIT_UNLOCKED);
+ RTNL_FLAG_DOIT_UNLOCKED |
+ RTNL_FLAG_DUMP_UNLOCKED);
}
static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list)
@@ -1307,7 +1316,11 @@ static int register_pernet_operations(struct list_head *list,
if (error < 0)
return error;
*ops->id = error;
- max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1);
+ /* This does not require READ_ONCE as writers already hold
+ * pernet_ops_rwsem. But WRITE_ONCE is needed to protect
+ * net_alloc_generic.
+ */
+ WRITE_ONCE(max_gen_ptrs, max(max_gen_ptrs, *ops->id + 1));
}
error = __register_pernet_operations(list, ops);
if (error) {
diff --git a/net/core/gso_test.c b/net/core/net_test.c
index 358c44680d91..9c3a590865d2 100644
--- a/net/core/gso_test.c
+++ b/net/core/net_test.c
@@ -1,6 +1,9 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <kunit/test.h>
+
+/* GSO */
+
#include <linux/skbuff.h>
static const char hdr[] = "abcdefgh";
@@ -258,17 +261,127 @@ free_gso_skb:
consume_skb(skb);
}
-static struct kunit_case gso_test_cases[] = {
- KUNIT_CASE_PARAM(gso_test_func, gso_test_gen_params),
- {}
+/* IP tunnel flags */
+
+#include <net/ip_tunnels.h>
+
+struct ip_tunnel_flags_test {
+ const char *name;
+
+ const u16 *src_bits;
+ const u16 *exp_bits;
+ u8 src_num;
+ u8 exp_num;
+
+ __be16 exp_val;
+ bool exp_comp;
+};
+
+#define IP_TUNNEL_FLAGS_TEST(n, src, comp, eval, exp) { \
+ .name = (n), \
+ .src_bits = (src), \
+ .src_num = ARRAY_SIZE(src), \
+ .exp_comp = (comp), \
+ .exp_val = (eval), \
+ .exp_bits = (exp), \
+ .exp_num = ARRAY_SIZE(exp), \
+}
+
+/* These are __be16-compatible and can be compared as is */
+static const u16 ip_tunnel_flags_1[] = {
+ IP_TUNNEL_KEY_BIT,
+ IP_TUNNEL_STRICT_BIT,
+ IP_TUNNEL_ERSPAN_OPT_BIT,
+};
+
+/* Due to the previous flags design limitation, setting either
+ * ``IP_TUNNEL_CSUM_BIT`` (on Big Endian) or ``IP_TUNNEL_DONT_FRAGMENT_BIT``
+ * (on Little) also sets VTI/ISATAP bit. In the bitmap implementation, they
+ * correspond to ``BIT(16)``, which is bigger than ``U16_MAX``, but still is
+ * backward-compatible.
+ */
+#ifdef __LITTLE_ENDIAN
+#define IP_TUNNEL_CONFLICT_BIT IP_TUNNEL_DONT_FRAGMENT_BIT
+#else
+#define IP_TUNNEL_CONFLICT_BIT IP_TUNNEL_CSUM_BIT
+#endif
+
+static const u16 ip_tunnel_flags_2_src[] = {
+ IP_TUNNEL_CONFLICT_BIT,
+};
+
+static const u16 ip_tunnel_flags_2_exp[] = {
+ IP_TUNNEL_CONFLICT_BIT,
+ IP_TUNNEL_SIT_ISATAP_BIT,
};
-static struct kunit_suite gso_test_suite = {
- .name = "net_core_gso",
- .test_cases = gso_test_cases,
+/* Bits 17 and higher are not compatible with __be16 flags */
+static const u16 ip_tunnel_flags_3_src[] = {
+ IP_TUNNEL_VXLAN_OPT_BIT,
+ 17,
+ 18,
+ 20,
};
-kunit_test_suite(gso_test_suite);
+static const u16 ip_tunnel_flags_3_exp[] = {
+ IP_TUNNEL_VXLAN_OPT_BIT,
+};
+
+static const struct ip_tunnel_flags_test ip_tunnel_flags_test[] = {
+ IP_TUNNEL_FLAGS_TEST("compat", ip_tunnel_flags_1, true,
+ cpu_to_be16(BIT(IP_TUNNEL_KEY_BIT) |
+ BIT(IP_TUNNEL_STRICT_BIT) |
+ BIT(IP_TUNNEL_ERSPAN_OPT_BIT)),
+ ip_tunnel_flags_1),
+ IP_TUNNEL_FLAGS_TEST("conflict", ip_tunnel_flags_2_src, true,
+ VTI_ISVTI, ip_tunnel_flags_2_exp),
+ IP_TUNNEL_FLAGS_TEST("new", ip_tunnel_flags_3_src, false,
+ cpu_to_be16(BIT(IP_TUNNEL_VXLAN_OPT_BIT)),
+ ip_tunnel_flags_3_exp),
+};
+
+static void
+ip_tunnel_flags_test_case_to_desc(const struct ip_tunnel_flags_test *t,
+ char *desc)
+{
+ strscpy(desc, t->name, KUNIT_PARAM_DESC_SIZE);
+}
+KUNIT_ARRAY_PARAM(ip_tunnel_flags_test, ip_tunnel_flags_test,
+ ip_tunnel_flags_test_case_to_desc);
+
+static void ip_tunnel_flags_test_run(struct kunit *test)
+{
+ const struct ip_tunnel_flags_test *t = test->param_value;
+ IP_TUNNEL_DECLARE_FLAGS(src) = { };
+ IP_TUNNEL_DECLARE_FLAGS(exp) = { };
+ IP_TUNNEL_DECLARE_FLAGS(out);
+
+ for (u32 j = 0; j < t->src_num; j++)
+ __set_bit(t->src_bits[j], src);
+ for (u32 j = 0; j < t->exp_num; j++)
+ __set_bit(t->exp_bits[j], exp);
+
+ KUNIT_ASSERT_EQ(test, t->exp_comp,
+ ip_tunnel_flags_is_be16_compat(src));
+ KUNIT_ASSERT_EQ(test, (__force u16)t->exp_val,
+ (__force u16)ip_tunnel_flags_to_be16(src));
+
+ ip_tunnel_flags_from_be16(out, t->exp_val);
+ KUNIT_ASSERT_TRUE(test, __ipt_flag_op(bitmap_equal, exp, out));
+}
+
+static struct kunit_case net_test_cases[] = {
+ KUNIT_CASE_PARAM(gso_test_func, gso_test_gen_params),
+ KUNIT_CASE_PARAM(ip_tunnel_flags_test_run,
+ ip_tunnel_flags_test_gen_params),
+ { },
+};
+
+static struct kunit_suite net_test_suite = {
+ .name = "net_core",
+ .test_cases = net_test_cases,
+};
+kunit_test_suite(net_test_suite);
+MODULE_DESCRIPTION("KUnit tests for networking core");
MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("KUnit tests for segmentation offload");
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
index 8d8ace9ef87f..8350a0afa9ec 100644
--- a/net/core/netdev-genl-gen.c
+++ b/net/core/netdev-genl-gen.c
@@ -70,6 +70,7 @@ static const struct nla_policy netdev_napi_get_dump_nl_policy[NETDEV_A_NAPI_IFIN
/* NETDEV_CMD_QSTATS_GET - dump */
static const struct nla_policy netdev_qstats_get_nl_policy[NETDEV_A_QSTATS_SCOPE + 1] = {
+ [NETDEV_A_QSTATS_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
[NETDEV_A_QSTATS_SCOPE] = NLA_POLICY_MASK(NLA_UINT, 0x1),
};
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index 7004b3399c2b..05f9515d2c05 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -59,22 +59,22 @@ XDP_METADATA_KFUNC_xxx
nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES,
xdp_rx_meta, NETDEV_A_DEV_PAD) ||
nla_put_u64_64bit(rsp, NETDEV_A_DEV_XSK_FEATURES,
- xsk_features, NETDEV_A_DEV_PAD)) {
- genlmsg_cancel(rsp, hdr);
- return -EINVAL;
- }
+ xsk_features, NETDEV_A_DEV_PAD))
+ goto err_cancel_msg;
if (netdev->xdp_features & NETDEV_XDP_ACT_XSK_ZEROCOPY) {
if (nla_put_u32(rsp, NETDEV_A_DEV_XDP_ZC_MAX_SEGS,
- netdev->xdp_zc_max_segs)) {
- genlmsg_cancel(rsp, hdr);
- return -EINVAL;
- }
+ netdev->xdp_zc_max_segs))
+ goto err_cancel_msg;
}
genlmsg_end(rsp, hdr);
return 0;
+
+err_cancel_msg:
+ genlmsg_cancel(rsp, hdr);
+ return -EMSGSIZE;
}
static void
@@ -489,7 +489,17 @@ netdev_nl_stats_write_rx(struct sk_buff *rsp, struct netdev_queue_stats_rx *rx)
{
if (netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_PACKETS, rx->packets) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_BYTES, rx->bytes) ||
- netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail))
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROPS, rx->hw_drops) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS, rx->hw_drop_overruns) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY, rx->csum_unnecessary) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_NONE, rx->csum_none) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_BAD, rx->csum_bad) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_PACKETS, rx->hw_gro_packets) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_BYTES, rx->hw_gro_bytes) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_PACKETS, rx->hw_gro_wire_packets) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_BYTES, rx->hw_gro_wire_bytes) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_RATELIMITS, rx->hw_drop_ratelimits))
return -EMSGSIZE;
return 0;
}
@@ -498,7 +508,18 @@ static int
netdev_nl_stats_write_tx(struct sk_buff *rsp, struct netdev_queue_stats_tx *tx)
{
if (netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_PACKETS, tx->packets) ||
- netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_BYTES, tx->bytes))
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_BYTES, tx->bytes) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROPS, tx->hw_drops) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_ERRORS, tx->hw_drop_errors) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_CSUM_NONE, tx->csum_none) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_NEEDS_CSUM, tx->needs_csum) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_PACKETS, tx->hw_gso_packets) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_BYTES, tx->hw_gso_bytes) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS, tx->hw_gso_wire_packets) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES, tx->hw_gso_wire_bytes) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS, tx->hw_drop_ratelimits) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_STOP, tx->stop) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_WAKE, tx->wake))
return -EMSGSIZE;
return 0;
}
@@ -639,6 +660,24 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int
+netdev_nl_qstats_get_dump_one(struct net_device *netdev, unsigned int scope,
+ struct sk_buff *skb, const struct genl_info *info,
+ struct netdev_nl_dump_ctx *ctx)
+{
+ if (!netdev->stat_ops)
+ return 0;
+
+ switch (scope) {
+ case 0:
+ return netdev_nl_stats_by_netdev(netdev, skb, info);
+ case NETDEV_QSTATS_SCOPE_QUEUE:
+ return netdev_nl_stats_by_queue(netdev, skb, info, ctx);
+ }
+
+ return -EINVAL; /* Should not happen, per netlink policy */
+}
+
int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
{
@@ -646,6 +685,7 @@ int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
const struct genl_info *info = genl_info_dump(cb);
struct net *net = sock_net(skb->sk);
struct net_device *netdev;
+ unsigned int ifindex;
unsigned int scope;
int err = 0;
@@ -653,21 +693,28 @@ int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
if (info->attrs[NETDEV_A_QSTATS_SCOPE])
scope = nla_get_uint(info->attrs[NETDEV_A_QSTATS_SCOPE]);
- rtnl_lock();
- for_each_netdev_dump(net, netdev, ctx->ifindex) {
- if (!netdev->stat_ops)
- continue;
+ ifindex = 0;
+ if (info->attrs[NETDEV_A_QSTATS_IFINDEX])
+ ifindex = nla_get_u32(info->attrs[NETDEV_A_QSTATS_IFINDEX]);
- switch (scope) {
- case 0:
- err = netdev_nl_stats_by_netdev(netdev, skb, info);
- break;
- case NETDEV_QSTATS_SCOPE_QUEUE:
- err = netdev_nl_stats_by_queue(netdev, skb, info, ctx);
- break;
+ rtnl_lock();
+ if (ifindex) {
+ netdev = __dev_get_by_index(net, ifindex);
+ if (netdev && netdev->stat_ops) {
+ err = netdev_nl_qstats_get_dump_one(netdev, scope, skb,
+ info, ctx);
+ } else {
+ NL_SET_BAD_ATTR(info->extack,
+ info->attrs[NETDEV_A_QSTATS_IFINDEX]);
+ err = netdev ? -EOPNOTSUPP : -ENODEV;
+ }
+ } else {
+ for_each_netdev_dump(net, netdev, ctx->ifindex) {
+ err = netdev_nl_qstats_get_dump_one(netdev, scope, skb,
+ info, ctx);
+ if (err < 0)
+ break;
}
- if (err < 0)
- break;
}
rtnl_unlock();
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 543007f159f9..55bcacf67df3 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -316,7 +316,7 @@ static int netpoll_owner_active(struct net_device *dev)
struct napi_struct *napi;
list_for_each_entry_rcu(napi, &dev->napi_list, dev_list) {
- if (napi->poll_owner == smp_processor_id())
+ if (READ_ONCE(napi->poll_owner) == smp_processor_id())
return 1;
}
return 0;
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index dd364d738c00..2abe6e919224 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -5,6 +5,7 @@
* Copyright (C) 2016 Red Hat, Inc.
*/
+#include <linux/error-injection.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/slab.h>
@@ -123,9 +124,9 @@ int page_pool_ethtool_stats_get_count(void)
}
EXPORT_SYMBOL(page_pool_ethtool_stats_get_count);
-u64 *page_pool_ethtool_stats_get(u64 *data, void *stats)
+u64 *page_pool_ethtool_stats_get(u64 *data, const void *stats)
{
- struct page_pool_stats *pool_stats = stats;
+ const struct page_pool_stats *pool_stats = stats;
*data++ = pool_stats->alloc_stats.fast;
*data++ = pool_stats->alloc_stats.slow;
@@ -172,19 +173,30 @@ static void page_pool_producer_unlock(struct page_pool *pool,
spin_unlock_bh(&pool->ring.producer_lock);
}
+static void page_pool_struct_check(void)
+{
+ CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_users);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_page);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_offset);
+ CACHELINE_ASSERT_GROUP_SIZE(struct page_pool, frag,
+ PAGE_POOL_FRAG_GROUP_ALIGN);
+}
+
static int page_pool_init(struct page_pool *pool,
const struct page_pool_params *params,
int cpuid)
{
unsigned int ring_qsize = 1024; /* Default */
+ page_pool_struct_check();
+
memcpy(&pool->p, &params->fast, sizeof(pool->p));
memcpy(&pool->slow, &params->slow, sizeof(pool->slow));
pool->cpuid = cpuid;
/* Validate only known flags were used */
- if (pool->p.flags & ~(PP_FLAG_ALL))
+ if (pool->slow.flags & ~PP_FLAG_ALL)
return -EINVAL;
if (pool->p.pool_size)
@@ -198,22 +210,26 @@ static int page_pool_init(struct page_pool *pool,
* DMA_BIDIRECTIONAL is for allowing page used for DMA sending,
* which is the XDP_TX use-case.
*/
- if (pool->p.flags & PP_FLAG_DMA_MAP) {
+ if (pool->slow.flags & PP_FLAG_DMA_MAP) {
if ((pool->p.dma_dir != DMA_FROM_DEVICE) &&
(pool->p.dma_dir != DMA_BIDIRECTIONAL))
return -EINVAL;
+
+ pool->dma_map = true;
}
- if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) {
+ if (pool->slow.flags & PP_FLAG_DMA_SYNC_DEV) {
/* In order to request DMA-sync-for-device the page
* needs to be mapped
*/
- if (!(pool->p.flags & PP_FLAG_DMA_MAP))
+ if (!(pool->slow.flags & PP_FLAG_DMA_MAP))
return -EINVAL;
if (!pool->p.max_len)
return -EINVAL;
+ pool->dma_sync = true;
+
/* pool->p.offset has to be set according to the address
* offset used by the DMA engine to start copying rx data
*/
@@ -222,7 +238,7 @@ static int page_pool_init(struct page_pool *pool,
pool->has_init_callback = !!pool->slow.init_callback;
#ifdef CONFIG_PAGE_POOL_STATS
- if (!(pool->p.flags & PP_FLAG_SYSTEM_POOL)) {
+ if (!(pool->slow.flags & PP_FLAG_SYSTEM_POOL)) {
pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats);
if (!pool->recycle_stats)
return -ENOMEM;
@@ -232,12 +248,13 @@ static int page_pool_init(struct page_pool *pool,
* (also percpu) page pool instance.
*/
pool->recycle_stats = &pp_system_recycle_stats;
+ pool->system = true;
}
#endif
if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) {
#ifdef CONFIG_PAGE_POOL_STATS
- if (!(pool->p.flags & PP_FLAG_SYSTEM_POOL))
+ if (!pool->system)
free_percpu(pool->recycle_stats);
#endif
return -ENOMEM;
@@ -248,7 +265,7 @@ static int page_pool_init(struct page_pool *pool,
/* Driver calling page_pool_create() also call page_pool_destroy() */
refcount_set(&pool->user_cnt, 1);
- if (pool->p.flags & PP_FLAG_DMA_MAP)
+ if (pool->dma_map)
get_device(pool->p.dev);
return 0;
@@ -258,11 +275,11 @@ static void page_pool_uninit(struct page_pool *pool)
{
ptr_ring_cleanup(&pool->ring, NULL);
- if (pool->p.flags & PP_FLAG_DMA_MAP)
+ if (pool->dma_map)
put_device(pool->p.dev);
#ifdef CONFIG_PAGE_POOL_STATS
- if (!(pool->p.flags & PP_FLAG_SYSTEM_POOL))
+ if (!pool->system)
free_percpu(pool->recycle_stats);
#endif
}
@@ -311,19 +328,18 @@ struct page_pool *page_pool_create(const struct page_pool_params *params)
}
EXPORT_SYMBOL(page_pool_create);
-static void page_pool_return_page(struct page_pool *pool, struct page *page);
+static void page_pool_return_page(struct page_pool *pool, netmem_ref netmem);
-noinline
-static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
+static noinline netmem_ref page_pool_refill_alloc_cache(struct page_pool *pool)
{
struct ptr_ring *r = &pool->ring;
- struct page *page;
+ netmem_ref netmem;
int pref_nid; /* preferred NUMA node */
/* Quicker fallback, avoid locks when ring is empty */
if (__ptr_ring_empty(r)) {
alloc_stat_inc(pool, empty);
- return NULL;
+ return 0;
}
/* Softirq guarantee CPU and thus NUMA node is stable. This,
@@ -338,64 +354,74 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
/* Refill alloc array, but only if NUMA match */
do {
- page = __ptr_ring_consume(r);
- if (unlikely(!page))
+ netmem = (__force netmem_ref)__ptr_ring_consume(r);
+ if (unlikely(!netmem))
break;
- if (likely(page_to_nid(page) == pref_nid)) {
- pool->alloc.cache[pool->alloc.count++] = page;
+ if (likely(page_to_nid(netmem_to_page(netmem)) == pref_nid)) {
+ pool->alloc.cache[pool->alloc.count++] = netmem;
} else {
/* NUMA mismatch;
* (1) release 1 page to page-allocator and
* (2) break out to fallthrough to alloc_pages_node.
* This limit stress on page buddy alloactor.
*/
- page_pool_return_page(pool, page);
+ page_pool_return_page(pool, netmem);
alloc_stat_inc(pool, waive);
- page = NULL;
+ netmem = 0;
break;
}
} while (pool->alloc.count < PP_ALLOC_CACHE_REFILL);
/* Return last page */
if (likely(pool->alloc.count > 0)) {
- page = pool->alloc.cache[--pool->alloc.count];
+ netmem = pool->alloc.cache[--pool->alloc.count];
alloc_stat_inc(pool, refill);
}
- return page;
+ return netmem;
}
/* fast path */
-static struct page *__page_pool_get_cached(struct page_pool *pool)
+static netmem_ref __page_pool_get_cached(struct page_pool *pool)
{
- struct page *page;
+ netmem_ref netmem;
/* Caller MUST guarantee safe non-concurrent access, e.g. softirq */
if (likely(pool->alloc.count)) {
/* Fast-path */
- page = pool->alloc.cache[--pool->alloc.count];
+ netmem = pool->alloc.cache[--pool->alloc.count];
alloc_stat_inc(pool, fast);
} else {
- page = page_pool_refill_alloc_cache(pool);
+ netmem = page_pool_refill_alloc_cache(pool);
}
- return page;
+ return netmem;
}
-static void page_pool_dma_sync_for_device(struct page_pool *pool,
- struct page *page,
- unsigned int dma_sync_size)
+static void __page_pool_dma_sync_for_device(const struct page_pool *pool,
+ netmem_ref netmem,
+ u32 dma_sync_size)
{
- dma_addr_t dma_addr = page_pool_get_dma_addr(page);
+#if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC)
+ dma_addr_t dma_addr = page_pool_get_dma_addr_netmem(netmem);
dma_sync_size = min(dma_sync_size, pool->p.max_len);
- dma_sync_single_range_for_device(pool->p.dev, dma_addr,
- pool->p.offset, dma_sync_size,
- pool->p.dma_dir);
+ __dma_sync_single_for_device(pool->p.dev, dma_addr + pool->p.offset,
+ dma_sync_size, pool->p.dma_dir);
+#endif
}
-static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
+static __always_inline void
+page_pool_dma_sync_for_device(const struct page_pool *pool,
+ netmem_ref netmem,
+ u32 dma_sync_size)
+{
+ if (pool->dma_sync && dma_dev_need_sync(pool->p.dev))
+ __page_pool_dma_sync_for_device(pool, netmem, dma_sync_size);
+}
+
+static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem)
{
dma_addr_t dma;
@@ -404,32 +430,32 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
* into page private data (i.e 32bit cpu with 64bit DMA caps)
* This mapping is kept for lifetime of page, until leaving pool.
*/
- dma = dma_map_page_attrs(pool->p.dev, page, 0,
- (PAGE_SIZE << pool->p.order),
- pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC |
- DMA_ATTR_WEAK_ORDERING);
+ dma = dma_map_page_attrs(pool->p.dev, netmem_to_page(netmem), 0,
+ (PAGE_SIZE << pool->p.order), pool->p.dma_dir,
+ DMA_ATTR_SKIP_CPU_SYNC |
+ DMA_ATTR_WEAK_ORDERING);
if (dma_mapping_error(pool->p.dev, dma))
return false;
- if (page_pool_set_dma_addr(page, dma))
+ if (page_pool_set_dma_addr_netmem(netmem, dma))
goto unmap_failed;
- if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
- page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
+ page_pool_dma_sync_for_device(pool, netmem, pool->p.max_len);
return true;
unmap_failed:
- WARN_ON_ONCE("unexpected DMA address, please report to netdev@");
+ WARN_ONCE(1, "unexpected DMA address, please report to netdev@");
dma_unmap_page_attrs(pool->p.dev, dma,
PAGE_SIZE << pool->p.order, pool->p.dma_dir,
DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
return false;
}
-static void page_pool_set_pp_info(struct page_pool *pool,
- struct page *page)
+static void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem)
{
+ struct page *page = netmem_to_page(netmem);
+
page->pp = pool;
page->pp_magic |= PP_SIGNATURE;
@@ -439,13 +465,15 @@ static void page_pool_set_pp_info(struct page_pool *pool,
* is dirtying the same cache line as the page->pp_magic above, so
* the overhead is negligible.
*/
- page_pool_fragment_page(page, 1);
+ page_pool_fragment_netmem(netmem, 1);
if (pool->has_init_callback)
- pool->slow.init_callback(page, pool->slow.init_arg);
+ pool->slow.init_callback(netmem, pool->slow.init_arg);
}
-static void page_pool_clear_pp_info(struct page *page)
+static void page_pool_clear_pp_info(netmem_ref netmem)
{
+ struct page *page = netmem_to_page(netmem);
+
page->pp_magic = 0;
page->pp = NULL;
}
@@ -460,35 +488,34 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
if (unlikely(!page))
return NULL;
- if ((pool->p.flags & PP_FLAG_DMA_MAP) &&
- unlikely(!page_pool_dma_map(pool, page))) {
+ if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page_to_netmem(page)))) {
put_page(page);
return NULL;
}
alloc_stat_inc(pool, slow_high_order);
- page_pool_set_pp_info(pool, page);
+ page_pool_set_pp_info(pool, page_to_netmem(page));
/* Track how many pages are held 'in-flight' */
pool->pages_state_hold_cnt++;
- trace_page_pool_state_hold(pool, page, pool->pages_state_hold_cnt);
+ trace_page_pool_state_hold(pool, page_to_netmem(page),
+ pool->pages_state_hold_cnt);
return page;
}
/* slow path */
-noinline
-static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
- gfp_t gfp)
+static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool,
+ gfp_t gfp)
{
const int bulk = PP_ALLOC_CACHE_REFILL;
- unsigned int pp_flags = pool->p.flags;
unsigned int pp_order = pool->p.order;
- struct page *page;
+ bool dma_map = pool->dma_map;
+ netmem_ref netmem;
int i, nr_pages;
/* Don't support bulk alloc for high-order pages */
if (unlikely(pp_order))
- return __page_pool_alloc_page_order(pool, gfp);
+ return page_to_netmem(__page_pool_alloc_page_order(pool, gfp));
/* Unnecessary as alloc cache is empty, but guarantees zero count */
if (unlikely(pool->alloc.count > 0))
@@ -497,59 +524,66 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
/* Mark empty alloc.cache slots "empty" for alloc_pages_bulk_array */
memset(&pool->alloc.cache, 0, sizeof(void *) * bulk);
- nr_pages = alloc_pages_bulk_array_node(gfp, pool->p.nid, bulk,
- pool->alloc.cache);
+ nr_pages = alloc_pages_bulk_array_node(gfp,
+ pool->p.nid, bulk,
+ (struct page **)pool->alloc.cache);
if (unlikely(!nr_pages))
- return NULL;
+ return 0;
/* Pages have been filled into alloc.cache array, but count is zero and
* page element have not been (possibly) DMA mapped.
*/
for (i = 0; i < nr_pages; i++) {
- page = pool->alloc.cache[i];
- if ((pp_flags & PP_FLAG_DMA_MAP) &&
- unlikely(!page_pool_dma_map(pool, page))) {
- put_page(page);
+ netmem = pool->alloc.cache[i];
+ if (dma_map && unlikely(!page_pool_dma_map(pool, netmem))) {
+ put_page(netmem_to_page(netmem));
continue;
}
- page_pool_set_pp_info(pool, page);
- pool->alloc.cache[pool->alloc.count++] = page;
+ page_pool_set_pp_info(pool, netmem);
+ pool->alloc.cache[pool->alloc.count++] = netmem;
/* Track how many pages are held 'in-flight' */
pool->pages_state_hold_cnt++;
- trace_page_pool_state_hold(pool, page,
+ trace_page_pool_state_hold(pool, netmem,
pool->pages_state_hold_cnt);
}
/* Return last page */
if (likely(pool->alloc.count > 0)) {
- page = pool->alloc.cache[--pool->alloc.count];
+ netmem = pool->alloc.cache[--pool->alloc.count];
alloc_stat_inc(pool, slow);
} else {
- page = NULL;
+ netmem = 0;
}
/* When page just alloc'ed is should/must have refcnt 1. */
- return page;
+ return netmem;
}
/* For using page_pool replace: alloc_pages() API calls, but provide
* synchronization guarantee for allocation side.
*/
-struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
+netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp)
{
- struct page *page;
+ netmem_ref netmem;
/* Fast-path: Get a page from cache */
- page = __page_pool_get_cached(pool);
- if (page)
- return page;
+ netmem = __page_pool_get_cached(pool);
+ if (netmem)
+ return netmem;
/* Slow-path: cache empty, do real allocation */
- page = __page_pool_alloc_pages_slow(pool, gfp);
- return page;
+ netmem = __page_pool_alloc_pages_slow(pool, gfp);
+ return netmem;
+}
+EXPORT_SYMBOL(page_pool_alloc_netmem);
+
+struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
+{
+ return netmem_to_page(page_pool_alloc_netmem(pool, gfp));
}
EXPORT_SYMBOL(page_pool_alloc_pages);
+ALLOW_ERROR_INJECTION(page_pool_alloc_pages, NULL);
/* Calculate distance between two u32 values, valid if distance is below 2^(31)
* https://en.wikipedia.org/wiki/Serial_number_arithmetic#General_Solution
@@ -575,24 +609,24 @@ s32 page_pool_inflight(const struct page_pool *pool, bool strict)
return inflight;
}
-static __always_inline
-void __page_pool_release_page_dma(struct page_pool *pool, struct page *page)
+static __always_inline void __page_pool_release_page_dma(struct page_pool *pool,
+ netmem_ref netmem)
{
dma_addr_t dma;
- if (!(pool->p.flags & PP_FLAG_DMA_MAP))
+ if (!pool->dma_map)
/* Always account for inflight pages, even if we didn't
* map them
*/
return;
- dma = page_pool_get_dma_addr(page);
+ dma = page_pool_get_dma_addr_netmem(netmem);
/* When page is unmapped, it cannot be returned to our pool */
dma_unmap_page_attrs(pool->p.dev, dma,
PAGE_SIZE << pool->p.order, pool->p.dma_dir,
DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
- page_pool_set_dma_addr(page, 0);
+ page_pool_set_dma_addr_netmem(netmem, 0);
}
/* Disconnects a page (from a page_pool). API users can have a need
@@ -600,35 +634,34 @@ void __page_pool_release_page_dma(struct page_pool *pool, struct page *page)
* a regular page (that will eventually be returned to the normal
* page-allocator via put_page).
*/
-void page_pool_return_page(struct page_pool *pool, struct page *page)
+void page_pool_return_page(struct page_pool *pool, netmem_ref netmem)
{
int count;
- __page_pool_release_page_dma(pool, page);
-
- page_pool_clear_pp_info(page);
+ __page_pool_release_page_dma(pool, netmem);
/* This may be the last page returned, releasing the pool, so
* it is not safe to reference pool afterwards.
*/
count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt);
- trace_page_pool_state_release(pool, page, count);
+ trace_page_pool_state_release(pool, netmem, count);
- put_page(page);
+ page_pool_clear_pp_info(netmem);
+ put_page(netmem_to_page(netmem));
/* An optimization would be to call __free_pages(page, pool->p.order)
* knowing page is not part of page-cache (thus avoiding a
* __page_cache_release() call).
*/
}
-static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page)
+static bool page_pool_recycle_in_ring(struct page_pool *pool, netmem_ref netmem)
{
int ret;
/* BH protection not needed if current is softirq */
if (in_softirq())
- ret = ptr_ring_produce(&pool->ring, page);
+ ret = ptr_ring_produce(&pool->ring, (__force void *)netmem);
else
- ret = ptr_ring_produce_bh(&pool->ring, page);
+ ret = ptr_ring_produce_bh(&pool->ring, (__force void *)netmem);
if (!ret) {
recycle_stat_inc(pool, ring);
@@ -643,7 +676,7 @@ static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page)
*
* Caller must provide appropriate safe context.
*/
-static bool page_pool_recycle_in_cache(struct page *page,
+static bool page_pool_recycle_in_cache(netmem_ref netmem,
struct page_pool *pool)
{
if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE)) {
@@ -652,24 +685,25 @@ static bool page_pool_recycle_in_cache(struct page *page,
}
/* Caller MUST have verified/know (page_ref_count(page) == 1) */
- pool->alloc.cache[pool->alloc.count++] = page;
+ pool->alloc.cache[pool->alloc.count++] = netmem;
recycle_stat_inc(pool, cached);
return true;
}
-static bool __page_pool_page_can_be_recycled(const struct page *page)
+static bool __page_pool_page_can_be_recycled(netmem_ref netmem)
{
- return page_ref_count(page) == 1 && !page_is_pfmemalloc(page);
+ return page_ref_count(netmem_to_page(netmem)) == 1 &&
+ !page_is_pfmemalloc(netmem_to_page(netmem));
}
/* If the page refcnt == 1, this will try to recycle the page.
- * if PP_FLAG_DMA_SYNC_DEV is set, we'll try to sync the DMA area for
+ * If pool->dma_sync is set, we'll try to sync the DMA area for
* the configured size min(dma_sync_size, pool->max_len).
* If the page refcnt != 1, then the page will be returned to memory
* subsystem.
*/
-static __always_inline struct page *
-__page_pool_put_page(struct page_pool *pool, struct page *page,
+static __always_inline netmem_ref
+__page_pool_put_page(struct page_pool *pool, netmem_ref netmem,
unsigned int dma_sync_size, bool allow_direct)
{
lockdep_assert_no_hardirq();
@@ -683,19 +717,16 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
* page is NOT reusable when allocated when system is under
* some pressure. (page_is_pfmemalloc)
*/
- if (likely(__page_pool_page_can_be_recycled(page))) {
+ if (likely(__page_pool_page_can_be_recycled(netmem))) {
/* Read barrier done in page_ref_count / READ_ONCE */
- if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
- page_pool_dma_sync_for_device(pool, page,
- dma_sync_size);
+ page_pool_dma_sync_for_device(pool, netmem, dma_sync_size);
- if (allow_direct && in_softirq() &&
- page_pool_recycle_in_cache(page, pool))
- return NULL;
+ if (allow_direct && page_pool_recycle_in_cache(netmem, pool))
+ return 0;
/* Page found as candidate for recycling */
- return page;
+ return netmem;
}
/* Fallback/non-XDP mode: API user have elevated refcnt.
*
@@ -711,21 +742,56 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
* will be invoking put_page.
*/
recycle_stat_inc(pool, released_refcnt);
- page_pool_return_page(pool, page);
+ page_pool_return_page(pool, netmem);
- return NULL;
+ return 0;
}
-void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page,
- unsigned int dma_sync_size, bool allow_direct)
+static bool page_pool_napi_local(const struct page_pool *pool)
+{
+ const struct napi_struct *napi;
+ u32 cpuid;
+
+ if (unlikely(!in_softirq()))
+ return false;
+
+ /* Allow direct recycle if we have reasons to believe that we are
+ * in the same context as the consumer would run, so there's
+ * no possible race.
+ * __page_pool_put_page() makes sure we're not in hardirq context
+ * and interrupts are enabled prior to accessing the cache.
+ */
+ cpuid = smp_processor_id();
+ if (READ_ONCE(pool->cpuid) == cpuid)
+ return true;
+
+ napi = READ_ONCE(pool->p.napi);
+
+ return napi && READ_ONCE(napi->list_owner) == cpuid;
+}
+
+void page_pool_put_unrefed_netmem(struct page_pool *pool, netmem_ref netmem,
+ unsigned int dma_sync_size, bool allow_direct)
{
- page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct);
- if (page && !page_pool_recycle_in_ring(pool, page)) {
+ if (!allow_direct)
+ allow_direct = page_pool_napi_local(pool);
+
+ netmem =
+ __page_pool_put_page(pool, netmem, dma_sync_size, allow_direct);
+ if (netmem && !page_pool_recycle_in_ring(pool, netmem)) {
/* Cache full, fallback to free pages */
recycle_stat_inc(pool, ring_full);
- page_pool_return_page(pool, page);
+ page_pool_return_page(pool, netmem);
}
}
+EXPORT_SYMBOL(page_pool_put_unrefed_netmem);
+
+void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page,
+ unsigned int dma_sync_size, bool allow_direct)
+{
+ page_pool_put_unrefed_netmem(pool, page_to_netmem(page), dma_sync_size,
+ allow_direct);
+}
EXPORT_SYMBOL(page_pool_put_unrefed_page);
/**
@@ -747,22 +813,25 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
int count)
{
int i, bulk_len = 0;
+ bool allow_direct;
bool in_softirq;
+ allow_direct = page_pool_napi_local(pool);
+
for (i = 0; i < count; i++) {
- struct page *page = virt_to_head_page(data[i]);
+ netmem_ref netmem = page_to_netmem(virt_to_head_page(data[i]));
/* It is not the last user for the page frag case */
- if (!page_pool_is_last_ref(page))
+ if (!page_pool_is_last_ref(netmem))
continue;
- page = __page_pool_put_page(pool, page, -1, false);
+ netmem = __page_pool_put_page(pool, netmem, -1, allow_direct);
/* Approved for bulk recycling in ptr_ring cache */
- if (page)
- data[bulk_len++] = page;
+ if (netmem)
+ data[bulk_len++] = (__force void *)netmem;
}
- if (unlikely(!bulk_len))
+ if (!bulk_len)
return;
/* Bulk producer into ptr_ring page_pool cache */
@@ -785,100 +854,106 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
* since put_page() with refcnt == 1 can be an expensive operation
*/
for (; i < bulk_len; i++)
- page_pool_return_page(pool, data[i]);
+ page_pool_return_page(pool, (__force netmem_ref)data[i]);
}
EXPORT_SYMBOL(page_pool_put_page_bulk);
-static struct page *page_pool_drain_frag(struct page_pool *pool,
- struct page *page)
+static netmem_ref page_pool_drain_frag(struct page_pool *pool,
+ netmem_ref netmem)
{
long drain_count = BIAS_MAX - pool->frag_users;
/* Some user is still using the page frag */
- if (likely(page_pool_unref_page(page, drain_count)))
- return NULL;
-
- if (__page_pool_page_can_be_recycled(page)) {
- if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
- page_pool_dma_sync_for_device(pool, page, -1);
+ if (likely(page_pool_unref_netmem(netmem, drain_count)))
+ return 0;
- return page;
+ if (__page_pool_page_can_be_recycled(netmem)) {
+ page_pool_dma_sync_for_device(pool, netmem, -1);
+ return netmem;
}
- page_pool_return_page(pool, page);
- return NULL;
+ page_pool_return_page(pool, netmem);
+ return 0;
}
static void page_pool_free_frag(struct page_pool *pool)
{
long drain_count = BIAS_MAX - pool->frag_users;
- struct page *page = pool->frag_page;
+ netmem_ref netmem = pool->frag_page;
- pool->frag_page = NULL;
+ pool->frag_page = 0;
- if (!page || page_pool_unref_page(page, drain_count))
+ if (!netmem || page_pool_unref_netmem(netmem, drain_count))
return;
- page_pool_return_page(pool, page);
+ page_pool_return_page(pool, netmem);
}
-struct page *page_pool_alloc_frag(struct page_pool *pool,
- unsigned int *offset,
- unsigned int size, gfp_t gfp)
+netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool,
+ unsigned int *offset, unsigned int size,
+ gfp_t gfp)
{
unsigned int max_size = PAGE_SIZE << pool->p.order;
- struct page *page = pool->frag_page;
+ netmem_ref netmem = pool->frag_page;
if (WARN_ON(size > max_size))
- return NULL;
+ return 0;
size = ALIGN(size, dma_get_cache_alignment());
*offset = pool->frag_offset;
- if (page && *offset + size > max_size) {
- page = page_pool_drain_frag(pool, page);
- if (page) {
+ if (netmem && *offset + size > max_size) {
+ netmem = page_pool_drain_frag(pool, netmem);
+ if (netmem) {
alloc_stat_inc(pool, fast);
goto frag_reset;
}
}
- if (!page) {
- page = page_pool_alloc_pages(pool, gfp);
- if (unlikely(!page)) {
- pool->frag_page = NULL;
- return NULL;
+ if (!netmem) {
+ netmem = page_pool_alloc_netmem(pool, gfp);
+ if (unlikely(!netmem)) {
+ pool->frag_page = 0;
+ return 0;
}
- pool->frag_page = page;
+ pool->frag_page = netmem;
frag_reset:
pool->frag_users = 1;
*offset = 0;
pool->frag_offset = size;
- page_pool_fragment_page(page, BIAS_MAX);
- return page;
+ page_pool_fragment_netmem(netmem, BIAS_MAX);
+ return netmem;
}
pool->frag_users++;
pool->frag_offset = *offset + size;
alloc_stat_inc(pool, fast);
- return page;
+ return netmem;
+}
+EXPORT_SYMBOL(page_pool_alloc_frag_netmem);
+
+struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
+ unsigned int size, gfp_t gfp)
+{
+ return netmem_to_page(page_pool_alloc_frag_netmem(pool, offset, size,
+ gfp));
}
EXPORT_SYMBOL(page_pool_alloc_frag);
static void page_pool_empty_ring(struct page_pool *pool)
{
- struct page *page;
+ netmem_ref netmem;
/* Empty recycle ring */
- while ((page = ptr_ring_consume_bh(&pool->ring))) {
+ while ((netmem = (__force netmem_ref)ptr_ring_consume_bh(&pool->ring))) {
/* Verify the refcnt invariant of cached pages */
- if (!(page_ref_count(page) == 1))
+ if (!(page_ref_count(netmem_to_page(netmem)) == 1))
pr_crit("%s() page_pool refcnt %d violation\n",
- __func__, page_ref_count(page));
+ __func__, netmem_ref_count(netmem));
- page_pool_return_page(pool, page);
+ page_pool_return_page(pool, netmem);
}
}
@@ -894,7 +969,7 @@ static void __page_pool_destroy(struct page_pool *pool)
static void page_pool_empty_alloc_cache_once(struct page_pool *pool)
{
- struct page *page;
+ netmem_ref netmem;
if (pool->destroy_cnt)
return;
@@ -904,8 +979,8 @@ static void page_pool_empty_alloc_cache_once(struct page_pool *pool)
* call concurrently.
*/
while (pool->alloc.count) {
- page = pool->alloc.cache[--pool->alloc.count];
- page_pool_return_page(pool, page);
+ netmem = pool->alloc.cache[--pool->alloc.count];
+ page_pool_return_page(pool, netmem);
}
}
@@ -959,17 +1034,17 @@ static void page_pool_release_retry(struct work_struct *wq)
}
void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
- struct xdp_mem_info *mem)
+ const struct xdp_mem_info *mem)
{
refcount_inc(&pool->user_cnt);
pool->disconnect = disconnect;
pool->xdp_mem_id = mem->id;
}
-static void page_pool_disable_direct_recycling(struct page_pool *pool)
+void page_pool_disable_direct_recycling(struct page_pool *pool)
{
/* Disable direct recycling based on pool->cpuid.
- * Paired with READ_ONCE() in napi_pp_put_page().
+ * Paired with READ_ONCE() in page_pool_napi_local().
*/
WRITE_ONCE(pool->cpuid, -1);
@@ -979,11 +1054,12 @@ static void page_pool_disable_direct_recycling(struct page_pool *pool)
/* To avoid races with recycling and additional barriers make sure
* pool and NAPI are unlinked when NAPI is disabled.
*/
- WARN_ON(!test_bit(NAPI_STATE_SCHED, &pool->p.napi->state) ||
- READ_ONCE(pool->p.napi->list_owner) != -1);
+ WARN_ON(!test_bit(NAPI_STATE_SCHED, &pool->p.napi->state));
+ WARN_ON(READ_ONCE(pool->p.napi->list_owner) != -1);
WRITE_ONCE(pool->p.napi, NULL);
}
+EXPORT_SYMBOL(page_pool_disable_direct_recycling);
void page_pool_destroy(struct page_pool *pool)
{
@@ -1011,15 +1087,15 @@ EXPORT_SYMBOL(page_pool_destroy);
/* Caller must provide appropriate safe context, e.g. NAPI. */
void page_pool_update_nid(struct page_pool *pool, int new_nid)
{
- struct page *page;
+ netmem_ref netmem;
trace_page_pool_update_nid(pool, new_nid);
pool->p.nid = new_nid;
/* Flush pool alloc cache, as refill will check NUMA node */
while (pool->alloc.count) {
- page = pool->alloc.cache[--pool->alloc.count];
- page_pool_return_page(pool, page);
+ netmem = pool->alloc.cache[--pool->alloc.count];
+ page_pool_return_page(pool, netmem);
}
}
EXPORT_SYMBOL(page_pool_update_nid);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a3d7847ce69d..73fd7f543fd0 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1036,8 +1036,8 @@ static size_t rtnl_proto_down_size(const struct net_device *dev)
{
size_t size = nla_total_size(1);
- if (dev->proto_down_reason)
- size += nla_total_size(0) + nla_total_size(4);
+ /* Assume dev->proto_down_reason is not zero. */
+ size += nla_total_size(0) + nla_total_size(4);
return size;
}
@@ -1477,13 +1477,15 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb,
static u32 rtnl_xdp_prog_skb(struct net_device *dev)
{
const struct bpf_prog *generic_xdp_prog;
+ u32 res = 0;
- ASSERT_RTNL();
+ rcu_read_lock();
+ generic_xdp_prog = rcu_dereference(dev->xdp_prog);
+ if (generic_xdp_prog)
+ res = generic_xdp_prog->aux->id;
+ rcu_read_unlock();
- generic_xdp_prog = rtnl_dereference(dev->xdp_prog);
- if (!generic_xdp_prog)
- return 0;
- return generic_xdp_prog->aux->id;
+ return res;
}
static u32 rtnl_xdp_prog_drv(struct net_device *dev)
@@ -1603,7 +1605,8 @@ static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev)
upper_dev = netdev_master_upper_dev_get_rcu(dev);
if (upper_dev)
- ret = nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex);
+ ret = nla_put_u32(skb, IFLA_MASTER,
+ READ_ONCE(upper_dev->ifindex));
rcu_read_unlock();
return ret;
@@ -1736,10 +1739,10 @@ static int rtnl_fill_proto_down(struct sk_buff *skb,
struct nlattr *pr;
u32 preason;
- if (nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
+ if (nla_put_u8(skb, IFLA_PROTO_DOWN, READ_ONCE(dev->proto_down)))
goto nla_put_failure;
- preason = dev->proto_down_reason;
+ preason = READ_ONCE(dev->proto_down_reason);
if (!preason)
return 0;
@@ -1812,6 +1815,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
u32 event, int *new_nsid, int new_ifindex,
int tgt_netnsid, gfp_t gfp)
{
+ char devname[IFNAMSIZ];
struct ifinfomsg *ifm;
struct nlmsghdr *nlh;
struct Qdisc *qdisc;
@@ -1824,41 +1828,51 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
ifm = nlmsg_data(nlh);
ifm->ifi_family = AF_UNSPEC;
ifm->__ifi_pad = 0;
- ifm->ifi_type = dev->type;
- ifm->ifi_index = dev->ifindex;
+ ifm->ifi_type = READ_ONCE(dev->type);
+ ifm->ifi_index = READ_ONCE(dev->ifindex);
ifm->ifi_flags = dev_get_flags(dev);
ifm->ifi_change = change;
if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_TARGET_NETNSID, tgt_netnsid))
goto nla_put_failure;
- qdisc = rtnl_dereference(dev->qdisc);
- if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
- nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) ||
+ netdev_copy_name(dev, devname);
+ if (nla_put_string(skb, IFLA_IFNAME, devname))
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, IFLA_TXQLEN, READ_ONCE(dev->tx_queue_len)) ||
nla_put_u8(skb, IFLA_OPERSTATE,
- netif_running(dev) ? dev->operstate : IF_OPER_DOWN) ||
- nla_put_u8(skb, IFLA_LINKMODE, dev->link_mode) ||
- nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
- nla_put_u32(skb, IFLA_MIN_MTU, dev->min_mtu) ||
- nla_put_u32(skb, IFLA_MAX_MTU, dev->max_mtu) ||
- nla_put_u32(skb, IFLA_GROUP, dev->group) ||
- nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) ||
- nla_put_u32(skb, IFLA_ALLMULTI, dev->allmulti) ||
- nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) ||
- nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) ||
- nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) ||
- nla_put_u32(skb, IFLA_GRO_MAX_SIZE, dev->gro_max_size) ||
- nla_put_u32(skb, IFLA_GSO_IPV4_MAX_SIZE, dev->gso_ipv4_max_size) ||
- nla_put_u32(skb, IFLA_GRO_IPV4_MAX_SIZE, dev->gro_ipv4_max_size) ||
- nla_put_u32(skb, IFLA_TSO_MAX_SIZE, dev->tso_max_size) ||
- nla_put_u32(skb, IFLA_TSO_MAX_SEGS, dev->tso_max_segs) ||
+ netif_running(dev) ? READ_ONCE(dev->operstate) :
+ IF_OPER_DOWN) ||
+ nla_put_u8(skb, IFLA_LINKMODE, READ_ONCE(dev->link_mode)) ||
+ nla_put_u32(skb, IFLA_MTU, READ_ONCE(dev->mtu)) ||
+ nla_put_u32(skb, IFLA_MIN_MTU, READ_ONCE(dev->min_mtu)) ||
+ nla_put_u32(skb, IFLA_MAX_MTU, READ_ONCE(dev->max_mtu)) ||
+ nla_put_u32(skb, IFLA_GROUP, READ_ONCE(dev->group)) ||
+ nla_put_u32(skb, IFLA_PROMISCUITY, READ_ONCE(dev->promiscuity)) ||
+ nla_put_u32(skb, IFLA_ALLMULTI, READ_ONCE(dev->allmulti)) ||
+ nla_put_u32(skb, IFLA_NUM_TX_QUEUES,
+ READ_ONCE(dev->num_tx_queues)) ||
+ nla_put_u32(skb, IFLA_GSO_MAX_SEGS,
+ READ_ONCE(dev->gso_max_segs)) ||
+ nla_put_u32(skb, IFLA_GSO_MAX_SIZE,
+ READ_ONCE(dev->gso_max_size)) ||
+ nla_put_u32(skb, IFLA_GRO_MAX_SIZE,
+ READ_ONCE(dev->gro_max_size)) ||
+ nla_put_u32(skb, IFLA_GSO_IPV4_MAX_SIZE,
+ READ_ONCE(dev->gso_ipv4_max_size)) ||
+ nla_put_u32(skb, IFLA_GRO_IPV4_MAX_SIZE,
+ READ_ONCE(dev->gro_ipv4_max_size)) ||
+ nla_put_u32(skb, IFLA_TSO_MAX_SIZE,
+ READ_ONCE(dev->tso_max_size)) ||
+ nla_put_u32(skb, IFLA_TSO_MAX_SEGS,
+ READ_ONCE(dev->tso_max_segs)) ||
#ifdef CONFIG_RPS
- nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
+ nla_put_u32(skb, IFLA_NUM_RX_QUEUES,
+ READ_ONCE(dev->num_rx_queues)) ||
#endif
put_master_ifindex(skb, dev) ||
nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
- (qdisc &&
- nla_put_string(skb, IFLA_QDISC, qdisc->ops->id)) ||
nla_put_ifalias(skb, dev) ||
nla_put_u32(skb, IFLA_CARRIER_CHANGES,
atomic_read(&dev->carrier_up_count) +
@@ -1909,9 +1923,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
goto nla_put_failure;
}
- if (rtnl_fill_link_netnsid(skb, dev, src_net, gfp))
- goto nla_put_failure;
-
if (new_nsid &&
nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0)
goto nla_put_failure;
@@ -1924,6 +1935,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
goto nla_put_failure;
rcu_read_lock();
+ if (rtnl_fill_link_netnsid(skb, dev, src_net, GFP_ATOMIC))
+ goto nla_put_failure_rcu;
+ qdisc = rcu_dereference(dev->qdisc);
+ if (qdisc && nla_put_string(skb, IFLA_QDISC, qdisc->ops->id))
+ goto nla_put_failure_rcu;
if (rtnl_fill_link_af(skb, dev, ext_filter_mask))
goto nla_put_failure_rcu;
if (rtnl_fill_link_ifmap(skb, dev))
@@ -2530,7 +2546,7 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
nla_for_each_nested(attr, tb[IFLA_VF_VLAN_LIST], rem) {
if (nla_type(attr) != IFLA_VF_VLAN_INFO ||
- nla_len(attr) < NLA_HDRLEN) {
+ nla_len(attr) < sizeof(struct ifla_vf_vlan_info)) {
return -EINVAL;
}
if (len >= MAX_VLAN_LIST_LEN)
@@ -3272,7 +3288,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
- dev = rtnl_dev_get(net, tb);
+ dev = rtnl_dev_get(tgt_net, tb);
else if (tb[IFLA_GROUP])
err = rtnl_group_dellink(tgt_net, nla_get_u32(tb[IFLA_GROUP]));
else
@@ -3953,22 +3969,28 @@ static int rtnl_dellinkprop(struct sk_buff *skb, struct nlmsghdr *nlh,
return rtnl_linkprop(RTM_DELLINKPROP, skb, nlh, extack);
}
-static u32 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
+static noinline_for_stack u32 rtnl_calcit(struct sk_buff *skb,
+ struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
size_t min_ifinfo_dump_size = 0;
- struct nlattr *tb[IFLA_MAX+1];
u32 ext_filter_mask = 0;
struct net_device *dev;
- int hdrlen;
+ struct nlattr *nla;
+ int hdrlen, rem;
/* Same kernel<->userspace interface hack as in rtnl_dump_ifinfo. */
hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ?
sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
- if (nlmsg_parse_deprecated(nlh, hdrlen, tb, IFLA_MAX, ifla_policy, NULL) >= 0) {
- if (tb[IFLA_EXT_MASK])
- ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+ if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
+ return NLMSG_GOODSIZE;
+
+ nla_for_each_attr_type(nla, IFLA_EXT_MASK,
+ nlmsg_attrdata(nlh, hdrlen),
+ nlmsg_attrlen(nlh, hdrlen), rem) {
+ if (nla_len(nla) == sizeof(u32))
+ ext_filter_mask = nla_get_u32(nla);
}
if (!ext_filter_mask)
@@ -5245,15 +5267,14 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
if (br_spec) {
- nla_for_each_nested(attr, br_spec, rem) {
- if (nla_type(attr) == IFLA_BRIDGE_FLAGS) {
- if (nla_len(attr) < sizeof(flags))
- return -EINVAL;
+ nla_for_each_nested_type(attr, IFLA_BRIDGE_FLAGS, br_spec,
+ rem) {
+ if (nla_len(attr) < sizeof(flags))
+ return -EINVAL;
- have_flags = true;
- flags = nla_get_u16(attr);
- break;
- }
+ have_flags = true;
+ flags = nla_get_u16(attr);
+ break;
}
}
@@ -5962,19 +5983,17 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh,
static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
struct netlink_ext_ack *extack = cb->extack;
- int h, s_h, err, s_idx, s_idxattr, s_prividx;
struct rtnl_stats_dump_filters filters;
struct net *net = sock_net(skb->sk);
unsigned int flags = NLM_F_MULTI;
struct if_stats_msg *ifsm;
- struct hlist_head *head;
+ struct {
+ unsigned long ifindex;
+ int idxattr;
+ int prividx;
+ } *ctx = (void *)cb->ctx;
struct net_device *dev;
- int idx = 0;
-
- s_h = cb->args[0];
- s_idx = cb->args[1];
- s_idxattr = cb->args[2];
- s_prividx = cb->args[3];
+ int err;
cb->seq = net->dev_base_seq;
@@ -5993,39 +6012,26 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (err)
return err;
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- idx = 0;
- head = &net->dev_index_head[h];
- hlist_for_each_entry(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, 0,
- flags, &filters,
- &s_idxattr, &s_prividx,
- extack);
- /* If we ran out of room on the first message,
- * we're in trouble
- */
- WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, 0,
+ flags, &filters,
+ &ctx->idxattr, &ctx->prividx,
+ extack);
+ /* If we ran out of room on the first message,
+ * we're in trouble.
+ */
+ WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
- if (err < 0)
- goto out;
- s_prividx = 0;
- s_idxattr = 0;
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-cont:
- idx++;
- }
+ if (err < 0)
+ break;
+ ctx->prividx = 0;
+ ctx->idxattr = 0;
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
}
-out:
- cb->args[3] = s_prividx;
- cb->args[2] = s_idxattr;
- cb->args[1] = idx;
- cb->args[0] = h;
- return skb->len;
+ return err;
}
void rtnl_offload_xstats_notify(struct net_device *dev)
@@ -6484,6 +6490,52 @@ static int rtnl_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
/* Process one rtnetlink message. */
+static int rtnl_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ const bool needs_lock = !(cb->flags & RTNL_FLAG_DUMP_UNLOCKED);
+ rtnl_dumpit_func dumpit = cb->data;
+ int err;
+
+ /* Previous iteration have already finished, avoid calling->dumpit()
+ * again, it may not expect to be called after it reached the end.
+ */
+ if (!dumpit)
+ return 0;
+
+ if (needs_lock)
+ rtnl_lock();
+ err = dumpit(skb, cb);
+ if (needs_lock)
+ rtnl_unlock();
+
+ /* Old dump handlers used to send NLM_DONE as in a separate recvmsg().
+ * Some applications which parse netlink manually depend on this.
+ */
+ if (cb->flags & RTNL_FLAG_DUMP_SPLIT_NLM_DONE) {
+ if (err < 0 && err != -EMSGSIZE)
+ return err;
+ if (!err)
+ cb->data = NULL;
+
+ return skb->len;
+ }
+ return err;
+}
+
+static int rtnetlink_dump_start(struct sock *ssk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ struct netlink_dump_control *control)
+{
+ if (control->flags & RTNL_FLAG_DUMP_SPLIT_NLM_DONE ||
+ !(control->flags & RTNL_FLAG_DUMP_UNLOCKED)) {
+ WARN_ON(control->data);
+ control->data = control->dump;
+ control->dump = rtnl_dumpit;
+ }
+
+ return netlink_dump_start(ssk, skb, nlh, control);
+}
+
static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -6548,7 +6600,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
.module = owner,
.flags = flags,
};
- err = netlink_dump_start(rtnl, skb, nlh, &c);
+ err = rtnetlink_dump_start(rtnl, skb, nlh, &c);
/* netlink_dump_start() will keep a reference on
* module if dump is still in progress.
*/
@@ -6663,7 +6715,6 @@ static int __net_init rtnetlink_net_init(struct net *net)
struct netlink_kernel_cfg cfg = {
.groups = RTNLGRP_MAX,
.input = rtnetlink_rcv,
- .cb_mutex = &rtnl_mutex,
.flags = NL_CFG_F_NONROOT_RECV,
.bind = rtnetlink_bind,
};
@@ -6694,7 +6745,7 @@ void __init rtnetlink_init(void)
register_netdevice_notifier(&rtnetlink_dev_notifier);
rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink,
- rtnl_dump_ifinfo, 0);
+ rtnl_dump_ifinfo, RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, 0);
diff --git a/net/core/scm.c b/net/core/scm.c
index 9cd4b0a01cd6..4f6a14babe5a 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -89,6 +89,12 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
fpl->count_unix = 0;
fpl->max = SCM_MAX_FD;
fpl->user = NULL;
+#if IS_ENABLED(CONFIG_UNIX)
+ fpl->inflight = false;
+ fpl->dead = false;
+ fpl->edges = NULL;
+ INIT_LIST_HEAD(&fpl->vertices);
+#endif
}
fpp = &fpl->fp[fpl->count];
@@ -376,8 +382,14 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
if (new_fpl) {
for (i = 0; i < fpl->count; i++)
get_file(fpl->fp[i]);
+
new_fpl->max = new_fpl->count;
new_fpl->user = get_uid(fpl->user);
+#if IS_ENABLED(CONFIG_UNIX)
+ new_fpl->inflight = false;
+ new_fpl->edges = NULL;
+ INIT_LIST_HEAD(&new_fpl->vertices);
+#endif
}
return new_fpl;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4096e679f61c..83f8cd8aa2d1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -51,6 +51,7 @@
#endif
#include <linux/string.h>
#include <linux/skbuff.h>
+#include <linux/skbuff_ref.h>
#include <linux/splice.h>
#include <linux/cache.h>
#include <linux/rtnetlink.h>
@@ -108,9 +109,6 @@ static struct kmem_cache *skbuff_ext_cache __ro_after_init;
#define SKB_SMALL_HEAD_HEADROOM \
SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE)
-int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
-EXPORT_SYMBOL(sysctl_max_skb_frags);
-
/* kcm_write_msgs() relies on casting paged frags to bio_vec to use
* iov_iter_bvec(). These static asserts ensure the cast is valid is long as the
* netmem is a page.
@@ -279,6 +277,7 @@ static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp_mask)
#endif
struct napi_alloc_cache {
+ local_lock_t bh_lock;
struct page_frag_cache page;
struct page_frag_1k page_small;
unsigned int skb_count;
@@ -286,7 +285,9 @@ struct napi_alloc_cache {
};
static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
-static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
+static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache) = {
+ .bh_lock = INIT_LOCAL_LOCK(bh_lock),
+};
/* Double check that napi_get_frags() allocates skbs with
* skb->head being backed by slab, not a page fragment.
@@ -308,11 +309,16 @@ void napi_get_frags_check(struct napi_struct *napi)
void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
{
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+ void *data;
fragsz = SKB_DATA_ALIGN(fragsz);
- return __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC,
+ local_lock_nested_bh(&napi_alloc_cache.bh_lock);
+ data = __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC,
align_mask);
+ local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
+ return data;
+
}
EXPORT_SYMBOL(__napi_alloc_frag_align);
@@ -320,19 +326,15 @@ void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
{
void *data;
- fragsz = SKB_DATA_ALIGN(fragsz);
if (in_hardirq() || irqs_disabled()) {
struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache);
+ fragsz = SKB_DATA_ALIGN(fragsz);
data = __page_frag_alloc_align(nc, fragsz, GFP_ATOMIC,
align_mask);
} else {
- struct napi_alloc_cache *nc;
-
local_bh_disable();
- nc = this_cpu_ptr(&napi_alloc_cache);
- data = __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC,
- align_mask);
+ data = __napi_alloc_frag_align(fragsz, align_mask);
local_bh_enable();
}
return data;
@@ -344,16 +346,20 @@ static struct sk_buff *napi_skb_cache_get(void)
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
struct sk_buff *skb;
+ local_lock_nested_bh(&napi_alloc_cache.bh_lock);
if (unlikely(!nc->skb_count)) {
nc->skb_count = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
GFP_ATOMIC,
NAPI_SKB_CACHE_BULK,
nc->skb_cache);
- if (unlikely(!nc->skb_count))
+ if (unlikely(!nc->skb_count)) {
+ local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
return NULL;
+ }
}
skb = nc->skb_cache[--nc->skb_count];
+ local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
kasan_mempool_unpoison_object(skb, kmem_cache_size(net_hotdata.skbuff_cache));
return skb;
@@ -746,9 +752,13 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
pfmemalloc = nc->pfmemalloc;
} else {
local_bh_disable();
+ local_lock_nested_bh(&napi_alloc_cache.bh_lock);
+
nc = this_cpu_ptr(&napi_alloc_cache.page);
data = page_frag_alloc(nc, len, gfp_mask);
pfmemalloc = nc->pfmemalloc;
+
+ local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
local_bh_enable();
}
@@ -775,10 +785,9 @@ skb_fail:
EXPORT_SYMBOL(__netdev_alloc_skb);
/**
- * __napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance
+ * napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance
* @napi: napi instance this buffer was allocated for
* @len: length to allocate
- * @gfp_mask: get_free_pages mask, passed to alloc_skb and alloc_pages
*
* Allocate a new sk_buff for use in NAPI receive. This buffer will
* attempt to allocate the head from a special reserved region used
@@ -787,9 +796,9 @@ EXPORT_SYMBOL(__netdev_alloc_skb);
*
* %NULL is returned if there is no free memory.
*/
-struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
- gfp_t gfp_mask)
+struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
{
+ gfp_t gfp_mask = GFP_ATOMIC | __GFP_NOWARN;
struct napi_alloc_cache *nc;
struct sk_buff *skb;
bool pfmemalloc;
@@ -813,11 +822,11 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
goto skb_success;
}
- nc = this_cpu_ptr(&napi_alloc_cache);
-
if (sk_memalloc_socks())
gfp_mask |= __GFP_MEMALLOC;
+ local_lock_nested_bh(&napi_alloc_cache.bh_lock);
+ nc = this_cpu_ptr(&napi_alloc_cache);
if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) {
/* we are artificially inflating the allocation size, but
* that is not as bad as it may look like, as:
@@ -839,6 +848,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
data = page_frag_alloc(&nc->page, len, gfp_mask);
pfmemalloc = nc->page.pfmemalloc;
}
+ local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
if (unlikely(!data))
return NULL;
@@ -860,7 +870,7 @@ skb_success:
skb_fail:
return skb;
}
-EXPORT_SYMBOL(__napi_alloc_skb);
+EXPORT_SYMBOL(napi_alloc_skb);
void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem,
int off, int size, unsigned int truesize)
@@ -1005,10 +1015,9 @@ int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
EXPORT_SYMBOL(skb_cow_data_for_xdp);
#if IS_ENABLED(CONFIG_PAGE_POOL)
-bool napi_pp_put_page(struct page *page, bool napi_safe)
+bool napi_pp_put_page(netmem_ref netmem)
{
- bool allow_direct = false;
- struct page_pool *pp;
+ struct page *page = netmem_to_page(netmem);
page = compound_head(page);
@@ -1022,39 +1031,18 @@ bool napi_pp_put_page(struct page *page, bool napi_safe)
if (unlikely(!is_pp_page(page)))
return false;
- pp = page->pp;
-
- /* Allow direct recycle if we have reasons to believe that we are
- * in the same context as the consumer would run, so there's
- * no possible race.
- * __page_pool_put_page() makes sure we're not in hardirq context
- * and interrupts are enabled prior to accessing the cache.
- */
- if (napi_safe || in_softirq()) {
- const struct napi_struct *napi = READ_ONCE(pp->p.napi);
- unsigned int cpuid = smp_processor_id();
-
- allow_direct = napi && READ_ONCE(napi->list_owner) == cpuid;
- allow_direct |= READ_ONCE(pp->cpuid) == cpuid;
- }
-
- /* Driver set this to memory recycling info. Reset it on recycle.
- * This will *not* work for NIC using a split-page memory model.
- * The page will be returned to the pool here regardless of the
- * 'flipped' fragment being in use or not.
- */
- page_pool_put_full_page(pp, page, allow_direct);
+ page_pool_put_full_netmem(page->pp, page_to_netmem(page), false);
return true;
}
EXPORT_SYMBOL(napi_pp_put_page);
#endif
-static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe)
+static bool skb_pp_recycle(struct sk_buff *skb, void *data)
{
if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle)
return false;
- return napi_pp_put_page(virt_to_page(data), napi_safe);
+ return napi_pp_put_page(page_to_netmem(virt_to_page(data)));
}
/**
@@ -1096,12 +1084,12 @@ static void skb_kfree_head(void *head, unsigned int end_offset)
kfree(head);
}
-static void skb_free_head(struct sk_buff *skb, bool napi_safe)
+static void skb_free_head(struct sk_buff *skb)
{
unsigned char *head = skb->head;
if (skb->head_frag) {
- if (skb_pp_recycle(skb, head, napi_safe))
+ if (skb_pp_recycle(skb, head))
return;
skb_free_frag(head);
} else {
@@ -1109,8 +1097,7 @@ static void skb_free_head(struct sk_buff *skb, bool napi_safe)
}
}
-static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason,
- bool napi_safe)
+static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
int i;
@@ -1127,13 +1114,13 @@ static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason,
}
for (i = 0; i < shinfo->nr_frags; i++)
- napi_frag_unref(&shinfo->frags[i], skb->pp_recycle, napi_safe);
+ __skb_frag_unref(&shinfo->frags[i], skb->pp_recycle);
free_head:
if (shinfo->frag_list)
kfree_skb_list_reason(shinfo->frag_list, reason);
- skb_free_head(skb, napi_safe);
+ skb_free_head(skb);
exit:
/* When we clone an SKB we copy the reycling bit. The pp_recycle
* bit is only set on the head though, so in order to avoid races
@@ -1194,12 +1181,11 @@ void skb_release_head_state(struct sk_buff *skb)
}
/* Free everything but the sk_buff shell. */
-static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason,
- bool napi_safe)
+static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason)
{
skb_release_head_state(skb);
if (likely(skb->head))
- skb_release_data(skb, reason, napi_safe);
+ skb_release_data(skb, reason);
}
/**
@@ -1213,13 +1199,14 @@ static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason,
void __kfree_skb(struct sk_buff *skb)
{
- skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED, false);
+ skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED);
kfree_skbmem(skb);
}
EXPORT_SYMBOL(__kfree_skb);
static __always_inline
-bool __kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
+bool __sk_skb_reason_drop(struct sock *sk, struct sk_buff *skb,
+ enum skb_drop_reason reason)
{
if (unlikely(!skb_unref(skb)))
return false;
@@ -1232,26 +1219,27 @@ bool __kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
if (reason == SKB_CONSUMED)
trace_consume_skb(skb, __builtin_return_address(0));
else
- trace_kfree_skb(skb, __builtin_return_address(0), reason);
+ trace_kfree_skb(skb, __builtin_return_address(0), reason, sk);
return true;
}
/**
- * kfree_skb_reason - free an sk_buff with special reason
+ * sk_skb_reason_drop - free an sk_buff with special reason
+ * @sk: the socket to receive @skb, or NULL if not applicable
* @skb: buffer to free
* @reason: reason why this skb is dropped
*
- * Drop a reference to the buffer and free it if the usage count has
- * hit zero. Meanwhile, pass the drop reason to 'kfree_skb'
- * tracepoint.
+ * Drop a reference to the buffer and free it if the usage count has hit
+ * zero. Meanwhile, pass the receiving socket and drop reason to
+ * 'kfree_skb' tracepoint.
*/
void __fix_address
-kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
+sk_skb_reason_drop(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason reason)
{
- if (__kfree_skb_reason(skb, reason))
+ if (__sk_skb_reason_drop(sk, skb, reason))
__kfree_skb(skb);
}
-EXPORT_SYMBOL(kfree_skb_reason);
+EXPORT_SYMBOL(sk_skb_reason_drop);
#define KFREE_SKB_BULK_SIZE 16
@@ -1270,7 +1258,7 @@ static void kfree_skb_add_bulk(struct sk_buff *skb,
return;
}
- skb_release_all(skb, reason, false);
+ skb_release_all(skb, reason);
sa->skb_array[sa->skb_count++] = skb;
if (unlikely(sa->skb_count == KFREE_SKB_BULK_SIZE)) {
@@ -1290,7 +1278,7 @@ kfree_skb_list_reason(struct sk_buff *segs, enum skb_drop_reason reason)
while (segs) {
struct sk_buff *next = segs->next;
- if (__kfree_skb_reason(segs, reason)) {
+ if (__sk_skb_reason_drop(NULL, segs, reason)) {
skb_poison_list(segs);
kfree_skb_add_bulk(segs, &sa, reason);
}
@@ -1331,22 +1319,28 @@ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt)
has_trans = skb_transport_header_was_set(skb);
printk("%sskb len=%u headroom=%u headlen=%u tailroom=%u\n"
- "mac=(%d,%d) net=(%d,%d) trans=%d\n"
+ "mac=(%d,%d) mac_len=%u net=(%d,%d) trans=%d\n"
"shinfo(txflags=%u nr_frags=%u gso(size=%hu type=%u segs=%hu))\n"
- "csum(0x%x ip_summed=%u complete_sw=%u valid=%u level=%u)\n"
- "hash(0x%x sw=%u l4=%u) proto=0x%04x pkttype=%u iif=%d\n",
+ "csum(0x%x start=%u offset=%u ip_summed=%u complete_sw=%u valid=%u level=%u)\n"
+ "hash(0x%x sw=%u l4=%u) proto=0x%04x pkttype=%u iif=%d\n"
+ "priority=0x%x mark=0x%x alloc_cpu=%u vlan_all=0x%x\n"
+ "encapsulation=%d inner(proto=0x%04x, mac=%u, net=%u, trans=%u)\n",
level, skb->len, headroom, skb_headlen(skb), tailroom,
has_mac ? skb->mac_header : -1,
has_mac ? skb_mac_header_len(skb) : -1,
+ skb->mac_len,
skb->network_header,
has_trans ? skb_network_header_len(skb) : -1,
has_trans ? skb->transport_header : -1,
sh->tx_flags, sh->nr_frags,
sh->gso_size, sh->gso_type, sh->gso_segs,
- skb->csum, skb->ip_summed, skb->csum_complete_sw,
- skb->csum_valid, skb->csum_level,
+ skb->csum, skb->csum_start, skb->csum_offset, skb->ip_summed,
+ skb->csum_complete_sw, skb->csum_valid, skb->csum_level,
skb->hash, skb->sw_hash, skb->l4_hash,
- ntohs(skb->protocol), skb->pkt_type, skb->skb_iif);
+ ntohs(skb->protocol), skb->pkt_type, skb->skb_iif,
+ skb->priority, skb->mark, skb->alloc_cpu, skb->vlan_all,
+ skb->encapsulation, skb->inner_protocol, skb->inner_mac_header,
+ skb->inner_network_header, skb->inner_transport_header);
if (dev)
printk("%sdev name=%s feat=%pNF\n",
@@ -1444,7 +1438,7 @@ EXPORT_SYMBOL(consume_skb);
void __consume_stateless_skb(struct sk_buff *skb)
{
trace_consume_skb(skb, __builtin_return_address(0));
- skb_release_data(skb, SKB_CONSUMED, false);
+ skb_release_data(skb, SKB_CONSUMED);
kfree_skbmem(skb);
}
@@ -1456,6 +1450,7 @@ static void napi_skb_cache_put(struct sk_buff *skb)
if (!kasan_mempool_poison_object(skb))
return;
+ local_lock_nested_bh(&napi_alloc_cache.bh_lock);
nc->skb_cache[nc->skb_count++] = skb;
if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
@@ -1467,11 +1462,12 @@ static void napi_skb_cache_put(struct sk_buff *skb)
nc->skb_cache + NAPI_SKB_CACHE_HALF);
nc->skb_count = NAPI_SKB_CACHE_HALF;
}
+ local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
}
void __napi_kfree_skb(struct sk_buff *skb, enum skb_drop_reason reason)
{
- skb_release_all(skb, reason, true);
+ skb_release_all(skb, reason);
napi_skb_cache_put(skb);
}
@@ -1509,7 +1505,7 @@ void napi_consume_skb(struct sk_buff *skb, int budget)
return;
}
- skb_release_all(skb, SKB_CONSUMED, !!budget);
+ skb_release_all(skb, SKB_CONSUMED);
napi_skb_cache_put(skb);
}
EXPORT_SYMBOL(napi_consume_skb);
@@ -1640,7 +1636,7 @@ EXPORT_SYMBOL_GPL(alloc_skb_for_msg);
*/
struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
{
- skb_release_all(dst, SKB_CONSUMED, false);
+ skb_release_all(dst, SKB_CONSUMED);
return __skb_clone(dst, src);
}
EXPORT_SYMBOL_GPL(skb_morph);
@@ -1708,7 +1704,7 @@ static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size)
return NULL;
}
- uarg->ubuf.callback = msg_zerocopy_callback;
+ uarg->ubuf.ops = &msg_zerocopy_ubuf_ops;
uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1;
uarg->len = 1;
uarg->bytelen = size;
@@ -1734,7 +1730,7 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
u32 bytelen, next;
/* there might be non MSG_ZEROCOPY users */
- if (uarg->callback != msg_zerocopy_callback)
+ if (uarg->ops != &msg_zerocopy_ubuf_ops)
return NULL;
/* realloc only when socket is locked (TCP, UDP cork),
@@ -1845,8 +1841,8 @@ release:
sock_put(sk);
}
-void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg,
- bool success)
+static void msg_zerocopy_complete(struct sk_buff *skb, struct ubuf_info *uarg,
+ bool success)
{
struct ubuf_info_msgzc *uarg_zc = uarg_to_msgzc(uarg);
@@ -1855,7 +1851,6 @@ void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg,
if (refcount_dec_and_test(&uarg->refcnt))
__msg_zerocopy_callback(uarg_zc);
}
-EXPORT_SYMBOL_GPL(msg_zerocopy_callback);
void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref)
{
@@ -1865,22 +1860,35 @@ void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref)
uarg_to_msgzc(uarg)->len--;
if (have_uref)
- msg_zerocopy_callback(NULL, uarg, true);
+ msg_zerocopy_complete(NULL, uarg, true);
}
EXPORT_SYMBOL_GPL(msg_zerocopy_put_abort);
+const struct ubuf_info_ops msg_zerocopy_ubuf_ops = {
+ .complete = msg_zerocopy_complete,
+};
+EXPORT_SYMBOL_GPL(msg_zerocopy_ubuf_ops);
+
int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
struct msghdr *msg, int len,
struct ubuf_info *uarg)
{
- struct ubuf_info *orig_uarg = skb_zcopy(skb);
int err, orig_len = skb->len;
- /* An skb can only point to one uarg. This edge case happens when
- * TCP appends to an skb, but zerocopy_realloc triggered a new alloc.
- */
- if (orig_uarg && uarg != orig_uarg)
- return -EEXIST;
+ if (uarg->ops->link_skb) {
+ err = uarg->ops->link_skb(skb, uarg);
+ if (err)
+ return err;
+ } else {
+ struct ubuf_info *orig_uarg = skb_zcopy(skb);
+
+ /* An skb can only point to one uarg. This edge case happens
+ * when TCP appends to an skb, but zerocopy_realloc triggered
+ * a new alloc.
+ */
+ if (orig_uarg && uarg != orig_uarg)
+ return -EEXIST;
+ }
err = __zerocopy_sg_from_iter(msg, sk, skb, &msg->msg_iter, len);
if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) {
@@ -2278,9 +2286,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
if (skb_has_frag_list(skb))
skb_clone_fraglist(skb);
- skb_release_data(skb, SKB_CONSUMED, false);
+ skb_release_data(skb, SKB_CONSUMED);
} else {
- skb_free_head(skb, false);
+ skb_free_head(skb);
}
off = (data + nhead) - skb->head;
@@ -4150,6 +4158,9 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
if (skb_zcopy(tgt) || skb_zcopy(skb))
return 0;
+ DEBUG_NET_WARN_ON_ONCE(tgt->pp_recycle != skb->pp_recycle);
+ DEBUG_NET_WARN_ON_ONCE(skb_cmp_decrypted(tgt, skb));
+
todo = shiftlen;
from = 0;
to = skb_shinfo(tgt)->nr_frags;
@@ -6586,12 +6597,12 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
skb_frag_ref(skb, i);
if (skb_has_frag_list(skb))
skb_clone_fraglist(skb);
- skb_release_data(skb, SKB_CONSUMED, false);
+ skb_release_data(skb, SKB_CONSUMED);
} else {
/* we can reuse existing recount- all we did was
* relocate values
*/
- skb_free_head(skb, false);
+ skb_free_head(skb);
}
skb->head = data;
@@ -6726,7 +6737,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
skb_kfree_head(data, size);
return -ENOMEM;
}
- skb_release_data(skb, SKB_CONSUMED, false);
+ skb_release_data(skb, SKB_CONSUMED);
skb->head = data;
skb->head_frag = 0;
@@ -7006,6 +7017,19 @@ free_now:
EXPORT_SYMBOL(__skb_ext_put);
#endif /* CONFIG_SKB_EXTENSIONS */
+static void kfree_skb_napi_cache(struct sk_buff *skb)
+{
+ /* if SKB is a clone, don't handle this case */
+ if (skb->fclone != SKB_FCLONE_UNAVAILABLE) {
+ __kfree_skb(skb);
+ return;
+ }
+
+ local_bh_disable();
+ __napi_kfree_skb(skb, SKB_CONSUMED);
+ local_bh_enable();
+}
+
/**
* skb_attempt_defer_free - queue skb for remote freeing
* @skb: buffer
@@ -7021,10 +7045,10 @@ void skb_attempt_defer_free(struct sk_buff *skb)
unsigned int defer_max;
bool kick;
- if (WARN_ON_ONCE(cpu >= nr_cpu_ids) ||
- !cpu_online(cpu) ||
- cpu == raw_smp_processor_id()) {
-nodefer: __kfree_skb(skb);
+ if (cpu == raw_smp_processor_id() ||
+ WARN_ON_ONCE(cpu >= nr_cpu_ids) ||
+ !cpu_online(cpu)) {
+nodefer: kfree_skb_napi_cache(skb);
return;
}
@@ -7032,7 +7056,7 @@ nodefer: __kfree_skb(skb);
DEBUG_NET_WARN_ON_ONCE(skb->destructor);
sd = &per_cpu(softnet_data, cpu);
- defer_max = READ_ONCE(sysctl_skb_defer_max);
+ defer_max = READ_ONCE(net_hotdata.sysctl_skb_defer_max);
if (READ_ONCE(sd->defer_count) >= defer_max)
goto nodefer;
@@ -7050,8 +7074,8 @@ nodefer: __kfree_skb(skb);
/* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU
* if we are unlucky enough (this seems very unlikely).
*/
- if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1))
- smp_call_function_single_async(cpu, &sd->defer_csd);
+ if (unlikely(kick))
+ kick_defer_list_purge(sd, cpu);
}
static void skb_splice_csum_page(struct sk_buff *skb, struct page *page,
@@ -7084,7 +7108,7 @@ static void skb_splice_csum_page(struct sk_buff *skb, struct page *page,
ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter,
ssize_t maxsize, gfp_t gfp)
{
- size_t frag_limit = READ_ONCE(sysctl_max_skb_frags);
+ size_t frag_limit = READ_ONCE(net_hotdata.sysctl_max_skb_frags);
struct page *pages[8], **ppages = pages;
ssize_t spliced = 0, ret = 0;
unsigned int i;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index fd20aae30be2..bbf40b999713 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -434,7 +434,8 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
page = sg_page(sge);
if (copied + copy > len)
copy = len - copied;
- copy = copy_page_to_iter(page, sge->offset, copy, iter);
+ if (copy)
+ copy = copy_page_to_iter(page, sge->offset, copy, iter);
if (!copy) {
copied = copied ? copied : -EFAULT;
goto out;
diff --git a/net/core/sock.c b/net/core/sock.c
index 0963689a5950..9abc4fe25953 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -127,6 +127,7 @@
#include <net/net_namespace.h>
#include <net/request_sock.h>
#include <net/sock.h>
+#include <net/proto_memory.h>
#include <linux/net_tstamp.h>
#include <net/xfrm.h>
#include <linux/ipsec.h>
@@ -283,7 +284,6 @@ __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
EXPORT_SYMBOL(sysctl_rmem_max);
__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
-int sysctl_mem_pcpu_rsv __read_mostly = SK_MEMORY_PCPU_RESERVE;
int sysctl_tstamp_allow_data __read_mostly = 1;
@@ -1083,6 +1083,17 @@ bool sockopt_capable(int cap)
}
EXPORT_SYMBOL(sockopt_capable);
+static int sockopt_validate_clockid(__kernel_clockid_t value)
+{
+ switch (value) {
+ case CLOCK_REALTIME:
+ case CLOCK_MONOTONIC:
+ case CLOCK_TAI:
+ return 0;
+ }
+ return -EINVAL;
+}
+
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.
@@ -1497,6 +1508,11 @@ set_sndbuf:
ret = -EPERM;
break;
}
+
+ ret = sockopt_validate_clockid(sk_txtime.clockid);
+ if (ret)
+ break;
+
sock_valbool_flag(sk, SOCK_TXTIME, true);
sk->sk_clockid = sk_txtime.clockid;
sk->sk_txtime_deadline_mode =
@@ -2262,7 +2278,12 @@ static void sk_init_common(struct sock *sk)
lockdep_set_class_and_name(&sk->sk_error_queue.lock,
af_elock_keys + sk->sk_family,
af_family_elock_key_strings[sk->sk_family]);
- lockdep_set_class_and_name(&sk->sk_callback_lock,
+ if (sk->sk_kern_sock)
+ lockdep_set_class_and_name(&sk->sk_callback_lock,
+ af_kern_callback_keys + sk->sk_family,
+ af_family_kern_clock_key_strings[sk->sk_family]);
+ else
+ lockdep_set_class_and_name(&sk->sk_callback_lock,
af_callback_keys + sk->sk_family,
af_family_clock_key_strings[sk->sk_family]);
}
@@ -2526,13 +2547,12 @@ EXPORT_SYMBOL(skb_set_owner_w);
static bool can_skb_orphan_partial(const struct sk_buff *skb)
{
-#ifdef CONFIG_TLS_DEVICE
/* Drivers depend on in-order delivery for crypto offload,
* partial orphan breaks out-of-order-OK logic.
*/
- if (skb->decrypted)
+ if (skb_is_decrypted(skb))
return false;
-#endif
+
return (skb->destructor == sock_wfree ||
(IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree));
}
@@ -3242,8 +3262,8 @@ int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
}
EXPORT_SYMBOL(sock_no_socketpair);
-int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+int sock_no_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
return -EOPNOTSUPP;
}
@@ -3338,7 +3358,7 @@ static void sock_def_error_report(struct sock *sk)
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
wake_up_interruptible_poll(&wq->wait, EPOLLERR);
- sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
+ sk_wake_async_rcu(sk, SOCK_WAKE_IO, POLL_ERR);
rcu_read_unlock();
}
@@ -3353,7 +3373,7 @@ void sock_def_readable(struct sock *sk)
if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
EPOLLRDNORM | EPOLLRDBAND);
- sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
+ sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN);
rcu_read_unlock();
}
@@ -3373,7 +3393,7 @@ static void sock_def_write_space(struct sock *sk)
EPOLLWRNORM | EPOLLWRBAND);
/* Should agree with poll, otherwise some programs break */
- sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+ sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
rcu_read_unlock();
@@ -3398,7 +3418,7 @@ static void sock_def_write_space_wfree(struct sock *sk)
EPOLLWRNORM | EPOLLWRBAND);
/* Should agree with poll, otherwise some programs break */
- sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+ sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
}
@@ -3461,18 +3481,6 @@ void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid)
}
sk->sk_uid = uid;
- rwlock_init(&sk->sk_callback_lock);
- if (sk->sk_kern_sock)
- lockdep_set_class_and_name(
- &sk->sk_callback_lock,
- af_kern_callback_keys + sk->sk_family,
- af_family_kern_clock_key_strings[sk->sk_family]);
- else
- lockdep_set_class_and_name(
- &sk->sk_callback_lock,
- af_callback_keys + sk->sk_family,
- af_family_clock_key_strings[sk->sk_family]);
-
sk->sk_state_change = sock_def_wakeup;
sk->sk_data_ready = sock_def_readable;
sk->sk_write_space = sock_def_write_space;
@@ -3743,6 +3751,9 @@ void sk_common_release(struct sock *sk)
sk->sk_prot->unhash(sk);
+ if (sk->sk_socket)
+ sk->sk_socket->sk = NULL;
+
/*
* In this point socket cannot receive new packets, but it is possible
* that some packets are in flight because some CPU runs receiver and
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 654122838025..a08eed9b9142 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -18,7 +18,7 @@
static const struct sock_diag_handler __rcu *sock_diag_handlers[AF_MAX];
-static struct sock_diag_inet_compat __rcu *inet_rcv_compat;
+static const struct sock_diag_inet_compat __rcu *inet_rcv_compat;
static struct workqueue_struct *broadcast_wq;
@@ -187,8 +187,7 @@ void sock_diag_broadcast_destroy(struct sock *sk)
void sock_diag_register_inet_compat(const struct sock_diag_inet_compat *ptr)
{
- xchg((__force const struct sock_diag_inet_compat **)&inet_rcv_compat,
- ptr);
+ xchg(&inet_rcv_compat, RCU_INITIALIZER(ptr));
}
EXPORT_SYMBOL_GPL(sock_diag_register_inet_compat);
@@ -196,8 +195,7 @@ void sock_diag_unregister_inet_compat(const struct sock_diag_inet_compat *ptr)
{
const struct sock_diag_inet_compat *old;
- old = xchg((__force const struct sock_diag_inet_compat **)&inet_rcv_compat,
- NULL);
+ old = unrcu_pointer(xchg(&inet_rcv_compat, NULL));
WARN_ON_ONCE(old != ptr);
}
EXPORT_SYMBOL_GPL(sock_diag_unregister_inet_compat);
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 8598466a3805..d3dbb92153f2 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -24,8 +24,16 @@ struct bpf_stab {
#define SOCK_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+/* This mutex is used to
+ * - protect race between prog/link attach/detach and link prog update, and
+ * - protect race between releasing and accessing map in bpf_link.
+ * A single global mutex lock is used since it is expected contention is low.
+ */
+static DEFINE_MUTEX(sockmap_mutex);
+
static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
- struct bpf_prog *old, u32 which);
+ struct bpf_prog *old, struct bpf_link *link,
+ u32 which);
static struct sk_psock_progs *sock_map_progs(struct bpf_map *map);
static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
@@ -71,7 +79,9 @@ int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog)
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
- ret = sock_map_prog_update(map, prog, NULL, attr->attach_type);
+ mutex_lock(&sockmap_mutex);
+ ret = sock_map_prog_update(map, prog, NULL, NULL, attr->attach_type);
+ mutex_unlock(&sockmap_mutex);
fdput(f);
return ret;
}
@@ -103,7 +113,9 @@ int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
goto put_prog;
}
- ret = sock_map_prog_update(map, NULL, prog, attr->attach_type);
+ mutex_lock(&sockmap_mutex);
+ ret = sock_map_prog_update(map, NULL, prog, NULL, attr->attach_type);
+ mutex_unlock(&sockmap_mutex);
put_prog:
bpf_prog_put(prog);
put_map:
@@ -411,9 +423,6 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
struct sock *sk;
int err = 0;
- if (irqs_disabled())
- return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
-
spin_lock_bh(&stab->lock);
sk = *psk;
if (!sk_test || sk_test == sk)
@@ -936,9 +945,6 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key)
struct bpf_shtab_elem *elem;
int ret = -ENOENT;
- if (irqs_disabled())
- return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
-
hash = sock_hash_bucket_hash(key, key_size);
bucket = sock_hash_select_bucket(htab, hash);
@@ -1460,55 +1466,84 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
return NULL;
}
-static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
- u32 which)
+static int sock_map_prog_link_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
+ struct bpf_link ***plink, u32 which)
{
struct sk_psock_progs *progs = sock_map_progs(map);
+ struct bpf_prog **cur_pprog;
+ struct bpf_link **cur_plink;
if (!progs)
return -EOPNOTSUPP;
switch (which) {
case BPF_SK_MSG_VERDICT:
- *pprog = &progs->msg_parser;
+ cur_pprog = &progs->msg_parser;
+ cur_plink = &progs->msg_parser_link;
break;
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
case BPF_SK_SKB_STREAM_PARSER:
- *pprog = &progs->stream_parser;
+ cur_pprog = &progs->stream_parser;
+ cur_plink = &progs->stream_parser_link;
break;
#endif
case BPF_SK_SKB_STREAM_VERDICT:
if (progs->skb_verdict)
return -EBUSY;
- *pprog = &progs->stream_verdict;
+ cur_pprog = &progs->stream_verdict;
+ cur_plink = &progs->stream_verdict_link;
break;
case BPF_SK_SKB_VERDICT:
if (progs->stream_verdict)
return -EBUSY;
- *pprog = &progs->skb_verdict;
+ cur_pprog = &progs->skb_verdict;
+ cur_plink = &progs->skb_verdict_link;
break;
default:
return -EOPNOTSUPP;
}
+ *pprog = cur_pprog;
+ if (plink)
+ *plink = cur_plink;
return 0;
}
+/* Handle the following four cases:
+ * prog_attach: prog != NULL, old == NULL, link == NULL
+ * prog_detach: prog == NULL, old != NULL, link == NULL
+ * link_attach: prog != NULL, old == NULL, link != NULL
+ * link_detach: prog == NULL, old != NULL, link != NULL
+ */
static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
- struct bpf_prog *old, u32 which)
+ struct bpf_prog *old, struct bpf_link *link,
+ u32 which)
{
struct bpf_prog **pprog;
+ struct bpf_link **plink;
int ret;
- ret = sock_map_prog_lookup(map, &pprog, which);
+ ret = sock_map_prog_link_lookup(map, &pprog, &plink, which);
if (ret)
return ret;
- if (old)
- return psock_replace_prog(pprog, prog, old);
+ /* for prog_attach/prog_detach/link_attach, return error if a bpf_link
+ * exists for that prog.
+ */
+ if ((!link || prog) && *plink)
+ return -EBUSY;
- psock_set_prog(pprog, prog);
- return 0;
+ if (old) {
+ ret = psock_replace_prog(pprog, prog, old);
+ if (!ret)
+ *plink = NULL;
+ } else {
+ psock_set_prog(pprog, prog);
+ if (link)
+ *plink = link;
+ }
+
+ return ret;
}
int sock_map_bpf_prog_query(const union bpf_attr *attr,
@@ -1533,7 +1568,7 @@ int sock_map_bpf_prog_query(const union bpf_attr *attr,
rcu_read_lock();
- ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type);
+ ret = sock_map_prog_link_lookup(map, &pprog, NULL, attr->query.attach_type);
if (ret)
goto end;
@@ -1639,19 +1674,23 @@ void sock_map_close(struct sock *sk, long timeout)
lock_sock(sk);
rcu_read_lock();
- psock = sk_psock_get(sk);
- if (unlikely(!psock)) {
- rcu_read_unlock();
- release_sock(sk);
- saved_close = READ_ONCE(sk->sk_prot)->close;
- } else {
+ psock = sk_psock(sk);
+ if (likely(psock)) {
saved_close = psock->saved_close;
sock_map_remove_links(sk, psock);
+ psock = sk_psock_get(sk);
+ if (unlikely(!psock))
+ goto no_psock;
rcu_read_unlock();
sk_psock_stop(psock);
release_sock(sk);
cancel_delayed_work_sync(&psock->work);
sk_psock_put(sk, psock);
+ } else {
+ saved_close = READ_ONCE(sk->sk_prot)->close;
+no_psock:
+ rcu_read_unlock();
+ release_sock(sk);
}
/* Make sure we do not recurse. This is a bug.
@@ -1663,6 +1702,196 @@ void sock_map_close(struct sock *sk, long timeout)
}
EXPORT_SYMBOL_GPL(sock_map_close);
+struct sockmap_link {
+ struct bpf_link link;
+ struct bpf_map *map;
+ enum bpf_attach_type attach_type;
+};
+
+static void sock_map_link_release(struct bpf_link *link)
+{
+ struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link);
+
+ mutex_lock(&sockmap_mutex);
+ if (!sockmap_link->map)
+ goto out;
+
+ WARN_ON_ONCE(sock_map_prog_update(sockmap_link->map, NULL, link->prog, link,
+ sockmap_link->attach_type));
+
+ bpf_map_put_with_uref(sockmap_link->map);
+ sockmap_link->map = NULL;
+out:
+ mutex_unlock(&sockmap_mutex);
+}
+
+static int sock_map_link_detach(struct bpf_link *link)
+{
+ sock_map_link_release(link);
+ return 0;
+}
+
+static void sock_map_link_dealloc(struct bpf_link *link)
+{
+ kfree(link);
+}
+
+/* Handle the following two cases:
+ * case 1: link != NULL, prog != NULL, old != NULL
+ * case 2: link != NULL, prog != NULL, old == NULL
+ */
+static int sock_map_link_update_prog(struct bpf_link *link,
+ struct bpf_prog *prog,
+ struct bpf_prog *old)
+{
+ const struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link);
+ struct bpf_prog **pprog, *old_link_prog;
+ struct bpf_link **plink;
+ int ret = 0;
+
+ mutex_lock(&sockmap_mutex);
+
+ /* If old prog is not NULL, ensure old prog is the same as link->prog. */
+ if (old && link->prog != old) {
+ ret = -EPERM;
+ goto out;
+ }
+ /* Ensure link->prog has the same type/attach_type as the new prog. */
+ if (link->prog->type != prog->type ||
+ link->prog->expected_attach_type != prog->expected_attach_type) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = sock_map_prog_link_lookup(sockmap_link->map, &pprog, &plink,
+ sockmap_link->attach_type);
+ if (ret)
+ goto out;
+
+ /* return error if the stored bpf_link does not match the incoming bpf_link. */
+ if (link != *plink) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ if (old) {
+ ret = psock_replace_prog(pprog, prog, old);
+ if (ret)
+ goto out;
+ } else {
+ psock_set_prog(pprog, prog);
+ }
+
+ bpf_prog_inc(prog);
+ old_link_prog = xchg(&link->prog, prog);
+ bpf_prog_put(old_link_prog);
+
+out:
+ mutex_unlock(&sockmap_mutex);
+ return ret;
+}
+
+static u32 sock_map_link_get_map_id(const struct sockmap_link *sockmap_link)
+{
+ u32 map_id = 0;
+
+ mutex_lock(&sockmap_mutex);
+ if (sockmap_link->map)
+ map_id = sockmap_link->map->id;
+ mutex_unlock(&sockmap_mutex);
+ return map_id;
+}
+
+static int sock_map_link_fill_info(const struct bpf_link *link,
+ struct bpf_link_info *info)
+{
+ const struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link);
+ u32 map_id = sock_map_link_get_map_id(sockmap_link);
+
+ info->sockmap.map_id = map_id;
+ info->sockmap.attach_type = sockmap_link->attach_type;
+ return 0;
+}
+
+static void sock_map_link_show_fdinfo(const struct bpf_link *link,
+ struct seq_file *seq)
+{
+ const struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link);
+ u32 map_id = sock_map_link_get_map_id(sockmap_link);
+
+ seq_printf(seq, "map_id:\t%u\n", map_id);
+ seq_printf(seq, "attach_type:\t%u\n", sockmap_link->attach_type);
+}
+
+static const struct bpf_link_ops sock_map_link_ops = {
+ .release = sock_map_link_release,
+ .dealloc = sock_map_link_dealloc,
+ .detach = sock_map_link_detach,
+ .update_prog = sock_map_link_update_prog,
+ .fill_link_info = sock_map_link_fill_info,
+ .show_fdinfo = sock_map_link_show_fdinfo,
+};
+
+int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ struct bpf_link_primer link_primer;
+ struct sockmap_link *sockmap_link;
+ enum bpf_attach_type attach_type;
+ struct bpf_map *map;
+ int ret;
+
+ if (attr->link_create.flags)
+ return -EINVAL;
+
+ map = bpf_map_get_with_uref(attr->link_create.target_fd);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+ if (map->map_type != BPF_MAP_TYPE_SOCKMAP && map->map_type != BPF_MAP_TYPE_SOCKHASH) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ sockmap_link = kzalloc(sizeof(*sockmap_link), GFP_USER);
+ if (!sockmap_link) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ attach_type = attr->link_create.attach_type;
+ bpf_link_init(&sockmap_link->link, BPF_LINK_TYPE_SOCKMAP, &sock_map_link_ops, prog);
+ sockmap_link->map = map;
+ sockmap_link->attach_type = attach_type;
+
+ ret = bpf_link_prime(&sockmap_link->link, &link_primer);
+ if (ret) {
+ kfree(sockmap_link);
+ goto out;
+ }
+
+ mutex_lock(&sockmap_mutex);
+ ret = sock_map_prog_update(map, prog, NULL, &sockmap_link->link, attach_type);
+ mutex_unlock(&sockmap_mutex);
+ if (ret) {
+ bpf_link_cleanup(&link_primer);
+ goto out;
+ }
+
+ /* Increase refcnt for the prog since when old prog is replaced with
+ * psock_replace_prog() and psock_set_prog() its refcnt will be decreased.
+ *
+ * Actually, we do not need to increase refcnt for the prog since bpf_link
+ * will hold a reference. But in order to have less complexity w.r.t.
+ * replacing/setting prog, let us increase the refcnt to make things simpler.
+ */
+ bpf_prog_inc(prog);
+
+ return bpf_link_settle(&link_primer);
+
+out:
+ bpf_map_put_with_uref(map);
+ return ret;
+}
+
static int sock_map_iter_attach_target(struct bpf_prog *prog,
union bpf_iter_link_info *linfo,
struct bpf_iter_aux_info *aux)
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 6973dda3abda..86a2476678c4 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -24,6 +24,7 @@
#include <net/busy_poll.h>
#include <net/pkt_sched.h>
#include <net/hotdata.h>
+#include <net/proto_memory.h>
#include <net/rps.h>
#include "dev.h"
@@ -94,7 +95,7 @@ static struct cpumask *rps_default_mask_cow_alloc(struct net *net)
return rps_default_mask;
}
-static int rps_default_mask_sysctl(struct ctl_table *table, int write,
+static int rps_default_mask_sysctl(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net = (struct net *)table->data;
@@ -125,7 +126,7 @@ done:
return err;
}
-static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
+static int rps_sock_flow_sysctl(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
unsigned int orig_size, size;
@@ -197,7 +198,7 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
#ifdef CONFIG_NET_FLOW_LIMIT
static DEFINE_MUTEX(flow_limit_update_mutex);
-static int flow_limit_cpu_sysctl(struct ctl_table *table, int write,
+static int flow_limit_cpu_sysctl(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct sd_flow_limit *cur;
@@ -254,7 +255,7 @@ done:
return ret;
}
-static int flow_limit_table_len_sysctl(struct ctl_table *table, int write,
+static int flow_limit_table_len_sysctl(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
unsigned int old, *ptr;
@@ -276,7 +277,7 @@ static int flow_limit_table_len_sysctl(struct ctl_table *table, int write,
#endif /* CONFIG_NET_FLOW_LIMIT */
#ifdef CONFIG_NET_SCHED
-static int set_default_qdisc(struct ctl_table *table, int write,
+static int set_default_qdisc(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
char id[IFNAMSIZ];
@@ -295,7 +296,7 @@ static int set_default_qdisc(struct ctl_table *table, int write,
}
#endif
-static int proc_do_dev_weight(struct ctl_table *table, int write,
+static int proc_do_dev_weight(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
static DEFINE_MUTEX(dev_weight_mutex);
@@ -313,7 +314,7 @@ static int proc_do_dev_weight(struct ctl_table *table, int write,
return ret;
}
-static int proc_do_rss_key(struct ctl_table *table, int write,
+static int proc_do_rss_key(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table fake_table;
@@ -326,7 +327,7 @@ static int proc_do_rss_key(struct ctl_table *table, int write,
}
#ifdef CONFIG_BPF_JIT
-static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
+static int proc_dointvec_minmax_bpf_enable(const struct ctl_table *table, int write,
void *buffer, size_t *lenp,
loff_t *ppos)
{
@@ -359,7 +360,7 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
# ifdef CONFIG_HAVE_EBPF_JIT
static int
-proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
+proc_dointvec_minmax_bpf_restricted(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
if (!capable(CAP_SYS_ADMIN))
@@ -370,7 +371,7 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
# endif /* CONFIG_HAVE_EBPF_JIT */
static int
-proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write,
+proc_dolongvec_minmax_bpf_restricted(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
if (!capable(CAP_SYS_ADMIN))
@@ -382,40 +383,8 @@ proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write,
static struct ctl_table net_core_table[] = {
{
- .procname = "wmem_max",
- .data = &sysctl_wmem_max,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_sndbuf,
- },
- {
- .procname = "rmem_max",
- .data = &sysctl_rmem_max,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_rcvbuf,
- },
- {
- .procname = "wmem_default",
- .data = &sysctl_wmem_default,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_sndbuf,
- },
- {
- .procname = "rmem_default",
- .data = &sysctl_rmem_default,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_rcvbuf,
- },
- {
.procname = "mem_pcpu_rsv",
- .data = &sysctl_mem_pcpu_rsv,
+ .data = &net_hotdata.sysctl_mem_pcpu_rsv,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -595,7 +564,7 @@ static struct ctl_table net_core_table[] = {
},
{
.procname = "max_skb_frags",
- .data = &sysctl_max_skb_frags,
+ .data = &net_hotdata.sysctl_max_skb_frags,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -654,13 +623,12 @@ static struct ctl_table net_core_table[] = {
},
{
.procname = "skb_defer_max",
- .data = &sysctl_skb_defer_max,
+ .data = &net_hotdata.sysctl_skb_defer_max,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
- { }
};
static struct ctl_table netns_core_table[] = {
@@ -697,7 +665,41 @@ static struct ctl_table netns_core_table[] = {
.extra2 = SYSCTL_ONE,
.proc_handler = proc_dou8vec_minmax,
},
- { }
+ /* sysctl_core_net_init() will set the values after this
+ * to readonly in network namespaces
+ */
+ {
+ .procname = "wmem_max",
+ .data = &sysctl_wmem_max,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &min_sndbuf,
+ },
+ {
+ .procname = "rmem_max",
+ .data = &sysctl_rmem_max,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &min_rcvbuf,
+ },
+ {
+ .procname = "wmem_default",
+ .data = &sysctl_wmem_default,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &min_sndbuf,
+ },
+ {
+ .procname = "rmem_default",
+ .data = &sysctl_rmem_default,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &min_rcvbuf,
+ },
};
static int __init fb_tunnels_only_for_init_net_sysctl_setup(char *str)
@@ -715,20 +717,27 @@ __setup("fb_tunnels=", fb_tunnels_only_for_init_net_sysctl_setup);
static __net_init int sysctl_core_net_init(struct net *net)
{
- struct ctl_table *tbl, *tmp;
+ size_t table_size = ARRAY_SIZE(netns_core_table);
+ struct ctl_table *tbl;
tbl = netns_core_table;
if (!net_eq(net, &init_net)) {
+ int i;
tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL);
if (tbl == NULL)
goto err_dup;
- for (tmp = tbl; tmp->procname; tmp++)
- tmp->data += (char *)net - (char *)&init_net;
+ for (i = 0; i < table_size; ++i) {
+ if (tbl[i].data == &sysctl_wmem_max)
+ break;
+
+ tbl[i].data += (char *)net - (char *)&init_net;
+ }
+ for (; i < table_size; ++i)
+ tbl[i].mode &= ~0222;
}
- net->core.sysctl_hdr = register_net_sysctl_sz(net, "net/core", tbl,
- ARRAY_SIZE(netns_core_table));
+ net->core.sysctl_hdr = register_net_sysctl_sz(net, "net/core", tbl, table_size);
if (net->core.sysctl_hdr == NULL)
goto err_reg;
@@ -743,7 +752,7 @@ err_dup:
static __net_exit void sysctl_core_net_exit(struct net *net)
{
- struct ctl_table *tbl;
+ const struct ctl_table *tbl;
tbl = net->core.sysctl_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->core.sysctl_hdr);
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 04840697fe79..3717fb152ecc 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -25,7 +25,8 @@ void skb_clone_tx_timestamp(struct sk_buff *skb)
struct sk_buff *clone;
unsigned int type;
- if (!skb->sk)
+ if (!skb->sk || !skb->dev ||
+ !phy_is_default_hwtstamp(skb->dev->phydev))
return;
type = classify(skb);
@@ -47,7 +48,7 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb)
struct mii_timestamper *mii_ts;
unsigned int type;
- if (!skb->dev || !skb->dev->phydev || !skb->dev->phydev->mii_ts)
+ if (!skb->dev || !phy_is_default_hwtstamp(skb->dev->phydev))
return false;
if (skb_headroom(skb) < ETH_HLEN)
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 41693154e426..bcc5551c6424 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -127,10 +127,8 @@ void xdp_unreg_mem_model(struct xdp_mem_info *mem)
return;
if (type == MEM_TYPE_PAGE_POOL) {
- rcu_read_lock();
- xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params);
+ xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params);
page_pool_destroy(xa->page_pool);
- rcu_read_unlock();
}
}
EXPORT_SYMBOL_GPL(xdp_unreg_mem_model);
@@ -295,10 +293,8 @@ static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem,
mutex_lock(&mem_id_lock);
ret = __mem_id_init_hash_table();
mutex_unlock(&mem_id_lock);
- if (ret < 0) {
- WARN_ON(1);
+ if (ret < 0)
return ERR_PTR(ret);
- }
}
xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp);
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 4d9823d6dced..d6b30700af67 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -353,6 +353,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
/**
* ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
* @sk: socket to perform estimator on
+ * @mrtt: measured RTT
*
* This code is almost identical with TCP's tcp_rtt_estimator(), since
* - it has a higher sampling frequency (recommended by RFC 1323),
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 44b033fe1ef6..5926159a6f20 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -24,6 +24,7 @@
#include <net/xfrm.h>
#include <net/secure_seq.h>
#include <net/netns/generic.h>
+#include <net/rstreason.h>
#include "ackvec.h"
#include "ccid.h"
@@ -521,7 +522,8 @@ out:
return err;
}
-static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
+static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb,
+ enum sk_rst_reason reason)
{
int err;
const struct iphdr *rxiph;
@@ -655,8 +657,11 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
if (dccp_v4_send_response(sk, req))
goto drop_and_free;
- inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
- reqsk_put(req);
+ if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT)))
+ reqsk_free(req);
+ else
+ reqsk_put(req);
+
return 0;
drop_and_free:
@@ -706,7 +711,7 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
reset:
- dccp_v4_ctl_send_reset(sk, skb);
+ dccp_v4_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
kfree_skb(skb);
return 0;
}
@@ -869,7 +874,7 @@ lookup:
if (nsk == sk) {
reqsk_put(req);
} else if (dccp_child_process(sk, nsk, skb)) {
- dccp_v4_ctl_send_reset(sk, skb);
+ dccp_v4_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
goto discard_and_relse;
} else {
sock_put(sk);
@@ -909,7 +914,7 @@ no_dccp_socket:
if (dh->dccph_type != DCCP_PKT_RESET) {
DCCP_SKB_CB(skb)->dccpd_reset_code =
DCCP_RESET_CODE_NO_CONNECTION;
- dccp_v4_ctl_send_reset(sk, skb);
+ dccp_v4_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
}
discard_it:
@@ -1039,7 +1044,7 @@ static void __net_exit dccp_v4_exit_net(struct net *net)
static void __net_exit dccp_v4_exit_batch(struct list_head *net_exit_list)
{
- inet_twsk_purge(&dccp_hashinfo, AF_INET);
+ inet_twsk_purge(&dccp_hashinfo);
}
static struct pernet_operations dccp_v4_ops = {
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index ded07e09f813..da5dba120bc9 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -29,6 +29,7 @@
#include <net/secure_seq.h>
#include <net/netns/generic.h>
#include <net/sock.h>
+#include <net/rstreason.h>
#include "dccp.h"
#include "ipv6.h"
@@ -256,7 +257,8 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req)
kfree_skb(inet_rsk(req)->pktopts);
}
-static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
+static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb,
+ enum sk_rst_reason reason)
{
const struct ipv6hdr *rxip6h;
struct sk_buff *skb;
@@ -398,8 +400,11 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (dccp_v6_send_response(sk, req))
goto drop_and_free;
- inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
- reqsk_put(req);
+ if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT)))
+ reqsk_free(req);
+ else
+ reqsk_put(req);
+
return 0;
drop_and_free:
@@ -656,7 +661,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
reset:
- dccp_v6_ctl_send_reset(sk, skb);
+ dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
discard:
if (opt_skb != NULL)
__kfree_skb(opt_skb);
@@ -762,7 +767,7 @@ lookup:
if (nsk == sk) {
reqsk_put(req);
} else if (dccp_child_process(sk, nsk, skb)) {
- dccp_v6_ctl_send_reset(sk, skb);
+ dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
goto discard_and_relse;
} else {
sock_put(sk);
@@ -801,7 +806,7 @@ no_dccp_socket:
if (dh->dccph_type != DCCP_PKT_RESET) {
DCCP_SKB_CB(skb)->dccpd_reset_code =
DCCP_RESET_CODE_NO_CONNECTION;
- dccp_v6_ctl_send_reset(sk, skb);
+ dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
}
discard_it:
@@ -1119,15 +1124,9 @@ static void __net_exit dccp_v6_exit_net(struct net *net)
inet_ctl_sock_destroy(pn->v6_ctl_sk);
}
-static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list)
-{
- inet_twsk_purge(&dccp_hashinfo, AF_INET6);
-}
-
static struct pernet_operations dccp_v6_ops = {
.init = dccp_v6_init_net,
.exit = dccp_v6_exit_net,
- .exit_batch = dccp_v6_exit_batch,
.id = &dccp_v6_pernet_id,
.size = sizeof(struct dccp_v6_pernet),
};
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 64d805b27add..fecc8190064f 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -15,6 +15,7 @@
#include <net/sock.h>
#include <net/xfrm.h>
#include <net/inet_timewait_sock.h>
+#include <net/rstreason.h>
#include "ackvec.h"
#include "ccid.h"
@@ -53,17 +54,10 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
if (state == DCCP_TIME_WAIT)
timeo = DCCP_TIMEWAIT_LEN;
- /* tw_timer is pinned, so we need to make sure BH are disabled
- * in following section, otherwise timer handler could run before
- * we complete the initialization.
- */
- local_bh_disable();
- inet_twsk_schedule(tw, timeo);
/* Linkage updates.
* Note that access to tw after this point is illegal.
*/
- inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
- local_bh_enable();
+ inet_twsk_hashdance_schedule(tw, sk, &dccp_hashinfo, timeo);
} else {
/* Sorry, if we're out of memory, just CLOSE this
* socket up. We've got bigger problems than
@@ -202,7 +196,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
drop:
if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET)
- req->rsk_ops->send_reset(sk, skb);
+ req->rsk_ops->send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
inet_csk_reqsk_queue_drop(sk, req);
out:
diff --git a/net/dccp/output.c b/net/dccp/output.c
index fd2eb148d24d..5c2e24f3c39b 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -204,7 +204,7 @@ void dccp_write_space(struct sock *sk)
wake_up_interruptible(&wq->wait);
/* Should agree with poll, otherwise some programs break */
if (sock_writeable(sk))
- sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+ sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
rcu_read_unlock();
}
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index ee8d4f5afa72..3fc474d6e57d 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -90,8 +90,6 @@ static struct ctl_table dccp_default_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_ms_jiffies,
},
-
- { }
};
static struct ctl_table_header *dccp_table_header;
diff --git a/net/devlink/core.c b/net/devlink/core.c
index 7f0b093208d7..f49cd83f1955 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -314,7 +314,7 @@ static void devlink_release(struct work_struct *work)
mutex_destroy(&devlink->lock);
lockdep_unregister_key(&devlink->lock_key);
put_device(devlink->dev);
- kfree(devlink);
+ kvfree(devlink);
}
void devlink_put(struct devlink *devlink)
@@ -420,7 +420,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
if (!devlink_reload_actions_valid(ops))
return NULL;
- devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL);
+ devlink = kvzalloc(struct_size(devlink, priv, priv_size), GFP_KERNEL);
if (!devlink)
return NULL;
@@ -455,7 +455,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
return devlink;
err_xa_alloc:
- kfree(devlink);
+ kvfree(devlink);
return NULL;
}
EXPORT_SYMBOL_GPL(devlink_alloc_ns);
diff --git a/net/devlink/dev.c b/net/devlink/dev.c
index 19dbf540748a..13c73f50da3d 100644
--- a/net/devlink/dev.c
+++ b/net/devlink/dev.c
@@ -1202,23 +1202,19 @@ static void __devlink_compat_running_version(struct devlink *devlink,
if (err)
goto free_msg;
- nla_for_each_attr(nlattr, (void *)msg->data, msg->len, rem) {
+ nla_for_each_attr_type(nlattr, DEVLINK_ATTR_INFO_VERSION_RUNNING,
+ (void *)msg->data, msg->len, rem) {
const struct nlattr *kv;
int rem_kv;
- if (nla_type(nlattr) != DEVLINK_ATTR_INFO_VERSION_RUNNING)
- continue;
-
- nla_for_each_nested(kv, nlattr, rem_kv) {
- if (nla_type(kv) != DEVLINK_ATTR_INFO_VERSION_VALUE)
- continue;
-
+ nla_for_each_nested_type(kv, DEVLINK_ATTR_INFO_VERSION_VALUE,
+ nlattr, rem_kv) {
strlcat(buf, nla_data(kv), len);
strlcat(buf, " ", len);
}
}
free_msg:
- nlmsg_free(msg);
+ nlmsg_consume(msg);
}
void devlink_compat_running_version(struct devlink *devlink,
diff --git a/net/devlink/dpipe.c b/net/devlink/dpipe.c
index a72a9292efc5..55009b377447 100644
--- a/net/devlink/dpipe.c
+++ b/net/devlink/dpipe.c
@@ -839,7 +839,7 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled);
*/
int devl_dpipe_table_register(struct devlink *devlink,
const char *table_name,
- struct devlink_dpipe_table_ops *table_ops,
+ const struct devlink_dpipe_table_ops *table_ops,
void *priv, bool counter_control_extern)
{
struct devlink_dpipe_table *table;
diff --git a/net/devlink/param.c b/net/devlink/param.c
index 22bc3b500518..dcf0d1ccebba 100644
--- a/net/devlink/param.c
+++ b/net/devlink/param.c
@@ -158,11 +158,12 @@ static int devlink_param_get(struct devlink *devlink,
static int devlink_param_set(struct devlink *devlink,
const struct devlink_param *param,
- struct devlink_param_gset_ctx *ctx)
+ struct devlink_param_gset_ctx *ctx,
+ struct netlink_ext_ack *extack)
{
if (!param->set)
return -EOPNOTSUPP;
- return param->set(devlink, param->id, ctx);
+ return param->set(devlink, param->id, ctx, extack);
}
static int
@@ -571,7 +572,7 @@ static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink,
return -EOPNOTSUPP;
ctx.val = value;
ctx.cmode = cmode;
- err = devlink_param_set(devlink, param, &ctx);
+ err = devlink_param_set(devlink, param, &ctx, info->extack);
if (err)
return err;
}
diff --git a/net/devlink/port.c b/net/devlink/port.c
index 118d130d2afd..be9158b4453c 100644
--- a/net/devlink/port.c
+++ b/net/devlink/port.c
@@ -16,6 +16,7 @@ static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_
DEVLINK_PORT_FN_STATE_ACTIVE),
[DEVLINK_PORT_FN_ATTR_CAPS] =
NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK),
+ [DEVLINK_PORT_FN_ATTR_MAX_IO_EQS] = { .type = NLA_U32 },
};
#define ASSERT_DEVLINK_PORT_REGISTERED(devlink_port) \
@@ -182,6 +183,30 @@ static int devlink_port_fn_caps_fill(struct devlink_port *devlink_port,
return 0;
}
+static int devlink_port_fn_max_io_eqs_fill(struct devlink_port *port,
+ struct sk_buff *msg,
+ struct netlink_ext_ack *extack,
+ bool *msg_updated)
+{
+ u32 max_io_eqs;
+ int err;
+
+ if (!port->ops->port_fn_max_io_eqs_get)
+ return 0;
+
+ err = port->ops->port_fn_max_io_eqs_get(port, &max_io_eqs, extack);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+ err = nla_put_u32(msg, DEVLINK_PORT_FN_ATTR_MAX_IO_EQS, max_io_eqs);
+ if (err)
+ return err;
+ *msg_updated = true;
+ return 0;
+}
+
int devlink_nl_port_handle_fill(struct sk_buff *msg, struct devlink_port *devlink_port)
{
if (devlink_nl_put_handle(msg, devlink_port->devlink))
@@ -410,6 +435,18 @@ static int devlink_port_fn_caps_set(struct devlink_port *devlink_port,
}
static int
+devlink_port_fn_max_io_eqs_set(struct devlink_port *devlink_port,
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ u32 max_io_eqs;
+
+ max_io_eqs = nla_get_u32(attr);
+ return devlink_port->ops->port_fn_max_io_eqs_set(devlink_port,
+ max_io_eqs, extack);
+}
+
+static int
devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
struct netlink_ext_ack *extack)
{
@@ -430,6 +467,9 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por
err = devlink_port_fn_state_fill(port, msg, extack, &msg_updated);
if (err)
goto out;
+ err = devlink_port_fn_max_io_eqs_fill(port, msg, extack, &msg_updated);
+ if (err)
+ goto out;
err = devlink_rel_devlink_handle_put(msg, port->devlink,
port->rel_index,
DEVLINK_PORT_FN_ATTR_DEVLINK,
@@ -726,6 +766,12 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port,
}
}
}
+ if (tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS] &&
+ !ops->port_fn_max_io_eqs_set) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS],
+ "Function does not support max_io_eqs setting");
+ return -EOPNOTSUPP;
+ }
return 0;
}
@@ -761,6 +807,13 @@ static int devlink_port_function_set(struct devlink_port *port,
return err;
}
+ attr = tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS];
+ if (attr) {
+ err = devlink_port_fn_max_io_eqs_set(port, attr, extack);
+ if (err)
+ return err;
+ }
+
/* Keep this as the last function attribute set, so that when
* multiple port function attributes are set along with state,
* Those can be applied first before activating the state.
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 8e698bea99a3..2dfe9063613f 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -129,7 +129,7 @@ config NET_DSA_TAG_RTL4_A
tristate "Tag driver for Realtek 4 byte protocol A tags"
help
Say Y or M if you want to enable support for tagging frames for the
- Realtek switches with 4 byte protocol A tags, sich as found in
+ Realtek switches with 4 byte protocol A tags, such as found in
the Realtek RTL8366RB.
config NET_DSA_TAG_RTL8_4
@@ -166,6 +166,12 @@ config NET_DSA_TAG_TRAILER
Say Y or M if you want to enable support for tagging frames at
with a trailed. e.g. Marvell 88E6060.
+config NET_DSA_TAG_VSC73XX_8021Q
+ tristate "Tag driver for Microchip/Vitesse VSC73xx family of switches, using VLAN"
+ help
+ Say Y or M if you want to enable support for tagging frames with a
+ custom VLAN-based header.
+
config NET_DSA_TAG_XRS700X
tristate "Tag driver for XRS700x switches"
help
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 8a1894a42552..555c07cfeb71 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -37,6 +37,7 @@ obj-$(CONFIG_NET_DSA_TAG_RTL8_4) += tag_rtl8_4.o
obj-$(CONFIG_NET_DSA_TAG_RZN1_A5PSW) += tag_rzn1_a5psw.o
obj-$(CONFIG_NET_DSA_TAG_SJA1105) += tag_sja1105.o
obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
+obj-$(CONFIG_NET_DSA_TAG_VSC73XX_8021Q) += tag_vsc73xx_8021q.o
obj-$(CONFIG_NET_DSA_TAG_XRS700X) += tag_xrs700x.o
# for tracing framework to find trace.h
diff --git a/net/dsa/devlink.c b/net/dsa/devlink.c
index 431bf52290a1..0aac887d0098 100644
--- a/net/dsa/devlink.c
+++ b/net/dsa/devlink.c
@@ -194,7 +194,8 @@ int dsa_devlink_param_get(struct devlink *dl, u32 id,
EXPORT_SYMBOL_GPL(dsa_devlink_param_get);
int dsa_devlink_param_set(struct devlink *dl, u32 id,
- struct devlink_param_gset_ctx *ctx)
+ struct devlink_param_gset_ctx *ctx,
+ struct netlink_ext_ack *extack)
{
struct dsa_switch *ds = dsa_devlink_to_ds(dl);
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 09d2f5d4b3dd..668c729946ea 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -1505,6 +1505,14 @@ static int dsa_switch_probe(struct dsa_switch *ds)
if (!ds->num_ports)
return -EINVAL;
+ if (ds->phylink_mac_ops) {
+ if (ds->ops->phylink_mac_select_pcs ||
+ ds->ops->phylink_mac_config ||
+ ds->ops->phylink_mac_link_down ||
+ ds->ops->phylink_mac_link_up)
+ return -EINVAL;
+ }
+
if (np) {
err = dsa_switch_parse_of(ds, np);
if (err)
diff --git a/net/dsa/port.c b/net/dsa/port.c
index c42dac87671b..25258b33e59e 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -1467,10 +1467,34 @@ int dsa_port_change_conduit(struct dsa_port *dp, struct net_device *conduit,
*/
dsa_user_unsync_ha(dev);
+ /* If live-changing, we also need to uninstall the user device address
+ * from the port FDB and the conduit interface.
+ */
+ if (dev->flags & IFF_UP)
+ dsa_user_host_uc_uninstall(dev);
+
err = dsa_port_assign_conduit(dp, conduit, extack, true);
if (err)
goto rewind_old_addrs;
+ /* If the port doesn't have its own MAC address and relies on the DSA
+ * conduit's one, inherit it again from the new DSA conduit.
+ */
+ if (is_zero_ether_addr(dp->mac))
+ eth_hw_addr_inherit(dev, conduit);
+
+ /* If live-changing, we need to install the user device address to the
+ * port FDB and the conduit interface.
+ */
+ if (dev->flags & IFF_UP) {
+ err = dsa_user_host_uc_install(dev, dev->dev_addr);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed to install host UC address");
+ goto rewind_addr_inherit;
+ }
+ }
+
dsa_user_sync_ha(dev);
if (vlan_filtering) {
@@ -1500,10 +1524,26 @@ rewind_new_vlan:
rewind_new_addrs:
dsa_user_unsync_ha(dev);
+ if (dev->flags & IFF_UP)
+ dsa_user_host_uc_uninstall(dev);
+
+rewind_addr_inherit:
+ if (is_zero_ether_addr(dp->mac))
+ eth_hw_addr_inherit(dev, old_conduit);
+
dsa_port_assign_conduit(dp, old_conduit, NULL, false);
/* Restore the objects on the old CPU port */
rewind_old_addrs:
+ if (dev->flags & IFF_UP) {
+ tmp = dsa_user_host_uc_install(dev, dev->dev_addr);
+ if (tmp) {
+ dev_err(ds->dev,
+ "port %d failed to restore host UC address: %pe\n",
+ dp->index, ERR_PTR(tmp));
+ }
+ }
+
dsa_user_sync_ha(dev);
if (vlan_filtering) {
@@ -1535,30 +1575,11 @@ void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
cpu_dp->tag_ops = tag_ops;
}
-static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp)
-{
- struct device_node *phy_dn;
- struct phy_device *phydev;
-
- phy_dn = of_parse_phandle(dp->dn, "phy-handle", 0);
- if (!phy_dn)
- return NULL;
-
- phydev = of_phy_find_device(phy_dn);
- if (!phydev) {
- of_node_put(phy_dn);
- return ERR_PTR(-EPROBE_DEFER);
- }
-
- of_node_put(phy_dn);
- return phydev;
-}
-
static struct phylink_pcs *
dsa_port_phylink_mac_select_pcs(struct phylink_config *config,
phy_interface_t interface)
{
- struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+ struct dsa_port *dp = dsa_phylink_to_port(config);
struct phylink_pcs *pcs = ERR_PTR(-EOPNOTSUPP);
struct dsa_switch *ds = dp->ds;
@@ -1568,26 +1589,11 @@ dsa_port_phylink_mac_select_pcs(struct phylink_config *config,
return pcs;
}
-static int dsa_port_phylink_mac_prepare(struct phylink_config *config,
- unsigned int mode,
- phy_interface_t interface)
-{
- struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
- struct dsa_switch *ds = dp->ds;
- int err = 0;
-
- if (ds->ops->phylink_mac_prepare)
- err = ds->ops->phylink_mac_prepare(ds, dp->index, mode,
- interface);
-
- return err;
-}
-
static void dsa_port_phylink_mac_config(struct phylink_config *config,
unsigned int mode,
const struct phylink_link_state *state)
{
- struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+ struct dsa_port *dp = dsa_phylink_to_port(config);
struct dsa_switch *ds = dp->ds;
if (!ds->ops->phylink_mac_config)
@@ -1596,37 +1602,15 @@ static void dsa_port_phylink_mac_config(struct phylink_config *config,
ds->ops->phylink_mac_config(ds, dp->index, mode, state);
}
-static int dsa_port_phylink_mac_finish(struct phylink_config *config,
- unsigned int mode,
- phy_interface_t interface)
-{
- struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
- struct dsa_switch *ds = dp->ds;
- int err = 0;
-
- if (ds->ops->phylink_mac_finish)
- err = ds->ops->phylink_mac_finish(ds, dp->index, mode,
- interface);
-
- return err;
-}
-
static void dsa_port_phylink_mac_link_down(struct phylink_config *config,
unsigned int mode,
phy_interface_t interface)
{
- struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
- struct phy_device *phydev = NULL;
+ struct dsa_port *dp = dsa_phylink_to_port(config);
struct dsa_switch *ds = dp->ds;
- if (dsa_port_is_user(dp))
- phydev = dp->user->phydev;
-
- if (!ds->ops->phylink_mac_link_down) {
- if (ds->ops->adjust_link && phydev)
- ds->ops->adjust_link(ds, dp->index, phydev);
+ if (!ds->ops->phylink_mac_link_down)
return;
- }
ds->ops->phylink_mac_link_down(ds, dp->index, mode, interface);
}
@@ -1638,14 +1622,11 @@ static void dsa_port_phylink_mac_link_up(struct phylink_config *config,
int speed, int duplex,
bool tx_pause, bool rx_pause)
{
- struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+ struct dsa_port *dp = dsa_phylink_to_port(config);
struct dsa_switch *ds = dp->ds;
- if (!ds->ops->phylink_mac_link_up) {
- if (ds->ops->adjust_link && phydev)
- ds->ops->adjust_link(ds, dp->index, phydev);
+ if (!ds->ops->phylink_mac_link_up)
return;
- }
ds->ops->phylink_mac_link_up(ds, dp->index, mode, interface, phydev,
speed, duplex, tx_pause, rx_pause);
@@ -1653,15 +1634,14 @@ static void dsa_port_phylink_mac_link_up(struct phylink_config *config,
static const struct phylink_mac_ops dsa_port_phylink_mac_ops = {
.mac_select_pcs = dsa_port_phylink_mac_select_pcs,
- .mac_prepare = dsa_port_phylink_mac_prepare,
.mac_config = dsa_port_phylink_mac_config,
- .mac_finish = dsa_port_phylink_mac_finish,
.mac_link_down = dsa_port_phylink_mac_link_down,
.mac_link_up = dsa_port_phylink_mac_link_up,
};
int dsa_port_phylink_create(struct dsa_port *dp)
{
+ const struct phylink_mac_ops *mac_ops;
struct dsa_switch *ds = dp->ds;
phy_interface_t mode;
struct phylink *pl;
@@ -1685,8 +1665,12 @@ int dsa_port_phylink_create(struct dsa_port *dp)
}
}
- pl = phylink_create(&dp->pl_config, of_fwnode_handle(dp->dn),
- mode, &dsa_port_phylink_mac_ops);
+ mac_ops = &dsa_port_phylink_mac_ops;
+ if (ds->phylink_mac_ops)
+ mac_ops = ds->phylink_mac_ops;
+
+ pl = phylink_create(&dp->pl_config, of_fwnode_handle(dp->dn), mode,
+ mac_ops);
if (IS_ERR(pl)) {
pr_err("error creating PHYLINK: %ld\n", PTR_ERR(pl));
return PTR_ERR(pl);
@@ -1703,78 +1687,6 @@ void dsa_port_phylink_destroy(struct dsa_port *dp)
dp->pl = NULL;
}
-static int dsa_shared_port_setup_phy_of(struct dsa_port *dp, bool enable)
-{
- struct dsa_switch *ds = dp->ds;
- struct phy_device *phydev;
- int port = dp->index;
- int err = 0;
-
- phydev = dsa_port_get_phy_device(dp);
- if (!phydev)
- return 0;
-
- if (IS_ERR(phydev))
- return PTR_ERR(phydev);
-
- if (enable) {
- err = genphy_resume(phydev);
- if (err < 0)
- goto err_put_dev;
-
- err = genphy_read_status(phydev);
- if (err < 0)
- goto err_put_dev;
- } else {
- err = genphy_suspend(phydev);
- if (err < 0)
- goto err_put_dev;
- }
-
- if (ds->ops->adjust_link)
- ds->ops->adjust_link(ds, port, phydev);
-
- dev_dbg(ds->dev, "enabled port's phy: %s", phydev_name(phydev));
-
-err_put_dev:
- put_device(&phydev->mdio.dev);
- return err;
-}
-
-static int dsa_shared_port_fixed_link_register_of(struct dsa_port *dp)
-{
- struct device_node *dn = dp->dn;
- struct dsa_switch *ds = dp->ds;
- struct phy_device *phydev;
- int port = dp->index;
- phy_interface_t mode;
- int err;
-
- err = of_phy_register_fixed_link(dn);
- if (err) {
- dev_err(ds->dev,
- "failed to register the fixed PHY of port %d\n",
- port);
- return err;
- }
-
- phydev = of_phy_find_device(dn);
-
- err = of_get_phy_mode(dn, &mode);
- if (err)
- mode = PHY_INTERFACE_MODE_NA;
- phydev->interface = mode;
-
- genphy_read_status(phydev);
-
- if (ds->ops->adjust_link)
- ds->ops->adjust_link(ds, port, phydev);
-
- put_device(&phydev->mdio.dev);
-
- return 0;
-}
-
static int dsa_shared_port_phylink_register(struct dsa_port *dp)
{
struct dsa_switch *ds = dp->ds;
@@ -1952,12 +1864,23 @@ static void dsa_shared_port_validate_of(struct dsa_port *dp,
dn, dsa_port_is_cpu(dp) ? "CPU" : "DSA", dp->index);
}
+static void dsa_shared_port_link_down(struct dsa_port *dp)
+{
+ struct dsa_switch *ds = dp->ds;
+
+ if (ds->phylink_mac_ops && ds->phylink_mac_ops->mac_link_down)
+ ds->phylink_mac_ops->mac_link_down(&dp->pl_config, MLO_AN_FIXED,
+ PHY_INTERFACE_MODE_NA);
+ else if (ds->ops->phylink_mac_link_down)
+ ds->ops->phylink_mac_link_down(ds, dp->index, MLO_AN_FIXED,
+ PHY_INTERFACE_MODE_NA);
+}
+
int dsa_shared_port_link_register_of(struct dsa_port *dp)
{
struct dsa_switch *ds = dp->ds;
bool missing_link_description;
bool missing_phy_mode;
- int port = dp->index;
dsa_shared_port_validate_of(dp, &missing_phy_mode,
&missing_link_description);
@@ -1967,46 +1890,28 @@ int dsa_shared_port_link_register_of(struct dsa_port *dp)
dsa_switches_apply_workarounds))
return -EINVAL;
- if (!ds->ops->adjust_link) {
- if (missing_link_description) {
- dev_warn(ds->dev,
- "Skipping phylink registration for %s port %d\n",
- dsa_port_is_cpu(dp) ? "CPU" : "DSA", dp->index);
- } else {
- if (ds->ops->phylink_mac_link_down)
- ds->ops->phylink_mac_link_down(ds, port,
- MLO_AN_FIXED, PHY_INTERFACE_MODE_NA);
+ if (missing_link_description) {
+ dev_warn(ds->dev,
+ "Skipping phylink registration for %s port %d\n",
+ dsa_port_is_cpu(dp) ? "CPU" : "DSA", dp->index);
+ } else {
+ dsa_shared_port_link_down(dp);
- return dsa_shared_port_phylink_register(dp);
- }
- return 0;
+ return dsa_shared_port_phylink_register(dp);
}
- dev_warn(ds->dev,
- "Using legacy PHYLIB callbacks. Please migrate to PHYLINK!\n");
-
- if (of_phy_is_fixed_link(dp->dn))
- return dsa_shared_port_fixed_link_register_of(dp);
- else
- return dsa_shared_port_setup_phy_of(dp, true);
+ return 0;
}
void dsa_shared_port_link_unregister_of(struct dsa_port *dp)
{
- struct dsa_switch *ds = dp->ds;
-
- if (!ds->ops->adjust_link && dp->pl) {
+ if (dp->pl) {
rtnl_lock();
phylink_disconnect_phy(dp->pl);
rtnl_unlock();
dsa_port_phylink_destroy(dp);
return;
}
-
- if (of_phy_is_fixed_link(dp->dn))
- of_phy_deregister_fixed_link(dp->dn);
- else
- dsa_shared_port_setup_phy_of(dp, false);
}
int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr,
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index 71b26ae6db39..3ee53e28ec2e 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -286,7 +286,8 @@ int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds,
* be used for VLAN-unaware bridging.
*/
int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, int port,
- struct dsa_bridge bridge)
+ struct dsa_bridge bridge, bool *tx_fwd_offload,
+ struct netlink_ext_ack *extack)
{
struct dsa_port *dp = dsa_to_port(ds, port);
u16 standalone_vid, bridge_vid;
@@ -304,6 +305,8 @@ int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, int port,
dsa_port_tag_8021q_vlan_del(dp, standalone_vid, false);
+ *tx_fwd_offload = true;
+
return 0;
}
EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_join);
@@ -468,8 +471,8 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
}
EXPORT_SYMBOL_GPL(dsa_8021q_xmit);
-struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *conduit,
- int vbid)
+static struct net_device *
+dsa_tag_8021q_find_port_by_vbid(struct net_device *conduit, int vbid)
{
struct dsa_port *cpu_dp = conduit->dsa_ptr;
struct dsa_switch_tree *dst = cpu_dp->dst;
@@ -495,30 +498,91 @@ struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *conduit,
return NULL;
}
-EXPORT_SYMBOL_GPL(dsa_tag_8021q_find_port_by_vbid);
+struct net_device *dsa_tag_8021q_find_user(struct net_device *conduit,
+ int source_port, int switch_id,
+ int vid, int vbid)
+{
+ /* Always prefer precise source port information, if available */
+ if (source_port != -1 && switch_id != -1)
+ return dsa_conduit_find_user(conduit, switch_id, source_port);
+ else if (vbid >= 1)
+ return dsa_tag_8021q_find_port_by_vbid(conduit, vbid);
+
+ return dsa_find_designated_bridge_port_by_vid(conduit, vid);
+}
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_find_user);
+
+/**
+ * dsa_8021q_rcv - Decode source information from tag_8021q header
+ * @skb: RX socket buffer
+ * @source_port: pointer to storage for precise source port information.
+ * If this is known already from outside tag_8021q, the pre-initialized
+ * value is preserved. If not known, pass -1.
+ * @switch_id: similar to source_port.
+ * @vbid: pointer to storage for imprecise bridge ID. Must be pre-initialized
+ * with -1. If a positive value is returned, the source_port and switch_id
+ * are invalid.
+ * @vid: pointer to storage for original VID, in case tag_8021q decoding failed.
+ *
+ * If the packet has a tag_8021q header, decode it and set @source_port,
+ * @switch_id and @vbid, and strip the header. Otherwise set @vid and keep the
+ * header in the hwaccel area of the packet.
+ */
void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id,
- int *vbid)
+ int *vbid, int *vid)
{
- u16 vid, tci;
+ int tmp_source_port, tmp_switch_id, tmp_vbid;
+ __be16 vlan_proto;
+ u16 tmp_vid, tci;
if (skb_vlan_tag_present(skb)) {
+ vlan_proto = skb->vlan_proto;
tci = skb_vlan_tag_get(skb);
__vlan_hwaccel_clear_tag(skb);
} else {
+ struct vlan_ethhdr *hdr = vlan_eth_hdr(skb);
+
+ vlan_proto = hdr->h_vlan_proto;
skb_push_rcsum(skb, ETH_HLEN);
__skb_vlan_pop(skb, &tci);
skb_pull_rcsum(skb, ETH_HLEN);
}
- vid = tci & VLAN_VID_MASK;
+ tmp_vid = tci & VLAN_VID_MASK;
+ if (!vid_is_dsa_8021q(tmp_vid)) {
+ /* Not a tag_8021q frame, so return the VID to the
+ * caller for further processing, and put the tag back
+ */
+ if (vid)
+ *vid = tmp_vid;
+
+ __vlan_hwaccel_put_tag(skb, vlan_proto, tci);
+
+ return;
+ }
- *source_port = dsa_8021q_rx_source_port(vid);
- *switch_id = dsa_8021q_rx_switch_id(vid);
+ tmp_source_port = dsa_8021q_rx_source_port(tmp_vid);
+ tmp_switch_id = dsa_8021q_rx_switch_id(tmp_vid);
+ tmp_vbid = dsa_tag_8021q_rx_vbid(tmp_vid);
+
+ /* Precise source port information is unknown when receiving from a
+ * VLAN-unaware bridging domain, and tmp_source_port and tmp_switch_id
+ * are zeroes in this case.
+ *
+ * Preserve the source information from hardware-specific mechanisms,
+ * if available. This allows us to not overwrite a valid source port
+ * and switch ID with less precise values.
+ */
+ if (tmp_vbid == 0 && *source_port == -1)
+ *source_port = tmp_source_port;
+ if (tmp_vbid == 0 && *switch_id == -1)
+ *switch_id = tmp_switch_id;
if (vbid)
- *vbid = dsa_tag_8021q_rx_vbid(vid);
+ *vbid = tmp_vbid;
skb->priority = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+ return;
}
EXPORT_SYMBOL_GPL(dsa_8021q_rcv);
diff --git a/net/dsa/tag_8021q.h b/net/dsa/tag_8021q.h
index 41f7167ac520..27b8906f99ec 100644
--- a/net/dsa/tag_8021q.h
+++ b/net/dsa/tag_8021q.h
@@ -14,10 +14,11 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
u16 tpid, u16 tci);
void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id,
- int *vbid);
+ int *vbid, int *vid);
-struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *conduit,
- int vbid);
+struct net_device *dsa_tag_8021q_find_user(struct net_device *conduit,
+ int source_port, int switch_id,
+ int vid, int vbid);
int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds,
struct dsa_notifier_tag_8021q_vlan_info *info);
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index b059381310fe..8e8b1bef6af6 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -81,7 +81,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
{
int src_port, switch_id;
- dsa_8021q_rcv(skb, &src_port, &switch_id, NULL);
+ dsa_8021q_rcv(skb, &src_port, &switch_id, NULL, NULL);
skb->dev = dsa_conduit_find_user(netdev, switch_id, src_port);
if (!skb->dev)
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 1aba1d05c27a..3e902af7eea6 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -472,37 +472,14 @@ static bool sja1110_skb_has_inband_control_extension(const struct sk_buff *skb)
return ntohs(eth_hdr(skb)->h_proto) == ETH_P_SJA1110;
}
-/* If the VLAN in the packet is a tag_8021q one, set @source_port and
- * @switch_id and strip the header. Otherwise set @vid and keep it in the
- * packet.
- */
-static void sja1105_vlan_rcv(struct sk_buff *skb, int *source_port,
- int *switch_id, int *vbid, u16 *vid)
-{
- struct vlan_ethhdr *hdr = vlan_eth_hdr(skb);
- u16 vlan_tci;
-
- if (skb_vlan_tag_present(skb))
- vlan_tci = skb_vlan_tag_get(skb);
- else
- vlan_tci = ntohs(hdr->h_vlan_TCI);
-
- if (vid_is_dsa_8021q(vlan_tci & VLAN_VID_MASK))
- return dsa_8021q_rcv(skb, source_port, switch_id, vbid);
-
- /* Try our best with imprecise RX */
- *vid = vlan_tci & VLAN_VID_MASK;
-}
-
static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
struct net_device *netdev)
{
- int source_port = -1, switch_id = -1, vbid = -1;
+ int source_port = -1, switch_id = -1, vbid = -1, vid = -1;
struct sja1105_meta meta = {0};
struct ethhdr *hdr;
bool is_link_local;
bool is_meta;
- u16 vid;
hdr = eth_hdr(skb);
is_link_local = sja1105_is_link_local(skb);
@@ -524,37 +501,16 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
/* Normal data plane traffic and link-local frames are tagged with
* a tag_8021q VLAN which we have to strip
*/
- if (sja1105_skb_has_tag_8021q(skb)) {
- int tmp_source_port = -1, tmp_switch_id = -1;
-
- sja1105_vlan_rcv(skb, &tmp_source_port, &tmp_switch_id, &vbid,
- &vid);
- /* Preserve the source information from the INCL_SRCPT option,
- * if available. This allows us to not overwrite a valid source
- * port and switch ID with zeroes when receiving link-local
- * frames from a VLAN-unaware bridged port (non-zero vbid) or a
- * VLAN-aware bridged port (non-zero vid). Furthermore, the
- * tag_8021q source port information is only of trust when the
- * vbid is 0 (precise port). Otherwise, tmp_source_port and
- * tmp_switch_id will be zeroes.
- */
- if (vbid == 0 && source_port == -1)
- source_port = tmp_source_port;
- if (vbid == 0 && switch_id == -1)
- switch_id = tmp_switch_id;
- } else if (source_port == -1 && switch_id == -1) {
+ if (sja1105_skb_has_tag_8021q(skb))
+ dsa_8021q_rcv(skb, &source_port, &switch_id, &vbid, &vid);
+ else if (source_port == -1 && switch_id == -1)
/* Packets with no source information have no chance of
* getting accepted, drop them straight away.
*/
return NULL;
- }
- if (source_port != -1 && switch_id != -1)
- skb->dev = dsa_conduit_find_user(netdev, switch_id, source_port);
- else if (vbid >= 1)
- skb->dev = dsa_tag_8021q_find_port_by_vbid(netdev, vbid);
- else
- skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid);
+ skb->dev = dsa_tag_8021q_find_user(netdev, source_port, switch_id,
+ vid, vbid);
if (!skb->dev) {
netdev_warn(netdev, "Couldn't decode source port\n");
return NULL;
@@ -677,9 +633,8 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb,
static struct sk_buff *sja1110_rcv(struct sk_buff *skb,
struct net_device *netdev)
{
- int source_port = -1, switch_id = -1, vbid = -1;
+ int source_port = -1, switch_id = -1, vbid = -1, vid = -1;
bool host_only = false;
- u16 vid = 0;
if (sja1110_skb_has_inband_control_extension(skb)) {
skb = sja1110_rcv_inband_control_extension(skb, &source_port,
@@ -691,14 +646,11 @@ static struct sk_buff *sja1110_rcv(struct sk_buff *skb,
/* Packets with in-band control extensions might still have RX VLANs */
if (likely(sja1105_skb_has_tag_8021q(skb)))
- sja1105_vlan_rcv(skb, &source_port, &switch_id, &vbid, &vid);
-
- if (vbid >= 1)
- skb->dev = dsa_tag_8021q_find_port_by_vbid(netdev, vbid);
- else if (source_port == -1 || switch_id == -1)
- skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid);
- else
- skb->dev = dsa_conduit_find_user(netdev, switch_id, source_port);
+ dsa_8021q_rcv(skb, &source_port, &switch_id, &vbid, &vid);
+
+ skb->dev = dsa_tag_8021q_find_user(netdev, source_port, switch_id,
+ vid, vbid);
+
if (!skb->dev) {
netdev_warn(netdev, "Couldn't decode source port\n");
return NULL;
diff --git a/net/dsa/tag_vsc73xx_8021q.c b/net/dsa/tag_vsc73xx_8021q.c
new file mode 100644
index 000000000000..af121a9aff7f
--- /dev/null
+++ b/net/dsa/tag_vsc73xx_8021q.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright (C) 2024 Pawel Dembicki <paweldembicki@gmail.com>
+ */
+#include <linux/dsa/8021q.h>
+
+#include "tag.h"
+#include "tag_8021q.h"
+
+#define VSC73XX_8021Q_NAME "vsc73xx-8021q"
+
+static struct sk_buff *
+vsc73xx_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+ struct dsa_port *dp = dsa_user_to_port(netdev);
+ u16 queue_mapping = skb_get_queue_mapping(skb);
+ u16 tx_vid = dsa_tag_8021q_standalone_vid(dp);
+ u8 pcp;
+
+ if (skb->offload_fwd_mark) {
+ unsigned int bridge_num = dsa_port_bridge_num_get(dp);
+ struct net_device *br = dsa_port_bridge_dev_get(dp);
+
+ if (br_vlan_enabled(br))
+ return skb;
+
+ tx_vid = dsa_tag_8021q_bridge_vid(bridge_num);
+ }
+
+ pcp = netdev_txq_to_tc(netdev, queue_mapping);
+
+ return dsa_8021q_xmit(skb, netdev, ETH_P_8021Q,
+ ((pcp << VLAN_PRIO_SHIFT) | tx_vid));
+}
+
+static struct sk_buff *
+vsc73xx_rcv(struct sk_buff *skb, struct net_device *netdev)
+{
+ int src_port = -1, switch_id = -1, vbid = -1, vid = -1;
+
+ dsa_8021q_rcv(skb, &src_port, &switch_id, &vbid, &vid);
+
+ skb->dev = dsa_tag_8021q_find_user(netdev, src_port, switch_id,
+ vid, vbid);
+ if (!skb->dev) {
+ dev_warn_ratelimited(&netdev->dev,
+ "Couldn't decode source port\n");
+ return NULL;
+ }
+
+ dsa_default_offload_fwd_mark(skb);
+
+ return skb;
+}
+
+static const struct dsa_device_ops vsc73xx_8021q_netdev_ops = {
+ .name = VSC73XX_8021Q_NAME,
+ .proto = DSA_TAG_PROTO_VSC73XX_8021Q,
+ .xmit = vsc73xx_xmit,
+ .rcv = vsc73xx_rcv,
+ .needed_headroom = VLAN_HLEN,
+ .promisc_on_conduit = true,
+};
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("DSA tag driver for VSC73XX family of switches, using VLAN");
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_VSC73XX_8021Q, VSC73XX_8021Q_NAME);
+
+module_dsa_tag_driver(vsc73xx_8021q_netdev_ops);
diff --git a/net/dsa/trace.h b/net/dsa/trace.h
index 567f29a39707..83f3e5f78491 100644
--- a/net/dsa/trace.h
+++ b/net/dsa/trace.h
@@ -39,8 +39,8 @@ DECLARE_EVENT_CLASS(dsa_port_addr_op_hw,
),
TP_fast_assign(
- __assign_str(dev, dev_name(dp->ds->dev));
- __assign_str(kind, dsa_port_kind(dp));
+ __assign_str(dev);
+ __assign_str(kind);
__entry->port = dp->index;
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
@@ -98,8 +98,8 @@ DECLARE_EVENT_CLASS(dsa_port_addr_op_refcount,
),
TP_fast_assign(
- __assign_str(dev, dev_name(dp->ds->dev));
- __assign_str(kind, dsa_port_kind(dp));
+ __assign_str(dev);
+ __assign_str(kind);
__entry->port = dp->index;
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
@@ -157,8 +157,8 @@ DECLARE_EVENT_CLASS(dsa_port_addr_del_not_found,
),
TP_fast_assign(
- __assign_str(dev, dev_name(dp->ds->dev));
- __assign_str(kind, dsa_port_kind(dp));
+ __assign_str(dev);
+ __assign_str(kind);
__entry->port = dp->index;
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
@@ -199,7 +199,7 @@ TRACE_EVENT(dsa_lag_fdb_add_hw,
),
TP_fast_assign(
- __assign_str(dev, lag_dev->name);
+ __assign_str(dev);
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
dsa_db_print(db, __entry->db_buf);
@@ -227,7 +227,7 @@ TRACE_EVENT(dsa_lag_fdb_add_bump,
),
TP_fast_assign(
- __assign_str(dev, lag_dev->name);
+ __assign_str(dev);
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
dsa_db_print(db, __entry->db_buf);
@@ -255,7 +255,7 @@ TRACE_EVENT(dsa_lag_fdb_del_hw,
),
TP_fast_assign(
- __assign_str(dev, lag_dev->name);
+ __assign_str(dev);
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
dsa_db_print(db, __entry->db_buf);
@@ -283,7 +283,7 @@ TRACE_EVENT(dsa_lag_fdb_del_drop,
),
TP_fast_assign(
- __assign_str(dev, lag_dev->name);
+ __assign_str(dev);
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
dsa_db_print(db, __entry->db_buf);
@@ -310,7 +310,7 @@ TRACE_EVENT(dsa_lag_fdb_del_not_found,
),
TP_fast_assign(
- __assign_str(dev, lag_dev->name);
+ __assign_str(dev);
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
dsa_db_print(db, __entry->db_buf);
@@ -338,8 +338,8 @@ DECLARE_EVENT_CLASS(dsa_vlan_op_hw,
),
TP_fast_assign(
- __assign_str(dev, dev_name(dp->ds->dev));
- __assign_str(kind, dsa_port_kind(dp));
+ __assign_str(dev);
+ __assign_str(kind);
__entry->port = dp->index;
__entry->vid = vlan->vid;
__entry->flags = vlan->flags;
@@ -383,8 +383,8 @@ DECLARE_EVENT_CLASS(dsa_vlan_op_refcount,
),
TP_fast_assign(
- __assign_str(dev, dev_name(dp->ds->dev));
- __assign_str(kind, dsa_port_kind(dp));
+ __assign_str(dev);
+ __assign_str(kind);
__entry->port = dp->index;
__entry->vid = vlan->vid;
__entry->flags = vlan->flags;
@@ -426,8 +426,8 @@ TRACE_EVENT(dsa_vlan_del_not_found,
),
TP_fast_assign(
- __assign_str(dev, dev_name(dp->ds->dev));
- __assign_str(kind, dsa_port_kind(dp));
+ __assign_str(dev);
+ __assign_str(kind);
__entry->port = dp->index;
__entry->vid = vlan->vid;
),
diff --git a/net/dsa/user.c b/net/dsa/user.c
index 16d395bb1a1f..f5adfa1d978a 100644
--- a/net/dsa/user.c
+++ b/net/dsa/user.c
@@ -355,60 +355,82 @@ static int dsa_user_get_iflink(const struct net_device *dev)
return READ_ONCE(dsa_user_to_conduit(dev)->ifindex);
}
-static int dsa_user_open(struct net_device *dev)
+int dsa_user_host_uc_install(struct net_device *dev, const u8 *addr)
{
struct net_device *conduit = dsa_user_to_conduit(dev);
struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
int err;
- err = dev_open(conduit, NULL);
- if (err < 0) {
- netdev_err(dev, "failed to open conduit %s\n", conduit->name);
- goto out;
- }
-
if (dsa_switch_supports_uc_filtering(ds)) {
- err = dsa_port_standalone_host_fdb_add(dp, dev->dev_addr, 0);
+ err = dsa_port_standalone_host_fdb_add(dp, addr, 0);
if (err)
goto out;
}
- if (!ether_addr_equal(dev->dev_addr, conduit->dev_addr)) {
- err = dev_uc_add(conduit, dev->dev_addr);
+ if (!ether_addr_equal(addr, conduit->dev_addr)) {
+ err = dev_uc_add(conduit, addr);
if (err < 0)
goto del_host_addr;
}
- err = dsa_port_enable_rt(dp, dev->phydev);
- if (err)
- goto del_unicast;
-
return 0;
-del_unicast:
- if (!ether_addr_equal(dev->dev_addr, conduit->dev_addr))
- dev_uc_del(conduit, dev->dev_addr);
del_host_addr:
if (dsa_switch_supports_uc_filtering(ds))
- dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0);
+ dsa_port_standalone_host_fdb_del(dp, addr, 0);
out:
return err;
}
-static int dsa_user_close(struct net_device *dev)
+void dsa_user_host_uc_uninstall(struct net_device *dev)
{
struct net_device *conduit = dsa_user_to_conduit(dev);
struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
- dsa_port_disable_rt(dp);
-
if (!ether_addr_equal(dev->dev_addr, conduit->dev_addr))
dev_uc_del(conduit, dev->dev_addr);
if (dsa_switch_supports_uc_filtering(ds))
dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0);
+}
+
+static int dsa_user_open(struct net_device *dev)
+{
+ struct net_device *conduit = dsa_user_to_conduit(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
+ int err;
+
+ err = dev_open(conduit, NULL);
+ if (err < 0) {
+ netdev_err(dev, "failed to open conduit %s\n", conduit->name);
+ goto out;
+ }
+
+ err = dsa_user_host_uc_install(dev, dev->dev_addr);
+ if (err)
+ goto out;
+
+ err = dsa_port_enable_rt(dp, dev->phydev);
+ if (err)
+ goto out_del_host_uc;
+
+ return 0;
+
+out_del_host_uc:
+ dsa_user_host_uc_uninstall(dev);
+out:
+ return err;
+}
+
+static int dsa_user_close(struct net_device *dev)
+{
+ struct dsa_port *dp = dsa_user_to_port(dev);
+
+ dsa_port_disable_rt(dp);
+
+ dsa_user_host_uc_uninstall(dev);
return 0;
}
@@ -448,7 +470,6 @@ static void dsa_user_set_rx_mode(struct net_device *dev)
static int dsa_user_set_mac_address(struct net_device *dev, void *a)
{
- struct net_device *conduit = dsa_user_to_conduit(dev);
struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
struct sockaddr *addr = a;
@@ -470,34 +491,16 @@ static int dsa_user_set_mac_address(struct net_device *dev, void *a)
if (!(dev->flags & IFF_UP))
goto out_change_dev_addr;
- if (dsa_switch_supports_uc_filtering(ds)) {
- err = dsa_port_standalone_host_fdb_add(dp, addr->sa_data, 0);
- if (err)
- return err;
- }
-
- if (!ether_addr_equal(addr->sa_data, conduit->dev_addr)) {
- err = dev_uc_add(conduit, addr->sa_data);
- if (err < 0)
- goto del_unicast;
- }
-
- if (!ether_addr_equal(dev->dev_addr, conduit->dev_addr))
- dev_uc_del(conduit, dev->dev_addr);
+ err = dsa_user_host_uc_install(dev, addr->sa_data);
+ if (err)
+ return err;
- if (dsa_switch_supports_uc_filtering(ds))
- dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0);
+ dsa_user_host_uc_uninstall(dev);
out_change_dev_addr:
eth_hw_addr_set(dev, addr->sa_data);
return 0;
-
-del_unicast:
- if (dsa_switch_supports_uc_filtering(ds))
- dsa_port_standalone_host_fdb_del(dp, addr->sa_data, 0);
-
- return err;
}
struct dsa_user_dump_ctx {
@@ -1726,7 +1729,7 @@ static int dsa_user_set_rxnfc(struct net_device *dev,
}
static int dsa_user_get_ts_info(struct net_device *dev,
- struct ethtool_ts_info *ts)
+ struct kernel_ethtool_ts_info *ts)
{
struct dsa_user_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->dp->ds;
@@ -2120,7 +2123,7 @@ int dsa_user_change_mtu(struct net_device *dev, int new_mtu)
if (err)
goto out_port_failed;
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
dsa_bridge_mtu_normalization(dp);
@@ -2137,6 +2140,32 @@ out_conduit_failed:
}
static int __maybe_unused
+dsa_user_dcbnl_set_apptrust(struct net_device *dev, u8 *sel, int nsel)
+{
+ struct dsa_port *dp = dsa_user_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+ int port = dp->index;
+
+ if (!ds->ops->port_set_apptrust)
+ return -EOPNOTSUPP;
+
+ return ds->ops->port_set_apptrust(ds, port, sel, nsel);
+}
+
+static int __maybe_unused
+dsa_user_dcbnl_get_apptrust(struct net_device *dev, u8 *sel, int *nsel)
+{
+ struct dsa_port *dp = dsa_user_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+ int port = dp->index;
+
+ if (!ds->ops->port_get_apptrust)
+ return -EOPNOTSUPP;
+
+ return ds->ops->port_get_apptrust(ds, port, sel, nsel);
+}
+
+static int __maybe_unused
dsa_user_dcbnl_set_default_prio(struct net_device *dev, struct dcb_app *app)
{
struct dsa_port *dp = dsa_user_to_port(dev);
@@ -2163,6 +2192,58 @@ dsa_user_dcbnl_set_default_prio(struct net_device *dev, struct dcb_app *app)
return 0;
}
+/* Update the DSCP prio entries on all user ports of the switch in case
+ * the switch supports global DSCP prio instead of per port DSCP prios.
+ */
+static int dsa_user_dcbnl_ieee_global_dscp_setdel(struct net_device *dev,
+ struct dcb_app *app, bool del)
+{
+ int (*setdel)(struct net_device *dev, struct dcb_app *app);
+ struct dsa_port *dp = dsa_user_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+ struct dsa_port *other_dp;
+ int err, restore_err;
+
+ if (del)
+ setdel = dcb_ieee_delapp;
+ else
+ setdel = dcb_ieee_setapp;
+
+ dsa_switch_for_each_user_port(other_dp, ds) {
+ struct net_device *user = other_dp->user;
+
+ if (!user || user == dev)
+ continue;
+
+ err = setdel(user, app);
+ if (err)
+ goto err_try_to_restore;
+ }
+
+ return 0;
+
+err_try_to_restore:
+
+ /* Revert logic to restore previous state of app entries */
+ if (!del)
+ setdel = dcb_ieee_delapp;
+ else
+ setdel = dcb_ieee_setapp;
+
+ dsa_switch_for_each_user_port_continue_reverse(other_dp, ds) {
+ struct net_device *user = other_dp->user;
+
+ if (!user || user == dev)
+ continue;
+
+ restore_err = setdel(user, app);
+ if (restore_err)
+ netdev_err(user, "Failed to restore DSCP prio entry configuration\n");
+ }
+
+ return err;
+}
+
static int __maybe_unused
dsa_user_dcbnl_add_dscp_prio(struct net_device *dev, struct dcb_app *app)
{
@@ -2194,6 +2275,17 @@ dsa_user_dcbnl_add_dscp_prio(struct net_device *dev, struct dcb_app *app)
return err;
}
+ if (!ds->dscp_prio_mapping_is_global)
+ return 0;
+
+ err = dsa_user_dcbnl_ieee_global_dscp_setdel(dev, app, false);
+ if (err) {
+ if (ds->ops->port_del_dscp_prio)
+ ds->ops->port_del_dscp_prio(ds, port, dscp, new_prio);
+ dcb_ieee_delapp(dev, app);
+ return err;
+ }
+
return 0;
}
@@ -2264,6 +2356,18 @@ dsa_user_dcbnl_del_dscp_prio(struct net_device *dev, struct dcb_app *app)
return err;
}
+ if (!ds->dscp_prio_mapping_is_global)
+ return 0;
+
+ err = dsa_user_dcbnl_ieee_global_dscp_setdel(dev, app, true);
+ if (err) {
+ if (ds->ops->port_add_dscp_prio)
+ ds->ops->port_add_dscp_prio(ds, port, dscp,
+ app->priority);
+ dcb_ieee_setapp(dev, app);
+ return err;
+ }
+
return 0;
}
@@ -2376,6 +2480,8 @@ static const struct ethtool_ops dsa_user_ethtool_ops = {
static const struct dcbnl_rtnl_ops __maybe_unused dsa_user_dcbnl_ops = {
.ieee_setapp = dsa_user_dcbnl_ieee_setapp,
.ieee_delapp = dsa_user_dcbnl_ieee_delapp,
+ .dcbnl_setapptrust = dsa_user_dcbnl_set_apptrust,
+ .dcbnl_getapptrust = dsa_user_dcbnl_get_apptrust,
};
static void dsa_user_get_stats64(struct net_device *dev,
@@ -2445,7 +2551,7 @@ EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_change);
static void dsa_user_phylink_fixed_state(struct phylink_config *config,
struct phylink_link_state *state)
{
- struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+ struct dsa_port *dp = dsa_phylink_to_port(config);
struct dsa_switch *ds = dp->ds;
/* No need to check that this operation is valid, the callback would
@@ -2776,12 +2882,6 @@ int dsa_user_change_conduit(struct net_device *dev, struct net_device *conduit,
ERR_PTR(err));
}
- /* If the port doesn't have its own MAC address and relies on the DSA
- * conduit's one, inherit it again from the new DSA conduit.
- */
- if (is_zero_ether_addr(dp->mac))
- eth_hw_addr_inherit(dev, conduit);
-
return 0;
out_revert_conduit_link:
diff --git a/net/dsa/user.h b/net/dsa/user.h
index 996069130bea..016884bead3c 100644
--- a/net/dsa/user.h
+++ b/net/dsa/user.h
@@ -42,6 +42,8 @@ int dsa_user_suspend(struct net_device *user_dev);
int dsa_user_resume(struct net_device *user_dev);
int dsa_user_register_notifier(void);
void dsa_user_unregister_notifier(void);
+int dsa_user_host_uc_install(struct net_device *dev, const u8 *addr);
+void dsa_user_host_uc_uninstall(struct net_device *dev);
void dsa_user_sync_ha(struct net_device *dev);
void dsa_user_unsync_ha(struct net_device *dev);
void dsa_user_setup_tagger(struct net_device *user);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 049c3adeb850..4e3651101b86 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -161,9 +161,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
skb->dev = dev;
skb_reset_mac_header(skb);
- eth = (struct ethhdr *)skb->data;
- skb_pull_inline(skb, ETH_HLEN);
-
+ eth = eth_skb_pull_mac(skb);
eth_skb_pkt_type(skb, dev);
/*
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
index 504f954a1b28..9a190635fe95 100644
--- a/net/ethtool/Makefile
+++ b/net/ethtool/Makefile
@@ -8,4 +8,4 @@ ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o rss.o \
linkstate.o debug.o wol.o features.o privflags.o rings.o \
channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \
tunnels.o fec.o eeprom.o stats.o phc_vclocks.o mm.o \
- module.o pse-pd.o plca.o mm.o
+ module.o cmis_fw_update.o cmis_cdb.o pse-pd.o plca.o mm.o
diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c
index 06a151165c31..f6f136ec7ddf 100644
--- a/net/ethtool/cabletest.c
+++ b/net/ethtool/cabletest.c
@@ -207,10 +207,6 @@ err:
}
EXPORT_SYMBOL_GPL(ethnl_cable_test_fault_length);
-struct cable_test_tdr_req_info {
- struct ethnl_req_info base;
-};
-
static const struct nla_policy cable_test_tdr_act_cfg_policy[] = {
[ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST] = { .type = NLA_U32 },
[ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST] = { .type = NLA_U32 },
diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c
index 7b4bbd674bae..cee188da54f8 100644
--- a/net/ethtool/channels.c
+++ b/net/ethtool/channels.c
@@ -171,11 +171,9 @@ ethnl_set_channels(struct ethnl_req_info *req_info, struct genl_info *info)
*/
if (ethtool_get_max_rxnfc_channel(dev, &max_rxnfc_in_use))
max_rxnfc_in_use = 0;
- if (!netif_is_rxfh_configured(dev) ||
- ethtool_get_max_rxfh_channel(dev, &max_rxfh_in_use))
- max_rxfh_in_use = 0;
+ max_rxfh_in_use = ethtool_get_max_rxfh_channel(dev);
if (channels.combined_count + channels.rx_count <= max_rxfh_in_use) {
- GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing indirection table settings");
+ GENL_SET_ERR_MSG_FMT(info, "requested channel counts are too low for existing indirection table (%d)", max_rxfh_in_use);
return -EINVAL;
}
if (channels.combined_count + channels.rx_count <= max_rxnfc_in_use) {
diff --git a/net/ethtool/cmis.h b/net/ethtool/cmis.h
new file mode 100644
index 000000000000..e71cc3e1b7eb
--- /dev/null
+++ b/net/ethtool/cmis.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#define ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH 120
+#define ETHTOOL_CMIS_CDB_CMD_PAGE 0x9F
+#define ETHTOOL_CMIS_CDB_PAGE_I2C_ADDR 0x50
+
+/**
+ * struct ethtool_cmis_cdb - CDB commands parameters
+ * @cmis_rev: CMIS revision major.
+ * @read_write_len_ext: Allowable additional number of byte octets to the LPL
+ * in a READ or a WRITE CDB commands.
+ * @max_completion_time: Maximum CDB command completion time in msec.
+ */
+struct ethtool_cmis_cdb {
+ u8 cmis_rev;
+ u8 read_write_len_ext;
+ u16 max_completion_time;
+};
+
+enum ethtool_cmis_cdb_cmd_id {
+ ETHTOOL_CMIS_CDB_CMD_QUERY_STATUS = 0x0000,
+ ETHTOOL_CMIS_CDB_CMD_MODULE_FEATURES = 0x0040,
+ ETHTOOL_CMIS_CDB_CMD_FW_MANAGMENT_FEATURES = 0x0041,
+ ETHTOOL_CMIS_CDB_CMD_START_FW_DOWNLOAD = 0x0101,
+ ETHTOOL_CMIS_CDB_CMD_WRITE_FW_BLOCK_LPL = 0x0103,
+ ETHTOOL_CMIS_CDB_CMD_COMPLETE_FW_DOWNLOAD = 0x0107,
+ ETHTOOL_CMIS_CDB_CMD_RUN_FW_IMAGE = 0x0109,
+ ETHTOOL_CMIS_CDB_CMD_COMMIT_FW_IMAGE = 0x010A,
+};
+
+/**
+ * struct ethtool_cmis_cdb_request - CDB commands request fields as decribed in
+ * the CMIS standard
+ * @id: Command ID.
+ * @epl_len: EPL memory length.
+ * @lpl_len: LPL memory length.
+ * @chk_code: Check code for the previous field and the payload.
+ * @resv1: Added to match the CMIS standard request continuity.
+ * @resv2: Added to match the CMIS standard request continuity.
+ * @payload: Payload for the CDB commands.
+ */
+struct ethtool_cmis_cdb_request {
+ __be16 id;
+ struct_group(body,
+ __be16 epl_len;
+ u8 lpl_len;
+ u8 chk_code;
+ u8 resv1;
+ u8 resv2;
+ u8 payload[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH];
+ );
+};
+
+#define CDB_F_COMPLETION_VALID BIT(0)
+#define CDB_F_STATUS_VALID BIT(1)
+#define CDB_F_MODULE_STATE_VALID BIT(2)
+
+/**
+ * struct ethtool_cmis_cdb_cmd_args - CDB commands execution arguments
+ * @req: CDB command fields as described in the CMIS standard.
+ * @max_duration: Maximum duration time for command completion in msec.
+ * @read_write_len_ext: Allowable additional number of byte octets to the LPL
+ * in a READ or a WRITE commands.
+ * @msleep_pre_rpl: Waiting time before checking reply in msec.
+ * @rpl_exp_len: Expected reply length in bytes.
+ * @flags: Validation flags for CDB commands.
+ * @err_msg: Error message to be sent to user space.
+ */
+struct ethtool_cmis_cdb_cmd_args {
+ struct ethtool_cmis_cdb_request req;
+ u16 max_duration;
+ u8 read_write_len_ext;
+ u8 msleep_pre_rpl;
+ u8 rpl_exp_len;
+ u8 flags;
+ char *err_msg;
+};
+
+/**
+ * struct ethtool_cmis_cdb_rpl_hdr - CDB commands reply header arguments
+ * @rpl_len: Reply length.
+ * @rpl_chk_code: Reply check code.
+ */
+struct ethtool_cmis_cdb_rpl_hdr {
+ u8 rpl_len;
+ u8 rpl_chk_code;
+};
+
+/**
+ * struct ethtool_cmis_cdb_rpl - CDB commands reply arguments
+ * @hdr: CDB commands reply header arguments.
+ * @payload: Payload for the CDB commands reply.
+ */
+struct ethtool_cmis_cdb_rpl {
+ struct ethtool_cmis_cdb_rpl_hdr hdr;
+ u8 payload[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH];
+};
+
+u32 ethtool_cmis_get_max_payload_size(u8 num_of_byte_octs);
+
+void ethtool_cmis_cdb_compose_args(struct ethtool_cmis_cdb_cmd_args *args,
+ enum ethtool_cmis_cdb_cmd_id cmd, u8 *pl,
+ u8 lpl_len, u16 max_duration,
+ u8 read_write_len_ext, u16 msleep_pre_rpl,
+ u8 rpl_exp_len, u8 flags);
+
+void ethtool_cmis_cdb_check_completion_flag(u8 cmis_rev, u8 *flags);
+
+void ethtool_cmis_page_init(struct ethtool_module_eeprom *page_data,
+ u8 page, u32 offset, u32 length);
+void ethtool_cmis_page_fini(struct ethtool_module_eeprom *page_data);
+
+struct ethtool_cmis_cdb *
+ethtool_cmis_cdb_init(struct net_device *dev,
+ const struct ethtool_module_fw_flash_params *params,
+ struct ethnl_module_fw_flash_ntf_params *ntf_params);
+void ethtool_cmis_cdb_fini(struct ethtool_cmis_cdb *cdb);
+
+int ethtool_cmis_wait_for_cond(struct net_device *dev, u8 flags, u8 flag,
+ u16 max_duration, u32 offset,
+ bool (*cond_success)(u8), bool (*cond_fail)(u8), u8 *state);
+
+int ethtool_cmis_cdb_execute_cmd(struct net_device *dev,
+ struct ethtool_cmis_cdb_cmd_args *args);
diff --git a/net/ethtool/cmis_cdb.c b/net/ethtool/cmis_cdb.c
new file mode 100644
index 000000000000..1bb08783b60d
--- /dev/null
+++ b/net/ethtool/cmis_cdb.c
@@ -0,0 +1,602 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/ethtool.h>
+#include <linux/jiffies.h>
+
+#include "common.h"
+#include "module_fw.h"
+#include "cmis.h"
+
+/* For accessing the LPL field on page 9Fh, the allowable length extension is
+ * min(i, 15) byte octets where i specifies the allowable additional number of
+ * byte octets in a READ or a WRITE.
+ */
+u32 ethtool_cmis_get_max_payload_size(u8 num_of_byte_octs)
+{
+ return 8 * (1 + min_t(u8, num_of_byte_octs, 15));
+}
+
+void ethtool_cmis_cdb_compose_args(struct ethtool_cmis_cdb_cmd_args *args,
+ enum ethtool_cmis_cdb_cmd_id cmd, u8 *pl,
+ u8 lpl_len, u16 max_duration,
+ u8 read_write_len_ext, u16 msleep_pre_rpl,
+ u8 rpl_exp_len, u8 flags)
+{
+ args->req.id = cpu_to_be16(cmd);
+ args->req.lpl_len = lpl_len;
+ if (pl)
+ memcpy(args->req.payload, pl, args->req.lpl_len);
+
+ args->max_duration = max_duration;
+ args->read_write_len_ext =
+ ethtool_cmis_get_max_payload_size(read_write_len_ext);
+ args->msleep_pre_rpl = msleep_pre_rpl;
+ args->rpl_exp_len = rpl_exp_len;
+ args->flags = flags;
+ args->err_msg = NULL;
+}
+
+void ethtool_cmis_page_init(struct ethtool_module_eeprom *page_data,
+ u8 page, u32 offset, u32 length)
+{
+ page_data->page = page;
+ page_data->offset = offset;
+ page_data->length = length;
+ page_data->i2c_address = ETHTOOL_CMIS_CDB_PAGE_I2C_ADDR;
+}
+
+#define CMIS_REVISION_PAGE 0x00
+#define CMIS_REVISION_OFFSET 0x01
+
+struct cmis_rev_rpl {
+ u8 rev;
+};
+
+static u8 cmis_rev_rpl_major(struct cmis_rev_rpl *rpl)
+{
+ return rpl->rev >> 4;
+}
+
+static int cmis_rev_major_get(struct net_device *dev, u8 *rev_major)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_module_eeprom page_data = {0};
+ struct netlink_ext_ack extack = {};
+ struct cmis_rev_rpl rpl = {};
+ int err;
+
+ ethtool_cmis_page_init(&page_data, CMIS_REVISION_PAGE,
+ CMIS_REVISION_OFFSET, sizeof(rpl));
+ page_data.data = (u8 *)&rpl;
+
+ err = ops->get_module_eeprom_by_page(dev, &page_data, &extack);
+ if (err < 0) {
+ if (extack._msg)
+ netdev_err(dev, "%s\n", extack._msg);
+ return err;
+ }
+
+ *rev_major = cmis_rev_rpl_major(&rpl);
+
+ return 0;
+}
+
+#define CMIS_CDB_ADVERTISEMENT_PAGE 0x01
+#define CMIS_CDB_ADVERTISEMENT_OFFSET 0xA3
+
+/* Based on section 8.4.11 "CDB Messaging Support Advertisement" in CMIS
+ * standard revision 5.2.
+ */
+struct cmis_cdb_advert_rpl {
+ u8 inst_supported;
+ u8 read_write_len_ext;
+ u8 resv1;
+ u8 resv2;
+};
+
+static u8 cmis_cdb_advert_rpl_inst_supported(struct cmis_cdb_advert_rpl *rpl)
+{
+ return rpl->inst_supported >> 6;
+}
+
+static int cmis_cdb_advertisement_get(struct ethtool_cmis_cdb *cdb,
+ struct net_device *dev)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_module_eeprom page_data = {};
+ struct cmis_cdb_advert_rpl rpl = {};
+ struct netlink_ext_ack extack = {};
+ int err;
+
+ ethtool_cmis_page_init(&page_data, CMIS_CDB_ADVERTISEMENT_PAGE,
+ CMIS_CDB_ADVERTISEMENT_OFFSET, sizeof(rpl));
+ page_data.data = (u8 *)&rpl;
+
+ err = ops->get_module_eeprom_by_page(dev, &page_data, &extack);
+ if (err < 0) {
+ if (extack._msg)
+ netdev_err(dev, "%s\n", extack._msg);
+ return err;
+ }
+
+ if (!cmis_cdb_advert_rpl_inst_supported(&rpl))
+ return -EOPNOTSUPP;
+
+ cdb->read_write_len_ext = rpl.read_write_len_ext;
+
+ return 0;
+}
+
+#define CMIS_PASSWORD_ENTRY_PAGE 0x00
+#define CMIS_PASSWORD_ENTRY_OFFSET 0x7A
+
+struct cmis_password_entry_pl {
+ __be32 password;
+};
+
+/* See section 9.3.1 "CMD 0000h: Query Status" in CMIS standard revision 5.2.
+ * struct cmis_cdb_query_status_pl and struct cmis_cdb_query_status_rpl are
+ * structured layouts of the flat arrays,
+ * struct ethtool_cmis_cdb_request::payload and
+ * struct ethtool_cmis_cdb_rpl::payload respectively.
+ */
+struct cmis_cdb_query_status_pl {
+ u16 response_delay;
+};
+
+struct cmis_cdb_query_status_rpl {
+ u8 length;
+ u8 status;
+};
+
+static int
+cmis_cdb_validate_password(struct ethtool_cmis_cdb *cdb,
+ struct net_device *dev,
+ const struct ethtool_module_fw_flash_params *params,
+ struct ethnl_module_fw_flash_ntf_params *ntf_params)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct cmis_cdb_query_status_pl qs_pl = {0};
+ struct ethtool_module_eeprom page_data = {};
+ struct ethtool_cmis_cdb_cmd_args args = {};
+ struct cmis_password_entry_pl pe_pl = {};
+ struct cmis_cdb_query_status_rpl *rpl;
+ struct netlink_ext_ack extack = {};
+ int err;
+
+ ethtool_cmis_page_init(&page_data, CMIS_PASSWORD_ENTRY_PAGE,
+ CMIS_PASSWORD_ENTRY_OFFSET, sizeof(pe_pl));
+ page_data.data = (u8 *)&pe_pl;
+
+ pe_pl = *((struct cmis_password_entry_pl *)page_data.data);
+ pe_pl.password = params->password;
+ err = ops->set_module_eeprom_by_page(dev, &page_data, &extack);
+ if (err < 0) {
+ if (extack._msg)
+ netdev_err(dev, "%s\n", extack._msg);
+ return err;
+ }
+
+ ethtool_cmis_cdb_compose_args(&args, ETHTOOL_CMIS_CDB_CMD_QUERY_STATUS,
+ (u8 *)&qs_pl, sizeof(qs_pl), 0,
+ cdb->read_write_len_ext, 1000,
+ sizeof(*rpl),
+ CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID);
+
+ err = ethtool_cmis_cdb_execute_cmd(dev, &args);
+ if (err < 0) {
+ ethnl_module_fw_flash_ntf_err(dev, ntf_params,
+ "Query Status command failed",
+ args.err_msg);
+ return err;
+ }
+
+ rpl = (struct cmis_cdb_query_status_rpl *)args.req.payload;
+ if (!rpl->length || !rpl->status) {
+ ethnl_module_fw_flash_ntf_err(dev, ntf_params,
+ "Password was not accepted",
+ NULL);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Some CDB commands asserts the CDB completion flag only from CMIS
+ * revision 5. Therefore, check the relevant validity flag only when
+ * the revision supports it.
+ */
+void ethtool_cmis_cdb_check_completion_flag(u8 cmis_rev, u8 *flags)
+{
+ *flags |= cmis_rev >= 5 ? CDB_F_COMPLETION_VALID : 0;
+}
+
+#define CMIS_CDB_MODULE_FEATURES_RESV_DATA 34
+
+/* See section 9.4.1 "CMD 0040h: Module Features" in CMIS standard revision 5.2.
+ * struct cmis_cdb_module_features_rpl is structured layout of the flat
+ * array, ethtool_cmis_cdb_rpl::payload.
+ */
+struct cmis_cdb_module_features_rpl {
+ u8 resv1[CMIS_CDB_MODULE_FEATURES_RESV_DATA];
+ __be16 max_completion_time;
+};
+
+static u16
+cmis_cdb_module_features_completion_time(struct cmis_cdb_module_features_rpl *rpl)
+{
+ return be16_to_cpu(rpl->max_completion_time);
+}
+
+static int cmis_cdb_module_features_get(struct ethtool_cmis_cdb *cdb,
+ struct net_device *dev,
+ struct ethnl_module_fw_flash_ntf_params *ntf_params)
+{
+ struct ethtool_cmis_cdb_cmd_args args = {};
+ struct cmis_cdb_module_features_rpl *rpl;
+ u8 flags = CDB_F_STATUS_VALID;
+ int err;
+
+ ethtool_cmis_cdb_check_completion_flag(cdb->cmis_rev, &flags);
+ ethtool_cmis_cdb_compose_args(&args,
+ ETHTOOL_CMIS_CDB_CMD_MODULE_FEATURES,
+ NULL, 0, 0, cdb->read_write_len_ext,
+ 1000, sizeof(*rpl), flags);
+
+ err = ethtool_cmis_cdb_execute_cmd(dev, &args);
+ if (err < 0) {
+ ethnl_module_fw_flash_ntf_err(dev, ntf_params,
+ "Module Features command failed",
+ args.err_msg);
+ return err;
+ }
+
+ rpl = (struct cmis_cdb_module_features_rpl *)args.req.payload;
+ cdb->max_completion_time =
+ cmis_cdb_module_features_completion_time(rpl);
+
+ return 0;
+}
+
+struct ethtool_cmis_cdb *
+ethtool_cmis_cdb_init(struct net_device *dev,
+ const struct ethtool_module_fw_flash_params *params,
+ struct ethnl_module_fw_flash_ntf_params *ntf_params)
+{
+ struct ethtool_cmis_cdb *cdb;
+ int err;
+
+ cdb = kzalloc(sizeof(*cdb), GFP_KERNEL);
+ if (!cdb)
+ return ERR_PTR(-ENOMEM);
+
+ err = cmis_rev_major_get(dev, &cdb->cmis_rev);
+ if (err < 0)
+ goto err;
+
+ if (cdb->cmis_rev < 4) {
+ ethnl_module_fw_flash_ntf_err(dev, ntf_params,
+ "CMIS revision doesn't support module firmware flashing",
+ NULL);
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+
+ err = cmis_cdb_advertisement_get(cdb, dev);
+ if (err < 0)
+ goto err;
+
+ if (params->password_valid) {
+ err = cmis_cdb_validate_password(cdb, dev, params, ntf_params);
+ if (err < 0)
+ goto err;
+ }
+
+ err = cmis_cdb_module_features_get(cdb, dev, ntf_params);
+ if (err < 0)
+ goto err;
+
+ return cdb;
+
+err:
+ ethtool_cmis_cdb_fini(cdb);
+ return ERR_PTR(err);
+}
+
+void ethtool_cmis_cdb_fini(struct ethtool_cmis_cdb *cdb)
+{
+ kfree(cdb);
+}
+
+static bool is_completed(u8 data)
+{
+ return !!(data & 0x40);
+}
+
+#define CMIS_CDB_STATUS_SUCCESS 0x01
+
+static bool status_success(u8 data)
+{
+ return data == CMIS_CDB_STATUS_SUCCESS;
+}
+
+#define CMIS_CDB_STATUS_FAIL 0x40
+
+static bool status_fail(u8 data)
+{
+ return data & CMIS_CDB_STATUS_FAIL;
+}
+
+struct cmis_wait_for_cond_rpl {
+ u8 state;
+};
+
+static int
+ethtool_cmis_module_poll(struct net_device *dev,
+ struct cmis_wait_for_cond_rpl *rpl, u32 offset,
+ bool (*cond_success)(u8), bool (*cond_fail)(u8))
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_module_eeprom page_data = {0};
+ struct netlink_ext_ack extack = {};
+ int err;
+
+ ethtool_cmis_page_init(&page_data, 0, offset, sizeof(rpl));
+ page_data.data = (u8 *)rpl;
+
+ err = ops->get_module_eeprom_by_page(dev, &page_data, &extack);
+ if (err < 0) {
+ if (extack._msg)
+ netdev_err_once(dev, "%s\n", extack._msg);
+ return -EBUSY;
+ }
+
+ if ((*cond_success)(rpl->state))
+ return 0;
+
+ if (*cond_fail && (*cond_fail)(rpl->state))
+ return -EIO;
+
+ return -EBUSY;
+}
+
+int ethtool_cmis_wait_for_cond(struct net_device *dev, u8 flags, u8 flag,
+ u16 max_duration, u32 offset,
+ bool (*cond_success)(u8), bool (*cond_fail)(u8),
+ u8 *state)
+{
+ struct cmis_wait_for_cond_rpl rpl = {};
+ unsigned long end;
+ int err;
+
+ if (!(flags & flag))
+ return 0;
+
+ if (max_duration == 0)
+ max_duration = U16_MAX;
+
+ end = jiffies + msecs_to_jiffies(max_duration);
+ do {
+ err = ethtool_cmis_module_poll(dev, &rpl, offset, cond_success,
+ cond_fail);
+ if (err != -EBUSY)
+ goto out;
+
+ msleep(20);
+ } while (time_before(jiffies, end));
+
+ err = ethtool_cmis_module_poll(dev, &rpl, offset, cond_success,
+ cond_fail);
+ if (err == -EBUSY)
+ err = -ETIMEDOUT;
+
+out:
+ *state = rpl.state;
+ return err;
+}
+
+#define CMIS_CDB_COMPLETION_FLAG_OFFSET 0x08
+
+static int cmis_cdb_wait_for_completion(struct net_device *dev,
+ struct ethtool_cmis_cdb_cmd_args *args)
+{
+ u8 flag;
+ int err;
+
+ /* Some vendors demand waiting time before checking completion flag
+ * in some CDB commands.
+ */
+ msleep(args->msleep_pre_rpl);
+
+ err = ethtool_cmis_wait_for_cond(dev, args->flags,
+ CDB_F_COMPLETION_VALID,
+ args->max_duration,
+ CMIS_CDB_COMPLETION_FLAG_OFFSET,
+ is_completed, NULL, &flag);
+ if (err < 0)
+ args->err_msg = "Completion Flag did not set on time";
+
+ return err;
+}
+
+#define CMIS_CDB_STATUS_OFFSET 0x25
+
+static void cmis_cdb_status_fail_msg_get(u8 status, char **err_msg)
+{
+ switch (status) {
+ case 0b10000001:
+ *err_msg = "CDB Status is in progress: Busy capturing command";
+ break;
+ case 0b10000010:
+ *err_msg =
+ "CDB Status is in progress: Busy checking/validating command";
+ break;
+ case 0b10000011:
+ *err_msg = "CDB Status is in progress: Busy executing";
+ break;
+ case 0b01000000:
+ *err_msg = "CDB status failed: no specific failure";
+ break;
+ case 0b01000010:
+ *err_msg =
+ "CDB status failed: Parameter range error or parameter not supported";
+ break;
+ case 0b01000101:
+ *err_msg = "CDB status failed: CdbChkCode error";
+ break;
+ default:
+ *err_msg = "Unknown failure reason";
+ }
+};
+
+static int cmis_cdb_wait_for_status(struct net_device *dev,
+ struct ethtool_cmis_cdb_cmd_args *args)
+{
+ u8 status;
+ int err;
+
+ /* Some vendors demand waiting time before checking status in some
+ * CDB commands.
+ */
+ msleep(args->msleep_pre_rpl);
+
+ err = ethtool_cmis_wait_for_cond(dev, args->flags, CDB_F_STATUS_VALID,
+ args->max_duration,
+ CMIS_CDB_STATUS_OFFSET,
+ status_success, status_fail, &status);
+ if (err < 0 && !args->err_msg)
+ cmis_cdb_status_fail_msg_get(status, &args->err_msg);
+
+ return err;
+}
+
+#define CMIS_CDB_REPLY_OFFSET 0x86
+
+static int cmis_cdb_process_reply(struct net_device *dev,
+ struct ethtool_module_eeprom *page_data,
+ struct ethtool_cmis_cdb_cmd_args *args)
+{
+ u8 rpl_hdr_len = sizeof(struct ethtool_cmis_cdb_rpl_hdr);
+ u8 rpl_exp_len = args->rpl_exp_len + rpl_hdr_len;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct netlink_ext_ack extack = {};
+ struct ethtool_cmis_cdb_rpl *rpl;
+ int err;
+
+ if (!args->rpl_exp_len)
+ return 0;
+
+ ethtool_cmis_page_init(page_data, ETHTOOL_CMIS_CDB_CMD_PAGE,
+ CMIS_CDB_REPLY_OFFSET, rpl_exp_len);
+ page_data->data = kmalloc(page_data->length, GFP_KERNEL);
+ if (!page_data->data)
+ return -ENOMEM;
+
+ err = ops->get_module_eeprom_by_page(dev, page_data, &extack);
+ if (err < 0) {
+ if (extack._msg)
+ netdev_err(dev, "%s\n", extack._msg);
+ goto out;
+ }
+
+ rpl = (struct ethtool_cmis_cdb_rpl *)page_data->data;
+ if ((args->rpl_exp_len > rpl->hdr.rpl_len + rpl_hdr_len) ||
+ !rpl->hdr.rpl_chk_code) {
+ err = -EIO;
+ goto out;
+ }
+
+ args->req.lpl_len = rpl->hdr.rpl_len;
+ memcpy(args->req.payload, rpl->payload, args->req.lpl_len);
+
+out:
+ kfree(page_data->data);
+ return err;
+}
+
+static int
+__ethtool_cmis_cdb_execute_cmd(struct net_device *dev,
+ struct ethtool_module_eeprom *page_data,
+ u8 page, u32 offset, u32 length, void *data)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct netlink_ext_ack extack = {};
+ int err;
+
+ ethtool_cmis_page_init(page_data, page, offset, length);
+ page_data->data = kmemdup(data, page_data->length, GFP_KERNEL);
+ if (!page_data->data)
+ return -ENOMEM;
+
+ err = ops->set_module_eeprom_by_page(dev, page_data, &extack);
+ if (err < 0) {
+ if (extack._msg)
+ netdev_err(dev, "%s\n", extack._msg);
+ }
+
+ kfree(page_data->data);
+ return err;
+}
+
+static u8 cmis_cdb_calc_checksum(const void *data, size_t size)
+{
+ const u8 *bytes = (const u8 *)data;
+ u8 checksum = 0;
+
+ for (size_t i = 0; i < size; i++)
+ checksum += bytes[i];
+
+ return ~checksum;
+}
+
+#define CMIS_CDB_CMD_ID_OFFSET 0x80
+
+int ethtool_cmis_cdb_execute_cmd(struct net_device *dev,
+ struct ethtool_cmis_cdb_cmd_args *args)
+{
+ struct ethtool_module_eeprom page_data = {};
+ u32 offset;
+ int err;
+
+ args->req.chk_code =
+ cmis_cdb_calc_checksum(&args->req, sizeof(args->req));
+
+ if (args->req.lpl_len > args->read_write_len_ext) {
+ args->err_msg = "LPL length is longer than CDB read write length extension allows";
+ return -EINVAL;
+ }
+
+ /* According to the CMIS standard, there are two options to trigger the
+ * CDB commands. The default option is triggering the command by writing
+ * the CMDID bytes. Therefore, the command will be split to 2 calls:
+ * First, with everything except the CMDID field and then the CMDID
+ * field.
+ */
+ offset = CMIS_CDB_CMD_ID_OFFSET +
+ offsetof(struct ethtool_cmis_cdb_request, body);
+ err = __ethtool_cmis_cdb_execute_cmd(dev, &page_data,
+ ETHTOOL_CMIS_CDB_CMD_PAGE, offset,
+ sizeof(args->req.body),
+ &args->req.body);
+ if (err < 0)
+ return err;
+
+ offset = CMIS_CDB_CMD_ID_OFFSET +
+ offsetof(struct ethtool_cmis_cdb_request, id);
+ err = __ethtool_cmis_cdb_execute_cmd(dev, &page_data,
+ ETHTOOL_CMIS_CDB_CMD_PAGE, offset,
+ sizeof(args->req.id),
+ &args->req.id);
+ if (err < 0)
+ return err;
+
+ err = cmis_cdb_wait_for_completion(dev, args);
+ if (err < 0)
+ return err;
+
+ err = cmis_cdb_wait_for_status(dev, args);
+ if (err < 0)
+ return err;
+
+ return cmis_cdb_process_reply(dev, &page_data, args);
+}
diff --git a/net/ethtool/cmis_fw_update.c b/net/ethtool/cmis_fw_update.c
new file mode 100644
index 000000000000..655ff5224ffa
--- /dev/null
+++ b/net/ethtool/cmis_fw_update.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/ethtool.h>
+#include <linux/firmware.h>
+
+#include "common.h"
+#include "module_fw.h"
+#include "cmis.h"
+
+struct cmis_fw_update_fw_mng_features {
+ u8 start_cmd_payload_size;
+ u16 max_duration_start;
+ u16 max_duration_write;
+ u16 max_duration_complete;
+};
+
+/* See section 9.4.2 "CMD 0041h: Firmware Management Features" in CMIS standard
+ * revision 5.2.
+ * struct cmis_cdb_fw_mng_features_rpl is a structured layout of the flat
+ * array, ethtool_cmis_cdb_rpl::payload.
+ */
+struct cmis_cdb_fw_mng_features_rpl {
+ u8 resv1;
+ u8 resv2;
+ u8 start_cmd_payload_size;
+ u8 resv3;
+ u8 read_write_len_ext;
+ u8 write_mechanism;
+ u8 resv4;
+ u8 resv5;
+ __be16 max_duration_start;
+ __be16 resv6;
+ __be16 max_duration_write;
+ __be16 max_duration_complete;
+ __be16 resv7;
+};
+
+enum cmis_cdb_fw_write_mechanism {
+ CMIS_CDB_FW_WRITE_MECHANISM_LPL = 0x01,
+ CMIS_CDB_FW_WRITE_MECHANISM_BOTH = 0x11,
+};
+
+static int
+cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb,
+ struct net_device *dev,
+ struct cmis_fw_update_fw_mng_features *fw_mng,
+ struct ethnl_module_fw_flash_ntf_params *ntf_params)
+{
+ struct ethtool_cmis_cdb_cmd_args args = {};
+ struct cmis_cdb_fw_mng_features_rpl *rpl;
+ u8 flags = CDB_F_STATUS_VALID;
+ int err;
+
+ ethtool_cmis_cdb_check_completion_flag(cdb->cmis_rev, &flags);
+ ethtool_cmis_cdb_compose_args(&args,
+ ETHTOOL_CMIS_CDB_CMD_FW_MANAGMENT_FEATURES,
+ NULL, 0, cdb->max_completion_time,
+ cdb->read_write_len_ext, 1000,
+ sizeof(*rpl), flags);
+
+ err = ethtool_cmis_cdb_execute_cmd(dev, &args);
+ if (err < 0) {
+ ethnl_module_fw_flash_ntf_err(dev, ntf_params,
+ "FW Management Features command failed",
+ args.err_msg);
+ return err;
+ }
+
+ rpl = (struct cmis_cdb_fw_mng_features_rpl *)args.req.payload;
+ if (!(rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_LPL ||
+ rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_BOTH)) {
+ ethnl_module_fw_flash_ntf_err(dev, ntf_params,
+ "Write LPL is not supported",
+ NULL);
+ return -EOPNOTSUPP;
+ }
+
+ /* Above, we used read_write_len_ext that we got from CDB
+ * advertisement. Update it with the value that we got from module
+ * features query, which is specific for Firmware Management Commands
+ * (IDs 0100h-01FFh).
+ */
+ cdb->read_write_len_ext = rpl->read_write_len_ext;
+ fw_mng->start_cmd_payload_size = rpl->start_cmd_payload_size;
+ fw_mng->max_duration_start = be16_to_cpu(rpl->max_duration_start);
+ fw_mng->max_duration_write = be16_to_cpu(rpl->max_duration_write);
+ fw_mng->max_duration_complete = be16_to_cpu(rpl->max_duration_complete);
+
+ return 0;
+}
+
+/* See section 9.7.2 "CMD 0101h: Start Firmware Download" in CMIS standard
+ * revision 5.2.
+ * struct cmis_cdb_start_fw_download_pl is a structured layout of the
+ * flat array, ethtool_cmis_cdb_request::payload.
+ */
+struct cmis_cdb_start_fw_download_pl {
+ __struct_group(cmis_cdb_start_fw_download_pl_h, head, /* no attrs */,
+ __be32 image_size;
+ __be32 resv1;
+ );
+ u8 vendor_data[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH -
+ sizeof(struct cmis_cdb_start_fw_download_pl_h)];
+};
+
+static int
+cmis_fw_update_start_download(struct ethtool_cmis_cdb *cdb,
+ struct ethtool_cmis_fw_update_params *fw_update,
+ struct cmis_fw_update_fw_mng_features *fw_mng)
+{
+ u8 vendor_data_size = fw_mng->start_cmd_payload_size;
+ struct cmis_cdb_start_fw_download_pl pl = {};
+ struct ethtool_cmis_cdb_cmd_args args = {};
+ u8 lpl_len;
+ int err;
+
+ pl.image_size = cpu_to_be32(fw_update->fw->size);
+ memcpy(pl.vendor_data, fw_update->fw->data, vendor_data_size);
+
+ lpl_len = offsetof(struct cmis_cdb_start_fw_download_pl,
+ vendor_data[vendor_data_size]);
+
+ ethtool_cmis_cdb_compose_args(&args,
+ ETHTOOL_CMIS_CDB_CMD_START_FW_DOWNLOAD,
+ (u8 *)&pl, lpl_len,
+ fw_mng->max_duration_start,
+ cdb->read_write_len_ext, 1000, 0,
+ CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID);
+
+ err = ethtool_cmis_cdb_execute_cmd(fw_update->dev, &args);
+ if (err < 0)
+ ethnl_module_fw_flash_ntf_err(fw_update->dev,
+ &fw_update->ntf_params,
+ "Start FW download command failed",
+ args.err_msg);
+
+ return err;
+}
+
+/* See section 9.7.4 "CMD 0103h: Write Firmware Block LPL" in CMIS standard
+ * revision 5.2.
+ * struct cmis_cdb_write_fw_block_lpl_pl is a structured layout of the
+ * flat array, ethtool_cmis_cdb_request::payload.
+ */
+struct cmis_cdb_write_fw_block_lpl_pl {
+ __be32 block_address;
+ u8 fw_block[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH - sizeof(__be32)];
+};
+
+static int
+cmis_fw_update_write_image(struct ethtool_cmis_cdb *cdb,
+ struct ethtool_cmis_fw_update_params *fw_update,
+ struct cmis_fw_update_fw_mng_features *fw_mng)
+{
+ u8 start = fw_mng->start_cmd_payload_size;
+ u32 offset, max_block_size, max_lpl_len;
+ u32 image_size = fw_update->fw->size;
+ int err;
+
+ max_lpl_len = min_t(u32,
+ ethtool_cmis_get_max_payload_size(cdb->read_write_len_ext),
+ ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH);
+ max_block_size =
+ max_lpl_len - sizeof_field(struct cmis_cdb_write_fw_block_lpl_pl,
+ block_address);
+
+ for (offset = start; offset < image_size; offset += max_block_size) {
+ struct cmis_cdb_write_fw_block_lpl_pl pl = {
+ .block_address = cpu_to_be32(offset - start),
+ };
+ struct ethtool_cmis_cdb_cmd_args args = {};
+ u32 block_size, lpl_len;
+
+ ethnl_module_fw_flash_ntf_in_progress(fw_update->dev,
+ &fw_update->ntf_params,
+ offset - start,
+ image_size);
+ block_size = min_t(u32, max_block_size, image_size - offset);
+ memcpy(pl.fw_block, &fw_update->fw->data[offset], block_size);
+ lpl_len = block_size +
+ sizeof_field(struct cmis_cdb_write_fw_block_lpl_pl,
+ block_address);
+
+ ethtool_cmis_cdb_compose_args(&args,
+ ETHTOOL_CMIS_CDB_CMD_WRITE_FW_BLOCK_LPL,
+ (u8 *)&pl, lpl_len,
+ fw_mng->max_duration_write,
+ cdb->read_write_len_ext, 1, 0,
+ CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID);
+
+ err = ethtool_cmis_cdb_execute_cmd(fw_update->dev, &args);
+ if (err < 0) {
+ ethnl_module_fw_flash_ntf_err(fw_update->dev,
+ &fw_update->ntf_params,
+ "Write FW block LPL command failed",
+ args.err_msg);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int
+cmis_fw_update_complete_download(struct ethtool_cmis_cdb *cdb,
+ struct net_device *dev,
+ struct cmis_fw_update_fw_mng_features *fw_mng,
+ struct ethnl_module_fw_flash_ntf_params *ntf_params)
+{
+ struct ethtool_cmis_cdb_cmd_args args = {};
+ int err;
+
+ ethtool_cmis_cdb_compose_args(&args,
+ ETHTOOL_CMIS_CDB_CMD_COMPLETE_FW_DOWNLOAD,
+ NULL, 0, fw_mng->max_duration_complete,
+ cdb->read_write_len_ext, 1000, 0,
+ CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID);
+
+ err = ethtool_cmis_cdb_execute_cmd(dev, &args);
+ if (err < 0)
+ ethnl_module_fw_flash_ntf_err(dev, ntf_params,
+ "Complete FW download command failed",
+ args.err_msg);
+
+ return err;
+}
+
+static int
+cmis_fw_update_download_image(struct ethtool_cmis_cdb *cdb,
+ struct ethtool_cmis_fw_update_params *fw_update,
+ struct cmis_fw_update_fw_mng_features *fw_mng)
+{
+ int err;
+
+ err = cmis_fw_update_start_download(cdb, fw_update, fw_mng);
+ if (err < 0)
+ return err;
+
+ err = cmis_fw_update_write_image(cdb, fw_update, fw_mng);
+ if (err < 0)
+ return err;
+
+ err = cmis_fw_update_complete_download(cdb, fw_update->dev, fw_mng,
+ &fw_update->ntf_params);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+enum {
+ CMIS_MODULE_LOW_PWR = 1,
+ CMIS_MODULE_READY = 3,
+};
+
+static bool module_is_ready(u8 data)
+{
+ u8 state = (data >> 1) & 7;
+
+ return state == CMIS_MODULE_READY || state == CMIS_MODULE_LOW_PWR;
+}
+
+#define CMIS_MODULE_READY_MAX_DURATION_MSEC 1000
+#define CMIS_MODULE_STATE_OFFSET 3
+
+static int
+cmis_fw_update_wait_for_module_state(struct net_device *dev, u8 flags)
+{
+ u8 state;
+
+ return ethtool_cmis_wait_for_cond(dev, flags, CDB_F_MODULE_STATE_VALID,
+ CMIS_MODULE_READY_MAX_DURATION_MSEC,
+ CMIS_MODULE_STATE_OFFSET,
+ module_is_ready, NULL, &state);
+}
+
+/* See section 9.7.10 "CMD 0109h: Run Firmware Image" in CMIS standard
+ * revision 5.2.
+ * struct cmis_cdb_run_fw_image_pl is a structured layout of the flat
+ * array, ethtool_cmis_cdb_request::payload.
+ */
+struct cmis_cdb_run_fw_image_pl {
+ u8 resv1;
+ u8 image_to_run;
+ u16 delay_to_reset;
+};
+
+static int
+cmis_fw_update_run_image(struct ethtool_cmis_cdb *cdb, struct net_device *dev,
+ struct ethnl_module_fw_flash_ntf_params *ntf_params)
+{
+ struct ethtool_cmis_cdb_cmd_args args = {};
+ struct cmis_cdb_run_fw_image_pl pl = {0};
+ int err;
+
+ ethtool_cmis_cdb_compose_args(&args, ETHTOOL_CMIS_CDB_CMD_RUN_FW_IMAGE,
+ (u8 *)&pl, sizeof(pl),
+ cdb->max_completion_time,
+ cdb->read_write_len_ext, 1000, 0,
+ CDB_F_MODULE_STATE_VALID);
+
+ err = ethtool_cmis_cdb_execute_cmd(dev, &args);
+ if (err < 0) {
+ ethnl_module_fw_flash_ntf_err(dev, ntf_params,
+ "Run image command failed",
+ args.err_msg);
+ return err;
+ }
+
+ err = cmis_fw_update_wait_for_module_state(dev, args.flags);
+ if (err < 0)
+ ethnl_module_fw_flash_ntf_err(dev, ntf_params,
+ "Module is not ready on time after reset",
+ NULL);
+
+ return err;
+}
+
+static int
+cmis_fw_update_commit_image(struct ethtool_cmis_cdb *cdb,
+ struct net_device *dev,
+ struct ethnl_module_fw_flash_ntf_params *ntf_params)
+{
+ struct ethtool_cmis_cdb_cmd_args args = {};
+ int err;
+
+ ethtool_cmis_cdb_compose_args(&args,
+ ETHTOOL_CMIS_CDB_CMD_COMMIT_FW_IMAGE,
+ NULL, 0, cdb->max_completion_time,
+ cdb->read_write_len_ext, 1000, 0,
+ CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID);
+
+ err = ethtool_cmis_cdb_execute_cmd(dev, &args);
+ if (err < 0)
+ ethnl_module_fw_flash_ntf_err(dev, ntf_params,
+ "Commit image command failed",
+ args.err_msg);
+
+ return err;
+}
+
+static int cmis_fw_update_reset(struct net_device *dev)
+{
+ __u32 reset_data = ETH_RESET_PHY;
+
+ return dev->ethtool_ops->reset(dev, &reset_data);
+}
+
+void
+ethtool_cmis_fw_update(struct ethtool_cmis_fw_update_params *fw_update)
+{
+ struct ethnl_module_fw_flash_ntf_params *ntf_params =
+ &fw_update->ntf_params;
+ struct cmis_fw_update_fw_mng_features fw_mng = {0};
+ struct net_device *dev = fw_update->dev;
+ struct ethtool_cmis_cdb *cdb;
+ int err;
+
+ cdb = ethtool_cmis_cdb_init(dev, &fw_update->params, ntf_params);
+ if (IS_ERR(cdb))
+ goto err_send_ntf;
+
+ ethnl_module_fw_flash_ntf_start(dev, ntf_params);
+
+ err = cmis_fw_update_fw_mng_features_get(cdb, dev, &fw_mng, ntf_params);
+ if (err < 0)
+ goto err_cdb_fini;
+
+ err = cmis_fw_update_download_image(cdb, fw_update, &fw_mng);
+ if (err < 0)
+ goto err_cdb_fini;
+
+ err = cmis_fw_update_run_image(cdb, dev, ntf_params);
+ if (err < 0)
+ goto err_cdb_fini;
+
+ /* The CDB command "Run Firmware Image" resets the firmware, so the new
+ * one might have different settings.
+ * Free the old CDB instance, and init a new one.
+ */
+ ethtool_cmis_cdb_fini(cdb);
+
+ cdb = ethtool_cmis_cdb_init(dev, &fw_update->params, ntf_params);
+ if (IS_ERR(cdb))
+ goto err_send_ntf;
+
+ err = cmis_fw_update_commit_image(cdb, dev, ntf_params);
+ if (err < 0)
+ goto err_cdb_fini;
+
+ err = cmis_fw_update_reset(dev);
+ if (err < 0)
+ goto err_cdb_fini;
+
+ ethnl_module_fw_flash_ntf_complete(dev, ntf_params);
+ ethtool_cmis_cdb_fini(cdb);
+ return;
+
+err_cdb_fini:
+ ethtool_cmis_cdb_fini(cdb);
+err_send_ntf:
+ ethnl_module_fw_flash_ntf_err(dev, ntf_params, NULL, NULL);
+}
diff --git a/net/ethtool/coalesce.c b/net/ethtool/coalesce.c
index 83112c1a71ae..3e18ca1ccc5e 100644
--- a/net/ethtool/coalesce.c
+++ b/net/ethtool/coalesce.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/dim.h>
#include "netlink.h"
#include "common.h"
@@ -82,6 +83,14 @@ static int coalesce_prepare_data(const struct ethnl_req_info *req_base,
static int coalesce_reply_size(const struct ethnl_req_info *req_base,
const struct ethnl_reply_data *reply_base)
{
+ int modersz = nla_total_size(0) + /* _PROFILE_IRQ_MODERATION, nest */
+ nla_total_size(sizeof(u32)) + /* _IRQ_MODERATION_USEC */
+ nla_total_size(sizeof(u32)) + /* _IRQ_MODERATION_PKTS */
+ nla_total_size(sizeof(u32)); /* _IRQ_MODERATION_COMPS */
+
+ int total_modersz = nla_total_size(0) + /* _{R,T}X_PROFILE, nest */
+ modersz * NET_DIM_PARAMS_NUM_PROFILES;
+
return nla_total_size(sizeof(u32)) + /* _RX_USECS */
nla_total_size(sizeof(u32)) + /* _RX_MAX_FRAMES */
nla_total_size(sizeof(u32)) + /* _RX_USECS_IRQ */
@@ -108,7 +117,8 @@ static int coalesce_reply_size(const struct ethnl_req_info *req_base,
nla_total_size(sizeof(u8)) + /* _USE_CQE_MODE_RX */
nla_total_size(sizeof(u32)) + /* _TX_AGGR_MAX_BYTES */
nla_total_size(sizeof(u32)) + /* _TX_AGGR_MAX_FRAMES */
- nla_total_size(sizeof(u32)); /* _TX_AGGR_TIME_USECS */
+ nla_total_size(sizeof(u32)) + /* _TX_AGGR_TIME_USECS */
+ total_modersz * 2; /* _{R,T}X_PROFILE */
}
static bool coalesce_put_u32(struct sk_buff *skb, u16 attr_type, u32 val,
@@ -127,6 +137,74 @@ static bool coalesce_put_bool(struct sk_buff *skb, u16 attr_type, u32 val,
return nla_put_u8(skb, attr_type, !!val);
}
+/**
+ * coalesce_put_profile - fill reply with a nla nest with four child nla nests.
+ * @skb: socket buffer the message is stored in
+ * @attr_type: nest attr type ETHTOOL_A_COALESCE_*X_PROFILE
+ * @profile: data passed to userspace
+ * @coal_flags: modifiable parameters supported by the driver
+ *
+ * Put a dim profile nest attribute. Refer to ETHTOOL_A_PROFILE_IRQ_MODERATION.
+ *
+ * Return: 0 on success or a negative error code.
+ */
+static int coalesce_put_profile(struct sk_buff *skb, u16 attr_type,
+ const struct dim_cq_moder *profile,
+ u8 coal_flags)
+{
+ struct nlattr *profile_attr, *moder_attr;
+ int i, ret;
+
+ if (!profile || !coal_flags)
+ return 0;
+
+ profile_attr = nla_nest_start(skb, attr_type);
+ if (!profile_attr)
+ return -EMSGSIZE;
+
+ for (i = 0; i < NET_DIM_PARAMS_NUM_PROFILES; i++) {
+ moder_attr = nla_nest_start(skb,
+ ETHTOOL_A_PROFILE_IRQ_MODERATION);
+ if (!moder_attr) {
+ ret = -EMSGSIZE;
+ goto cancel_profile;
+ }
+
+ if (coal_flags & DIM_COALESCE_USEC) {
+ ret = nla_put_u32(skb, ETHTOOL_A_IRQ_MODERATION_USEC,
+ profile[i].usec);
+ if (ret)
+ goto cancel_moder;
+ }
+
+ if (coal_flags & DIM_COALESCE_PKTS) {
+ ret = nla_put_u32(skb, ETHTOOL_A_IRQ_MODERATION_PKTS,
+ profile[i].pkts);
+ if (ret)
+ goto cancel_moder;
+ }
+
+ if (coal_flags & DIM_COALESCE_COMPS) {
+ ret = nla_put_u32(skb, ETHTOOL_A_IRQ_MODERATION_COMPS,
+ profile[i].comps);
+ if (ret)
+ goto cancel_moder;
+ }
+
+ nla_nest_end(skb, moder_attr);
+ }
+
+ nla_nest_end(skb, profile_attr);
+
+ return 0;
+
+cancel_moder:
+ nla_nest_cancel(skb, moder_attr);
+cancel_profile:
+ nla_nest_cancel(skb, profile_attr);
+ return ret;
+}
+
static int coalesce_fill_reply(struct sk_buff *skb,
const struct ethnl_req_info *req_base,
const struct ethnl_reply_data *reply_base)
@@ -135,6 +213,8 @@ static int coalesce_fill_reply(struct sk_buff *skb,
const struct kernel_ethtool_coalesce *kcoal = &data->kernel_coalesce;
const struct ethtool_coalesce *coal = &data->coalesce;
u32 supported = data->supported_params;
+ struct dim_irq_moder *moder;
+ int ret = 0;
if (coalesce_put_u32(skb, ETHTOOL_A_COALESCE_RX_USECS,
coal->rx_coalesce_usecs, supported) ||
@@ -192,11 +272,42 @@ static int coalesce_fill_reply(struct sk_buff *skb,
kcoal->tx_aggr_time_usecs, supported))
return -EMSGSIZE;
- return 0;
+ if (!req_base->dev || !req_base->dev->irq_moder)
+ return 0;
+
+ moder = req_base->dev->irq_moder;
+ rcu_read_lock();
+ if (moder->profile_flags & DIM_PROFILE_RX) {
+ ret = coalesce_put_profile(skb, ETHTOOL_A_COALESCE_RX_PROFILE,
+ rcu_dereference(moder->rx_profile),
+ moder->coal_flags);
+ if (ret)
+ goto out;
+ }
+
+ if (moder->profile_flags & DIM_PROFILE_TX)
+ ret = coalesce_put_profile(skb, ETHTOOL_A_COALESCE_TX_PROFILE,
+ rcu_dereference(moder->tx_profile),
+ moder->coal_flags);
+
+out:
+ rcu_read_unlock();
+ return ret;
}
/* COALESCE_SET */
+static const struct nla_policy coalesce_irq_moderation_policy[] = {
+ [ETHTOOL_A_IRQ_MODERATION_USEC] = { .type = NLA_U32 },
+ [ETHTOOL_A_IRQ_MODERATION_PKTS] = { .type = NLA_U32 },
+ [ETHTOOL_A_IRQ_MODERATION_COMPS] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy coalesce_profile_policy[] = {
+ [ETHTOOL_A_PROFILE_IRQ_MODERATION] =
+ NLA_POLICY_NESTED(coalesce_irq_moderation_policy),
+};
+
const struct nla_policy ethnl_coalesce_set_policy[] = {
[ETHTOOL_A_COALESCE_HEADER] =
NLA_POLICY_NESTED(ethnl_header_policy),
@@ -227,6 +338,10 @@ const struct nla_policy ethnl_coalesce_set_policy[] = {
[ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES] = { .type = NLA_U32 },
[ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES] = { .type = NLA_U32 },
[ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS] = { .type = NLA_U32 },
+ [ETHTOOL_A_COALESCE_RX_PROFILE] =
+ NLA_POLICY_NESTED(coalesce_profile_policy),
+ [ETHTOOL_A_COALESCE_TX_PROFILE] =
+ NLA_POLICY_NESTED(coalesce_profile_policy),
};
static int
@@ -234,6 +349,7 @@ ethnl_set_coalesce_validate(struct ethnl_req_info *req_info,
struct genl_info *info)
{
const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
+ struct dim_irq_moder *irq_moder = req_info->dev->irq_moder;
struct nlattr **tb = info->attrs;
u32 supported_params;
u16 a;
@@ -243,6 +359,12 @@ ethnl_set_coalesce_validate(struct ethnl_req_info *req_info,
/* make sure that only supported parameters are present */
supported_params = ops->supported_coalesce_params;
+ if (irq_moder && irq_moder->profile_flags & DIM_PROFILE_RX)
+ supported_params |= ETHTOOL_COALESCE_RX_PROFILE;
+
+ if (irq_moder && irq_moder->profile_flags & DIM_PROFILE_TX)
+ supported_params |= ETHTOOL_COALESCE_TX_PROFILE;
+
for (a = ETHTOOL_A_COALESCE_RX_USECS; a < __ETHTOOL_A_COALESCE_CNT; a++)
if (tb[a] && !(supported_params & attr_to_mask(a))) {
NL_SET_ERR_MSG_ATTR(info->extack, tb[a],
@@ -253,6 +375,138 @@ ethnl_set_coalesce_validate(struct ethnl_req_info *req_info,
return 1;
}
+/**
+ * ethnl_update_irq_moder - update a specific field in the given profile
+ * @irq_moder: place that collects dim related information
+ * @irq_field: field in profile to modify
+ * @attr_type: attr type ETHTOOL_A_IRQ_MODERATION_*
+ * @tb: netlink attribute with new values or null
+ * @coal_bit: DIM_COALESCE_* bit from coal_flags
+ * @mod: pointer to bool for modification tracking
+ * @extack: netlink extended ack
+ *
+ * Return: 0 on success or a negative error code.
+ */
+static int ethnl_update_irq_moder(struct dim_irq_moder *irq_moder,
+ u16 *irq_field, u16 attr_type,
+ struct nlattr **tb,
+ u8 coal_bit, bool *mod,
+ struct netlink_ext_ack *extack)
+{
+ int ret = 0;
+ u32 val;
+
+ if (!tb[attr_type])
+ return 0;
+
+ if (irq_moder->coal_flags & coal_bit) {
+ val = nla_get_u32(tb[attr_type]);
+ if (*irq_field == val)
+ return 0;
+
+ *irq_field = val;
+ *mod = true;
+ } else {
+ NL_SET_BAD_ATTR(extack, tb[attr_type]);
+ ret = -EOPNOTSUPP;
+ }
+
+ return ret;
+}
+
+/**
+ * ethnl_update_profile - get a profile nest with child nests from userspace.
+ * @dev: netdevice to update the profile
+ * @dst: profile get from the driver and modified by ethnl_update_profile.
+ * @nests: nest attr ETHTOOL_A_COALESCE_*X_PROFILE to set profile.
+ * @mod: pointer to bool for modification tracking
+ * @extack: Netlink extended ack
+ *
+ * Layout of nests:
+ * Nested ETHTOOL_A_COALESCE_*X_PROFILE attr
+ * Nested ETHTOOL_A_PROFILE_IRQ_MODERATION attr
+ * ETHTOOL_A_IRQ_MODERATION_USEC attr
+ * ETHTOOL_A_IRQ_MODERATION_PKTS attr
+ * ETHTOOL_A_IRQ_MODERATION_COMPS attr
+ * ...
+ * Nested ETHTOOL_A_PROFILE_IRQ_MODERATION attr
+ * ETHTOOL_A_IRQ_MODERATION_USEC attr
+ * ETHTOOL_A_IRQ_MODERATION_PKTS attr
+ * ETHTOOL_A_IRQ_MODERATION_COMPS attr
+ *
+ * Return: 0 on success or a negative error code.
+ */
+static int ethnl_update_profile(struct net_device *dev,
+ struct dim_cq_moder __rcu **dst,
+ const struct nlattr *nests,
+ bool *mod,
+ struct netlink_ext_ack *extack)
+{
+ int len_irq_moder = ARRAY_SIZE(coalesce_irq_moderation_policy);
+ struct nlattr *tb[ARRAY_SIZE(coalesce_irq_moderation_policy)];
+ struct dim_irq_moder *irq_moder = dev->irq_moder;
+ struct dim_cq_moder *new_profile, *old_profile;
+ int ret, rem, i = 0, len;
+ struct nlattr *nest;
+
+ if (!nests)
+ return 0;
+
+ if (!*dst)
+ return -EOPNOTSUPP;
+
+ old_profile = rtnl_dereference(*dst);
+ len = NET_DIM_PARAMS_NUM_PROFILES * sizeof(*old_profile);
+ new_profile = kmemdup(old_profile, len, GFP_KERNEL);
+ if (!new_profile)
+ return -ENOMEM;
+
+ nla_for_each_nested_type(nest, ETHTOOL_A_PROFILE_IRQ_MODERATION,
+ nests, rem) {
+ ret = nla_parse_nested(tb, len_irq_moder - 1, nest,
+ coalesce_irq_moderation_policy,
+ extack);
+ if (ret)
+ goto err_out;
+
+ ret = ethnl_update_irq_moder(irq_moder, &new_profile[i].usec,
+ ETHTOOL_A_IRQ_MODERATION_USEC,
+ tb, DIM_COALESCE_USEC,
+ mod, extack);
+ if (ret)
+ goto err_out;
+
+ ret = ethnl_update_irq_moder(irq_moder, &new_profile[i].pkts,
+ ETHTOOL_A_IRQ_MODERATION_PKTS,
+ tb, DIM_COALESCE_PKTS,
+ mod, extack);
+ if (ret)
+ goto err_out;
+
+ ret = ethnl_update_irq_moder(irq_moder, &new_profile[i].comps,
+ ETHTOOL_A_IRQ_MODERATION_COMPS,
+ tb, DIM_COALESCE_COMPS,
+ mod, extack);
+ if (ret)
+ goto err_out;
+
+ i++;
+ }
+
+ /* After the profile is modified, dim itself is a dynamic
+ * mechanism and will quickly fit to the appropriate
+ * coalescing parameters according to the new profile.
+ */
+ rcu_assign_pointer(*dst, new_profile);
+ kfree_rcu(old_profile, rcu);
+
+ return 0;
+
+err_out:
+ kfree(new_profile);
+ return ret;
+}
+
static int
__ethnl_set_coalesce(struct ethnl_req_info *req_info, struct genl_info *info,
bool *dual_change)
@@ -317,6 +571,22 @@ __ethnl_set_coalesce(struct ethnl_req_info *req_info, struct genl_info *info,
ethnl_update_u32(&kernel_coalesce.tx_aggr_time_usecs,
tb[ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS], &mod);
+ if (dev->irq_moder && dev->irq_moder->profile_flags & DIM_PROFILE_RX) {
+ ret = ethnl_update_profile(dev, &dev->irq_moder->rx_profile,
+ tb[ETHTOOL_A_COALESCE_RX_PROFILE],
+ &mod, info->extack);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (dev->irq_moder && dev->irq_moder->profile_flags & DIM_PROFILE_TX) {
+ ret = ethnl_update_profile(dev, &dev->irq_moder->tx_profile,
+ tb[ETHTOOL_A_COALESCE_TX_PROFILE],
+ &mod, info->extack);
+ if (ret < 0)
+ return ret;
+ }
+
/* Update operation modes */
ethnl_update_bool32(&coalesce.use_adaptive_rx_coalesce,
tb[ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX], &mod_mode);
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 6b2a360dcdf0..07032babd1b6 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -211,6 +211,7 @@ const char link_mode_names[][ETH_GSTRING_LEN] = {
__DEFINE_LINK_MODE_NAME(10, T1S, Full),
__DEFINE_LINK_MODE_NAME(10, T1S, Half),
__DEFINE_LINK_MODE_NAME(10, T1S_P2MP, Half),
+ __DEFINE_LINK_MODE_NAME(10, T1BRR, Full),
};
static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
@@ -251,6 +252,7 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
#define __LINK_MODE_LANES_T1S_P2MP 1
#define __LINK_MODE_LANES_VR8 8
#define __LINK_MODE_LANES_DR8_2 8
+#define __LINK_MODE_LANES_T1BRR 1
#define __DEFINE_LINK_MODE_PARAMS(_speed, _type, _duplex) \
[ETHTOOL_LINK_MODE(_speed, _type, _duplex)] = { \
@@ -374,6 +376,7 @@ const struct link_mode_info link_mode_params[] = {
__DEFINE_LINK_MODE_PARAMS(10, T1S, Full),
__DEFINE_LINK_MODE_PARAMS(10, T1S, Half),
__DEFINE_LINK_MODE_PARAMS(10, T1S_P2MP, Half),
+ __DEFINE_LINK_MODE_PARAMS(10, T1BRR, Full),
};
static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS);
@@ -587,35 +590,64 @@ err_free_info:
return err;
}
-int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max)
+static u32 ethtool_get_max_rss_ctx_channel(struct net_device *dev)
+{
+ struct ethtool_rxfh_context *ctx;
+ unsigned long context;
+ u32 max_ring = 0;
+
+ mutex_lock(&dev->ethtool->rss_lock);
+ xa_for_each(&dev->ethtool->rss_ctx, context, ctx) {
+ u32 i, *tbl;
+
+ tbl = ethtool_rxfh_context_indir(ctx);
+ for (i = 0; i < ctx->indir_size; i++)
+ max_ring = max(max_ring, tbl[i]);
+ }
+ mutex_unlock(&dev->ethtool->rss_lock);
+
+ return max_ring;
+}
+
+u32 ethtool_get_max_rxfh_channel(struct net_device *dev)
{
struct ethtool_rxfh_param rxfh = {};
- u32 dev_size, current_max = 0;
+ u32 dev_size, current_max;
int ret;
+ /* While we do track whether RSS context has an indirection
+ * table explicitly set by the user, no driver looks at that bit.
+ * Assume drivers won't auto-regenerate the additional tables,
+ * to be safe.
+ */
+ current_max = ethtool_get_max_rss_ctx_channel(dev);
+
+ if (!netif_is_rxfh_configured(dev))
+ return current_max;
+
if (!dev->ethtool_ops->get_rxfh_indir_size ||
!dev->ethtool_ops->get_rxfh)
- return -EOPNOTSUPP;
+ return current_max;
dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
if (dev_size == 0)
- return -EOPNOTSUPP;
+ return current_max;
rxfh.indir = kcalloc(dev_size, sizeof(rxfh.indir[0]), GFP_USER);
if (!rxfh.indir)
- return -ENOMEM;
+ return U32_MAX;
ret = dev->ethtool_ops->get_rxfh(dev, &rxfh);
- if (ret)
- goto out;
+ if (ret) {
+ current_max = U32_MAX;
+ goto out_free;
+ }
while (dev_size--)
current_max = max(current_max, rxfh.indir[dev_size]);
- *max = current_max;
-
-out:
+out_free:
kfree(rxfh.indir);
- return ret;
+ return current_max;
}
int ethtool_check_ops(const struct ethtool_ops *ops)
@@ -629,7 +661,7 @@ int ethtool_check_ops(const struct ethtool_ops *ops)
return 0;
}
-int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info)
+int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info)
{
const struct ethtool_ops *ops = dev->ethtool_ops;
struct phy_device *phydev = dev->phydev;
@@ -637,7 +669,7 @@ int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info)
memset(info, 0, sizeof(*info));
info->cmd = ETHTOOL_GET_TS_INFO;
- if (phy_has_tsinfo(phydev))
+ if (phy_is_default_hwtstamp(phydev) && phy_has_tsinfo(phydev))
return phy_ts_info(phydev, info);
if (ops->get_ts_info)
return ops->get_ts_info(dev, info);
@@ -651,7 +683,7 @@ int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info)
int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index)
{
- struct ethtool_ts_info info = { };
+ struct kernel_ethtool_ts_info info = { };
int num = 0;
if (!__ethtool_get_ts_info(dev, &info))
@@ -661,7 +693,7 @@ int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index)
}
EXPORT_SYMBOL(ethtool_get_phc_vclocks);
-int ethtool_get_ts_info_by_layer(struct net_device *dev, struct ethtool_ts_info *info)
+int ethtool_get_ts_info_by_layer(struct net_device *dev, struct kernel_ethtool_ts_info *info)
{
return __ethtool_get_ts_info(dev, info);
}
@@ -712,3 +744,17 @@ ethtool_forced_speed_maps_init(struct ethtool_forced_speed_map *maps, u32 size)
}
}
EXPORT_SYMBOL_GPL(ethtool_forced_speed_maps_init);
+
+void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id)
+{
+ struct ethtool_rxfh_context *ctx;
+
+ WARN_ONCE(!rtnl_is_locked() &&
+ !lockdep_is_held_type(&dev->ethtool->rss_lock, -1),
+ "RSS context lock assertion failed\n");
+
+ netdev_err(dev, "device error, RSS context %d lost\n", context_id);
+ ctx = xa_erase(&dev->ethtool->rss_ctx, context_id);
+ kfree(ctx);
+}
+EXPORT_SYMBOL(ethtool_rxfh_context_lost);
diff --git a/net/ethtool/common.h b/net/ethtool/common.h
index 28b8aaaf9bcb..863806fcf01a 100644
--- a/net/ethtool/common.h
+++ b/net/ethtool/common.h
@@ -42,9 +42,9 @@ int __ethtool_get_link(struct net_device *dev);
bool convert_legacy_settings_to_link_ksettings(
struct ethtool_link_ksettings *link_ksettings,
const struct ethtool_cmd *legacy_settings);
-int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max);
+u32 ethtool_get_max_rxfh_channel(struct net_device *dev);
int ethtool_get_max_rxnfc_channel(struct net_device *dev, u64 *max);
-int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info);
+int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info);
extern const struct ethtool_phy_ops *ethtool_phy_ops;
extern const struct ethtool_pse_ops *ethtool_pse_ops;
diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c
index 6209c3a9c8f7..3b8209e930fd 100644
--- a/net/ethtool/eeprom.c
+++ b/net/ethtool/eeprom.c
@@ -91,6 +91,12 @@ static int get_module_eeprom_by_page(struct net_device *dev,
{
const struct ethtool_ops *ops = dev->ethtool_ops;
+ if (dev->ethtool->module_fw_flash_in_progress) {
+ NL_SET_ERR_MSG(extack,
+ "Module firmware flashing is in progress");
+ return -EBUSY;
+ }
+
if (dev->sfp_bus)
return sfp_get_module_eeprom_by_page(dev->sfp_bus, page_data, extack);
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 5a55270aa86e..e18823bf2330 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -65,7 +65,8 @@ u32 ethtool_op_get_link(struct net_device *dev)
}
EXPORT_SYMBOL(ethtool_op_get_link);
-int ethtool_op_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info)
+int ethtool_op_get_ts_info(struct net_device *dev,
+ struct kernel_ethtool_ts_info *info)
{
info->so_timestamping =
SOF_TIMESTAMPING_TX_SOFTWARE |
@@ -658,6 +659,9 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
if (!dev->ethtool_ops->get_link_ksettings)
return -EOPNOTSUPP;
+ if (dev->ethtool->module_fw_flash_in_progress)
+ return -EBUSY;
+
memset(&link_ksettings, 0, sizeof(link_ksettings));
err = dev->ethtool_ops->get_link_ksettings(dev, &link_ksettings);
if (err < 0)
@@ -1199,6 +1203,7 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
const struct ethtool_ops *ops = dev->ethtool_ops;
struct ethtool_rxfh_param rxfh_dev = {};
u32 user_indir_size, user_key_size;
+ struct ethtool_rxfh_context *ctx;
struct ethtool_rxfh rxfh;
u32 indir_bytes;
u8 *rss_config;
@@ -1246,11 +1251,26 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
if (user_key_size)
rxfh_dev.key = rss_config + indir_bytes;
- rxfh_dev.rss_context = rxfh.rss_context;
-
- ret = dev->ethtool_ops->get_rxfh(dev, &rxfh_dev);
- if (ret)
- goto out;
+ if (rxfh.rss_context) {
+ ctx = xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context);
+ if (!ctx) {
+ ret = -ENOENT;
+ goto out;
+ }
+ if (rxfh_dev.indir)
+ memcpy(rxfh_dev.indir, ethtool_rxfh_context_indir(ctx),
+ indir_bytes);
+ if (rxfh_dev.key)
+ memcpy(rxfh_dev.key, ethtool_rxfh_context_key(ctx),
+ user_key_size);
+ rxfh_dev.hfunc = ctx->hfunc;
+ rxfh_dev.input_xfrm = ctx->input_xfrm;
+ ret = 0;
+ } else {
+ ret = dev->ethtool_ops->get_rxfh(dev, &rxfh_dev);
+ if (ret)
+ goto out;
+ }
if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, hfunc),
&rxfh_dev.hfunc, sizeof(rxfh.hfunc))) {
@@ -1271,17 +1291,54 @@ out:
return ret;
}
+static struct ethtool_rxfh_context *
+ethtool_rxfh_ctx_alloc(const struct ethtool_ops *ops,
+ u32 indir_size, u32 key_size)
+{
+ size_t indir_bytes, flex_len, key_off, size;
+ struct ethtool_rxfh_context *ctx;
+ u32 priv_bytes, indir_max;
+ u16 key_max;
+
+ key_max = max(key_size, ops->rxfh_key_space);
+ indir_max = max(indir_size, ops->rxfh_indir_space);
+
+ priv_bytes = ALIGN(ops->rxfh_priv_size, sizeof(u32));
+ indir_bytes = array_size(indir_max, sizeof(u32));
+
+ key_off = size_add(priv_bytes, indir_bytes);
+ flex_len = size_add(key_off, key_max);
+ size = struct_size_t(struct ethtool_rxfh_context, data, flex_len);
+
+ ctx = kzalloc(size, GFP_KERNEL_ACCOUNT);
+ if (!ctx)
+ return NULL;
+
+ ctx->indir_size = indir_size;
+ ctx->key_size = key_size;
+ ctx->key_off = key_off;
+ ctx->priv_size = ops->rxfh_priv_size;
+
+ ctx->hfunc = ETH_RSS_HASH_NO_CHANGE;
+ ctx->input_xfrm = RXH_XFRM_NO_CHANGE;
+
+ return ctx;
+}
+
static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
void __user *useraddr)
{
u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
const struct ethtool_ops *ops = dev->ethtool_ops;
u32 dev_indir_size = 0, dev_key_size = 0, i;
+ u32 user_indir_len = 0, indir_bytes = 0;
struct ethtool_rxfh_param rxfh_dev = {};
+ struct ethtool_rxfh_context *ctx = NULL;
struct netlink_ext_ack *extack = NULL;
struct ethtool_rxnfc rx_rings;
struct ethtool_rxfh rxfh;
- u32 indir_bytes = 0;
+ bool locked = false; /* dev->ethtool->rss_lock taken */
+ bool create = false;
u8 *rss_config;
int ret;
@@ -1306,27 +1363,31 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
if (rxfh.input_xfrm && rxfh.input_xfrm != RXH_XFRM_SYM_XOR &&
rxfh.input_xfrm != RXH_XFRM_NO_CHANGE)
return -EINVAL;
- if ((rxfh.input_xfrm & RXH_XFRM_SYM_XOR) &&
+ if (rxfh.input_xfrm != RXH_XFRM_NO_CHANGE &&
+ (rxfh.input_xfrm & RXH_XFRM_SYM_XOR) &&
!ops->cap_rss_sym_xor_supported)
return -EOPNOTSUPP;
+ create = rxfh.rss_context == ETH_RXFH_CONTEXT_ALLOC;
- /* If either indir, hash key or function is valid, proceed further.
- * Must request at least one change: indir size, hash key, function
- * or input transformation.
- */
if ((rxfh.indir_size &&
rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE &&
rxfh.indir_size != dev_indir_size) ||
- (rxfh.key_size && (rxfh.key_size != dev_key_size)) ||
+ (rxfh.key_size && rxfh.key_size != dev_key_size))
+ return -EINVAL;
+
+ /* Must request at least one change: indir size, hash key, function
+ * or input transformation.
+ * There's no need for any of it in case of context creation.
+ */
+ if (!create &&
(rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE &&
rxfh.key_size == 0 && rxfh.hfunc == ETH_RSS_HASH_NO_CHANGE &&
rxfh.input_xfrm == RXH_XFRM_NO_CHANGE))
return -EINVAL;
- if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
- indir_bytes = dev_indir_size * sizeof(rxfh_dev.indir[0]);
+ indir_bytes = dev_indir_size * sizeof(rxfh_dev.indir[0]);
- rss_config = kzalloc(indir_bytes + rxfh.key_size, GFP_USER);
+ rss_config = kzalloc(indir_bytes + dev_key_size, GFP_USER);
if (!rss_config)
return -ENOMEM;
@@ -1341,6 +1402,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
*/
if (rxfh.indir_size &&
rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) {
+ user_indir_len = indir_bytes;
rxfh_dev.indir = (u32 *)rss_config;
rxfh_dev.indir_size = dev_indir_size;
ret = ethtool_copy_validate_indir(rxfh_dev.indir,
@@ -1367,20 +1429,82 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
rxfh_dev.key_size = dev_key_size;
rxfh_dev.key = rss_config + indir_bytes;
if (copy_from_user(rxfh_dev.key,
- useraddr + rss_cfg_offset + indir_bytes,
+ useraddr + rss_cfg_offset + user_indir_len,
rxfh.key_size)) {
ret = -EFAULT;
goto out;
}
}
+ if (rxfh.rss_context) {
+ mutex_lock(&dev->ethtool->rss_lock);
+ locked = true;
+ }
+ if (create) {
+ if (rxfh_dev.rss_delete) {
+ ret = -EINVAL;
+ goto out;
+ }
+ ctx = ethtool_rxfh_ctx_alloc(ops, dev_indir_size, dev_key_size);
+ if (!ctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (ops->create_rxfh_context) {
+ u32 limit = ops->rxfh_max_num_contexts ?: U32_MAX;
+ u32 ctx_id;
+
+ /* driver uses new API, core allocates ID */
+ ret = xa_alloc(&dev->ethtool->rss_ctx, &ctx_id, ctx,
+ XA_LIMIT(1, limit - 1),
+ GFP_KERNEL_ACCOUNT);
+ if (ret < 0) {
+ kfree(ctx);
+ goto out;
+ }
+ WARN_ON(!ctx_id); /* can't happen */
+ rxfh.rss_context = ctx_id;
+ }
+ } else if (rxfh.rss_context) {
+ ctx = xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context);
+ if (!ctx) {
+ ret = -ENOENT;
+ goto out;
+ }
+ }
rxfh_dev.hfunc = rxfh.hfunc;
rxfh_dev.rss_context = rxfh.rss_context;
rxfh_dev.input_xfrm = rxfh.input_xfrm;
- ret = ops->set_rxfh(dev, &rxfh_dev, extack);
- if (ret)
+ if (rxfh.rss_context && ops->create_rxfh_context) {
+ if (create) {
+ ret = ops->create_rxfh_context(dev, ctx, &rxfh_dev,
+ extack);
+ /* Make sure driver populates defaults */
+ WARN_ON_ONCE(!ret && !rxfh_dev.key &&
+ !memchr_inv(ethtool_rxfh_context_key(ctx),
+ 0, ctx->key_size));
+ } else if (rxfh_dev.rss_delete) {
+ ret = ops->remove_rxfh_context(dev, ctx,
+ rxfh.rss_context,
+ extack);
+ } else {
+ ret = ops->modify_rxfh_context(dev, ctx, &rxfh_dev,
+ extack);
+ }
+ } else {
+ ret = ops->set_rxfh(dev, &rxfh_dev, extack);
+ }
+ if (ret) {
+ if (create) {
+ /* failed to create, free our new tracking entry */
+ if (ops->create_rxfh_context)
+ xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context);
+ kfree(ctx);
+ }
goto out;
+ }
if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_context),
&rxfh_dev.rss_context, sizeof(rxfh_dev.rss_context)))
@@ -1393,8 +1517,62 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
dev->priv_flags |= IFF_RXFH_CONFIGURED;
}
+ /* Update rss_ctx tracking */
+ if (create && !ops->create_rxfh_context) {
+ /* driver uses old API, it chose context ID */
+ if (WARN_ON(xa_load(&dev->ethtool->rss_ctx, rxfh_dev.rss_context))) {
+ /* context ID reused, our tracking is screwed */
+ kfree(ctx);
+ goto out;
+ }
+ /* Allocate the exact ID the driver gave us */
+ if (xa_is_err(xa_store(&dev->ethtool->rss_ctx, rxfh_dev.rss_context,
+ ctx, GFP_KERNEL))) {
+ kfree(ctx);
+ goto out;
+ }
+
+ /* Fetch the defaults for the old API, in the new API drivers
+ * should write defaults into ctx themselves.
+ */
+ rxfh_dev.indir = (u32 *)rss_config;
+ rxfh_dev.indir_size = dev_indir_size;
+
+ rxfh_dev.key = rss_config + indir_bytes;
+ rxfh_dev.key_size = dev_key_size;
+
+ ret = ops->get_rxfh(dev, &rxfh_dev);
+ if (WARN_ON(ret)) {
+ xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context);
+ kfree(ctx);
+ goto out;
+ }
+ }
+ if (rxfh_dev.rss_delete) {
+ WARN_ON(xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context) != ctx);
+ kfree(ctx);
+ } else if (ctx) {
+ if (rxfh_dev.indir) {
+ for (i = 0; i < dev_indir_size; i++)
+ ethtool_rxfh_context_indir(ctx)[i] = rxfh_dev.indir[i];
+ ctx->indir_configured =
+ rxfh.indir_size &&
+ rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE;
+ }
+ if (rxfh_dev.key) {
+ memcpy(ethtool_rxfh_context_key(ctx), rxfh_dev.key,
+ dev_key_size);
+ ctx->key_configured = !!rxfh.key_size;
+ }
+ if (rxfh_dev.hfunc != ETH_RSS_HASH_NO_CHANGE)
+ ctx->hfunc = rxfh_dev.hfunc;
+ if (rxfh_dev.input_xfrm != RXH_XFRM_NO_CHANGE)
+ ctx->input_xfrm = rxfh_dev.input_xfrm;
+ }
out:
+ if (locked)
+ mutex_unlock(&dev->ethtool->rss_lock);
kfree(rss_config);
return ret;
}
@@ -1449,6 +1627,9 @@ static int ethtool_reset(struct net_device *dev, char __user *useraddr)
if (!dev->ethtool_ops->reset)
return -EOPNOTSUPP;
+ if (dev->ethtool->module_fw_flash_in_progress)
+ return -EBUSY;
+
if (copy_from_user(&reset, useraddr, sizeof(reset)))
return -EFAULT;
@@ -1503,7 +1684,7 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr)
if (ret)
return ret;
- dev->wol_enabled = !!wol.wolopts;
+ dev->ethtool->wol_enabled = !!wol.wolopts;
ethtool_notify(dev, ETHTOOL_MSG_WOL_NTF, NULL);
return 0;
@@ -1923,9 +2104,7 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
* indirection table/rxnfc settings */
if (ethtool_get_max_rxnfc_channel(dev, &max_rxnfc_in_use))
max_rxnfc_in_use = 0;
- if (!netif_is_rxfh_configured(dev) ||
- ethtool_get_max_rxfh_channel(dev, &max_rxfh_in_use))
- max_rxfh_in_use = 0;
+ max_rxfh_in_use = ethtool_get_max_rxfh_channel(dev);
if (channels.combined_count + channels.rx_count <=
max_t(u64, max_rxnfc_in_use, max_rxfh_in_use))
return -EINVAL;
@@ -2220,7 +2399,7 @@ static int ethtool_get_phy_stats_ethtool(struct net_device *dev,
const struct ethtool_ops *ops = dev->ethtool_ops;
int n_stats, ret;
- if (!ops || !ops->get_sset_count || ops->get_ethtool_phy_stats)
+ if (!ops || !ops->get_sset_count || !ops->get_ethtool_phy_stats)
return -EOPNOTSUPP;
n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS);
@@ -2443,13 +2622,20 @@ out:
static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr)
{
- struct ethtool_ts_info info;
+ struct kernel_ethtool_ts_info kernel_info;
+ struct ethtool_ts_info info = {};
int err;
- err = __ethtool_get_ts_info(dev, &info);
+ err = __ethtool_get_ts_info(dev, &kernel_info);
if (err)
return err;
+ info.cmd = kernel_info.cmd;
+ info.so_timestamping = kernel_info.so_timestamping;
+ info.phc_index = kernel_info.phc_index;
+ info.tx_types = kernel_info.tx_types;
+ info.rx_filters = kernel_info.rx_filters;
+
if (copy_to_user(useraddr, &info, sizeof(info)))
return -EFAULT;
@@ -2462,6 +2648,9 @@ int ethtool_get_module_info_call(struct net_device *dev,
const struct ethtool_ops *ops = dev->ethtool_ops;
struct phy_device *phydev = dev->phydev;
+ if (dev->ethtool->module_fw_flash_in_progress)
+ return -EBUSY;
+
if (dev->sfp_bus)
return sfp_get_module_info(dev->sfp_bus, modinfo);
@@ -2499,6 +2688,9 @@ int ethtool_get_module_eeprom_call(struct net_device *dev,
const struct ethtool_ops *ops = dev->ethtool_ops;
struct phy_device *phydev = dev->phydev;
+ if (dev->ethtool->module_fw_flash_in_progress)
+ return -EBUSY;
+
if (dev->sfp_bus)
return sfp_get_module_eeprom(dev->sfp_bus, ee, data);
diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c
index b2de2108b356..34d76e87847d 100644
--- a/net/ethtool/linkstate.c
+++ b/net/ethtool/linkstate.c
@@ -37,6 +37,8 @@ static int linkstate_get_sqi(struct net_device *dev)
mutex_lock(&phydev->lock);
if (!phydev->drv || !phydev->drv->get_sqi)
ret = -EOPNOTSUPP;
+ else if (!phydev->link)
+ ret = -ENETDOWN;
else
ret = phydev->drv->get_sqi(phydev);
mutex_unlock(&phydev->lock);
@@ -55,6 +57,8 @@ static int linkstate_get_sqi_max(struct net_device *dev)
mutex_lock(&phydev->lock);
if (!phydev->drv || !phydev->drv->get_sqi_max)
ret = -EOPNOTSUPP;
+ else if (!phydev->link)
+ ret = -ENETDOWN;
else
ret = phydev->drv->get_sqi_max(phydev);
mutex_unlock(&phydev->lock);
@@ -62,6 +66,17 @@ static int linkstate_get_sqi_max(struct net_device *dev)
return ret;
};
+static bool linkstate_sqi_critical_error(int sqi)
+{
+ return sqi < 0 && sqi != -EOPNOTSUPP && sqi != -ENETDOWN;
+}
+
+static bool linkstate_sqi_valid(struct linkstate_reply_data *data)
+{
+ return data->sqi >= 0 && data->sqi_max >= 0 &&
+ data->sqi <= data->sqi_max;
+}
+
static int linkstate_get_link_ext_state(struct net_device *dev,
struct linkstate_reply_data *data)
{
@@ -93,12 +108,12 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base,
data->link = __ethtool_get_link(dev);
ret = linkstate_get_sqi(dev);
- if (ret < 0 && ret != -EOPNOTSUPP)
+ if (linkstate_sqi_critical_error(ret))
goto out;
data->sqi = ret;
ret = linkstate_get_sqi_max(dev);
- if (ret < 0 && ret != -EOPNOTSUPP)
+ if (linkstate_sqi_critical_error(ret))
goto out;
data->sqi_max = ret;
@@ -136,11 +151,10 @@ static int linkstate_reply_size(const struct ethnl_req_info *req_base,
len = nla_total_size(sizeof(u8)) /* LINKSTATE_LINK */
+ 0;
- if (data->sqi != -EOPNOTSUPP)
- len += nla_total_size(sizeof(u32));
-
- if (data->sqi_max != -EOPNOTSUPP)
- len += nla_total_size(sizeof(u32));
+ if (linkstate_sqi_valid(data)) {
+ len += nla_total_size(sizeof(u32)); /* LINKSTATE_SQI */
+ len += nla_total_size(sizeof(u32)); /* LINKSTATE_SQI_MAX */
+ }
if (data->link_ext_state_provided)
len += nla_total_size(sizeof(u8)); /* LINKSTATE_EXT_STATE */
@@ -164,13 +178,14 @@ static int linkstate_fill_reply(struct sk_buff *skb,
nla_put_u8(skb, ETHTOOL_A_LINKSTATE_LINK, !!data->link))
return -EMSGSIZE;
- if (data->sqi != -EOPNOTSUPP &&
- nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi))
- return -EMSGSIZE;
+ if (linkstate_sqi_valid(data)) {
+ if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi))
+ return -EMSGSIZE;
- if (data->sqi_max != -EOPNOTSUPP &&
- nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX, data->sqi_max))
- return -EMSGSIZE;
+ if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX,
+ data->sqi_max))
+ return -EMSGSIZE;
+ }
if (data->link_ext_state_provided) {
if (nla_put_u8(skb, ETHTOOL_A_LINKSTATE_EXT_STATE,
diff --git a/net/ethtool/module.c b/net/ethtool/module.c
index ceb575efc290..6988e07bdcd6 100644
--- a/net/ethtool/module.c
+++ b/net/ethtool/module.c
@@ -1,10 +1,14 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/ethtool.h>
+#include <linux/firmware.h>
+#include <linux/sfp.h>
+#include <net/devlink.h>
#include "netlink.h"
#include "common.h"
#include "bitset.h"
+#include "module_fw.h"
struct module_req_info {
struct ethnl_req_info base;
@@ -33,6 +37,12 @@ static int module_get_power_mode(struct net_device *dev,
if (!ops->get_module_power_mode)
return 0;
+ if (dev->ethtool->module_fw_flash_in_progress) {
+ NL_SET_ERR_MSG(extack,
+ "Module firmware flashing is in progress");
+ return -EBUSY;
+ }
+
return ops->get_module_power_mode(dev, &data->power, extack);
}
@@ -109,6 +119,12 @@ ethnl_set_module_validate(struct ethnl_req_info *req_info,
if (!tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY])
return 0;
+ if (req_info->dev->ethtool->module_fw_flash_in_progress) {
+ NL_SET_ERR_MSG(info->extack,
+ "Module firmware flashing is in progress");
+ return -EBUSY;
+ }
+
if (!ops->get_module_power_mode || !ops->set_module_power_mode) {
NL_SET_ERR_MSG_ATTR(info->extack,
tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY],
@@ -158,3 +174,381 @@ const struct ethnl_request_ops ethnl_module_request_ops = {
.set = ethnl_set_module,
.set_ntf_cmd = ETHTOOL_MSG_MODULE_NTF,
};
+
+/* MODULE_FW_FLASH_ACT */
+
+const struct nla_policy
+ethnl_module_fw_flash_act_policy[ETHTOOL_A_MODULE_FW_FLASH_PASSWORD + 1] = {
+ [ETHTOOL_A_MODULE_FW_FLASH_HEADER] =
+ NLA_POLICY_NESTED(ethnl_header_policy),
+ [ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME] = { .type = NLA_NUL_STRING },
+ [ETHTOOL_A_MODULE_FW_FLASH_PASSWORD] = { .type = NLA_U32 },
+};
+
+static LIST_HEAD(module_fw_flash_work_list);
+static DEFINE_SPINLOCK(module_fw_flash_work_list_lock);
+
+static int
+module_flash_fw_work_list_add(struct ethtool_module_fw_flash *module_fw,
+ struct genl_info *info)
+{
+ struct ethtool_module_fw_flash *work;
+
+ /* First, check if already registered. */
+ spin_lock(&module_fw_flash_work_list_lock);
+ list_for_each_entry(work, &module_fw_flash_work_list, list) {
+ if (work->fw_update.ntf_params.portid == info->snd_portid &&
+ work->fw_update.dev == module_fw->fw_update.dev) {
+ spin_unlock(&module_fw_flash_work_list_lock);
+ return -EALREADY;
+ }
+ }
+
+ list_add_tail(&module_fw->list, &module_fw_flash_work_list);
+ spin_unlock(&module_fw_flash_work_list_lock);
+
+ return 0;
+}
+
+static void module_flash_fw_work_list_del(struct list_head *list)
+{
+ spin_lock(&module_fw_flash_work_list_lock);
+ list_del(list);
+ spin_unlock(&module_fw_flash_work_list_lock);
+}
+
+static void module_flash_fw_work(struct work_struct *work)
+{
+ struct ethtool_module_fw_flash *module_fw;
+
+ module_fw = container_of(work, struct ethtool_module_fw_flash, work);
+
+ ethtool_cmis_fw_update(&module_fw->fw_update);
+
+ module_flash_fw_work_list_del(&module_fw->list);
+ module_fw->fw_update.dev->ethtool->module_fw_flash_in_progress = false;
+ netdev_put(module_fw->fw_update.dev, &module_fw->dev_tracker);
+ release_firmware(module_fw->fw_update.fw);
+ kfree(module_fw);
+}
+
+#define MODULE_EEPROM_PHYS_ID_PAGE 0
+#define MODULE_EEPROM_PHYS_ID_I2C_ADDR 0x50
+
+static int module_flash_fw_work_init(struct ethtool_module_fw_flash *module_fw,
+ struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_module_eeprom page_data = {};
+ u8 phys_id;
+ int err;
+
+ /* Fetch the SFF-8024 Identifier Value. For all supported standards, it
+ * is located at I2C address 0x50, byte 0. See section 4.1 in SFF-8024,
+ * revision 4.9.
+ */
+ page_data.page = MODULE_EEPROM_PHYS_ID_PAGE;
+ page_data.offset = SFP_PHYS_ID;
+ page_data.length = sizeof(phys_id);
+ page_data.i2c_address = MODULE_EEPROM_PHYS_ID_I2C_ADDR;
+ page_data.data = &phys_id;
+
+ err = ops->get_module_eeprom_by_page(dev, &page_data, extack);
+ if (err < 0)
+ return err;
+
+ switch (phys_id) {
+ case SFF8024_ID_QSFP_DD:
+ case SFF8024_ID_OSFP:
+ case SFF8024_ID_DSFP:
+ case SFF8024_ID_QSFP_PLUS_CMIS:
+ case SFF8024_ID_SFP_DD_CMIS:
+ case SFF8024_ID_SFP_PLUS_CMIS:
+ INIT_WORK(&module_fw->work, module_flash_fw_work);
+ break;
+ default:
+ NL_SET_ERR_MSG(extack,
+ "Module type does not support firmware flashing");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+void ethnl_module_fw_flash_sock_destroy(struct ethnl_sock_priv *sk_priv)
+{
+ struct ethtool_module_fw_flash *work;
+
+ spin_lock(&module_fw_flash_work_list_lock);
+ list_for_each_entry(work, &module_fw_flash_work_list, list) {
+ if (work->fw_update.dev == sk_priv->dev &&
+ work->fw_update.ntf_params.portid == sk_priv->portid) {
+ work->fw_update.ntf_params.closed_sock = true;
+ break;
+ }
+ }
+ spin_unlock(&module_fw_flash_work_list_lock);
+}
+
+static int
+module_flash_fw_schedule(struct net_device *dev, const char *file_name,
+ struct ethtool_module_fw_flash_params *params,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ struct ethtool_cmis_fw_update_params *fw_update;
+ struct ethtool_module_fw_flash *module_fw;
+ int err;
+
+ module_fw = kzalloc(sizeof(*module_fw), GFP_KERNEL);
+ if (!module_fw)
+ return -ENOMEM;
+
+ fw_update = &module_fw->fw_update;
+ fw_update->params = *params;
+ err = request_firmware_direct(&fw_update->fw,
+ file_name, &dev->dev);
+ if (err) {
+ NL_SET_ERR_MSG(info->extack,
+ "Failed to request module firmware image");
+ goto err_free;
+ }
+
+ err = module_flash_fw_work_init(module_fw, dev, info->extack);
+ if (err < 0)
+ goto err_release_firmware;
+
+ dev->ethtool->module_fw_flash_in_progress = true;
+ netdev_hold(dev, &module_fw->dev_tracker, GFP_KERNEL);
+ fw_update->dev = dev;
+ fw_update->ntf_params.portid = info->snd_portid;
+ fw_update->ntf_params.seq = info->snd_seq;
+ fw_update->ntf_params.closed_sock = false;
+
+ err = ethnl_sock_priv_set(skb, dev, fw_update->ntf_params.portid,
+ ETHTOOL_SOCK_TYPE_MODULE_FW_FLASH);
+ if (err < 0)
+ goto err_release_firmware;
+
+ err = module_flash_fw_work_list_add(module_fw, info);
+ if (err < 0)
+ goto err_release_firmware;
+
+ schedule_work(&module_fw->work);
+
+ return 0;
+
+err_release_firmware:
+ release_firmware(fw_update->fw);
+err_free:
+ kfree(module_fw);
+ return err;
+}
+
+static int module_flash_fw(struct net_device *dev, struct nlattr **tb,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ struct ethtool_module_fw_flash_params params = {};
+ const char *file_name;
+ struct nlattr *attr;
+
+ if (GENL_REQ_ATTR_CHECK(info, ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME))
+ return -EINVAL;
+
+ file_name = nla_data(tb[ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME]);
+
+ attr = tb[ETHTOOL_A_MODULE_FW_FLASH_PASSWORD];
+ if (attr) {
+ params.password = cpu_to_be32(nla_get_u32(attr));
+ params.password_valid = true;
+ }
+
+ return module_flash_fw_schedule(dev, file_name, &params, skb, info);
+}
+
+static int ethnl_module_fw_flash_validate(struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ struct devlink_port *devlink_port = dev->devlink_port;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+
+ if (!ops->set_module_eeprom_by_page ||
+ !ops->get_module_eeprom_by_page) {
+ NL_SET_ERR_MSG(extack,
+ "Flashing module firmware is not supported by this device");
+ return -EOPNOTSUPP;
+ }
+
+ if (!ops->reset) {
+ NL_SET_ERR_MSG(extack,
+ "Reset module is not supported by this device, so flashing is not permitted");
+ return -EOPNOTSUPP;
+ }
+
+ if (dev->ethtool->module_fw_flash_in_progress) {
+ NL_SET_ERR_MSG(extack, "Module firmware flashing already in progress");
+ return -EBUSY;
+ }
+
+ if (dev->flags & IFF_UP) {
+ NL_SET_ERR_MSG(extack, "Netdevice is up, so flashing is not permitted");
+ return -EBUSY;
+ }
+
+ if (devlink_port && devlink_port->attrs.split) {
+ NL_SET_ERR_MSG(extack, "Can't perform firmware flashing on a split port");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+int ethnl_act_module_fw_flash(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ethnl_req_info req_info = {};
+ struct nlattr **tb = info->attrs;
+ struct net_device *dev;
+ int ret;
+
+ ret = ethnl_parse_header_dev_get(&req_info,
+ tb[ETHTOOL_A_MODULE_FW_FLASH_HEADER],
+ genl_info_net(info), info->extack,
+ true);
+ if (ret < 0)
+ return ret;
+ dev = req_info.dev;
+
+ rtnl_lock();
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ goto out_rtnl;
+
+ ret = ethnl_module_fw_flash_validate(dev, info->extack);
+ if (ret < 0)
+ goto out_rtnl;
+
+ ret = module_flash_fw(dev, tb, skb, info);
+
+ ethnl_ops_complete(dev);
+
+out_rtnl:
+ rtnl_unlock();
+ ethnl_parse_header_dev_put(&req_info);
+ return ret;
+}
+
+/* MODULE_FW_FLASH_NTF */
+
+static int
+ethnl_module_fw_flash_ntf_put_err(struct sk_buff *skb, char *err_msg,
+ char *sub_err_msg)
+{
+ int err_msg_len, sub_err_msg_len, total_len;
+ struct nlattr *attr;
+
+ if (!err_msg)
+ return 0;
+
+ err_msg_len = strlen(err_msg);
+ total_len = err_msg_len + 2; /* For period and NUL. */
+
+ if (sub_err_msg) {
+ sub_err_msg_len = strlen(sub_err_msg);
+ total_len += sub_err_msg_len + 2; /* For ", ". */
+ }
+
+ attr = nla_reserve(skb, ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG,
+ total_len);
+ if (!attr)
+ return -ENOMEM;
+
+ if (sub_err_msg)
+ sprintf(nla_data(attr), "%s, %s.", err_msg, sub_err_msg);
+ else
+ sprintf(nla_data(attr), "%s.", err_msg);
+
+ return 0;
+}
+
+static void
+ethnl_module_fw_flash_ntf(struct net_device *dev,
+ enum ethtool_module_fw_flash_status status,
+ struct ethnl_module_fw_flash_ntf_params *ntf_params,
+ char *err_msg, char *sub_err_msg,
+ u64 done, u64 total)
+{
+ struct sk_buff *skb;
+ void *hdr;
+ int ret;
+
+ if (ntf_params->closed_sock)
+ return;
+
+ skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ return;
+
+ hdr = ethnl_unicast_put(skb, ntf_params->portid, ++ntf_params->seq,
+ ETHTOOL_MSG_MODULE_FW_FLASH_NTF);
+ if (!hdr)
+ goto err_skb;
+
+ ret = ethnl_fill_reply_header(skb, dev,
+ ETHTOOL_A_MODULE_FW_FLASH_HEADER);
+ if (ret < 0)
+ goto err_skb;
+
+ if (nla_put_u32(skb, ETHTOOL_A_MODULE_FW_FLASH_STATUS, status))
+ goto err_skb;
+
+ ret = ethnl_module_fw_flash_ntf_put_err(skb, err_msg, sub_err_msg);
+ if (ret < 0)
+ goto err_skb;
+
+ if (nla_put_uint(skb, ETHTOOL_A_MODULE_FW_FLASH_DONE, done))
+ goto err_skb;
+
+ if (nla_put_uint(skb, ETHTOOL_A_MODULE_FW_FLASH_TOTAL, total))
+ goto err_skb;
+
+ genlmsg_end(skb, hdr);
+ genlmsg_unicast(dev_net(dev), skb, ntf_params->portid);
+ return;
+
+err_skb:
+ nlmsg_free(skb);
+}
+
+void ethnl_module_fw_flash_ntf_err(struct net_device *dev,
+ struct ethnl_module_fw_flash_ntf_params *params,
+ char *err_msg, char *sub_err_msg)
+{
+ ethnl_module_fw_flash_ntf(dev, ETHTOOL_MODULE_FW_FLASH_STATUS_ERROR,
+ params, err_msg, sub_err_msg, 0, 0);
+}
+
+void
+ethnl_module_fw_flash_ntf_start(struct net_device *dev,
+ struct ethnl_module_fw_flash_ntf_params *params)
+{
+ ethnl_module_fw_flash_ntf(dev, ETHTOOL_MODULE_FW_FLASH_STATUS_STARTED,
+ params, NULL, NULL, 0, 0);
+}
+
+void
+ethnl_module_fw_flash_ntf_complete(struct net_device *dev,
+ struct ethnl_module_fw_flash_ntf_params *params)
+{
+ ethnl_module_fw_flash_ntf(dev, ETHTOOL_MODULE_FW_FLASH_STATUS_COMPLETED,
+ params, NULL, NULL, 0, 0);
+}
+
+void
+ethnl_module_fw_flash_ntf_in_progress(struct net_device *dev,
+ struct ethnl_module_fw_flash_ntf_params *params,
+ u64 done, u64 total)
+{
+ ethnl_module_fw_flash_ntf(dev,
+ ETHTOOL_MODULE_FW_FLASH_STATUS_IN_PROGRESS,
+ params, NULL, NULL, done, total);
+}
diff --git a/net/ethtool/module_fw.h b/net/ethtool/module_fw.h
new file mode 100644
index 000000000000..634543a12d0c
--- /dev/null
+++ b/net/ethtool/module_fw.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <uapi/linux/ethtool.h>
+#include "netlink.h"
+
+/**
+ * struct ethnl_module_fw_flash_ntf_params - module firmware flashing
+ * notifications parameters
+ * @portid: Netlink portid of sender.
+ * @seq: Sequence number of sender.
+ * @closed_sock: Indicates whether the socket was closed from user space.
+ */
+struct ethnl_module_fw_flash_ntf_params {
+ u32 portid;
+ u32 seq;
+ bool closed_sock;
+};
+
+/**
+ * struct ethtool_module_fw_flash_params - module firmware flashing parameters
+ * @password: Module password. Only valid when @pass_valid is set.
+ * @password_valid: Whether the module password is valid or not.
+ */
+struct ethtool_module_fw_flash_params {
+ __be32 password;
+ u8 password_valid:1;
+};
+
+/**
+ * struct ethtool_cmis_fw_update_params - CMIS firmware update specific
+ * parameters
+ * @dev: Pointer to the net_device to be flashed.
+ * @params: Module firmware flashing parameters.
+ * @ntf_params: Module firmware flashing notification parameters.
+ * @fw: Firmware to flash.
+ */
+struct ethtool_cmis_fw_update_params {
+ struct net_device *dev;
+ struct ethtool_module_fw_flash_params params;
+ struct ethnl_module_fw_flash_ntf_params ntf_params;
+ const struct firmware *fw;
+};
+
+/**
+ * struct ethtool_module_fw_flash - module firmware flashing
+ * @list: List node for &module_fw_flash_work_list.
+ * @dev_tracker: Refcount tracker for @dev.
+ * @work: The flashing firmware work.
+ * @fw_update: CMIS firmware update specific parameters.
+ */
+struct ethtool_module_fw_flash {
+ struct list_head list;
+ netdevice_tracker dev_tracker;
+ struct work_struct work;
+ struct ethtool_cmis_fw_update_params fw_update;
+};
+
+void ethnl_module_fw_flash_sock_destroy(struct ethnl_sock_priv *sk_priv);
+
+void
+ethnl_module_fw_flash_ntf_err(struct net_device *dev,
+ struct ethnl_module_fw_flash_ntf_params *params,
+ char *err_msg, char *sub_err_msg);
+void
+ethnl_module_fw_flash_ntf_start(struct net_device *dev,
+ struct ethnl_module_fw_flash_ntf_params *params);
+void
+ethnl_module_fw_flash_ntf_complete(struct net_device *dev,
+ struct ethnl_module_fw_flash_ntf_params *params);
+void
+ethnl_module_fw_flash_ntf_in_progress(struct net_device *dev,
+ struct ethnl_module_fw_flash_ntf_params *params,
+ u64 done, u64 total);
+
+void ethtool_cmis_fw_update(struct ethtool_cmis_fw_update_params *params);
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index bd04f28d5cf4..cb1eea00e349 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -4,6 +4,7 @@
#include <linux/ethtool_netlink.h>
#include <linux/pm_runtime.h>
#include "netlink.h"
+#include "module_fw.h"
static struct genl_family ethtool_genl_family;
@@ -30,6 +31,35 @@ const struct nla_policy ethnl_header_policy_stats[] = {
ETHTOOL_FLAGS_STATS),
};
+int ethnl_sock_priv_set(struct sk_buff *skb, struct net_device *dev, u32 portid,
+ enum ethnl_sock_type type)
+{
+ struct ethnl_sock_priv *sk_priv;
+
+ sk_priv = genl_sk_priv_get(&ethtool_genl_family, NETLINK_CB(skb).sk);
+ if (IS_ERR(sk_priv))
+ return PTR_ERR(sk_priv);
+
+ sk_priv->dev = dev;
+ sk_priv->portid = portid;
+ sk_priv->type = type;
+
+ return 0;
+}
+
+static void ethnl_sock_priv_destroy(void *priv)
+{
+ struct ethnl_sock_priv *sk_priv = priv;
+
+ switch (sk_priv->type) {
+ case ETHTOOL_SOCK_TYPE_MODULE_FW_FLASH:
+ ethnl_module_fw_flash_sock_destroy(sk_priv);
+ break;
+ default:
+ break;
+ }
+}
+
int ethnl_ops_begin(struct net_device *dev)
{
int ret;
@@ -239,6 +269,11 @@ void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd)
cmd);
}
+void *ethnl_unicast_put(struct sk_buff *skb, u32 portid, u32 seq, u8 cmd)
+{
+ return genlmsg_put(skb, portid, seq, &ethtool_genl_family, 0, cmd);
+}
+
int ethnl_multicast(struct sk_buff *skb, struct net_device *dev)
{
return genlmsg_multicast_netns(&ethtool_genl_family, dev_net(dev), skb,
@@ -760,10 +795,22 @@ static void ethnl_notify_features(struct netdev_notifier_info *info)
static int ethnl_netdev_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
+ struct netdev_notifier_info *info = ptr;
+ struct netlink_ext_ack *extack;
+ struct net_device *dev;
+
+ dev = netdev_notifier_info_to_dev(info);
+ extack = netdev_notifier_info_to_extack(info);
+
switch (event) {
case NETDEV_FEAT_CHANGE:
ethnl_notify_features(ptr);
break;
+ case NETDEV_PRE_UP:
+ if (dev->ethtool->module_fw_flash_in_progress) {
+ NL_SET_ERR_MSG(extack, "Can't set port up while flashing module firmware");
+ return NOTIFY_BAD;
+ }
}
return NOTIFY_DONE;
@@ -1125,6 +1172,13 @@ static const struct genl_ops ethtool_genl_ops[] = {
.policy = ethnl_mm_set_policy,
.maxattr = ARRAY_SIZE(ethnl_mm_set_policy) - 1,
},
+ {
+ .cmd = ETHTOOL_MSG_MODULE_FW_FLASH_ACT,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .doit = ethnl_act_module_fw_flash,
+ .policy = ethnl_module_fw_flash_act_policy,
+ .maxattr = ARRAY_SIZE(ethnl_module_fw_flash_act_policy) - 1,
+ },
};
static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
@@ -1141,6 +1195,8 @@ static struct genl_family ethtool_genl_family __ro_after_init = {
.resv_start_op = ETHTOOL_MSG_MODULE_GET + 1,
.mcgrps = ethtool_nl_mcgrps,
.n_mcgrps = ARRAY_SIZE(ethtool_nl_mcgrps),
+ .sock_priv_size = sizeof(struct ethnl_sock_priv),
+ .sock_priv_destroy = ethnl_sock_priv_destroy,
};
/* module setup */
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 9a333a8d04c1..46ec273a87c5 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -21,6 +21,7 @@ struct sk_buff *ethnl_reply_init(size_t payload, struct net_device *dev, u8 cmd,
void **ehdrp);
void *ethnl_dump_put(struct sk_buff *skb, struct netlink_callback *cb, u8 cmd);
void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd);
+void *ethnl_unicast_put(struct sk_buff *skb, u32 portid, u32 seq, u8 cmd);
int ethnl_multicast(struct sk_buff *skb, struct net_device *dev);
/**
@@ -283,6 +284,19 @@ struct ethnl_reply_data {
int ethnl_ops_begin(struct net_device *dev);
void ethnl_ops_complete(struct net_device *dev);
+enum ethnl_sock_type {
+ ETHTOOL_SOCK_TYPE_MODULE_FW_FLASH,
+};
+
+struct ethnl_sock_priv {
+ struct net_device *dev;
+ u32 portid;
+ enum ethnl_sock_type type;
+};
+
+int ethnl_sock_priv_set(struct sk_buff *skb, struct net_device *dev, u32 portid,
+ enum ethnl_sock_type type);
+
/**
* struct ethnl_request_ops - unified handling of GET and SET requests
* @request_cmd: command id for request (GET)
@@ -441,6 +455,7 @@ extern const struct nla_policy ethnl_plca_set_cfg_policy[ETHTOOL_A_PLCA_MAX + 1]
extern const struct nla_policy ethnl_plca_get_status_policy[ETHTOOL_A_PLCA_HEADER + 1];
extern const struct nla_policy ethnl_mm_get_policy[ETHTOOL_A_MM_HEADER + 1];
extern const struct nla_policy ethnl_mm_set_policy[ETHTOOL_A_MM_MAX + 1];
+extern const struct nla_policy ethnl_module_fw_flash_act_policy[ETHTOOL_A_MODULE_FW_FLASH_PASSWORD + 1];
int ethnl_set_features(struct sk_buff *skb, struct genl_info *info);
int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info);
@@ -448,6 +463,7 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info);
int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info);
int ethnl_tunnel_info_start(struct netlink_callback *cb);
int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int ethnl_act_module_fw_flash(struct sk_buff *skb, struct genl_info *info);
extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN];
extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN];
diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c
index cc478af77111..ff81aa749784 100644
--- a/net/ethtool/pse-pd.c
+++ b/net/ethtool/pse-pd.c
@@ -82,10 +82,60 @@ static int pse_reply_size(const struct ethnl_req_info *req_base,
len += nla_total_size(sizeof(u32)); /* _PODL_PSE_ADMIN_STATE */
if (st->podl_pw_status > 0)
len += nla_total_size(sizeof(u32)); /* _PODL_PSE_PW_D_STATUS */
+ if (st->c33_admin_state > 0)
+ len += nla_total_size(sizeof(u32)); /* _C33_PSE_ADMIN_STATE */
+ if (st->c33_pw_status > 0)
+ len += nla_total_size(sizeof(u32)); /* _C33_PSE_PW_D_STATUS */
+ if (st->c33_pw_class > 0)
+ len += nla_total_size(sizeof(u32)); /* _C33_PSE_PW_CLASS */
+ if (st->c33_actual_pw > 0)
+ len += nla_total_size(sizeof(u32)); /* _C33_PSE_ACTUAL_PW */
+ if (st->c33_ext_state_info.c33_pse_ext_state > 0) {
+ len += nla_total_size(sizeof(u32)); /* _C33_PSE_EXT_STATE */
+ if (st->c33_ext_state_info.__c33_pse_ext_substate > 0)
+ /* _C33_PSE_EXT_SUBSTATE */
+ len += nla_total_size(sizeof(u32));
+ }
+ if (st->c33_avail_pw_limit > 0)
+ /* _C33_AVAIL_PSE_PW_LIMIT */
+ len += nla_total_size(sizeof(u32));
+ if (st->c33_pw_limit_nb_ranges > 0)
+ /* _C33_PSE_PW_LIMIT_RANGES */
+ len += st->c33_pw_limit_nb_ranges *
+ (nla_total_size(0) +
+ nla_total_size(sizeof(u32)) * 2);
return len;
}
+static int pse_put_pw_limit_ranges(struct sk_buff *skb,
+ const struct pse_control_status *st)
+{
+ const struct ethtool_c33_pse_pw_limit_range *pw_limit_ranges;
+ int i;
+
+ pw_limit_ranges = st->c33_pw_limit_ranges;
+ for (i = 0; i < st->c33_pw_limit_nb_ranges; i++) {
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES);
+ if (!nest)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, ETHTOOL_A_C33_PSE_PW_LIMIT_MIN,
+ pw_limit_ranges->min) ||
+ nla_put_u32(skb, ETHTOOL_A_C33_PSE_PW_LIMIT_MAX,
+ pw_limit_ranges->max)) {
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+ }
+ nla_nest_end(skb, nest);
+ pw_limit_ranges++;
+ }
+
+ return 0;
+}
+
static int pse_fill_reply(struct sk_buff *skb,
const struct ethnl_req_info *req_base,
const struct ethnl_reply_data *reply_base)
@@ -103,9 +153,56 @@ static int pse_fill_reply(struct sk_buff *skb,
st->podl_pw_status))
return -EMSGSIZE;
+ if (st->c33_admin_state > 0 &&
+ nla_put_u32(skb, ETHTOOL_A_C33_PSE_ADMIN_STATE,
+ st->c33_admin_state))
+ return -EMSGSIZE;
+
+ if (st->c33_pw_status > 0 &&
+ nla_put_u32(skb, ETHTOOL_A_C33_PSE_PW_D_STATUS,
+ st->c33_pw_status))
+ return -EMSGSIZE;
+
+ if (st->c33_pw_class > 0 &&
+ nla_put_u32(skb, ETHTOOL_A_C33_PSE_PW_CLASS,
+ st->c33_pw_class))
+ return -EMSGSIZE;
+
+ if (st->c33_actual_pw > 0 &&
+ nla_put_u32(skb, ETHTOOL_A_C33_PSE_ACTUAL_PW,
+ st->c33_actual_pw))
+ return -EMSGSIZE;
+
+ if (st->c33_ext_state_info.c33_pse_ext_state > 0) {
+ if (nla_put_u32(skb, ETHTOOL_A_C33_PSE_EXT_STATE,
+ st->c33_ext_state_info.c33_pse_ext_state))
+ return -EMSGSIZE;
+
+ if (st->c33_ext_state_info.__c33_pse_ext_substate > 0 &&
+ nla_put_u32(skb, ETHTOOL_A_C33_PSE_EXT_SUBSTATE,
+ st->c33_ext_state_info.__c33_pse_ext_substate))
+ return -EMSGSIZE;
+ }
+
+ if (st->c33_avail_pw_limit > 0 &&
+ nla_put_u32(skb, ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT,
+ st->c33_avail_pw_limit))
+ return -EMSGSIZE;
+
+ if (st->c33_pw_limit_nb_ranges > 0 &&
+ pse_put_pw_limit_ranges(skb, st))
+ return -EMSGSIZE;
+
return 0;
}
+static void pse_cleanup_data(struct ethnl_reply_data *reply_base)
+{
+ const struct pse_reply_data *data = PSE_REPDATA(reply_base);
+
+ kfree(data->status.c33_pw_limit_ranges);
+}
+
/* PSE_SET */
const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1] = {
@@ -113,25 +210,19 @@ const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1] = {
[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL] =
NLA_POLICY_RANGE(NLA_U32, ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED,
ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED),
+ [ETHTOOL_A_C33_PSE_ADMIN_CONTROL] =
+ NLA_POLICY_RANGE(NLA_U32, ETHTOOL_C33_PSE_ADMIN_STATE_DISABLED,
+ ETHTOOL_C33_PSE_ADMIN_STATE_ENABLED),
+ [ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT] = { .type = NLA_U32 },
};
static int
ethnl_set_pse_validate(struct ethnl_req_info *req_info, struct genl_info *info)
{
- return !!info->attrs[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL];
-}
-
-static int
-ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info)
-{
struct net_device *dev = req_info->dev;
- struct pse_control_config config = {};
struct nlattr **tb = info->attrs;
struct phy_device *phydev;
- /* this values are already validated by the ethnl_pse_set_policy */
- config.admin_cotrol = nla_get_u32(tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL]);
-
phydev = dev->phydev;
if (!phydev) {
NL_SET_ERR_MSG(info->extack, "No PHY is attached");
@@ -143,8 +234,65 @@ ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info)
return -EOPNOTSUPP;
}
- /* Return errno directly - PSE has no notification */
- return pse_ethtool_set_config(phydev->psec, info->extack, &config);
+ if (tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL] &&
+ !pse_has_podl(phydev->psec)) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL],
+ "setting PoDL PSE admin control not supported");
+ return -EOPNOTSUPP;
+ }
+ if (tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL] &&
+ !pse_has_c33(phydev->psec)) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL],
+ "setting C33 PSE admin control not supported");
+ return -EOPNOTSUPP;
+ }
+
+ return 1;
+}
+
+static int
+ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ struct net_device *dev = req_info->dev;
+ struct nlattr **tb = info->attrs;
+ struct phy_device *phydev;
+ int ret = 0;
+
+ phydev = dev->phydev;
+
+ if (tb[ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT]) {
+ unsigned int pw_limit;
+
+ pw_limit = nla_get_u32(tb[ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT]);
+ ret = pse_ethtool_set_pw_limit(phydev->psec, info->extack,
+ pw_limit);
+ if (ret)
+ return ret;
+ }
+
+ /* These values are already validated by the ethnl_pse_set_policy */
+ if (tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL] ||
+ tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL]) {
+ struct pse_control_config config = {};
+
+ if (tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL])
+ config.podl_admin_control = nla_get_u32(tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL]);
+ if (tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL])
+ config.c33_admin_control = nla_get_u32(tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL]);
+
+ /* pse_ethtool_set_config() will do nothing if the config
+ * is zero
+ */
+ ret = pse_ethtool_set_config(phydev->psec, info->extack,
+ &config);
+ if (ret)
+ return ret;
+ }
+
+ /* Return errno or zero - PSE has no notification */
+ return ret;
}
const struct ethnl_request_ops ethnl_pse_request_ops = {
@@ -157,6 +305,7 @@ const struct ethnl_request_ops ethnl_pse_request_ops = {
.prepare_data = pse_prepare_data,
.reply_size = pse_reply_size,
.fill_reply = pse_fill_reply,
+ .cleanup_data = pse_cleanup_data,
.set_validate = ethnl_set_pse_validate,
.set = ethnl_set_pse,
diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c
index 71679137eff2..5c4c4505ab9a 100644
--- a/net/ethtool/rss.c
+++ b/net/ethtool/rss.c
@@ -111,7 +111,8 @@ rss_reply_size(const struct ethnl_req_info *req_base,
const struct rss_reply_data *data = RSS_REPDATA(reply_base);
int len;
- len = nla_total_size(sizeof(u32)) + /* _RSS_HFUNC */
+ len = nla_total_size(sizeof(u32)) + /* _RSS_CONTEXT */
+ nla_total_size(sizeof(u32)) + /* _RSS_HFUNC */
nla_total_size(sizeof(u32)) + /* _RSS_INPUT_XFRM */
nla_total_size(sizeof(u32) * data->indir_size) + /* _RSS_INDIR */
nla_total_size(data->hkey_size); /* _RSS_HKEY */
@@ -124,6 +125,11 @@ rss_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base,
const struct ethnl_reply_data *reply_base)
{
const struct rss_reply_data *data = RSS_REPDATA(reply_base);
+ struct rss_req_info *request = RSS_REQINFO(req_base);
+
+ if (request->rss_context &&
+ nla_put_u32(skb, ETHTOOL_A_RSS_CONTEXT, request->rss_context))
+ return -EMSGSIZE;
if ((data->hfunc &&
nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc)) ||
diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c
index 9daed0aab162..03d12d6f79ca 100644
--- a/net/ethtool/tsinfo.c
+++ b/net/ethtool/tsinfo.c
@@ -12,15 +12,19 @@ struct tsinfo_req_info {
struct tsinfo_reply_data {
struct ethnl_reply_data base;
- struct ethtool_ts_info ts_info;
+ struct kernel_ethtool_ts_info ts_info;
+ struct ethtool_ts_stats stats;
};
#define TSINFO_REPDATA(__reply_base) \
container_of(__reply_base, struct tsinfo_reply_data, base)
+#define ETHTOOL_TS_STAT_CNT \
+ (__ETHTOOL_A_TS_STAT_CNT - (ETHTOOL_A_TS_STAT_UNSPEC + 1))
+
const struct nla_policy ethnl_tsinfo_get_policy[] = {
[ETHTOOL_A_TSINFO_HEADER] =
- NLA_POLICY_NESTED(ethnl_header_policy),
+ NLA_POLICY_NESTED(ethnl_header_policy_stats),
};
static int tsinfo_prepare_data(const struct ethnl_req_info *req_base,
@@ -34,6 +38,12 @@ static int tsinfo_prepare_data(const struct ethnl_req_info *req_base,
ret = ethnl_ops_begin(dev);
if (ret < 0)
return ret;
+ if (req_base->flags & ETHTOOL_FLAG_STATS) {
+ ethtool_stats_init((u64 *)&data->stats,
+ sizeof(data->stats) / sizeof(u64));
+ if (dev->ethtool_ops->get_ts_stats)
+ dev->ethtool_ops->get_ts_stats(dev, &data->stats);
+ }
ret = __ethtool_get_ts_info(dev, &data->ts_info);
ethnl_ops_complete(dev);
@@ -45,7 +55,7 @@ static int tsinfo_reply_size(const struct ethnl_req_info *req_base,
{
const struct tsinfo_reply_data *data = TSINFO_REPDATA(reply_base);
bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS;
- const struct ethtool_ts_info *ts_info = &data->ts_info;
+ const struct kernel_ethtool_ts_info *ts_info = &data->ts_info;
int len = 0;
int ret;
@@ -79,17 +89,54 @@ static int tsinfo_reply_size(const struct ethnl_req_info *req_base,
}
if (ts_info->phc_index >= 0)
len += nla_total_size(sizeof(u32)); /* _TSINFO_PHC_INDEX */
+ if (req_base->flags & ETHTOOL_FLAG_STATS)
+ len += nla_total_size(0) + /* _TSINFO_STATS */
+ nla_total_size_64bit(sizeof(u64)) * ETHTOOL_TS_STAT_CNT;
return len;
}
+static int tsinfo_put_stat(struct sk_buff *skb, u64 val, u16 attrtype)
+{
+ if (val == ETHTOOL_STAT_NOT_SET)
+ return 0;
+ if (nla_put_uint(skb, attrtype, val))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int tsinfo_put_stats(struct sk_buff *skb,
+ const struct ethtool_ts_stats *stats)
+{
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, ETHTOOL_A_TSINFO_STATS);
+ if (!nest)
+ return -EMSGSIZE;
+
+ if (tsinfo_put_stat(skb, stats->tx_stats.pkts,
+ ETHTOOL_A_TS_STAT_TX_PKTS) ||
+ tsinfo_put_stat(skb, stats->tx_stats.lost,
+ ETHTOOL_A_TS_STAT_TX_LOST) ||
+ tsinfo_put_stat(skb, stats->tx_stats.err,
+ ETHTOOL_A_TS_STAT_TX_ERR))
+ goto err_cancel;
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+err_cancel:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
static int tsinfo_fill_reply(struct sk_buff *skb,
const struct ethnl_req_info *req_base,
const struct ethnl_reply_data *reply_base)
{
const struct tsinfo_reply_data *data = TSINFO_REPDATA(reply_base);
bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS;
- const struct ethtool_ts_info *ts_info = &data->ts_info;
+ const struct kernel_ethtool_ts_info *ts_info = &data->ts_info;
int ret;
if (ts_info->so_timestamping) {
@@ -119,6 +166,9 @@ static int tsinfo_fill_reply(struct sk_buff *skb,
if (ts_info->phc_index >= 0 &&
nla_put_u32(skb, ETHTOOL_A_TSINFO_PHC_INDEX, ts_info->phc_index))
return -EMSGSIZE;
+ if (req_base->flags & ETHTOOL_FLAG_STATS &&
+ tsinfo_put_stats(skb, &data->stats))
+ return -EMSGSIZE;
return 0;
}
diff --git a/net/ethtool/wol.c b/net/ethtool/wol.c
index 0ed56c9ac1bc..a39d8000d808 100644
--- a/net/ethtool/wol.c
+++ b/net/ethtool/wol.c
@@ -137,7 +137,7 @@ ethnl_set_wol(struct ethnl_req_info *req_info, struct genl_info *info)
ret = dev->ethtool_ops->set_wol(dev, &wol);
if (ret)
return ret;
- dev->wol_enabled = !!wol.wolopts;
+ dev->ethtool->wol_enabled = !!wol.wolopts;
return 1;
}
diff --git a/net/handshake/tlshd.c b/net/handshake/tlshd.c
index d697f68c598c..d6f52839827e 100644
--- a/net/handshake/tlshd.c
+++ b/net/handshake/tlshd.c
@@ -213,7 +213,6 @@ static int tls_handshake_accept(struct handshake_req *req,
if (!hdr)
goto out_cancel;
- ret = -EMSGSIZE;
ret = nla_put_s32(msg, HANDSHAKE_A_ACCEPT_SOCKFD, fd);
if (ret < 0)
goto out_cancel;
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index e9d45133d641..e4cc6b78dcfc 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -61,39 +61,42 @@ static bool hsr_check_carrier(struct hsr_port *master)
return false;
}
-static void hsr_check_announce(struct net_device *hsr_dev,
- unsigned char old_operstate)
+static void hsr_check_announce(struct net_device *hsr_dev)
{
struct hsr_priv *hsr;
hsr = netdev_priv(hsr_dev);
+ if (netif_running(hsr_dev) && netif_oper_up(hsr_dev)) {
+ /* Enable announce timer and start sending supervisory frames */
+ if (!timer_pending(&hsr->announce_timer)) {
+ hsr->announce_count = 0;
+ mod_timer(&hsr->announce_timer, jiffies +
+ msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL));
+ }
- if (READ_ONCE(hsr_dev->operstate) == IF_OPER_UP && old_operstate != IF_OPER_UP) {
- /* Went up */
- hsr->announce_count = 0;
- mod_timer(&hsr->announce_timer,
- jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL));
+ if (hsr->redbox && !timer_pending(&hsr->announce_proxy_timer))
+ mod_timer(&hsr->announce_proxy_timer, jiffies +
+ msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL) / 2);
+ } else {
+ /* Deactivate the announce timer */
+ timer_delete(&hsr->announce_timer);
+ if (hsr->redbox)
+ timer_delete(&hsr->announce_proxy_timer);
}
-
- if (READ_ONCE(hsr_dev->operstate) != IF_OPER_UP && old_operstate == IF_OPER_UP)
- /* Went down */
- del_timer(&hsr->announce_timer);
}
void hsr_check_carrier_and_operstate(struct hsr_priv *hsr)
{
struct hsr_port *master;
- unsigned char old_operstate;
bool has_carrier;
master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
/* netif_stacked_transfer_operstate() cannot be used here since
* it doesn't set IF_OPER_LOWERLAYERDOWN (?)
*/
- old_operstate = READ_ONCE(master->dev->operstate);
has_carrier = hsr_check_carrier(master);
hsr_set_operstate(master, has_carrier);
- hsr_check_announce(master->dev, old_operstate);
+ hsr_check_announce(master->dev);
}
int hsr_get_max_mtu(struct hsr_priv *hsr)
@@ -123,7 +126,7 @@ static int hsr_dev_change_mtu(struct net_device *dev, int new_mtu)
return -EINVAL;
}
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
return 0;
}
@@ -146,6 +149,9 @@ static int hsr_dev_open(struct net_device *dev)
case HSR_PT_SLAVE_B:
designation = "Slave B";
break;
+ case HSR_PT_INTERLINK:
+ designation = "Interlink";
+ break;
default:
designation = "Unknown";
}
@@ -279,12 +285,14 @@ out:
return NULL;
}
-static void send_hsr_supervision_frame(struct hsr_port *master,
- unsigned long *interval)
+static void send_hsr_supervision_frame(struct hsr_port *port,
+ unsigned long *interval,
+ const unsigned char *addr)
{
- struct hsr_priv *hsr = master->hsr;
+ struct hsr_priv *hsr = port->hsr;
__u8 type = HSR_TLV_LIFE_CHECK;
struct hsr_sup_payload *hsr_sp;
+ struct hsr_sup_tlv *hsr_stlv;
struct hsr_sup_tag *hsr_stag;
struct sk_buff *skb;
@@ -295,9 +303,9 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
hsr->announce_count++;
}
- skb = hsr_init_skb(master);
+ skb = hsr_init_skb(port);
if (!skb) {
- netdev_warn_once(master->dev, "HSR: Could not send supervision frame\n");
+ netdev_warn_once(port->dev, "HSR: Could not send supervision frame\n");
return;
}
@@ -320,22 +328,34 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
hsr_stag->tlv.HSR_TLV_length = hsr->prot_version ?
sizeof(struct hsr_sup_payload) : 12;
- /* Payload: MacAddressA */
+ /* Payload: MacAddressA / SAN MAC from ProxyNodeTable */
hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload));
- ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr);
+ ether_addr_copy(hsr_sp->macaddress_A, addr);
+
+ if (hsr->redbox &&
+ hsr_is_node_in_db(&hsr->proxy_node_db, addr)) {
+ hsr_stlv = skb_put(skb, sizeof(struct hsr_sup_tlv));
+ hsr_stlv->HSR_TLV_type = PRP_TLV_REDBOX_MAC;
+ hsr_stlv->HSR_TLV_length = sizeof(struct hsr_sup_payload);
+
+ /* Payload: MacAddressRedBox */
+ hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload));
+ ether_addr_copy(hsr_sp->macaddress_A, hsr->macaddress_redbox);
+ }
if (skb_put_padto(skb, ETH_ZLEN)) {
spin_unlock_bh(&hsr->seqnr_lock);
return;
}
- hsr_forward_skb(skb, master);
+ hsr_forward_skb(skb, port);
spin_unlock_bh(&hsr->seqnr_lock);
return;
}
static void send_prp_supervision_frame(struct hsr_port *master,
- unsigned long *interval)
+ unsigned long *interval,
+ const unsigned char *addr)
{
struct hsr_priv *hsr = master->hsr;
struct hsr_sup_payload *hsr_sp;
@@ -385,7 +405,7 @@ static void hsr_announce(struct timer_list *t)
rcu_read_lock();
master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
- hsr->proto_ops->send_sv_frame(master, &interval);
+ hsr->proto_ops->send_sv_frame(master, &interval, master->dev->dev_addr);
if (is_admin_up(master->dev))
mod_timer(&hsr->announce_timer, jiffies + interval);
@@ -393,6 +413,37 @@ static void hsr_announce(struct timer_list *t)
rcu_read_unlock();
}
+/* Announce (supervision frame) timer function for RedBox
+ */
+static void hsr_proxy_announce(struct timer_list *t)
+{
+ struct hsr_priv *hsr = from_timer(hsr, t, announce_proxy_timer);
+ struct hsr_port *interlink;
+ unsigned long interval = 0;
+ struct hsr_node *node;
+
+ rcu_read_lock();
+ /* RedBOX sends supervisory frames to HSR network with MAC addresses
+ * of SAN nodes stored in ProxyNodeTable.
+ */
+ interlink = hsr_port_get_hsr(hsr, HSR_PT_INTERLINK);
+ list_for_each_entry_rcu(node, &hsr->proxy_node_db, mac_list) {
+ if (hsr_addr_is_redbox(hsr, node->macaddress_A))
+ continue;
+ hsr->proto_ops->send_sv_frame(interlink, &interval,
+ node->macaddress_A);
+ }
+
+ if (is_admin_up(interlink->dev)) {
+ if (!interval)
+ interval = msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
+
+ mod_timer(&hsr->announce_proxy_timer, jiffies + interval);
+ }
+
+ rcu_read_unlock();
+}
+
void hsr_del_ports(struct hsr_priv *hsr)
{
struct hsr_port *port;
@@ -405,6 +456,10 @@ void hsr_del_ports(struct hsr_priv *hsr)
if (port)
hsr_del_port(port);
+ port = hsr_port_get_hsr(hsr, HSR_PT_INTERLINK);
+ if (port)
+ hsr_del_port(port);
+
port = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
if (port)
hsr_del_port(port);
@@ -534,8 +589,8 @@ static const unsigned char def_multicast_addr[ETH_ALEN] __aligned(2) = {
};
int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
- unsigned char multicast_spec, u8 protocol_version,
- struct netlink_ext_ack *extack)
+ struct net_device *interlink, unsigned char multicast_spec,
+ u8 protocol_version, struct netlink_ext_ack *extack)
{
bool unregister = false;
struct hsr_priv *hsr;
@@ -544,6 +599,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
hsr = netdev_priv(hsr_dev);
INIT_LIST_HEAD(&hsr->ports);
INIT_LIST_HEAD(&hsr->node_db);
+ INIT_LIST_HEAD(&hsr->proxy_node_db);
spin_lock_init(&hsr->list_lock);
eth_hw_addr_set(hsr_dev, slave[0]->dev_addr);
@@ -569,9 +625,12 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
/* Overflow soon to find bugs easier: */
hsr->sequence_nr = HSR_SEQNR_START;
hsr->sup_sequence_nr = HSR_SUP_SEQNR_START;
+ hsr->interlink_sequence_nr = HSR_SEQNR_START;
timer_setup(&hsr->announce_timer, hsr_announce, 0);
timer_setup(&hsr->prune_timer, hsr_prune_nodes, 0);
+ timer_setup(&hsr->prune_proxy_timer, hsr_prune_proxy_nodes, 0);
+ timer_setup(&hsr->announce_proxy_timer, hsr_proxy_announce, 0);
ether_addr_copy(hsr->sup_multicast_addr, def_multicast_addr);
hsr->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec;
@@ -604,6 +663,17 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
if (res)
goto err_unregister;
+ if (interlink) {
+ res = hsr_add_port(hsr, interlink, HSR_PT_INTERLINK, extack);
+ if (res)
+ goto err_unregister;
+
+ hsr->redbox = true;
+ ether_addr_copy(hsr->macaddress_redbox, interlink->dev_addr);
+ mod_timer(&hsr->prune_proxy_timer,
+ jiffies + msecs_to_jiffies(PRUNE_PROXY_PERIOD));
+ }
+
hsr_debugfs_init(hsr, hsr_dev);
mod_timer(&hsr->prune_timer, jiffies + msecs_to_jiffies(PRUNE_PERIOD));
diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h
index 9060c92168f9..655284095b78 100644
--- a/net/hsr/hsr_device.h
+++ b/net/hsr/hsr_device.h
@@ -16,8 +16,8 @@
void hsr_del_ports(struct hsr_priv *hsr);
void hsr_dev_setup(struct net_device *dev);
int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
- unsigned char multicast_spec, u8 protocol_version,
- struct netlink_ext_ack *extack);
+ struct net_device *interlink, unsigned char multicast_spec,
+ u8 protocol_version, struct netlink_ext_ack *extack);
void hsr_check_carrier_and_operstate(struct hsr_priv *hsr);
int hsr_get_max_mtu(struct hsr_priv *hsr);
#endif /* __HSR_DEVICE_H */
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index 5d68cb181695..b38060246e62 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -117,6 +117,35 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb)
return true;
}
+static bool is_proxy_supervision_frame(struct hsr_priv *hsr,
+ struct sk_buff *skb)
+{
+ struct hsr_sup_payload *payload;
+ struct ethhdr *eth_hdr;
+ u16 total_length = 0;
+
+ eth_hdr = (struct ethhdr *)skb_mac_header(skb);
+
+ /* Get the HSR protocol revision. */
+ if (eth_hdr->h_proto == htons(ETH_P_HSR))
+ total_length = sizeof(struct hsrv1_ethhdr_sp);
+ else
+ total_length = sizeof(struct hsrv0_ethhdr_sp);
+
+ if (!pskb_may_pull(skb, total_length + sizeof(struct hsr_sup_payload)))
+ return false;
+
+ skb_pull(skb, total_length);
+ payload = (struct hsr_sup_payload *)skb->data;
+ skb_push(skb, total_length);
+
+ /* For RedBox (HSR-SAN) check if we have received the supervision
+ * frame with MAC addresses from own ProxyNodeTable.
+ */
+ return hsr_is_node_in_db(&hsr->proxy_node_db,
+ payload->macaddress_A);
+}
+
static struct sk_buff *create_stripped_skb_hsr(struct sk_buff *skb_in,
struct hsr_frame_info *frame)
{
@@ -377,22 +406,79 @@ static int hsr_xmit(struct sk_buff *skb, struct hsr_port *port,
*/
ether_addr_copy(eth_hdr(skb)->h_source, port->dev->dev_addr);
}
+
+ /* When HSR node is used as RedBox - the frame received from HSR ring
+ * requires source MAC address (SA) replacement to one which can be
+ * recognized by SAN devices (otherwise, frames are dropped by switch)
+ */
+ if (port->type == HSR_PT_INTERLINK)
+ ether_addr_copy(eth_hdr(skb)->h_source,
+ port->hsr->macaddress_redbox);
+
return dev_queue_xmit(skb);
}
bool prp_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port)
{
return ((frame->port_rcv->type == HSR_PT_SLAVE_A &&
- port->type == HSR_PT_SLAVE_B) ||
+ port->type == HSR_PT_SLAVE_B) ||
(frame->port_rcv->type == HSR_PT_SLAVE_B &&
- port->type == HSR_PT_SLAVE_A));
+ port->type == HSR_PT_SLAVE_A));
}
bool hsr_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port)
{
+ struct sk_buff *skb;
+
if (port->dev->features & NETIF_F_HW_HSR_FWD)
return prp_drop_frame(frame, port);
+ /* RedBox specific frames dropping policies
+ *
+ * Do not send HSR supervisory frames to SAN devices
+ */
+ if (frame->is_supervision && port->type == HSR_PT_INTERLINK)
+ return true;
+
+ /* Do not forward to other HSR port (A or B) unicast frames which
+ * are addressed to interlink port (and are in the ProxyNodeTable).
+ */
+ skb = frame->skb_hsr;
+ if (skb && prp_drop_frame(frame, port) &&
+ is_unicast_ether_addr(eth_hdr(skb)->h_dest) &&
+ hsr_is_node_in_db(&port->hsr->proxy_node_db,
+ eth_hdr(skb)->h_dest)) {
+ return true;
+ }
+
+ /* Do not forward to port C (Interlink) frames from nodes A and B
+ * if DA is in NodeTable.
+ */
+ if ((frame->port_rcv->type == HSR_PT_SLAVE_A ||
+ frame->port_rcv->type == HSR_PT_SLAVE_B) &&
+ port->type == HSR_PT_INTERLINK) {
+ skb = frame->skb_hsr;
+ if (skb && is_unicast_ether_addr(eth_hdr(skb)->h_dest) &&
+ hsr_is_node_in_db(&port->hsr->node_db,
+ eth_hdr(skb)->h_dest)) {
+ return true;
+ }
+ }
+
+ /* Do not forward to port A and B unicast frames received on the
+ * interlink port if it is addressed to one of nodes registered in
+ * the ProxyNodeTable.
+ */
+ if ((port->type == HSR_PT_SLAVE_A || port->type == HSR_PT_SLAVE_B) &&
+ frame->port_rcv->type == HSR_PT_INTERLINK) {
+ skb = frame->skb_std;
+ if (skb && is_unicast_ether_addr(eth_hdr(skb)->h_dest) &&
+ hsr_is_node_in_db(&port->hsr->proxy_node_db,
+ eth_hdr(skb)->h_dest)) {
+ return true;
+ }
+ }
+
return false;
}
@@ -442,19 +528,21 @@ static void hsr_forward_do(struct hsr_frame_info *frame)
frame->sequence_nr))
continue;
- if (frame->is_supervision && port->type == HSR_PT_MASTER) {
+ if (frame->is_supervision && port->type == HSR_PT_MASTER &&
+ !frame->is_proxy_supervision) {
hsr_handle_sup_frame(frame);
continue;
}
/* Check if frame is to be dropped. Eg. for PRP no forward
- * between ports.
+ * between ports, or sending HSR supervision to RedBox.
*/
if (hsr->proto_ops->drop_frame &&
hsr->proto_ops->drop_frame(frame, port))
continue;
- if (port->type != HSR_PT_MASTER)
+ if (port->type == HSR_PT_SLAVE_A ||
+ port->type == HSR_PT_SLAVE_B)
skb = hsr->proto_ops->create_tagged_frame(frame, port);
else
skb = hsr->proto_ops->get_untagged_frame(frame, port);
@@ -469,7 +557,9 @@ static void hsr_forward_do(struct hsr_frame_info *frame)
hsr_deliver_master(skb, port->dev, frame->node_src);
} else {
if (!hsr_xmit(skb, port, frame))
- sent = true;
+ if (port->type == HSR_PT_SLAVE_A ||
+ port->type == HSR_PT_SLAVE_B)
+ sent = true;
}
}
}
@@ -503,10 +593,12 @@ static void handle_std_frame(struct sk_buff *skb,
frame->skb_prp = NULL;
frame->skb_std = skb;
- if (port->type != HSR_PT_MASTER) {
+ if (port->type != HSR_PT_MASTER)
frame->is_from_san = true;
- } else {
- /* Sequence nr for the master node */
+
+ if (port->type == HSR_PT_MASTER ||
+ port->type == HSR_PT_INTERLINK) {
+ /* Sequence nr for the master/interlink node */
lockdep_assert_held(&hsr->seqnr_lock);
frame->sequence_nr = hsr->sequence_nr;
hsr->sequence_nr++;
@@ -564,6 +656,7 @@ static int fill_frame_info(struct hsr_frame_info *frame,
{
struct hsr_priv *hsr = port->hsr;
struct hsr_vlan_ethhdr *vlan_hdr;
+ struct list_head *n_db;
struct ethhdr *ethhdr;
__be16 proto;
int ret;
@@ -574,9 +667,16 @@ static int fill_frame_info(struct hsr_frame_info *frame,
memset(frame, 0, sizeof(*frame));
frame->is_supervision = is_supervision_frame(port->hsr, skb);
- frame->node_src = hsr_get_node(port, &hsr->node_db, skb,
- frame->is_supervision,
- port->type);
+ if (frame->is_supervision && hsr->redbox)
+ frame->is_proxy_supervision =
+ is_proxy_supervision_frame(port->hsr, skb);
+
+ n_db = &hsr->node_db;
+ if (port->type == HSR_PT_INTERLINK)
+ n_db = &hsr->proxy_node_db;
+
+ frame->node_src = hsr_get_node(port, n_db, skb,
+ frame->is_supervision, port->type);
if (!frame->node_src)
return -1; /* Unknown node and !is_supervision, or no mem */
@@ -621,7 +721,7 @@ void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port)
/* Gets called for ingress frames as well as egress from master port.
* So check and increment stats for master port only here.
*/
- if (port->type == HSR_PT_MASTER) {
+ if (port->type == HSR_PT_MASTER || port->type == HSR_PT_INTERLINK) {
port->dev->stats.tx_packets++;
port->dev->stats.tx_bytes += skb->len;
}
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 26329db09210..73bc6f659812 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -36,6 +36,14 @@ static bool seq_nr_after(u16 a, u16 b)
#define seq_nr_before(a, b) seq_nr_after((b), (a))
#define seq_nr_before_or_eq(a, b) (!seq_nr_after((a), (b)))
+bool hsr_addr_is_redbox(struct hsr_priv *hsr, unsigned char *addr)
+{
+ if (!hsr->redbox || !is_valid_ether_addr(hsr->macaddress_redbox))
+ return false;
+
+ return ether_addr_equal(addr, hsr->macaddress_redbox);
+}
+
bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr)
{
struct hsr_self_node *sn;
@@ -71,6 +79,14 @@ static struct hsr_node *find_node_by_addr_A(struct list_head *node_db,
return NULL;
}
+/* Check if node for a given MAC address is already present in data base
+ */
+bool hsr_is_node_in_db(struct list_head *node_db,
+ const unsigned char addr[ETH_ALEN])
+{
+ return !!find_node_by_addr_A(node_db, addr);
+}
+
/* Helper for device init; the self_node is used in hsr_rcv() to recognize
* frames from self that's been looped over the HSR ring.
*/
@@ -223,6 +239,15 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db,
}
}
+ /* Check if required node is not in proxy nodes table */
+ list_for_each_entry_rcu(node, &hsr->proxy_node_db, mac_list) {
+ if (ether_addr_equal(node->macaddress_A, ethhdr->h_source)) {
+ if (hsr->proto_ops->update_san_info)
+ hsr->proto_ops->update_san_info(node, is_sup);
+ return node;
+ }
+ }
+
/* Everyone may create a node entry, connected node to a HSR/PRP
* device.
*/
@@ -418,6 +443,10 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb,
node_dst = find_node_by_addr_A(&port->hsr->node_db,
eth_hdr(skb)->h_dest);
+ if (!node_dst && port->hsr->redbox)
+ node_dst = find_node_by_addr_A(&port->hsr->proxy_node_db,
+ eth_hdr(skb)->h_dest);
+
if (!node_dst) {
if (port->hsr->prot_version != PRP_V1 && net_ratelimit())
netdev_err(skb->dev, "%s: Unknown node\n", __func__);
@@ -561,6 +590,41 @@ void hsr_prune_nodes(struct timer_list *t)
jiffies + msecs_to_jiffies(PRUNE_PERIOD));
}
+void hsr_prune_proxy_nodes(struct timer_list *t)
+{
+ struct hsr_priv *hsr = from_timer(hsr, t, prune_proxy_timer);
+ unsigned long timestamp;
+ struct hsr_node *node;
+ struct hsr_node *tmp;
+
+ spin_lock_bh(&hsr->list_lock);
+ list_for_each_entry_safe(node, tmp, &hsr->proxy_node_db, mac_list) {
+ /* Don't prune RedBox node. */
+ if (hsr_addr_is_redbox(hsr, node->macaddress_A))
+ continue;
+
+ timestamp = node->time_in[HSR_PT_INTERLINK];
+
+ /* Prune old entries */
+ if (time_is_before_jiffies(timestamp +
+ msecs_to_jiffies(HSR_PROXY_NODE_FORGET_TIME))) {
+ hsr_nl_nodedown(hsr, node->macaddress_A);
+ if (!node->removed) {
+ list_del_rcu(&node->mac_list);
+ node->removed = true;
+ /* Note that we need to free this entry later: */
+ kfree_rcu(node, rcu_head);
+ }
+ }
+ }
+
+ spin_unlock_bh(&hsr->list_lock);
+
+ /* Restart timer */
+ mod_timer(&hsr->prune_proxy_timer,
+ jiffies + msecs_to_jiffies(PRUNE_PROXY_PERIOD));
+}
+
void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos,
unsigned char addr[ETH_ALEN])
{
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index b23556251d62..993fa950d814 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -22,6 +22,7 @@ struct hsr_frame_info {
struct hsr_node *node_src;
u16 sequence_nr;
bool is_supervision;
+ bool is_proxy_supervision;
bool is_vlan;
bool is_local_dest;
bool is_local_exclusive;
@@ -35,6 +36,7 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db,
enum hsr_port_type rx_port);
void hsr_handle_sup_frame(struct hsr_frame_info *frame);
bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr);
+bool hsr_addr_is_redbox(struct hsr_priv *hsr, unsigned char *addr);
void hsr_addr_subst_source(struct hsr_node *node, struct sk_buff *skb);
void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb,
@@ -46,6 +48,7 @@ int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node,
u16 sequence_nr);
void hsr_prune_nodes(struct timer_list *t);
+void hsr_prune_proxy_nodes(struct timer_list *t);
int hsr_create_self_node(struct hsr_priv *hsr,
const unsigned char addr_a[ETH_ALEN],
@@ -67,6 +70,9 @@ void prp_handle_san_frame(bool san, enum hsr_port_type port,
struct hsr_node *node);
void prp_update_san_info(struct hsr_node *node, bool is_sup);
+bool hsr_is_node_in_db(struct list_head *node_db,
+ const unsigned char addr[ETH_ALEN]);
+
struct hsr_node {
struct list_head mac_list;
/* Protect R/W access to seq_out */
diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
index 9756e657bab9..d7ae32473c41 100644
--- a/net/hsr/hsr_main.c
+++ b/net/hsr/hsr_main.c
@@ -96,7 +96,7 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event,
break; /* Handled in ndo_change_mtu() */
mtu_max = hsr_get_max_mtu(port->hsr);
master = hsr_port_get_hsr(port->hsr, HSR_PT_MASTER);
- master->dev->mtu = mtu_max;
+ WRITE_ONCE(master->dev->mtu, mtu_max);
break;
case NETDEV_UNREGISTER:
if (!is_hsr_master(dev)) {
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index 18e01791ad79..ab1f8d35d9dc 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -21,6 +21,7 @@
*/
#define HSR_LIFE_CHECK_INTERVAL 2000 /* ms */
#define HSR_NODE_FORGET_TIME 60000 /* ms */
+#define HSR_PROXY_NODE_FORGET_TIME 60000 /* ms */
#define HSR_ANNOUNCE_INTERVAL 100 /* ms */
#define HSR_ENTRY_FORGET_TIME 400 /* ms */
@@ -35,6 +36,7 @@
* HSR_NODE_FORGET_TIME?
*/
#define PRUNE_PERIOD 3000 /* ms */
+#define PRUNE_PROXY_PERIOD 3000 /* ms */
#define HSR_TLV_EOT 0 /* End of TLVs */
#define HSR_TLV_ANNOUNCE 22
#define HSR_TLV_LIFE_CHECK 23
@@ -168,7 +170,8 @@ struct hsr_node;
struct hsr_proto_ops {
/* format and send supervision frame */
- void (*send_sv_frame)(struct hsr_port *port, unsigned long *interval);
+ void (*send_sv_frame)(struct hsr_port *port, unsigned long *interval,
+ const unsigned char addr[ETH_ALEN]);
void (*handle_san_frame)(bool san, enum hsr_port_type port,
struct hsr_node *node);
bool (*drop_frame)(struct hsr_frame_info *frame, struct hsr_port *port);
@@ -192,11 +195,15 @@ struct hsr_priv {
struct rcu_head rcu_head;
struct list_head ports;
struct list_head node_db; /* Known HSR nodes */
+ struct list_head proxy_node_db; /* RedBox HSR proxy nodes */
struct hsr_self_node __rcu *self_node; /* MACs of slaves */
struct timer_list announce_timer; /* Supervision frame dispatch */
+ struct timer_list announce_proxy_timer;
struct timer_list prune_timer;
+ struct timer_list prune_proxy_timer;
int announce_count;
u16 sequence_nr;
+ u16 interlink_sequence_nr; /* Interlink port seq_nr */
u16 sup_sequence_nr; /* For HSRv1 separate seq_nr for supervision */
enum hsr_version prot_version; /* Indicate if HSRv0, HSRv1 or PRPv1 */
spinlock_t seqnr_lock; /* locking for sequence_nr */
@@ -209,6 +216,8 @@ struct hsr_priv {
* of lan_id
*/
bool fwd_offloaded; /* Forwarding offloaded to HW */
+ bool redbox; /* Device supports HSR RedBox */
+ unsigned char macaddress_redbox[ETH_ALEN];
unsigned char sup_multicast_addr[ETH_ALEN] __aligned(sizeof(u16));
/* Align to u16 boundary to avoid unaligned access
* in ether_addr_equal
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index 78fe40eb9f01..f6ff0b61e08a 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -23,6 +23,7 @@ static const struct nla_policy hsr_policy[IFLA_HSR_MAX + 1] = {
[IFLA_HSR_SUPERVISION_ADDR] = { .len = ETH_ALEN },
[IFLA_HSR_SEQ_NR] = { .type = NLA_U16 },
[IFLA_HSR_PROTOCOL] = { .type = NLA_U8 },
+ [IFLA_HSR_INTERLINK] = { .type = NLA_U32 },
};
/* Here, it seems a netdevice has already been allocated for us, and the
@@ -35,8 +36,8 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev,
enum hsr_version proto_version;
unsigned char multicast_spec;
u8 proto = HSR_PROTOCOL_HSR;
- struct net_device *link[2];
+ struct net_device *link[2], *interlink = NULL;
if (!data) {
NL_SET_ERR_MSG_MOD(extack, "No slave devices specified");
return -EINVAL;
@@ -67,6 +68,20 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev,
return -EINVAL;
}
+ if (data[IFLA_HSR_INTERLINK])
+ interlink = __dev_get_by_index(src_net,
+ nla_get_u32(data[IFLA_HSR_INTERLINK]));
+
+ if (interlink && interlink == link[0]) {
+ NL_SET_ERR_MSG_MOD(extack, "Interlink and Slave1 are the same");
+ return -EINVAL;
+ }
+
+ if (interlink && interlink == link[1]) {
+ NL_SET_ERR_MSG_MOD(extack, "Interlink and Slave2 are the same");
+ return -EINVAL;
+ }
+
if (!data[IFLA_HSR_MULTICAST_SPEC])
multicast_spec = 0;
else
@@ -96,10 +111,17 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev,
}
}
- if (proto == HSR_PROTOCOL_PRP)
+ if (proto == HSR_PROTOCOL_PRP) {
proto_version = PRP_V1;
+ if (interlink) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Interlink only works with HSR");
+ return -EINVAL;
+ }
+ }
- return hsr_dev_finalize(dev, link, multicast_spec, proto_version, extack);
+ return hsr_dev_finalize(dev, link, interlink, multicast_spec,
+ proto_version, extack);
}
static void hsr_dellink(struct net_device *dev, struct list_head *head)
@@ -107,13 +129,16 @@ static void hsr_dellink(struct net_device *dev, struct list_head *head)
struct hsr_priv *hsr = netdev_priv(dev);
del_timer_sync(&hsr->prune_timer);
+ del_timer_sync(&hsr->prune_proxy_timer);
del_timer_sync(&hsr->announce_timer);
+ timer_delete_sync(&hsr->announce_proxy_timer);
hsr_debugfs_term(hsr);
hsr_del_ports(hsr);
hsr_del_self_node(hsr);
hsr_del_nodes(&hsr->node_db);
+ hsr_del_nodes(&hsr->proxy_node_db);
unregister_netdevice_queue(dev, head);
}
diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
index 1b6457f357bd..af6cf64a00e0 100644
--- a/net/hsr/hsr_slave.c
+++ b/net/hsr/hsr_slave.c
@@ -55,6 +55,7 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb)
protocol = eth_hdr(skb)->h_proto;
if (!(port->dev->features & NETIF_F_HW_HSR_TAG_RM) &&
+ port->type != HSR_PT_INTERLINK &&
hsr->proto_ops->invalid_dan_ingress_frame &&
hsr->proto_ops->invalid_dan_ingress_frame(protocol))
goto finish_pass;
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 6dd960ec558c..867d637d86f0 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -130,7 +130,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
goto err;
fq->q.stamp = skb->tstamp;
- fq->q.mono_delivery_time = skb->mono_delivery_time;
+ fq->q.tstamp_type = skb->tstamp_type;
if (frag_type == LOWPAN_DISPATCH_FRAG1)
fq->q.flags |= INET_FRAG_FIRST_IN;
@@ -338,7 +338,6 @@ static struct ctl_table lowpan_frags_ns_ctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { }
};
/* secret interval has been deprecated */
@@ -351,7 +350,6 @@ static struct ctl_table lowpan_frags_ctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { }
};
static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
@@ -370,10 +368,8 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
goto err_alloc;
/* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns) {
- table[0].procname = NULL;
+ if (net->user_ns != &init_user_ns)
table_size = 0;
- }
}
table[0].data = &ieee802154_lowpan->fqdir->high_thresh;
@@ -399,7 +395,7 @@ err_alloc:
static void __net_exit lowpan_frags_ns_sysctl_unregister(struct net *net)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
struct netns_ieee802154_lowpan *ieee802154_lowpan =
net_ieee802154_lowpan(net);
diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h
index 62aa6465253a..591ce0a16fc0 100644
--- a/net/ieee802154/trace.h
+++ b/net/ieee802154/trace.h
@@ -75,7 +75,7 @@ TRACE_EVENT(802154_rdev_add_virtual_intf,
),
TP_fast_assign(
WPAN_PHY_ASSIGN;
- __assign_str(vir_intf_name, name ? name : "<noname>");
+ __assign_str(vir_intf_name);
__entry->type = type;
__entry->extended_addr = extended_addr;
),
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index fafb123f798b..b24d74616637 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -758,7 +758,9 @@ void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *new
sock_rps_record_flow(newsk);
WARN_ON(!((1 << newsk->sk_state) &
(TCPF_ESTABLISHED | TCPF_SYN_RECV |
- TCPF_CLOSE_WAIT | TCPF_CLOSE)));
+ TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 |
+ TCPF_CLOSING | TCPF_CLOSE_WAIT |
+ TCPF_CLOSE)));
if (test_bit(SOCK_SUPPORT_ZC, &sock->flags))
set_bit(SOCK_SUPPORT_ZC, &newsock->flags);
@@ -771,16 +773,16 @@ void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *new
* Accept a pending connection. The TCP layer now gives BSD semantics.
*/
-int inet_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+int inet_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sock *sk1 = sock->sk, *sk2;
- int err = -EINVAL;
/* IPV6_ADDRFORM can change sk->sk_prot under us. */
- sk2 = READ_ONCE(sk1->sk_prot)->accept(sk1, flags, &err, kern);
+ arg->err = -EINVAL;
+ sk2 = READ_ONCE(sk1->sk_prot)->accept(sk1, arg);
if (!sk2)
- return err;
+ return arg->err;
lock_sock(sk2);
__inet_accept(sock, newsock, sk2);
@@ -1072,6 +1074,7 @@ const struct proto_ops inet_stream_ops = {
#endif
.splice_eof = inet_splice_eof,
.splice_read = tcp_splice_read,
+ .set_peek_off = sk_set_peek_off,
.read_sock = tcp_read_sock,
.read_skb = tcp_read_skb,
.sendmsg_locked = tcp_sendmsg_locked,
@@ -1306,8 +1309,8 @@ static int inet_sk_reselect_saddr(struct sock *sk)
int inet_sk_rebuild_header(struct sock *sk)
{
+ struct rtable *rt = dst_rtable(__sk_dst_check(sk, 0));
struct inet_sock *inet = inet_sk(sk);
- struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
__be32 daddr;
struct ip_options_rcu *inet_opt;
struct flowi4 *fl4;
@@ -1481,7 +1484,6 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
struct sk_buff *p;
unsigned int hlen;
unsigned int off;
- unsigned int id;
int flush = 1;
int proto;
@@ -1507,13 +1509,10 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
goto out;
NAPI_GRO_CB(skb)->proto = proto;
- id = ntohl(*(__be32 *)&iph->id);
- flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF));
- id >>= 16;
+ flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (ntohl(*(__be32 *)&iph->id) & ~IP_DF));
list_for_each_entry(p, head, list) {
struct iphdr *iph2;
- u16 flush_id;
if (!NAPI_GRO_CB(p)->same_flow)
continue;
@@ -1530,49 +1529,10 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
NAPI_GRO_CB(p)->same_flow = 0;
continue;
}
-
- /* All fields must match except length and checksum. */
- NAPI_GRO_CB(p)->flush |=
- (iph->ttl ^ iph2->ttl) |
- (iph->tos ^ iph2->tos) |
- ((iph->frag_off ^ iph2->frag_off) & htons(IP_DF));
-
- NAPI_GRO_CB(p)->flush |= flush;
-
- /* We need to store of the IP ID check to be included later
- * when we can verify that this packet does in fact belong
- * to a given flow.
- */
- flush_id = (u16)(id - ntohs(iph2->id));
-
- /* This bit of code makes it much easier for us to identify
- * the cases where we are doing atomic vs non-atomic IP ID
- * checks. Specifically an atomic check can return IP ID
- * values 0 - 0xFFFF, while a non-atomic check can only
- * return 0 or 0xFFFF.
- */
- if (!NAPI_GRO_CB(p)->is_atomic ||
- !(iph->frag_off & htons(IP_DF))) {
- flush_id ^= NAPI_GRO_CB(p)->count;
- flush_id = flush_id ? 0xFFFF : 0;
- }
-
- /* If the previous IP ID value was based on an atomic
- * datagram we can overwrite the value and ignore it.
- */
- if (NAPI_GRO_CB(skb)->is_atomic)
- NAPI_GRO_CB(p)->flush_id = flush_id;
- else
- NAPI_GRO_CB(p)->flush_id |= flush_id;
}
- NAPI_GRO_CB(skb)->is_atomic = !!(iph->frag_off & htons(IP_DF));
NAPI_GRO_CB(skb)->flush |= flush;
- skb_set_network_header(skb, off);
- /* The above will be needed by the transport layer if there is one
- * immediately following this IP hdr.
- */
- NAPI_GRO_CB(skb)->inner_network_offset = off;
+ NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark] = off;
/* Note : No need to call skb_gro_postpull_rcsum() here,
* as we already checked checksum over ipv4 header was 0
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 0d0d725b46ad..11c1519b3699 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -456,7 +456,8 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
/*unsigned long now; */
struct net *net = dev_net(dev);
- rt = ip_route_output(net, sip, tip, 0, l3mdev_master_ifindex_rcu(dev));
+ rt = ip_route_output(net, sip, tip, 0, l3mdev_master_ifindex_rcu(dev),
+ RT_SCOPE_UNIVERSE);
if (IS_ERR(rt))
return 1;
if (rt->dst.dev != dev) {
@@ -1002,6 +1003,55 @@ out_of_mem:
* User level interface (ioctl)
*/
+static struct net_device *arp_req_dev_by_name(struct net *net, struct arpreq *r,
+ bool getarp)
+{
+ struct net_device *dev;
+
+ if (getarp)
+ dev = dev_get_by_name_rcu(net, r->arp_dev);
+ else
+ dev = __dev_get_by_name(net, r->arp_dev);
+ if (!dev)
+ return ERR_PTR(-ENODEV);
+
+ /* Mmmm... It is wrong... ARPHRD_NETROM == 0 */
+ if (!r->arp_ha.sa_family)
+ r->arp_ha.sa_family = dev->type;
+
+ if ((r->arp_flags & ATF_COM) && r->arp_ha.sa_family != dev->type)
+ return ERR_PTR(-EINVAL);
+
+ return dev;
+}
+
+static struct net_device *arp_req_dev(struct net *net, struct arpreq *r)
+{
+ struct net_device *dev;
+ struct rtable *rt;
+ __be32 ip;
+
+ if (r->arp_dev[0])
+ return arp_req_dev_by_name(net, r, false);
+
+ if (r->arp_flags & ATF_PUBL)
+ return NULL;
+
+ ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
+
+ rt = ip_route_output(net, ip, 0, 0, 0, RT_SCOPE_LINK);
+ if (IS_ERR(rt))
+ return ERR_CAST(rt);
+
+ dev = rt->dst.dev;
+ ip_rt_put(rt);
+
+ if (!dev)
+ return ERR_PTR(-EINVAL);
+
+ return dev;
+}
+
/*
* Set (create) an ARP cache entry.
*/
@@ -1022,11 +1072,8 @@ static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on)
static int arp_req_set_public(struct net *net, struct arpreq *r,
struct net_device *dev)
{
- __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
__be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
- if (mask && mask != htonl(0xFFFFFFFF))
- return -EINVAL;
if (!dev && (r->arp_flags & ATF_COM)) {
dev = dev_getbyhwaddr_rcu(net, r->arp_ha.sa_family,
r->arp_ha.sa_data);
@@ -1034,6 +1081,8 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
return -ENODEV;
}
if (mask) {
+ __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
+
if (!pneigh_lookup(&arp_tbl, net, &ip, dev, 1))
return -ENOBUFS;
return 0;
@@ -1042,29 +1091,20 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
return arp_req_set_proxy(net, dev, 1);
}
-static int arp_req_set(struct net *net, struct arpreq *r,
- struct net_device *dev)
+static int arp_req_set(struct net *net, struct arpreq *r)
{
- __be32 ip;
struct neighbour *neigh;
+ struct net_device *dev;
+ __be32 ip;
int err;
+ dev = arp_req_dev(net, r);
+ if (IS_ERR(dev))
+ return PTR_ERR(dev);
+
if (r->arp_flags & ATF_PUBL)
return arp_req_set_public(net, r, dev);
- ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
- if (r->arp_flags & ATF_PERM)
- r->arp_flags |= ATF_COM;
- if (!dev) {
- struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0);
-
- if (IS_ERR(rt))
- return PTR_ERR(rt);
- dev = rt->dst.dev;
- ip_rt_put(rt);
- if (!dev)
- return -EINVAL;
- }
switch (dev->type) {
#if IS_ENABLED(CONFIG_FDDI)
case ARPHRD_FDDI:
@@ -1086,12 +1126,18 @@ static int arp_req_set(struct net *net, struct arpreq *r,
break;
}
+ ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
+
neigh = __neigh_lookup_errno(&arp_tbl, &ip, dev);
err = PTR_ERR(neigh);
if (!IS_ERR(neigh)) {
unsigned int state = NUD_STALE;
- if (r->arp_flags & ATF_PERM)
+
+ if (r->arp_flags & ATF_PERM) {
+ r->arp_flags |= ATF_COM;
state = NUD_PERMANENT;
+ }
+
err = neigh_update(neigh, (r->arp_flags & ATF_COM) ?
r->arp_ha.sa_data : NULL, state,
NEIGH_UPDATE_F_OVERRIDE |
@@ -1115,27 +1161,40 @@ static unsigned int arp_state_to_flags(struct neighbour *neigh)
* Get an ARP cache entry.
*/
-static int arp_req_get(struct arpreq *r, struct net_device *dev)
+static int arp_req_get(struct net *net, struct arpreq *r)
{
__be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
struct neighbour *neigh;
- int err = -ENXIO;
+ struct net_device *dev;
+
+ if (!r->arp_dev[0])
+ return -ENODEV;
+
+ dev = arp_req_dev_by_name(net, r, true);
+ if (IS_ERR(dev))
+ return PTR_ERR(dev);
neigh = neigh_lookup(&arp_tbl, &ip, dev);
- if (neigh) {
- if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) {
- read_lock_bh(&neigh->lock);
- memcpy(r->arp_ha.sa_data, neigh->ha,
- min(dev->addr_len, sizeof(r->arp_ha.sa_data_min)));
- r->arp_flags = arp_state_to_flags(neigh);
- read_unlock_bh(&neigh->lock);
- r->arp_ha.sa_family = dev->type;
- strscpy(r->arp_dev, dev->name, sizeof(r->arp_dev));
- err = 0;
- }
+ if (!neigh)
+ return -ENXIO;
+
+ if (READ_ONCE(neigh->nud_state) & NUD_NOARP) {
neigh_release(neigh);
+ return -ENXIO;
}
- return err;
+
+ read_lock_bh(&neigh->lock);
+ memcpy(r->arp_ha.sa_data, neigh->ha,
+ min(dev->addr_len, sizeof(r->arp_ha.sa_data_min)));
+ r->arp_flags = arp_state_to_flags(neigh);
+ read_unlock_bh(&neigh->lock);
+
+ neigh_release(neigh);
+
+ r->arp_ha.sa_family = dev->type;
+ netdev_copy_name(dev, r->arp_dev);
+
+ return 0;
}
int arp_invalidate(struct net_device *dev, __be32 ip, bool force)
@@ -1166,36 +1225,31 @@ int arp_invalidate(struct net_device *dev, __be32 ip, bool force)
static int arp_req_delete_public(struct net *net, struct arpreq *r,
struct net_device *dev)
{
- __be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
__be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
- if (mask == htonl(0xFFFFFFFF))
- return pneigh_delete(&arp_tbl, net, &ip, dev);
+ if (mask) {
+ __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
- if (mask)
- return -EINVAL;
+ return pneigh_delete(&arp_tbl, net, &ip, dev);
+ }
return arp_req_set_proxy(net, dev, 0);
}
-static int arp_req_delete(struct net *net, struct arpreq *r,
- struct net_device *dev)
+static int arp_req_delete(struct net *net, struct arpreq *r)
{
+ struct net_device *dev;
__be32 ip;
+ dev = arp_req_dev(net, r);
+ if (IS_ERR(dev))
+ return PTR_ERR(dev);
+
if (r->arp_flags & ATF_PUBL)
return arp_req_delete_public(net, r, dev);
ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
- if (!dev) {
- struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0);
- if (IS_ERR(rt))
- return PTR_ERR(rt);
- dev = rt->dst.dev;
- ip_rt_put(rt);
- if (!dev)
- return -EINVAL;
- }
+
return arp_invalidate(dev, ip, true);
}
@@ -1205,9 +1259,9 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
{
- int err;
struct arpreq r;
- struct net_device *dev = NULL;
+ __be32 *netmask;
+ int err;
switch (cmd) {
case SIOCDARP:
@@ -1230,42 +1284,34 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
if (!(r.arp_flags & ATF_PUBL) &&
(r.arp_flags & (ATF_NETMASK | ATF_DONTPUB)))
return -EINVAL;
+
+ netmask = &((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr;
if (!(r.arp_flags & ATF_NETMASK))
- ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr =
- htonl(0xFFFFFFFFUL);
- rtnl_lock();
- if (r.arp_dev[0]) {
- err = -ENODEV;
- dev = __dev_get_by_name(net, r.arp_dev);
- if (!dev)
- goto out;
-
- /* Mmmm... It is wrong... ARPHRD_NETROM==0 */
- if (!r.arp_ha.sa_family)
- r.arp_ha.sa_family = dev->type;
- err = -EINVAL;
- if ((r.arp_flags & ATF_COM) && r.arp_ha.sa_family != dev->type)
- goto out;
- } else if (cmd == SIOCGARP) {
- err = -ENODEV;
- goto out;
- }
+ *netmask = htonl(0xFFFFFFFFUL);
+ else if (*netmask && *netmask != htonl(0xFFFFFFFFUL))
+ return -EINVAL;
switch (cmd) {
case SIOCDARP:
- err = arp_req_delete(net, &r, dev);
+ rtnl_lock();
+ err = arp_req_delete(net, &r);
+ rtnl_unlock();
break;
case SIOCSARP:
- err = arp_req_set(net, &r, dev);
+ rtnl_lock();
+ err = arp_req_set(net, &r);
+ rtnl_unlock();
break;
case SIOCGARP:
- err = arp_req_get(&r, dev);
+ rcu_read_lock();
+ err = arp_req_get(net, &r);
+ rcu_read_unlock();
+
+ if (!err && copy_to_user(arg, &r, sizeof(r)))
+ err = -EFAULT;
break;
}
-out:
- rtnl_unlock();
- if (cmd == SIOCGARP && !err && copy_to_user(arg, &r, sizeof(r)))
- err = -EFAULT;
+
return err;
}
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 7f518ea5f4ac..3f88d0961e5b 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -107,6 +107,9 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log,
case offsetof(struct tcp_sock, snd_cwnd_cnt):
end = offsetofend(struct tcp_sock, snd_cwnd_cnt);
break;
+ case offsetof(struct tcp_sock, snd_cwnd_stamp):
+ end = offsetofend(struct tcp_sock, snd_cwnd_stamp);
+ break;
case offsetof(struct tcp_sock, snd_ssthresh):
end = offsetofend(struct tcp_sock, snd_ssthresh);
break;
@@ -257,17 +260,17 @@ static int bpf_tcp_ca_check_member(const struct btf_type *t,
return 0;
}
-static int bpf_tcp_ca_reg(void *kdata)
+static int bpf_tcp_ca_reg(void *kdata, struct bpf_link *link)
{
return tcp_register_congestion_control(kdata);
}
-static void bpf_tcp_ca_unreg(void *kdata)
+static void bpf_tcp_ca_unreg(void *kdata, struct bpf_link *link)
{
tcp_unregister_congestion_control(kdata);
}
-static int bpf_tcp_ca_update(void *kdata, void *old_kdata)
+static int bpf_tcp_ca_update(void *kdata, void *old_kdata, struct bpf_link *link)
{
return tcp_update_congestion_control(kdata, old_kdata);
}
@@ -307,7 +310,8 @@ static u32 bpf_tcp_ca_min_tso_segs(struct sock *sk)
return 0;
}
-static void bpf_tcp_ca_cong_control(struct sock *sk, const struct rate_sample *rs)
+static void bpf_tcp_ca_cong_control(struct sock *sk, u32 ack, int flag,
+ const struct rate_sample *rs)
{
}
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 8b17d83e5fde..8cc0e2f4159d 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1810,11 +1810,35 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len,
return CIPSO_V4_HDR_LEN + ret_val;
}
+static int cipso_v4_get_actual_opt_len(const unsigned char *data, int len)
+{
+ int iter = 0, optlen = 0;
+
+ /* determining the new total option length is tricky because of
+ * the padding necessary, the only thing i can think to do at
+ * this point is walk the options one-by-one, skipping the
+ * padding at the end to determine the actual option size and
+ * from there we can determine the new total option length
+ */
+ while (iter < len) {
+ if (data[iter] == IPOPT_END) {
+ break;
+ } else if (data[iter] == IPOPT_NOP) {
+ iter++;
+ } else {
+ iter += data[iter + 1];
+ optlen = iter;
+ }
+ }
+ return optlen;
+}
+
/**
* cipso_v4_sock_setattr - Add a CIPSO option to a socket
* @sk: the socket
* @doi_def: the CIPSO DOI to use
* @secattr: the specific security attributes of the socket
+ * @sk_locked: true if caller holds the socket lock
*
* Description:
* Set the CIPSO option on the given socket using the DOI definition and
@@ -1826,7 +1850,8 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len,
*/
int cipso_v4_sock_setattr(struct sock *sk,
const struct cipso_v4_doi *doi_def,
- const struct netlbl_lsm_secattr *secattr)
+ const struct netlbl_lsm_secattr *secattr,
+ bool sk_locked)
{
int ret_val = -EPERM;
unsigned char *buf = NULL;
@@ -1876,8 +1901,7 @@ int cipso_v4_sock_setattr(struct sock *sk,
sk_inet = inet_sk(sk);
- old = rcu_dereference_protected(sk_inet->inet_opt,
- lockdep_sock_is_held(sk));
+ old = rcu_dereference_protected(sk_inet->inet_opt, sk_locked);
if (inet_test_bit(IS_ICSK, sk)) {
sk_conn = inet_csk(sk);
if (old)
@@ -1952,7 +1976,7 @@ int cipso_v4_req_setattr(struct request_sock *req,
buf = NULL;
req_inet = inet_rsk(req);
- opt = xchg((__force struct ip_options_rcu **)&req_inet->ireq_opt, opt);
+ opt = unrcu_pointer(xchg(&req_inet->ireq_opt, RCU_INITIALIZER(opt)));
if (opt)
kfree_rcu(opt, rcu);
@@ -1985,7 +2009,6 @@ static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr)
u8 cipso_len;
u8 cipso_off;
unsigned char *cipso_ptr;
- int iter;
int optlen_new;
cipso_off = opt->opt.cipso - sizeof(struct iphdr);
@@ -2005,19 +2028,8 @@ static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr)
memmove(cipso_ptr, cipso_ptr + cipso_len,
opt->opt.optlen - cipso_off - cipso_len);
- /* determining the new total option length is tricky because of
- * the padding necessary, the only thing i can think to do at
- * this point is walk the options one-by-one, skipping the
- * padding at the end to determine the actual option size and
- * from there we can determine the new total option length */
- iter = 0;
- optlen_new = 0;
- while (iter < opt->opt.optlen)
- if (opt->opt.__data[iter] != IPOPT_NOP) {
- iter += opt->opt.__data[iter + 1];
- optlen_new = iter;
- } else
- iter++;
+ optlen_new = cipso_v4_get_actual_opt_len(opt->opt.__data,
+ opt->opt.optlen);
hdr_delta = opt->opt.optlen;
opt->opt.optlen = (optlen_new + 3) & ~3;
hdr_delta -= opt->opt.optlen;
@@ -2237,7 +2249,8 @@ int cipso_v4_skbuff_setattr(struct sk_buff *skb,
*/
int cipso_v4_skbuff_delattr(struct sk_buff *skb)
{
- int ret_val;
+ int ret_val, cipso_len, hdr_len_actual, new_hdr_len_actual, new_hdr_len,
+ hdr_len_delta;
struct iphdr *iph;
struct ip_options *opt = &IPCB(skb)->opt;
unsigned char *cipso_ptr;
@@ -2250,16 +2263,37 @@ int cipso_v4_skbuff_delattr(struct sk_buff *skb)
if (ret_val < 0)
return ret_val;
- /* the easiest thing to do is just replace the cipso option with noop
- * options since we don't change the size of the packet, although we
- * still need to recalculate the checksum */
-
iph = ip_hdr(skb);
cipso_ptr = (unsigned char *)iph + opt->cipso;
- memset(cipso_ptr, IPOPT_NOOP, cipso_ptr[1]);
+ cipso_len = cipso_ptr[1];
+
+ hdr_len_actual = sizeof(struct iphdr) +
+ cipso_v4_get_actual_opt_len((unsigned char *)(iph + 1),
+ opt->optlen);
+ new_hdr_len_actual = hdr_len_actual - cipso_len;
+ new_hdr_len = (new_hdr_len_actual + 3) & ~3;
+ hdr_len_delta = (iph->ihl << 2) - new_hdr_len;
+
+ /* 1. shift any options after CIPSO to the left */
+ memmove(cipso_ptr, cipso_ptr + cipso_len,
+ new_hdr_len_actual - opt->cipso);
+ /* 2. move the whole IP header to its new place */
+ memmove((unsigned char *)iph + hdr_len_delta, iph, new_hdr_len_actual);
+ /* 3. adjust the skb layout */
+ skb_pull(skb, hdr_len_delta);
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
+ /* 4. re-fill new padding with IPOPT_END (may now be longer) */
+ memset((unsigned char *)iph + new_hdr_len_actual, IPOPT_END,
+ new_hdr_len - new_hdr_len_actual);
+
+ opt->optlen -= hdr_len_delta;
opt->cipso = 0;
opt->is_changed = 1;
-
+ if (hdr_len_delta != 0) {
+ iph->ihl = new_hdr_len >> 2;
+ iph_set_totlen(iph, skb->len);
+ }
ip_send_check(iph);
return 0;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 7a437f0d4190..d96f3e452fef 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -224,6 +224,7 @@ static struct in_ifaddr *inet_alloc_ifa(void)
static void inet_rcu_free_ifa(struct rcu_head *head)
{
struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
+
if (ifa->ifa_dev)
in_dev_put(ifa->ifa_dev);
kfree(ifa);
@@ -231,7 +232,11 @@ static void inet_rcu_free_ifa(struct rcu_head *head)
static void inet_free_ifa(struct in_ifaddr *ifa)
{
- call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
+ /* Our reference to ifa->ifa_dev must be freed ASAP
+ * to release the reference to the netdev the same way.
+ * in_dev_put() -> in_dev_finish_destroy() -> netdev_put()
+ */
+ call_rcu_hurry(&ifa->rcu_head, inet_rcu_free_ifa);
}
static void in_dev_free_rcu(struct rcu_head *head)
@@ -1683,6 +1688,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa,
struct nlmsghdr *nlh;
unsigned long tstamp;
u32 preferred, valid;
+ u32 flags;
nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
args->flags);
@@ -1692,7 +1698,13 @@ static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa,
ifm = nlmsg_data(nlh);
ifm->ifa_family = AF_INET;
ifm->ifa_prefixlen = ifa->ifa_prefixlen;
- ifm->ifa_flags = READ_ONCE(ifa->ifa_flags);
+
+ flags = READ_ONCE(ifa->ifa_flags);
+ /* Warning : ifm->ifa_flags is an __u8, it holds only 8 bits.
+ * The 32bit value is given in IFA_FLAGS attribute.
+ */
+ ifm->ifa_flags = (__u8)flags;
+
ifm->ifa_scope = ifa->ifa_scope;
ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
@@ -1701,7 +1713,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa,
goto nla_put_failure;
tstamp = READ_ONCE(ifa->ifa_tstamp);
- if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
+ if (!(flags & IFA_F_PERMANENT)) {
preferred = READ_ONCE(ifa->ifa_preferred_lft);
valid = READ_ONCE(ifa->ifa_valid_lft);
if (preferred != INFINITY_LIFE_TIME) {
@@ -1732,7 +1744,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa,
nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
(ifa->ifa_proto &&
nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
- nla_put_u32(skb, IFA_FLAGS, ifm->ifa_flags) ||
+ nla_put_u32(skb, IFA_FLAGS, flags) ||
(ifa->ifa_rt_priority &&
nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp,
@@ -1875,10 +1887,11 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
goto done;
if (fillargs.ifindex) {
- err = -ENODEV;
dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
- if (!dev)
+ if (!dev) {
+ err = -ENODEV;
goto done;
+ }
in_dev = __in_dev_get_rcu(dev);
if (!in_dev)
goto done;
@@ -1890,7 +1903,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
cb->seq = inet_base_seq(tgt_net);
- for_each_netdev_dump(net, dev, ctx->ifindex) {
+ for_each_netdev_dump(tgt_net, dev, ctx->ifindex) {
in_dev = __in_dev_get_rcu(dev);
if (!in_dev)
continue;
@@ -2377,7 +2390,7 @@ static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
}
}
-static int devinet_conf_proc(struct ctl_table *ctl, int write,
+static int devinet_conf_proc(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int old_value = *(int *)ctl->data;
@@ -2429,7 +2442,7 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write,
return ret;
}
-static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
+static int devinet_sysctl_forward(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = ctl->data;
@@ -2476,7 +2489,7 @@ static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
return ret;
}
-static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
+static int ipv4_doint_and_flush(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = ctl->data;
@@ -2515,7 +2528,7 @@ static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
static struct devinet_sysctl_table {
struct ctl_table_header *sysctl_header;
- struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
+ struct ctl_table devinet_vars[IPV4_DEVCONF_MAX];
} devinet_sysctl = {
.devinet_vars = {
DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
@@ -2578,7 +2591,7 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
if (!t)
goto out;
- for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
+ for (i = 0; i < ARRAY_SIZE(t->devinet_vars); i++) {
t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
t->devinet_vars[i].extra1 = p;
t->devinet_vars[i].extra2 = net;
@@ -2652,7 +2665,6 @@ static struct ctl_table ctl_forward_entry[] = {
.extra1 = &ipv4_devconf,
.extra2 = &init_net,
},
- { },
};
#endif
@@ -2749,7 +2761,7 @@ err_alloc_all:
static __net_exit void devinet_exit_net(struct net *net)
{
#ifdef CONFIG_SYSCTL
- struct ctl_table *tbl;
+ const struct ctl_table *tbl;
tbl = net->ipv4.forw_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->ipv4.forw_hdr);
@@ -2793,7 +2805,7 @@ void __init devinet_init(void)
rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr,
- RTNL_FLAG_DUMP_UNLOCKED);
+ RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
inet_netconf_dump_devconf,
RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index d33d12421814..47378ca41904 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -20,6 +20,7 @@
#include <net/udp.h>
#include <net/tcp.h>
#include <net/espintcp.h>
+#include <linux/skbuff_ref.h>
#include <linux/highmem.h>
@@ -114,7 +115,7 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
*/
if (req->src != req->dst)
for (sg = sg_next(req->src); sg; sg = sg_next(sg))
- skb_page_unref(skb, sg_page(sg), false);
+ skb_page_unref(sg_page(sg), skb->pp_recycle);
}
#ifdef CONFIG_INET_ESPINTCP
@@ -238,8 +239,7 @@ static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
#else
static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
{
- kfree_skb(skb);
-
+ WARN_ON(1);
return -EOPNOTSUPP;
}
#endif
@@ -347,8 +347,8 @@ static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb,
__be16 dport)
{
struct udphdr *uh;
- __be32 *udpdata32;
unsigned int len;
+ struct xfrm_offload *xo = xfrm_offload(skb);
len = skb->len + esp->tailen - skb_transport_offset(skb);
if (len + sizeof(struct iphdr) > IP_MAX_MTU)
@@ -360,13 +360,12 @@ static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb,
uh->len = htons(len);
uh->check = 0;
- *skb_mac_header(skb) = IPPROTO_UDP;
-
- if (encap_type == UDP_ENCAP_ESPINUDP_NON_IKE) {
- udpdata32 = (__be32 *)(uh + 1);
- udpdata32[0] = udpdata32[1] = 0;
- return (struct ip_esp_hdr *)(udpdata32 + 2);
- }
+ /* For IPv4 ESP with UDP encapsulation, if xo is not null, the skb is in the crypto offload
+ * data path, which means that esp_output_udp_encap is called outside of the XFRM stack.
+ * In this case, the mac header doesn't point to the IPv4 protocol field, so don't set it.
+ */
+ if (!xo || encap_type != UDP_ENCAP_ESPINUDP)
+ *skb_mac_header(skb) = IPPROTO_UDP;
return (struct ip_esp_hdr *)(uh + 1);
}
@@ -423,7 +422,6 @@ static int esp_output_encap(struct xfrm_state *x, struct sk_buff *skb,
switch (encap_type) {
default:
case UDP_ENCAP_ESPINUDP:
- case UDP_ENCAP_ESPINUDP_NON_IKE:
esph = esp_output_udp_encap(skb, encap_type, esp, sport, dport);
break;
case TCP_ENCAP_ESPINTCP:
@@ -775,7 +773,6 @@ int esp_input_done2(struct sk_buff *skb, int err)
source = th->source;
break;
case UDP_ENCAP_ESPINUDP:
- case UDP_ENCAP_ESPINUDP_NON_IKE:
source = uh->source;
break;
default:
@@ -1179,9 +1176,6 @@ static int esp_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack)
case UDP_ENCAP_ESPINUDP:
x->props.header_len += sizeof(struct udphdr);
break;
- case UDP_ENCAP_ESPINUDP_NON_IKE:
- x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32);
- break;
#ifdef CONFIG_INET_ESPINTCP
case TCP_ENCAP_ESPINTCP:
/* only the length field, TCP encap is done by
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index b3271957ad9a..80c4ea0e12f4 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -56,6 +56,13 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
(xfrm_address_t *)&ip_hdr(skb)->daddr,
spi, IPPROTO_ESP, AF_INET);
+
+ if (unlikely(x && x->dir && x->dir != XFRM_SA_DIR_IN)) {
+ /* non-offload path will record the error and audit log */
+ xfrm_state_put(x);
+ x = NULL;
+ }
+
if (!x)
goto out_reset;
@@ -264,6 +271,7 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_
struct esp_info esp;
bool hw_offload = true;
__u32 seq;
+ int encap_type = 0;
esp.inplace = true;
@@ -296,8 +304,10 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_
esp.esph = ip_esp_hdr(skb);
+ if (x->encap)
+ encap_type = x->encap->encap_type;
- if (!hw_offload || !skb_is_gso(skb)) {
+ if (!hw_offload || !skb_is_gso(skb) || (hw_offload && encap_type == UDP_ENCAP_ESPINUDP)) {
esp.nfrags = esp_output_head(x, skb, &esp);
if (esp.nfrags < 0)
return esp.nfrags;
@@ -324,6 +334,18 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_
esp.seqno = cpu_to_be64(seq + ((u64)xo->seq.hi << 32));
+ if (hw_offload && encap_type == UDP_ENCAP_ESPINUDP) {
+ /* In the XFRM stack, the encapsulation protocol is set to iphdr->protocol by
+ * setting *skb_mac_header(skb) (see esp_output_udp_encap()) where skb->mac_header
+ * points to iphdr->protocol (see xfrm4_tunnel_encap_add()).
+ * However, in esp_xmit(), skb->mac_header doesn't point to iphdr->protocol.
+ * Therefore, the protocol field needs to be corrected.
+ */
+ ip_hdr(skb)->protocol = IPPROTO_UDP;
+
+ esph->seq_no = htonl(seq);
+ }
+
ip_hdr(skb)->tot_len = htons(skb->len);
ip_send_check(ip_hdr(skb));
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index c484b1c0fc00..7ad2cafb9276 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1050,11 +1050,6 @@ next:
e++;
}
}
-
- /* Don't let NLM_DONE coalesce into a message, even if it could.
- * Some user space expects NLM_DONE in a separate recv().
- */
- err = skb->len;
out:
cb->args[1] = e;
@@ -1665,5 +1660,5 @@ void __init ip_fib_init(void)
rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, 0);
rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, 0);
rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib,
- RTNL_FLAG_DUMP_UNLOCKED);
+ RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 5eb1b8d302bb..2b57cd2b96e2 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -254,7 +254,7 @@ void free_fib_info(struct fib_info *fi)
return;
}
- call_rcu(&fi->rcu, free_fib_info_rcu);
+ call_rcu_hurry(&fi->rcu, free_fib_info_rcu);
}
EXPORT_SYMBOL_GPL(free_fib_info);
@@ -1030,7 +1030,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
bool ecn_ca = false;
nla_strscpy(tmp, nla, sizeof(tmp));
- val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
+ val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
} else {
if (nla_len(nla) != sizeof(u32))
return false;
@@ -1459,8 +1459,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL);
if (!fi)
goto failure;
- fi->fib_metrics = ip_fib_metrics_init(fi->fib_net, cfg->fc_mx,
- cfg->fc_mx_len, extack);
+ fi->fib_metrics = ip_fib_metrics_init(cfg->fc_mx, cfg->fc_mx_len, extack);
if (IS_ERR(fi->fib_metrics)) {
err = PTR_ERR(fi->fib_metrics);
kfree(fi);
@@ -2270,6 +2269,15 @@ void fib_select_path(struct net *net, struct fib_result *res,
fib_select_default(fl4, res);
check_saddr:
- if (!fl4->saddr)
- fl4->saddr = fib_result_prefsrc(net, res);
+ if (!fl4->saddr) {
+ struct net_device *l3mdev;
+
+ l3mdev = dev_get_by_index_rcu(net, fl4->flowi4_l3mdev);
+
+ if (!l3mdev ||
+ l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) == l3mdev)
+ fl4->saddr = fib_result_prefsrc(net, res);
+ else
+ fl4->saddr = inet_select_addr(l3mdev, 0, RT_SCOPE_LINK);
+ }
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index f474106464d2..8f30e3f00b7f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1629,6 +1629,7 @@ set_result:
res->nhc = nhc;
res->type = fa->fa_type;
res->scope = fi->fib_scope;
+ res->dscp = fa->fa_dscp;
res->fi = fi;
res->table = tb;
res->fa_head = &n->leaf;
diff --git a/net/ipv4/fou_bpf.c b/net/ipv4/fou_bpf.c
index 06e5572f296f..54984f3170a8 100644
--- a/net/ipv4/fou_bpf.c
+++ b/net/ipv4/fou_bpf.c
@@ -64,7 +64,7 @@ __bpf_kfunc int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx,
info->encap.type = TUNNEL_ENCAP_NONE;
}
- if (info->key.tun_flags & TUNNEL_CSUM)
+ if (test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags))
info->encap.flags |= TUNNEL_ENCAP_FLAG_CSUM;
info->encap.sport = encap->sport;
diff --git a/net/ipv4/fou_core.c b/net/ipv4/fou_core.c
index a8494f796dca..0abbc413e0fe 100644
--- a/net/ipv4/fou_core.c
+++ b/net/ipv4/fou_core.c
@@ -433,7 +433,7 @@ next_proto:
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
ops = rcu_dereference(offloads[proto]);
- if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive))
+ if (!ops || !ops->callbacks.gro_receive)
goto out;
pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 3757fd93523f..6701a98d9a9f 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -73,7 +73,7 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
return -EINVAL;
- tpi->flags = gre_flags_to_tnl_flags(greh->flags);
+ gre_flags_to_tnl_flags(tpi->flags, greh->flags);
hdr_len = gre_calc_hlen(tpi->flags);
if (!pskb_may_pull(skb, nhs + hdr_len))
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 437e782b9663..ab6d0d98dbc3 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -93,6 +93,8 @@
#include <net/ip_fib.h>
#include <net/l3mdev.h>
#include <net/addrconf.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/icmp.h>
/*
* Build xmit assembly blocks
@@ -483,6 +485,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
struct icmp_bxm *param)
{
struct net_device *route_lookup_dev;
+ struct dst_entry *dst, *dst2;
struct rtable *rt, *rt2;
struct flowi4 fl4_dec;
int err;
@@ -508,16 +511,17 @@ static struct rtable *icmp_route_lookup(struct net *net,
/* No need to clone since we're just using its address. */
rt2 = rt;
- rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
- flowi4_to_flowi(fl4), NULL, 0);
- if (!IS_ERR(rt)) {
+ dst = xfrm_lookup(net, &rt->dst,
+ flowi4_to_flowi(fl4), NULL, 0);
+ rt = dst_rtable(dst);
+ if (!IS_ERR(dst)) {
if (rt != rt2)
return rt;
- } else if (PTR_ERR(rt) == -EPERM) {
+ } else if (PTR_ERR(dst) == -EPERM) {
rt = NULL;
- } else
+ } else {
return rt;
-
+ }
err = xfrm_decode_session_reverse(net, skb_in, flowi4_to_flowi(&fl4_dec), AF_INET);
if (err)
goto relookup_failed;
@@ -551,19 +555,19 @@ static struct rtable *icmp_route_lookup(struct net *net,
if (err)
goto relookup_failed;
- rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst,
- flowi4_to_flowi(&fl4_dec), NULL,
- XFRM_LOOKUP_ICMP);
- if (!IS_ERR(rt2)) {
+ dst2 = xfrm_lookup(net, &rt2->dst, flowi4_to_flowi(&fl4_dec), NULL,
+ XFRM_LOOKUP_ICMP);
+ rt2 = dst_rtable(dst2);
+ if (!IS_ERR(dst2)) {
dst_release(&rt->dst);
memcpy(fl4, &fl4_dec, sizeof(*fl4));
rt = rt2;
- } else if (PTR_ERR(rt2) == -EPERM) {
+ } else if (PTR_ERR(dst2) == -EPERM) {
if (rt)
dst_release(&rt->dst);
return rt2;
} else {
- err = PTR_ERR(rt2);
+ err = PTR_ERR(dst2);
goto relookup_failed;
}
return rt;
@@ -768,6 +772,8 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
if (!fl4.saddr)
fl4.saddr = htonl(INADDR_DUMMY);
+ trace_icmp_send(skb_in, type, code);
+
icmp_push_reply(sk, &icmp_param, &fl4, &ipc, &rt);
ende:
ip_rt_put(rt);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 717e97a389a8..9bf09de6a2e7 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1842,7 +1842,8 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
if (!dev) {
struct rtable *rt = ip_route_output(net,
imr->imr_multiaddr.s_addr,
- 0, 0, 0);
+ 0, 0, 0,
+ RT_SCOPE_UNIVERSE);
if (!IS_ERR(rt)) {
dev = rt->dst.dev;
ip_rt_put(rt);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 3b38610958ee..64d07b842e73 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -661,7 +661,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
/*
* This will accept the next outstanding connection.
*/
-struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
+struct sock *inet_csk_accept(struct sock *sk, struct proto_accept_arg *arg)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct request_sock_queue *queue = &icsk->icsk_accept_queue;
@@ -680,7 +680,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
/* Find already established connection */
if (reqsk_queue_empty(queue)) {
- long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ long timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
/* If this is a non blocking socket don't sleep */
error = -EAGAIN;
@@ -692,6 +692,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
goto out_err;
}
req = reqsk_queue_remove(queue, sk);
+ arg->is_empty = reqsk_queue_empty(queue);
newsk = req->sk;
if (sk->sk_protocol == IPPROTO_TCP &&
@@ -745,7 +746,7 @@ out:
out_err:
newsk = NULL;
req = NULL;
- *err = error;
+ arg->err = error;
goto out;
}
EXPORT_SYMBOL(inet_csk_accept);
@@ -910,6 +911,64 @@ int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req)
}
EXPORT_SYMBOL(inet_rtx_syn_ack);
+static struct request_sock *
+reqsk_alloc_noprof(const struct request_sock_ops *ops, struct sock *sk_listener,
+ bool attach_listener)
+{
+ struct request_sock *req;
+
+ req = kmem_cache_alloc_noprof(ops->slab, GFP_ATOMIC | __GFP_NOWARN);
+ if (!req)
+ return NULL;
+ req->rsk_listener = NULL;
+ if (attach_listener) {
+ if (unlikely(!refcount_inc_not_zero(&sk_listener->sk_refcnt))) {
+ kmem_cache_free(ops->slab, req);
+ return NULL;
+ }
+ req->rsk_listener = sk_listener;
+ }
+ req->rsk_ops = ops;
+ req_to_sk(req)->sk_prot = sk_listener->sk_prot;
+ sk_node_init(&req_to_sk(req)->sk_node);
+ sk_tx_queue_clear(req_to_sk(req));
+ req->saved_syn = NULL;
+ req->syncookie = 0;
+ req->timeout = 0;
+ req->num_timeout = 0;
+ req->num_retrans = 0;
+ req->sk = NULL;
+ refcount_set(&req->rsk_refcnt, 0);
+
+ return req;
+}
+#define reqsk_alloc(...) alloc_hooks(reqsk_alloc_noprof(__VA_ARGS__))
+
+struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
+ struct sock *sk_listener,
+ bool attach_listener)
+{
+ struct request_sock *req = reqsk_alloc(ops, sk_listener,
+ attach_listener);
+
+ if (req) {
+ struct inet_request_sock *ireq = inet_rsk(req);
+
+ ireq->ireq_opt = NULL;
+#if IS_ENABLED(CONFIG_IPV6)
+ ireq->pktopts = NULL;
+#endif
+ atomic64_set(&ireq->ir_cookie, 0);
+ ireq->ireq_state = TCP_NEW_SYN_RECV;
+ write_pnet(&ireq->ireq_net, sock_net(sk_listener));
+ ireq->ireq_family = sk_listener->sk_family;
+ req->timeout = TCP_TIMEOUT_INIT;
+ }
+
+ return req;
+}
+EXPORT_SYMBOL(inet_reqsk_alloc);
+
static struct request_sock *inet_reqsk_clone(struct request_sock *req,
struct sock *sk)
{
@@ -1121,25 +1180,34 @@ drop:
inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq);
}
-static void reqsk_queue_hash_req(struct request_sock *req,
+static bool reqsk_queue_hash_req(struct request_sock *req,
unsigned long timeout)
{
+ bool found_dup_sk = false;
+
+ if (!inet_ehash_insert(req_to_sk(req), NULL, &found_dup_sk))
+ return false;
+
+ /* The timer needs to be setup after a successful insertion. */
timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
mod_timer(&req->rsk_timer, jiffies + timeout);
- inet_ehash_insert(req_to_sk(req), NULL, NULL);
/* before letting lookups find us, make sure all req fields
* are committed to memory and refcnt initialized.
*/
smp_wmb();
refcount_set(&req->rsk_refcnt, 2 + 1);
+ return true;
}
-void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
+bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
unsigned long timeout)
{
- reqsk_queue_hash_req(req, timeout);
+ if (!reqsk_queue_hash_req(req, timeout))
+ return false;
+
inet_csk_reqsk_queue_added(sk);
+ return true;
}
EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 7adace541fe2..9712cdb8087c 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -1383,6 +1383,7 @@ static int inet_diag_dump_compat(struct sk_buff *skb,
req.sdiag_family = AF_UNSPEC; /* compatibility */
req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type);
req.idiag_ext = rc->idiag_ext;
+ req.pad = 0;
req.idiag_states = rc->idiag_states;
req.id = rc->id;
@@ -1398,6 +1399,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
req.sdiag_family = rc->idiag_family;
req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type);
req.idiag_ext = rc->idiag_ext;
+ req.pad = 0;
req.idiag_states = rc->idiag_states;
req.id = rc->id;
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index c88c9034d630..d179a2c84222 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -175,7 +175,7 @@ static void fqdir_free_fn(struct work_struct *work)
}
}
-static DECLARE_WORK(fqdir_free_work, fqdir_free_fn);
+static DECLARE_DELAYED_WORK(fqdir_free_work, fqdir_free_fn);
static void fqdir_work_fn(struct work_struct *work)
{
@@ -184,7 +184,7 @@ static void fqdir_work_fn(struct work_struct *work)
rhashtable_free_and_destroy(&fqdir->rhashtable, inet_frags_free_cb, NULL);
if (llist_add(&fqdir->free_list, &fqdir_free_list))
- queue_work(system_wq, &fqdir_free_work);
+ queue_delayed_work(system_wq, &fqdir_free_work, HZ);
}
int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net)
@@ -619,7 +619,7 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
skb_mark_not_on_list(head);
head->prev = NULL;
head->tstamp = q->stamp;
- head->mono_delivery_time = q->mono_delivery_time;
+ head->tstamp_type = q->tstamp_type;
if (sk)
refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index cf88eca5f1b4..48d0d494185b 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -565,7 +565,8 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
if (likely(inet_match(net, sk2, acookie, ports, dif, sdif))) {
if (sk2->sk_state == TCP_TIME_WAIT) {
tw = inet_twsk(sk2);
- if (twsk_unique(sk, sk2, twp))
+ if (sk->sk_protocol == IPPROTO_TCP &&
+ tcp_twsk_unique(sk, sk2, twp))
break;
}
goto not_unique;
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index e8de45d34d56..337390ba85b4 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -92,13 +92,22 @@ static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw,
hlist_nulls_add_head_rcu(&tw->tw_node, list);
}
+static void inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo)
+{
+ __inet_twsk_schedule(tw, timeo, false);
+}
+
/*
- * Enter the time wait state. This is called with locally disabled BH.
+ * Enter the time wait state.
* Essentially we whip up a timewait bucket, copy the relevant info into it
* from the SK, and mess with hash chains and list linkage.
+ *
+ * The caller must not access @tw anymore after this function returns.
*/
-void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
- struct inet_hashinfo *hashinfo)
+void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw,
+ struct sock *sk,
+ struct inet_hashinfo *hashinfo,
+ int timeo)
{
const struct inet_sock *inet = inet_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -114,6 +123,7 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
hashinfo->bhash_size)];
bhead2 = inet_bhashfn_portaddr(hashinfo, sk, twsk_net(tw), inet->inet_num);
+ local_bh_disable();
spin_lock(&bhead->lock);
spin_lock(&bhead2->lock);
@@ -129,26 +139,34 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
spin_lock(lock);
+ /* Step 2: Hash TW into tcp ehash chain */
inet_twsk_add_node_rcu(tw, &ehead->chain);
/* Step 3: Remove SK from hash chain */
if (__sk_nulls_del_node_init_rcu(sk))
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
- spin_unlock(lock);
+ /* Ensure above writes are committed into memory before updating the
+ * refcount.
+ * Provides ordering vs later refcount_inc().
+ */
+ smp_wmb();
/* tw_refcnt is set to 3 because we have :
* - one reference for bhash chain.
* - one reference for ehash chain.
* - one reference for timer.
- * We can use atomic_set() because prior spin_lock()/spin_unlock()
- * committed into memory all tw fields.
* Also note that after this point, we lost our implicit reference
* so we are not allowed to use tw anymore.
*/
refcount_set(&tw->tw_refcnt, 3);
+
+ inet_twsk_schedule(tw, timeo);
+
+ spin_unlock(lock);
+ local_bh_enable();
}
-EXPORT_SYMBOL_GPL(inet_twsk_hashdance);
+EXPORT_SYMBOL_GPL(inet_twsk_hashdance_schedule);
static void tw_timer_handler(struct timer_list *t)
{
@@ -192,7 +210,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
tw->tw_prot = sk->sk_prot_creator;
atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie));
twsk_net_set(tw, sock_net(sk));
- timer_setup(&tw->tw_timer, tw_timer_handler, TIMER_PINNED);
+ timer_setup(&tw->tw_timer, tw_timer_handler, 0);
/*
* Because we use RCU lookups, we should not set tw_refcnt
* to a non null value before everything is setup for this
@@ -217,7 +235,34 @@ EXPORT_SYMBOL_GPL(inet_twsk_alloc);
*/
void inet_twsk_deschedule_put(struct inet_timewait_sock *tw)
{
- if (del_timer_sync(&tw->tw_timer))
+ struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo;
+ spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
+
+ /* inet_twsk_purge() walks over all sockets, including tw ones,
+ * and removes them via inet_twsk_deschedule_put() after a
+ * refcount_inc_not_zero().
+ *
+ * inet_twsk_hashdance_schedule() must (re)init the refcount before
+ * arming the timer, i.e. inet_twsk_purge can obtain a reference to
+ * a twsk that did not yet schedule the timer.
+ *
+ * The ehash lock synchronizes these two:
+ * After acquiring the lock, the timer is always scheduled (else
+ * timer_shutdown returns false), because hashdance_schedule releases
+ * the ehash lock only after completing the timer initialization.
+ *
+ * Without grabbing the ehash lock, we get:
+ * 1) cpu x sets twsk refcount to 3
+ * 2) cpu y bumps refcount to 4
+ * 3) cpu y calls inet_twsk_deschedule_put() and shuts timer down
+ * 4) cpu x tries to start timer, but mod_timer is a noop post-shutdown
+ * -> timer refcount is never decremented.
+ */
+ spin_lock(lock);
+ /* Makes sure hashdance_schedule() has completed */
+ spin_unlock(lock);
+
+ if (timer_shutdown_sync(&tw->tw_timer))
inet_twsk_kill(tw);
inet_twsk_put(tw);
}
@@ -264,14 +309,18 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
/* Remove all non full sockets (TIME_WAIT and NEW_SYN_RECV) for dead netns */
-void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family)
+void inet_twsk_purge(struct inet_hashinfo *hashinfo)
{
+ struct inet_ehash_bucket *head = &hashinfo->ehash[0];
+ unsigned int ehash_mask = hashinfo->ehash_mask;
struct hlist_nulls_node *node;
unsigned int slot;
struct sock *sk;
- for (slot = 0; slot <= hashinfo->ehash_mask; slot++) {
- struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
+ for (slot = 0; slot <= ehash_mask; slot++, head++) {
+ if (hlist_nulls_empty(&head->chain))
+ continue;
+
restart_rcu:
cond_resched();
rcu_read_lock();
@@ -283,15 +332,13 @@ restart:
TCPF_NEW_SYN_RECV))
continue;
- if (sk->sk_family != family ||
- refcount_read(&sock_net(sk)->ns.count))
+ if (refcount_read(&sock_net(sk)->ns.count))
continue;
if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
continue;
- if (unlikely(sk->sk_family != family ||
- refcount_read(&sock_net(sk)->ns.count))) {
+ if (refcount_read(&sock_net(sk)->ns.count)) {
sock_gen_put(sk);
goto restart;
}
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index fb947d1613fe..a92664a5ef2e 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -355,7 +355,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
qp->iif = dev->ifindex;
qp->q.stamp = skb->tstamp;
- qp->q.mono_delivery_time = skb->mono_delivery_time;
+ qp->q.tstamp_type = skb->tstamp_type;
qp->q.meat += skb->len;
qp->ecn |= ecn;
add_frag_mem_limit(qp->q.fqdir, skb->truesize);
@@ -580,7 +580,6 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = &dist_min,
},
- { }
};
/* secret interval has been deprecated */
@@ -593,7 +592,6 @@ static struct ctl_table ip4_frags_ctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { }
};
static int __net_init ip4_frags_ns_ctl_register(struct net *net)
@@ -632,7 +630,7 @@ err_alloc:
static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
table = net->ipv4.frags_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->ipv4.frags_hdr);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 57ddcd8c62f6..ba205473522e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -265,6 +265,7 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
struct net *net = dev_net(skb->dev);
struct metadata_dst *tun_dst = NULL;
struct erspan_base_hdr *ershdr;
+ IP_TUNNEL_DECLARE_FLAGS(flags);
struct ip_tunnel_net *itn;
struct ip_tunnel *tunnel;
const struct iphdr *iph;
@@ -272,12 +273,14 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
int ver;
int len;
+ ip_tunnel_flags_copy(flags, tpi->flags);
+
itn = net_generic(net, erspan_net_id);
iph = ip_hdr(skb);
if (is_erspan_type1(gre_hdr_len)) {
ver = 0;
- tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
- tpi->flags | TUNNEL_NO_KEY,
+ __set_bit(IP_TUNNEL_NO_KEY_BIT, flags);
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
iph->saddr, iph->daddr, 0);
} else {
if (unlikely(!pskb_may_pull(skb,
@@ -287,8 +290,8 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
ver = ershdr->ver;
iph = ip_hdr(skb);
- tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
- tpi->flags | TUNNEL_KEY,
+ __set_bit(IP_TUNNEL_KEY_BIT, flags);
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
iph->saddr, iph->daddr, tpi->key);
}
@@ -312,10 +315,9 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
struct ip_tunnel_info *info;
unsigned char *gh;
__be64 tun_id;
- __be16 flags;
- tpi->flags |= TUNNEL_KEY;
- flags = tpi->flags;
+ __set_bit(IP_TUNNEL_KEY_BIT, tpi->flags);
+ ip_tunnel_flags_copy(flags, tpi->flags);
tun_id = key32_to_tunnel_id(tpi->key);
tun_dst = ip_tun_rx_dst(skb, flags,
@@ -338,7 +340,8 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
ERSPAN_V2_MDSIZE);
info = &tun_dst->u.tun_info;
- info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+ __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
+ info->key.tun_flags);
info->options_len = sizeof(*md);
}
@@ -381,10 +384,13 @@ static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
tnl_params = &tunnel->parms.iph;
if (tunnel->collect_md || tnl_params->daddr == 0) {
- __be16 flags;
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
__be64 tun_id;
- flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
+ __set_bit(IP_TUNNEL_CSUM_BIT, flags);
+ __set_bit(IP_TUNNEL_KEY_BIT, flags);
+ ip_tunnel_flags_and(flags, tpi->flags, flags);
+
tun_id = key32_to_tunnel_id(tpi->key);
tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
if (!tun_dst)
@@ -464,12 +470,15 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
__be16 proto)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- __be16 flags = tunnel->parms.o_flags;
+ IP_TUNNEL_DECLARE_FLAGS(flags);
+
+ ip_tunnel_flags_copy(flags, tunnel->parms.o_flags);
/* Push GRE header. */
gre_build_header(skb, tunnel->tun_hlen,
flags, proto, tunnel->parms.o_key,
- (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
+ test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
+ htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
}
@@ -483,10 +492,10 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
__be16 proto)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
int tunnel_hlen;
- __be16 flags;
tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
@@ -500,14 +509,19 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
goto err_free_skb;
/* Push Tunnel header. */
- if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
+ if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
+ tunnel->parms.o_flags)))
goto err_free_skb;
- flags = tun_info->key.tun_flags &
- (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
+ __set_bit(IP_TUNNEL_CSUM_BIT, flags);
+ __set_bit(IP_TUNNEL_KEY_BIT, flags);
+ __set_bit(IP_TUNNEL_SEQ_BIT, flags);
+ ip_tunnel_flags_and(flags, tun_info->key.tun_flags, flags);
+
gre_build_header(skb, tunnel_hlen, flags, proto,
tunnel_id_to_key32(tun_info->key.tun_id),
- (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
+ test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
+ htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
@@ -521,6 +535,7 @@ err_free_skb:
static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
struct erspan_metadata *md;
@@ -536,7 +551,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
goto err_free_skb;
key = &tun_info->key;
- if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
+ if (!test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_info->key.tun_flags))
goto err_free_skb;
if (tun_info->options_len < sizeof(*md))
goto err_free_skb;
@@ -589,8 +604,9 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
goto err_free_skb;
}
- gre_build_header(skb, 8, TUNNEL_SEQ,
- proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno)));
+ __set_bit(IP_TUNNEL_SEQ_BIT, flags);
+ gre_build_header(skb, 8, flags, proto, 0,
+ htonl(atomic_fetch_inc(&tunnel->o_seqno)));
ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
@@ -664,7 +680,8 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
tnl_params = &tunnel->parms.iph;
}
- if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
+ if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
+ tunnel->parms.o_flags)))
goto free_skb;
__gre_xmit(skb, dev, tnl_params, skb->protocol);
@@ -706,7 +723,7 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
/* Push ERSPAN header */
if (tunnel->erspan_ver == 0) {
proto = htons(ETH_P_ERSPAN);
- tunnel->parms.o_flags &= ~TUNNEL_SEQ;
+ __clear_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags);
} else if (tunnel->erspan_ver == 1) {
erspan_build_header(skb, ntohl(tunnel->parms.o_key),
tunnel->index,
@@ -721,7 +738,7 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
goto free_skb;
}
- tunnel->parms.o_flags &= ~TUNNEL_KEY;
+ __clear_bit(IP_TUNNEL_KEY_BIT, tunnel->parms.o_flags);
__gre_xmit(skb, dev, &tunnel->parms.iph, proto);
return NETDEV_TX_OK;
@@ -744,7 +761,8 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
return NETDEV_TX_OK;
}
- if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
+ if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
+ tunnel->parms.o_flags)))
goto free_skb;
if (skb_cow_head(skb, dev->needed_headroom))
@@ -762,7 +780,6 @@ free_skb:
static void ipgre_link_update(struct net_device *dev, bool set_mtu)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- __be16 flags;
int len;
len = tunnel->tun_hlen;
@@ -776,12 +793,11 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu)
dev->needed_headroom += len;
if (set_mtu)
- dev->mtu = max_t(int, dev->mtu - len, 68);
-
- flags = tunnel->parms.o_flags;
+ WRITE_ONCE(dev->mtu, max_t(int, dev->mtu - len, 68));
- if (flags & TUNNEL_SEQ ||
- (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE)) {
+ if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags) ||
+ (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) &&
+ tunnel->encap.type != TUNNEL_ENCAP_NONE)) {
dev->features &= ~NETIF_F_GSO_SOFTWARE;
dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
} else {
@@ -790,20 +806,29 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu)
}
}
-static int ipgre_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p,
+static int ipgre_tunnel_ctl(struct net_device *dev,
+ struct ip_tunnel_parm_kern *p,
int cmd)
{
+ __be16 i_flags, o_flags;
int err;
+ if (!ip_tunnel_flags_is_be16_compat(p->i_flags) ||
+ !ip_tunnel_flags_is_be16_compat(p->o_flags))
+ return -EOVERFLOW;
+
+ i_flags = ip_tunnel_flags_to_be16(p->i_flags);
+ o_flags = ip_tunnel_flags_to_be16(p->o_flags);
+
if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE ||
p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) ||
- ((p->i_flags | p->o_flags) & (GRE_VERSION | GRE_ROUTING)))
+ ((i_flags | o_flags) & (GRE_VERSION | GRE_ROUTING)))
return -EINVAL;
}
- p->i_flags = gre_flags_to_tnl_flags(p->i_flags);
- p->o_flags = gre_flags_to_tnl_flags(p->o_flags);
+ gre_flags_to_tnl_flags(p->i_flags, i_flags);
+ gre_flags_to_tnl_flags(p->o_flags, o_flags);
err = ip_tunnel_ctl(dev, p, cmd);
if (err)
@@ -812,15 +837,18 @@ static int ipgre_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p,
if (cmd == SIOCCHGTUNNEL) {
struct ip_tunnel *t = netdev_priv(dev);
- t->parms.i_flags = p->i_flags;
- t->parms.o_flags = p->o_flags;
+ ip_tunnel_flags_copy(t->parms.i_flags, p->i_flags);
+ ip_tunnel_flags_copy(t->parms.o_flags, p->o_flags);
if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
ipgre_link_update(dev, true);
}
- p->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
- p->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
+ i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
+ ip_tunnel_flags_from_be16(p->i_flags, i_flags);
+ o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
+ ip_tunnel_flags_from_be16(p->o_flags, o_flags);
+
return 0;
}
@@ -960,7 +988,6 @@ static void ipgre_tunnel_setup(struct net_device *dev)
static void __gre_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel;
- __be16 flags;
tunnel = netdev_priv(dev);
tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
@@ -972,14 +999,13 @@ static void __gre_tunnel_init(struct net_device *dev)
dev->features |= GRE_FEATURES | NETIF_F_LLTX;
dev->hw_features |= GRE_FEATURES;
- flags = tunnel->parms.o_flags;
-
/* TCP offload with GRE SEQ is not supported, nor can we support 2
* levels of outer headers requiring an update.
*/
- if (flags & TUNNEL_SEQ)
+ if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags))
return;
- if (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE)
+ if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) &&
+ tunnel->encap.type != TUNNEL_ENCAP_NONE)
return;
dev->features |= NETIF_F_GSO_SOFTWARE;
@@ -1136,7 +1162,7 @@ static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
static int ipgre_netlink_parms(struct net_device *dev,
struct nlattr *data[],
struct nlattr *tb[],
- struct ip_tunnel_parm *parms,
+ struct ip_tunnel_parm_kern *parms,
__u32 *fwmark)
{
struct ip_tunnel *t = netdev_priv(dev);
@@ -1152,10 +1178,12 @@ static int ipgre_netlink_parms(struct net_device *dev,
parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
if (data[IFLA_GRE_IFLAGS])
- parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
+ gre_flags_to_tnl_flags(parms->i_flags,
+ nla_get_be16(data[IFLA_GRE_IFLAGS]));
if (data[IFLA_GRE_OFLAGS])
- parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
+ gre_flags_to_tnl_flags(parms->o_flags,
+ nla_get_be16(data[IFLA_GRE_OFLAGS]));
if (data[IFLA_GRE_IKEY])
parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
@@ -1203,7 +1231,7 @@ static int ipgre_netlink_parms(struct net_device *dev,
static int erspan_netlink_parms(struct net_device *dev,
struct nlattr *data[],
struct nlattr *tb[],
- struct ip_tunnel_parm *parms,
+ struct ip_tunnel_parm_kern *parms,
__u32 *fwmark)
{
struct ip_tunnel *t = netdev_priv(dev);
@@ -1362,7 +1390,7 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
- struct ip_tunnel_parm p;
+ struct ip_tunnel_parm_kern p;
__u32 fwmark = 0;
int err;
@@ -1380,7 +1408,7 @@ static int erspan_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
- struct ip_tunnel_parm p;
+ struct ip_tunnel_parm_kern p;
__u32 fwmark = 0;
int err;
@@ -1399,8 +1427,8 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
struct ip_tunnel *t = netdev_priv(dev);
+ struct ip_tunnel_parm_kern p;
__u32 fwmark = t->fwmark;
- struct ip_tunnel_parm p;
int err;
err = ipgre_newlink_encap_setup(dev, data);
@@ -1415,8 +1443,8 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
if (err < 0)
return err;
- t->parms.i_flags = p.i_flags;
- t->parms.o_flags = p.o_flags;
+ ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags);
+ ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags);
ipgre_link_update(dev, !tb[IFLA_MTU]);
@@ -1428,8 +1456,8 @@ static int erspan_changelink(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
struct ip_tunnel *t = netdev_priv(dev);
+ struct ip_tunnel_parm_kern p;
__u32 fwmark = t->fwmark;
- struct ip_tunnel_parm p;
int err;
err = ipgre_newlink_encap_setup(dev, data);
@@ -1444,8 +1472,8 @@ static int erspan_changelink(struct net_device *dev, struct nlattr *tb[],
if (err < 0)
return err;
- t->parms.i_flags = p.i_flags;
- t->parms.o_flags = p.o_flags;
+ ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags);
+ ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags);
return 0;
}
@@ -1501,8 +1529,10 @@ static size_t ipgre_get_size(const struct net_device *dev)
static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct ip_tunnel *t = netdev_priv(dev);
- struct ip_tunnel_parm *p = &t->parms;
- __be16 o_flags = p->o_flags;
+ struct ip_tunnel_parm_kern *p = &t->parms;
+ IP_TUNNEL_DECLARE_FLAGS(o_flags);
+
+ ip_tunnel_flags_copy(o_flags, p->o_flags);
if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
nla_put_be16(skb, IFLA_GRE_IFLAGS,
@@ -1550,7 +1580,7 @@ static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev)
if (t->erspan_ver <= 2) {
if (t->erspan_ver != 0 && !t->collect_md)
- t->parms.o_flags |= TUNNEL_KEY;
+ __set_bit(IP_TUNNEL_KEY_BIT, t->parms.o_flags);
if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
goto nla_put_failure;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 5e9c8156656a..d6fbcbd2358a 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -616,7 +616,7 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk,
dst = skb_dst(skb);
if (curr_dst != dst) {
hint = ip_extract_route_hint(net, skb,
- ((struct rtable *)dst)->rt_type);
+ dst_rtable(dst)->rt_type);
/* dispatch old sublist */
if (!list_empty(&sublist))
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 39229fd0601a..b90d0f78ac80 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -198,7 +198,7 @@ EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- struct rtable *rt = (struct rtable *)dst;
+ struct rtable *rt = dst_rtable(dst);
struct net_device *dev = dst->dev;
unsigned int hh_len = LL_RESERVED_SPACE(dev);
struct neighbour *neigh;
@@ -475,7 +475,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
goto packet_routed;
/* Make sure we can route this packet. */
- rt = (struct rtable *)__sk_dst_check(sk, 0);
+ rt = dst_rtable(__sk_dst_check(sk, 0));
if (!rt) {
__be32 daddr;
@@ -764,7 +764,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
{
struct iphdr *iph;
struct sk_buff *skb2;
- bool mono_delivery_time = skb->mono_delivery_time;
+ u8 tstamp_type = skb->tstamp_type;
struct rtable *rt = skb_rtable(skb);
unsigned int mtu, hlen, ll_rs;
struct ip_fraglist_iter iter;
@@ -856,7 +856,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
}
}
- skb_set_delivery_time(skb, tstamp, mono_delivery_time);
+ skb_set_delivery_time(skb, tstamp, tstamp_type);
err = output(net, sk, skb);
if (!err)
@@ -912,7 +912,7 @@ slow_path:
/*
* Put this fragment into the sending queue.
*/
- skb_set_delivery_time(skb2, tstamp, mono_delivery_time);
+ skb_set_delivery_time(skb2, tstamp, tstamp_type);
err = output(net, sk, skb2);
if (err)
goto fail;
@@ -971,7 +971,7 @@ static int __ip_append_data(struct sock *sk,
bool zc = false;
unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
int csummode = CHECKSUM_NONE;
- struct rtable *rt = (struct rtable *)cork->dst;
+ struct rtable *rt = dst_rtable(cork->dst);
bool paged, hold_tskey, extra_uref = false;
unsigned int wmem_alloc_delta = 0;
u32 tskey = 0;
@@ -1390,7 +1390,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
struct inet_sock *inet = inet_sk(sk);
struct net *net = sock_net(sk);
struct ip_options *opt = NULL;
- struct rtable *rt = (struct rtable *)cork->dst;
+ struct rtable *rt = dst_rtable(cork->dst);
struct iphdr *iph;
u8 pmtudisc, ttl;
__be16 df = 0;
@@ -1457,7 +1457,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority);
skb->mark = cork->mark;
- skb->tstamp = cork->transmit_time;
+ if (sk_is_tcp(sk))
+ skb_set_delivery_time(skb, cork->transmit_time, SKB_CLOCK_MONOTONIC);
+ else
+ skb_set_delivery_type_by_clockid(skb, cork->transmit_time, sk->sk_clockid);
/*
* Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec
* on dst refcount
@@ -1649,7 +1652,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
arg->csumoffset) = csum_fold(csum_add(nskb->csum,
arg->csum));
nskb->ip_summed = CHECKSUM_NONE;
- nskb->mono_delivery_time = !!transmit_time;
+ if (transmit_time)
+ nskb->tstamp_type = SKB_CLOCK_MONOTONIC;
if (txhash)
skb_set_hash(nskb, txhash, PKT_HASH_TYPE_L4);
ip_push_pending_frames(sk, &fl4);
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 1b8d8ff9a237..5cffad42fe8c 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -56,17 +56,13 @@ static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
IP_TNL_HASH_BITS);
}
-static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
- __be16 flags, __be32 key)
+static bool ip_tunnel_key_match(const struct ip_tunnel_parm_kern *p,
+ const unsigned long *flags, __be32 key)
{
- if (p->i_flags & TUNNEL_KEY) {
- if (flags & TUNNEL_KEY)
- return key == p->i_key;
- else
- /* key expected, none present */
- return false;
- } else
- return !(flags & TUNNEL_KEY);
+ if (!test_bit(IP_TUNNEL_KEY_BIT, flags))
+ return !test_bit(IP_TUNNEL_KEY_BIT, p->i_flags);
+
+ return test_bit(IP_TUNNEL_KEY_BIT, p->i_flags) && p->i_key == key;
}
/* Fallback tunnel: no source, no destination, no key, no options
@@ -81,7 +77,7 @@ static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
Given src, dst and key, find appropriate for input tunnel.
*/
struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
- int link, __be16 flags,
+ int link, const unsigned long *flags,
__be32 remote, __be32 local,
__be32 key)
{
@@ -143,7 +139,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
}
hlist_for_each_entry_rcu(t, head, hash_node) {
- if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
+ if ((!test_bit(IP_TUNNEL_NO_KEY_BIT, flags) &&
+ t->parms.i_key != key) ||
t->parms.iph.saddr != 0 ||
t->parms.iph.daddr != 0 ||
!(t->dev->flags & IFF_UP))
@@ -171,7 +168,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
- struct ip_tunnel_parm *parms)
+ struct ip_tunnel_parm_kern *parms)
{
unsigned int h;
__be32 remote;
@@ -182,7 +179,8 @@ static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
else
remote = 0;
- if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
+ if (!test_bit(IP_TUNNEL_KEY_BIT, parms->i_flags) &&
+ test_bit(IP_TUNNEL_VTI_BIT, parms->i_flags))
i_key = 0;
h = ip_tunnel_hash(i_key, remote);
@@ -206,17 +204,19 @@ static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
}
static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
- struct ip_tunnel_parm *parms,
+ struct ip_tunnel_parm_kern *parms,
int type)
{
__be32 remote = parms->iph.daddr;
__be32 local = parms->iph.saddr;
+ IP_TUNNEL_DECLARE_FLAGS(flags);
__be32 key = parms->i_key;
- __be16 flags = parms->i_flags;
int link = parms->link;
struct ip_tunnel *t = NULL;
struct hlist_head *head = ip_bucket(itn, parms);
+ ip_tunnel_flags_copy(flags, parms->i_flags);
+
hlist_for_each_entry_rcu(t, head, hash_node) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
@@ -230,7 +230,7 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
static struct net_device *__ip_tunnel_create(struct net *net,
const struct rtnl_link_ops *ops,
- struct ip_tunnel_parm *parms)
+ struct ip_tunnel_parm_kern *parms)
{
int err;
struct ip_tunnel *tunnel;
@@ -326,7 +326,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
static struct ip_tunnel *ip_tunnel_create(struct net *net,
struct ip_tunnel_net *itn,
- struct ip_tunnel_parm *parms)
+ struct ip_tunnel_parm_kern *parms)
{
struct ip_tunnel *nt;
struct net_device *dev;
@@ -386,15 +386,15 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
}
#endif
- if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
- ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
+ if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.i_flags) !=
+ test_bit(IP_TUNNEL_CSUM_BIT, tpi->flags)) {
DEV_STATS_INC(tunnel->dev, rx_crc_errors);
DEV_STATS_INC(tunnel->dev, rx_errors);
goto drop;
}
- if (tunnel->parms.i_flags&TUNNEL_SEQ) {
- if (!(tpi->flags&TUNNEL_SEQ) ||
+ if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.i_flags)) {
+ if (!test_bit(IP_TUNNEL_SEQ_BIT, tpi->flags) ||
(tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
DEV_STATS_INC(tunnel->dev, rx_fifo_errors);
DEV_STATS_INC(tunnel->dev, rx_errors);
@@ -543,7 +543,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
struct rt6_info *rt6;
__be32 daddr;
- rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
+ rt6 = skb_valid_dst(skb) ? dst_rt6_info(skb_dst(skb)) :
NULL;
daddr = md ? dst : tunnel->parms.iph.daddr;
@@ -638,7 +638,7 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
- if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
+ if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags))
df = htons(IP_DF);
if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
key->u.ipv4.dst, true)) {
@@ -871,7 +871,7 @@ EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
static void ip_tunnel_update(struct ip_tunnel_net *itn,
struct ip_tunnel *t,
struct net_device *dev,
- struct ip_tunnel_parm *p,
+ struct ip_tunnel_parm_kern *p,
bool set_mtu,
__u32 fwmark)
{
@@ -897,13 +897,14 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
t->fwmark = fwmark;
mtu = ip_tunnel_bind_dev(dev);
if (set_mtu)
- dev->mtu = mtu;
+ WRITE_ONCE(dev->mtu, mtu);
}
dst_cache_reset(&t->dst_cache);
netdev_state_change(dev);
}
-int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
+int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p,
+ int cmd)
{
int err = 0;
struct ip_tunnel *t = netdev_priv(dev);
@@ -927,10 +928,10 @@ int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
goto done;
if (p->iph.ttl)
p->iph.frag_off |= htons(IP_DF);
- if (!(p->i_flags & VTI_ISVTI)) {
- if (!(p->i_flags & TUNNEL_KEY))
+ if (!test_bit(IP_TUNNEL_VTI_BIT, p->i_flags)) {
+ if (!test_bit(IP_TUNNEL_KEY_BIT, p->i_flags))
p->i_key = 0;
- if (!(p->o_flags & TUNNEL_KEY))
+ if (!test_bit(IP_TUNNEL_KEY_BIT, p->o_flags))
p->o_key = 0;
}
@@ -1005,16 +1006,58 @@ done:
}
EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
+bool ip_tunnel_parm_from_user(struct ip_tunnel_parm_kern *kp,
+ const void __user *data)
+{
+ struct ip_tunnel_parm p;
+
+ if (copy_from_user(&p, data, sizeof(p)))
+ return false;
+
+ strscpy(kp->name, p.name);
+ kp->link = p.link;
+ ip_tunnel_flags_from_be16(kp->i_flags, p.i_flags);
+ ip_tunnel_flags_from_be16(kp->o_flags, p.o_flags);
+ kp->i_key = p.i_key;
+ kp->o_key = p.o_key;
+ memcpy(&kp->iph, &p.iph, min(sizeof(kp->iph), sizeof(p.iph)));
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_parm_from_user);
+
+bool ip_tunnel_parm_to_user(void __user *data, struct ip_tunnel_parm_kern *kp)
+{
+ struct ip_tunnel_parm p;
+
+ if (!ip_tunnel_flags_is_be16_compat(kp->i_flags) ||
+ !ip_tunnel_flags_is_be16_compat(kp->o_flags))
+ return false;
+
+ memset(&p, 0, sizeof(p));
+
+ strscpy(p.name, kp->name);
+ p.link = kp->link;
+ p.i_flags = ip_tunnel_flags_to_be16(kp->i_flags);
+ p.o_flags = ip_tunnel_flags_to_be16(kp->o_flags);
+ p.i_key = kp->i_key;
+ p.o_key = kp->o_key;
+ memcpy(&p.iph, &kp->iph, min(sizeof(p.iph), sizeof(kp->iph)));
+
+ return !copy_to_user(data, &p, sizeof(p));
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_parm_to_user);
+
int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
void __user *data, int cmd)
{
- struct ip_tunnel_parm p;
+ struct ip_tunnel_parm_kern p;
int err;
- if (copy_from_user(&p, data, sizeof(p)))
+ if (!ip_tunnel_parm_from_user(&p, data))
return -EFAULT;
err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
- if (!err && copy_to_user(data, &p, sizeof(p)))
+ if (!err && !ip_tunnel_parm_to_user(data, &p))
return -EFAULT;
return err;
}
@@ -1039,7 +1082,7 @@ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
new_mtu = max_mtu;
}
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
return 0;
}
EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
@@ -1056,7 +1099,6 @@ static void ip_tunnel_dev_free(struct net_device *dev)
gro_cells_destroy(&tunnel->gro_cells);
dst_cache_destroy(&tunnel->dst_cache);
- free_percpu(dev->tstats);
}
void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
@@ -1077,7 +1119,7 @@ struct net *ip_tunnel_get_link_net(const struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- return tunnel->net;
+ return READ_ONCE(tunnel->net);
}
EXPORT_SYMBOL(ip_tunnel_get_link_net);
@@ -1093,7 +1135,7 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
struct rtnl_link_ops *ops, char *devname)
{
struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
- struct ip_tunnel_parm parms;
+ struct ip_tunnel_parm_kern parms;
unsigned int i;
itn->rtnl_link_ops = ops;
@@ -1171,7 +1213,7 @@ void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
- struct ip_tunnel_parm *p, __u32 fwmark)
+ struct ip_tunnel_parm_kern *p, __u32 fwmark)
{
struct ip_tunnel *nt;
struct net *net = dev_net(dev);
@@ -1225,7 +1267,7 @@ err_register_netdevice:
EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
- struct ip_tunnel_parm *p, __u32 fwmark)
+ struct ip_tunnel_parm_kern *p, __u32 fwmark)
{
struct ip_tunnel *t;
struct ip_tunnel *tunnel = netdev_priv(dev);
@@ -1270,20 +1312,15 @@ int ip_tunnel_init(struct net_device *dev)
dev->needs_free_netdev = true;
dev->priv_destructor = ip_tunnel_dev_free;
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
- if (err) {
- free_percpu(dev->tstats);
+ if (err)
return err;
- }
err = gro_cells_init(&tunnel->gro_cells, dev);
if (err) {
dst_cache_destroy(&tunnel->dst_cache);
- free_percpu(dev->tstats);
return err;
}
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 80ccd6661aa3..a3676155be78 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -125,6 +125,7 @@ EXPORT_SYMBOL_GPL(__iptunnel_pull_header);
struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
gfp_t flags)
{
+ IP_TUNNEL_DECLARE_FLAGS(tun_flags) = { };
struct metadata_dst *res;
struct ip_tunnel_info *dst, *src;
@@ -144,10 +145,10 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
sizeof(struct in6_addr));
else
dst->key.u.ipv4.dst = src->key.u.ipv4.src;
- dst->key.tun_flags = src->key.tun_flags;
+ ip_tunnel_flags_copy(dst->key.tun_flags, src->key.tun_flags);
dst->mode = src->mode | IP_TUNNEL_INFO_TX;
ip_tunnel_info_opts_set(dst, ip_tunnel_info_opts(src),
- src->options_len, 0);
+ src->options_len, tun_flags);
return res;
}
@@ -497,7 +498,7 @@ static int ip_tun_parse_opts_geneve(struct nlattr *attr,
opt->opt_class = nla_get_be16(attr);
attr = tb[LWTUNNEL_IP_OPT_GENEVE_TYPE];
opt->type = nla_get_u8(attr);
- info->key.tun_flags |= TUNNEL_GENEVE_OPT;
+ __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags);
}
return sizeof(struct geneve_opt) + data_len;
@@ -525,7 +526,7 @@ static int ip_tun_parse_opts_vxlan(struct nlattr *attr,
attr = tb[LWTUNNEL_IP_OPT_VXLAN_GBP];
md->gbp = nla_get_u32(attr);
md->gbp &= VXLAN_GBP_MASK;
- info->key.tun_flags |= TUNNEL_VXLAN_OPT;
+ __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags);
}
return sizeof(struct vxlan_metadata);
@@ -574,7 +575,7 @@ static int ip_tun_parse_opts_erspan(struct nlattr *attr,
set_hwid(&md->u.md2, nla_get_u8(attr));
}
- info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+ __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags);
}
return sizeof(struct erspan_metadata);
@@ -585,7 +586,7 @@ static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info,
{
int err, rem, opt_len, opts_len = 0;
struct nlattr *nla;
- __be16 type = 0;
+ u32 type = 0;
if (!attr)
return 0;
@@ -598,7 +599,7 @@ static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info,
nla_for_each_attr(nla, nla_data(attr), nla_len(attr), rem) {
switch (nla_type(nla)) {
case LWTUNNEL_IP_OPTS_GENEVE:
- if (type && type != TUNNEL_GENEVE_OPT)
+ if (type && type != IP_TUNNEL_GENEVE_OPT_BIT)
return -EINVAL;
opt_len = ip_tun_parse_opts_geneve(nla, info, opts_len,
extack);
@@ -607,7 +608,7 @@ static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info,
opts_len += opt_len;
if (opts_len > IP_TUNNEL_OPTS_MAX)
return -EINVAL;
- type = TUNNEL_GENEVE_OPT;
+ type = IP_TUNNEL_GENEVE_OPT_BIT;
break;
case LWTUNNEL_IP_OPTS_VXLAN:
if (type)
@@ -617,7 +618,7 @@ static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info,
if (opt_len < 0)
return opt_len;
opts_len += opt_len;
- type = TUNNEL_VXLAN_OPT;
+ type = IP_TUNNEL_VXLAN_OPT_BIT;
break;
case LWTUNNEL_IP_OPTS_ERSPAN:
if (type)
@@ -627,7 +628,7 @@ static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info,
if (opt_len < 0)
return opt_len;
opts_len += opt_len;
- type = TUNNEL_ERSPAN_OPT;
+ type = IP_TUNNEL_ERSPAN_OPT_BIT;
break;
default:
return -EINVAL;
@@ -705,10 +706,16 @@ static int ip_tun_build_state(struct net *net, struct nlattr *attr,
if (tb[LWTUNNEL_IP_TOS])
tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP_TOS]);
- if (tb[LWTUNNEL_IP_FLAGS])
- tun_info->key.tun_flags |=
- (nla_get_be16(tb[LWTUNNEL_IP_FLAGS]) &
- ~TUNNEL_OPTIONS_PRESENT);
+ if (tb[LWTUNNEL_IP_FLAGS]) {
+ IP_TUNNEL_DECLARE_FLAGS(flags);
+
+ ip_tunnel_flags_from_be16(flags,
+ nla_get_be16(tb[LWTUNNEL_IP_FLAGS]));
+ ip_tunnel_clear_options_present(flags);
+
+ ip_tunnel_flags_or(tun_info->key.tun_flags,
+ tun_info->key.tun_flags, flags);
+ }
tun_info->mode = IP_TUNNEL_INFO_TX;
tun_info->options_len = opt_len;
@@ -812,18 +819,18 @@ static int ip_tun_fill_encap_opts(struct sk_buff *skb, int type,
struct nlattr *nest;
int err = 0;
- if (!(tun_info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))
+ if (!ip_tunnel_is_options_present(tun_info->key.tun_flags))
return 0;
nest = nla_nest_start_noflag(skb, type);
if (!nest)
return -ENOMEM;
- if (tun_info->key.tun_flags & TUNNEL_GENEVE_OPT)
+ if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, tun_info->key.tun_flags))
err = ip_tun_fill_encap_opts_geneve(skb, tun_info);
- else if (tun_info->key.tun_flags & TUNNEL_VXLAN_OPT)
+ else if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, tun_info->key.tun_flags))
err = ip_tun_fill_encap_opts_vxlan(skb, tun_info);
- else if (tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)
+ else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_info->key.tun_flags))
err = ip_tun_fill_encap_opts_erspan(skb, tun_info);
if (err) {
@@ -846,7 +853,8 @@ static int ip_tun_fill_encap_info(struct sk_buff *skb,
nla_put_in_addr(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) ||
nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.tos) ||
nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ttl) ||
- nla_put_be16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags) ||
+ nla_put_be16(skb, LWTUNNEL_IP_FLAGS,
+ ip_tunnel_flags_to_be16(tun_info->key.tun_flags)) ||
ip_tun_fill_encap_opts(skb, LWTUNNEL_IP_OPTS, tun_info))
return -ENOMEM;
@@ -857,11 +865,11 @@ static int ip_tun_opts_nlsize(struct ip_tunnel_info *info)
{
int opt_len;
- if (!(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))
+ if (!ip_tunnel_is_options_present(info->key.tun_flags))
return 0;
opt_len = nla_total_size(0); /* LWTUNNEL_IP_OPTS */
- if (info->key.tun_flags & TUNNEL_GENEVE_OPT) {
+ if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags)) {
struct geneve_opt *opt;
int offset = 0;
@@ -874,10 +882,10 @@ static int ip_tun_opts_nlsize(struct ip_tunnel_info *info)
/* OPT_GENEVE_DATA */
offset += sizeof(*opt) + opt->length * 4;
}
- } else if (info->key.tun_flags & TUNNEL_VXLAN_OPT) {
+ } else if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags)) {
opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_VXLAN */
+ nla_total_size(4); /* OPT_VXLAN_GBP */
- } else if (info->key.tun_flags & TUNNEL_ERSPAN_OPT) {
+ } else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags)) {
struct erspan_metadata *md = ip_tunnel_info_opts(info);
opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_ERSPAN */
@@ -984,10 +992,17 @@ static int ip6_tun_build_state(struct net *net, struct nlattr *attr,
if (tb[LWTUNNEL_IP6_TC])
tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP6_TC]);
- if (tb[LWTUNNEL_IP6_FLAGS])
- tun_info->key.tun_flags |=
- (nla_get_be16(tb[LWTUNNEL_IP6_FLAGS]) &
- ~TUNNEL_OPTIONS_PRESENT);
+ if (tb[LWTUNNEL_IP6_FLAGS]) {
+ IP_TUNNEL_DECLARE_FLAGS(flags);
+ __be16 data;
+
+ data = nla_get_be16(tb[LWTUNNEL_IP6_FLAGS]);
+ ip_tunnel_flags_from_be16(flags, data);
+ ip_tunnel_clear_options_present(flags);
+
+ ip_tunnel_flags_or(tun_info->key.tun_flags,
+ tun_info->key.tun_flags, flags);
+ }
tun_info->mode = IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6;
tun_info->options_len = opt_len;
@@ -1008,7 +1023,8 @@ static int ip6_tun_fill_encap_info(struct sk_buff *skb,
nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) ||
nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.tos) ||
nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.ttl) ||
- nla_put_be16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags) ||
+ nla_put_be16(skb, LWTUNNEL_IP6_FLAGS,
+ ip_tunnel_flags_to_be16(tun_info->key.tun_flags)) ||
ip_tun_fill_encap_opts(skb, LWTUNNEL_IP6_OPTS, tun_info))
return -ENOMEM;
@@ -1116,7 +1132,7 @@ bool ip_tunnel_netlink_encap_parms(struct nlattr *data[],
EXPORT_SYMBOL_GPL(ip_tunnel_netlink_encap_parms);
void ip_tunnel_netlink_parms(struct nlattr *data[],
- struct ip_tunnel_parm *parms)
+ struct ip_tunnel_parm_kern *parms)
{
if (data[IFLA_IPTUN_LINK])
parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
@@ -1139,8 +1155,12 @@ void ip_tunnel_netlink_parms(struct nlattr *data[],
if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
parms->iph.frag_off = htons(IP_DF);
- if (data[IFLA_IPTUN_FLAGS])
- parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]);
+ if (data[IFLA_IPTUN_FLAGS]) {
+ __be16 flags;
+
+ flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]);
+ ip_tunnel_flags_from_be16(parms->i_flags, flags);
+ }
if (data[IFLA_IPTUN_PROTO])
parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index ee587adb169f..14536da9f5dc 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -51,8 +51,11 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
const struct iphdr *iph = ip_hdr(skb);
struct net *net = dev_net(skb->dev);
struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
- tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
+ __set_bit(IP_TUNNEL_NO_KEY_BIT, flags);
+
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
iph->saddr, iph->daddr, 0);
if (tunnel) {
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
@@ -167,7 +170,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
struct flowi *fl)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct ip_tunnel_parm *parms = &tunnel->parms;
+ struct ip_tunnel_parm_kern *parms = &tunnel->parms;
struct dst_entry *dst = skb_dst(skb);
struct net_device *tdev; /* Device to other host */
int pkt_len = skb->len;
@@ -322,8 +325,11 @@ static int vti4_err(struct sk_buff *skb, u32 info)
const struct iphdr *iph = (const struct iphdr *)skb->data;
int protocol = iph->protocol;
struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
+
+ __set_bit(IP_TUNNEL_NO_KEY_BIT, flags);
- tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
iph->daddr, iph->saddr, 0);
if (!tunnel)
return -1;
@@ -373,8 +379,9 @@ static int vti4_err(struct sk_buff *skb, u32 info)
}
static int
-vti_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
+vti_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, int cmd)
{
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
int err = 0;
if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
@@ -383,20 +390,26 @@ vti_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
return -EINVAL;
}
- if (!(p->i_flags & GRE_KEY))
+ if (!ip_tunnel_flags_is_be16_compat(p->i_flags) ||
+ !ip_tunnel_flags_is_be16_compat(p->o_flags))
+ return -EOVERFLOW;
+
+ if (!(ip_tunnel_flags_to_be16(p->i_flags) & GRE_KEY))
p->i_key = 0;
- if (!(p->o_flags & GRE_KEY))
+ if (!(ip_tunnel_flags_to_be16(p->o_flags) & GRE_KEY))
p->o_key = 0;
- p->i_flags = VTI_ISVTI;
+ __set_bit(IP_TUNNEL_VTI_BIT, flags);
+ ip_tunnel_flags_copy(p->i_flags, flags);
err = ip_tunnel_ctl(dev, p, cmd);
if (err)
return err;
if (cmd != SIOCDELTUNNEL) {
- p->i_flags |= GRE_KEY;
- p->o_flags |= GRE_KEY;
+ ip_tunnel_flags_from_be16(flags, GRE_KEY);
+ ip_tunnel_flags_or(p->i_flags, p->i_flags, flags);
+ ip_tunnel_flags_or(p->o_flags, p->o_flags, flags);
}
return 0;
}
@@ -531,7 +544,7 @@ static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
}
static void vti_netlink_parms(struct nlattr *data[],
- struct ip_tunnel_parm *parms,
+ struct ip_tunnel_parm_kern *parms,
__u32 *fwmark)
{
memset(parms, 0, sizeof(*parms));
@@ -541,7 +554,7 @@ static void vti_netlink_parms(struct nlattr *data[],
if (!data)
return;
- parms->i_flags = VTI_ISVTI;
+ __set_bit(IP_TUNNEL_VTI_BIT, parms->i_flags);
if (data[IFLA_VTI_LINK])
parms->link = nla_get_u32(data[IFLA_VTI_LINK]);
@@ -566,7 +579,7 @@ static int vti_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
- struct ip_tunnel_parm parms;
+ struct ip_tunnel_parm_kern parms;
__u32 fwmark = 0;
vti_netlink_parms(data, &parms, &fwmark);
@@ -578,8 +591,8 @@ static int vti_changelink(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
struct ip_tunnel *t = netdev_priv(dev);
+ struct ip_tunnel_parm_kern p;
__u32 fwmark = t->fwmark;
- struct ip_tunnel_parm p;
vti_netlink_parms(data, &p, &fwmark);
return ip_tunnel_changelink(dev, tb, &p, fwmark);
@@ -606,7 +619,7 @@ static size_t vti_get_size(const struct net_device *dev)
static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct ip_tunnel *t = netdev_priv(dev);
- struct ip_tunnel_parm *p = &t->parms;
+ struct ip_tunnel_parm_kern *p = &t->parms;
if (nla_put_u32(skb, IFLA_VTI_LINK, p->link) ||
nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key) ||
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index f2696eaadbe6..923a2ef68c2f 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -130,13 +130,16 @@ static int ipip_err(struct sk_buff *skb, u32 info)
struct net *net = dev_net(skb->dev);
struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
const struct iphdr *iph = (const struct iphdr *)skb->data;
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
struct ip_tunnel *t;
int err = 0;
- t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
- iph->daddr, iph->saddr, 0);
+ __set_bit(IP_TUNNEL_NO_KEY_BIT, flags);
+
+ t = ip_tunnel_lookup(itn, skb->dev->ifindex, flags, iph->daddr,
+ iph->saddr, 0);
if (!t) {
err = -ENOENT;
goto out;
@@ -213,13 +216,16 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
{
struct net *net = dev_net(skb->dev);
struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
struct metadata_dst *tun_dst = NULL;
struct ip_tunnel *tunnel;
const struct iphdr *iph;
+ __set_bit(IP_TUNNEL_NO_KEY_BIT, flags);
+
iph = ip_hdr(skb);
- tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
- iph->saddr, iph->daddr, 0);
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags, iph->saddr,
+ iph->daddr, 0);
if (tunnel) {
const struct tnl_ptk_info *tpi;
@@ -238,7 +244,9 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
if (iptunnel_pull_header(skb, 0, tpi->proto, false))
goto drop;
if (tunnel->collect_md) {
- tun_dst = ip_tun_rx_dst(skb, 0, 0, 0);
+ ip_tunnel_flags_zero(flags);
+
+ tun_dst = ip_tun_rx_dst(skb, flags, 0, 0);
if (!tun_dst)
return 0;
ip_tunnel_md_udp_encap(skb, &tun_dst->u.tun_info);
@@ -330,7 +338,7 @@ static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto)
}
static int
-ipip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
+ipip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, int cmd)
{
if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
if (p->iph.version != 4 ||
@@ -340,7 +348,8 @@ ipip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
}
p->i_key = p->o_key = 0;
- p->i_flags = p->o_flags = 0;
+ ip_tunnel_flags_zero(p->i_flags);
+ ip_tunnel_flags_zero(p->o_flags);
return ip_tunnel_ctl(dev, p, cmd);
}
@@ -405,8 +414,8 @@ static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
}
static void ipip_netlink_parms(struct nlattr *data[],
- struct ip_tunnel_parm *parms, bool *collect_md,
- __u32 *fwmark)
+ struct ip_tunnel_parm_kern *parms,
+ bool *collect_md, __u32 *fwmark)
{
memset(parms, 0, sizeof(*parms));
@@ -432,8 +441,8 @@ static int ipip_newlink(struct net *src_net, struct net_device *dev,
struct netlink_ext_ack *extack)
{
struct ip_tunnel *t = netdev_priv(dev);
- struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
+ struct ip_tunnel_parm_kern p;
__u32 fwmark = 0;
if (ip_tunnel_netlink_encap_parms(data, &ipencap)) {
@@ -452,8 +461,8 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
struct ip_tunnel *t = netdev_priv(dev);
- struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
+ struct ip_tunnel_parm_kern p;
bool collect_md;
__u32 fwmark = t->fwmark;
@@ -510,7 +519,7 @@ static size_t ipip_get_size(const struct net_device *dev)
static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct ip_tunnel_parm *parm = &tunnel->parms;
+ struct ip_tunnel_parm_kern *parm = &tunnel->parms;
if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index fd5c01c8489f..6c750bd13dd8 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -441,7 +441,7 @@ static bool ipmr_init_vif_indev(const struct net_device *dev)
static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
{
struct net_device *tunnel_dev, *new_dev;
- struct ip_tunnel_parm p = { };
+ struct ip_tunnel_parm_kern p = { };
int err;
tunnel_dev = __dev_get_by_name(net, "tunl0");
diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c
index 0e3ee1532848..8ddac1f595ed 100644
--- a/net/ipv4/metrics.c
+++ b/net/ipv4/metrics.c
@@ -7,7 +7,7 @@
#include <net/net_namespace.h>
#include <net/tcp.h>
-static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx,
+static int ip_metrics_convert(struct nlattr *fc_mx,
int fc_mx_len, u32 *metrics,
struct netlink_ext_ack *extack)
{
@@ -31,7 +31,7 @@ static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx,
char tmp[TCP_CA_NAME_MAX];
nla_strscpy(tmp, nla, sizeof(tmp));
- val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca);
+ val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
if (val == TCP_CA_UNSPEC) {
NL_SET_ERR_MSG(extack, "Unknown tcp congestion algorithm");
return -EINVAL;
@@ -63,7 +63,7 @@ static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx,
return 0;
}
-struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx,
+struct dst_metrics *ip_fib_metrics_init(struct nlattr *fc_mx,
int fc_mx_len,
struct netlink_ext_ack *extack)
{
@@ -77,7 +77,7 @@ struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx,
if (unlikely(!fib_metrics))
return ERR_PTR(-ENOMEM);
- err = ip_metrics_convert(net, fc_mx, fc_mx_len, fib_metrics->metrics,
+ err = ip_metrics_convert(fc_mx, fc_mx_len, fib_metrics->metrics,
extack);
if (!err) {
refcount_set(&fib_metrics->refcnt, 1);
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index b9062f4552ac..3ab908b74795 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -44,7 +44,7 @@ static int iptable_filter_table_init(struct net *net)
return -ENOMEM;
/* Entry 1 is the FORWARD hook */
((struct ipt_standard *)repl->entries)[1].target.verdict =
- forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
+ forward ? -NF_ACCEPT - 1 : NF_DROP - 1;
err = ipt_register_table(net, &packet_filter, repl, filter_ops);
kfree(repl);
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 4d42d0756fd7..a5db7c67d61b 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -145,25 +145,27 @@ static struct pernet_operations iptable_nat_net_ops = {
static int __init iptable_nat_init(void)
{
- int ret = xt_register_template(&nf_nat_ipv4_table,
- iptable_nat_table_init);
+ int ret;
+ /* net->gen->ptr[iptable_nat_net_id] must be allocated
+ * before calling iptable_nat_table_init().
+ */
+ ret = register_pernet_subsys(&iptable_nat_net_ops);
if (ret < 0)
return ret;
- ret = register_pernet_subsys(&iptable_nat_net_ops);
- if (ret < 0) {
- xt_unregister_template(&nf_nat_ipv4_table);
- return ret;
- }
+ ret = xt_register_template(&nf_nat_ipv4_table,
+ iptable_nat_table_init);
+ if (ret < 0)
+ unregister_pernet_subsys(&iptable_nat_net_ops);
return ret;
}
static void __exit iptable_nat_exit(void)
{
- unregister_pernet_subsys(&iptable_nat_net_ops);
xt_unregister_template(&nf_nat_ipv4_table);
+ unregister_pernet_subsys(&iptable_nat_net_ops);
}
module_init(iptable_nat_init);
diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c
index 69e331799604..73e66a088e25 100644
--- a/net/ipv4/netfilter/nf_tproxy_ipv4.c
+++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c
@@ -58,6 +58,8 @@ __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
laddr = 0;
indev = __in_dev_get_rcu(skb->dev);
+ if (!indev)
+ return daddr;
in_dev_for_each_ifa_rcu(ifa, indev) {
if (ifa->ifa_flags & IFA_F_SECONDARY)
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 535856b0f0ed..6b9787ee8601 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -888,9 +888,10 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nexthop *nh,
p = nla_data(nla);
for (i = 0; i < nhg->num_nh; ++i) {
- p->id = nhg->nh_entries[i].nh->id;
- p->weight = nhg->nh_entries[i].weight - 1;
- p += 1;
+ *p++ = (struct nexthop_grp) {
+ .id = nhg->nh_entries[i].nh->id,
+ .weight = nhg->nh_entries[i].weight - 1,
+ };
}
if (nhg->resilient && nla_put_nh_group_res(skb, nhg))
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 823306487a82..619ddc087957 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -946,7 +946,7 @@ static enum skb_drop_reason __ping_queue_rcv_skb(struct sock *sk,
pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n",
inet_sk(sk), inet_sk(sk)->inet_num, skb);
if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
- kfree_skb_reason(skb, reason);
+ sk_skb_reason_drop(sk, skb, reason);
pr_debug("ping_queue_rcv_skb -> failed\n");
return reason;
}
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 914bc9c35cc7..40053a02bae1 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -33,6 +33,7 @@
#include <net/protocol.h>
#include <net/tcp.h>
#include <net/mptcp.h>
+#include <net/proto_memory.h>
#include <net/udp.h>
#include <net/udplite.h>
#include <linux/bottom_half.h>
@@ -43,7 +44,7 @@
#include <net/sock.h>
#include <net/raw.h>
-#define TCPUDP_MIB_MAX max_t(u32, UDP_MIB_MAX, TCP_MIB_MAX)
+#define TCPUDP_MIB_MAX MAX_T(u32, UDP_MIB_MAX, TCP_MIB_MAX)
/*
* Report socket allocation statistics [mea@utu.fi]
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 4cb43401e0e0..474dfd263c8b 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -301,7 +301,7 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
ipv4_pktinfo_prepare(sk, skb, true);
if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
- kfree_skb_reason(skb, reason);
+ sk_skb_reason_drop(sk, skb, reason);
return NET_RX_DROP;
}
@@ -312,7 +312,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
{
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
atomic_inc(&sk->sk_drops);
- kfree_skb_reason(skb, SKB_DROP_REASON_XFRM_POLICY);
+ sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_XFRM_POLICY);
return NET_RX_DROP;
}
nf_reset_ct(skb);
@@ -360,7 +360,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
skb->protocol = htons(ETH_P_IP);
skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc->mark;
- skb->tstamp = sockc->transmit_time;
+ skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid);
skb_dst_set(skb, &rt->dst);
*rtp = NULL;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b814fdab19f7..13c0f1d455f3 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -106,9 +106,6 @@
#include "fib_lookup.h"
-#define RT_FL_TOS(oldflp4) \
- ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
-
#define RT_GC_TIMEOUT (300*HZ)
#define DEFAULT_MIN_PMTU (512 + 20 + 20)
@@ -132,7 +129,8 @@ struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
INDIRECT_CALLABLE_SCOPE
unsigned int ipv4_mtu(const struct dst_entry *dst);
-static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
+static void ipv4_negative_advice(struct sock *sk,
+ struct dst_entry *dst);
static void ipv4_link_failure(struct sk_buff *skb);
static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu,
@@ -498,15 +496,6 @@ void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
}
EXPORT_SYMBOL(__ip_select_ident);
-static void ip_rt_fix_tos(struct flowi4 *fl4)
-{
- __u8 tos = RT_FL_TOS(fl4);
-
- fl4->flowi4_tos = tos & IPTOS_RT_MASK;
- if (tos & RTO_ONLINK)
- fl4->flowi4_scope = RT_SCOPE_LINK;
-}
-
static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
const struct sock *sk, const struct iphdr *iph,
int oif, __u8 tos, u8 prot, u32 mark,
@@ -831,28 +820,21 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
u32 mark = skb->mark;
__u8 tos = iph->tos;
- rt = (struct rtable *) dst;
+ rt = dst_rtable(dst);
__build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
__ip_do_redirect(rt, skb, &fl4, true);
}
-static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
+static void ipv4_negative_advice(struct sock *sk,
+ struct dst_entry *dst)
{
- struct rtable *rt = (struct rtable *)dst;
- struct dst_entry *ret = dst;
+ struct rtable *rt = dst_rtable(dst);
- if (rt) {
- if (dst->obsolete > 0) {
- ip_rt_put(rt);
- ret = NULL;
- } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
- rt->dst.expires) {
- ip_rt_put(rt);
- ret = NULL;
- }
- }
- return ret;
+ if ((dst->obsolete > 0) ||
+ (rt->rt_flags & RTCF_REDIRECTED) ||
+ rt->dst.expires)
+ sk_dst_reset(sk);
}
/*
@@ -1056,7 +1038,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu,
bool confirm_neigh)
{
- struct rtable *rt = (struct rtable *) dst;
+ struct rtable *rt = dst_rtable(dst);
struct flowi4 fl4;
ip_rt_build_flow_key(&fl4, sk, skb);
@@ -1127,7 +1109,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
__build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
- rt = (struct rtable *)odst;
+ rt = dst_rtable(odst);
if (odst->obsolete && !odst->ops->check(odst, 0)) {
rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
if (IS_ERR(rt))
@@ -1136,7 +1118,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
new = true;
}
- __ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu);
+ __ip_rt_update_pmtu(dst_rtable(xfrm_dst_path(&rt->dst)), &fl4, mtu);
if (!dst_check(&rt->dst, 0)) {
if (new)
@@ -1193,7 +1175,7 @@ EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
INDIRECT_CALLABLE_SCOPE struct dst_entry *ipv4_dst_check(struct dst_entry *dst,
u32 cookie)
{
- struct rtable *rt = (struct rtable *) dst;
+ struct rtable *rt = dst_rtable(dst);
/* All IPV4 dsts are created with ->obsolete set to the value
* DST_OBSOLETE_FORCE_CHK which forces validation calls down
@@ -1281,7 +1263,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
struct flowi4 fl4 = {
.daddr = iph->daddr,
.saddr = iph->saddr,
- .flowi4_tos = RT_TOS(iph->tos),
+ .flowi4_tos = iph->tos & IPTOS_RT_MASK,
.flowi4_oif = rt->dst.dev->ifindex,
.flowi4_iif = skb->dev->ifindex,
.flowi4_mark = skb->mark,
@@ -1499,7 +1481,6 @@ static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt)
struct uncached_list {
spinlock_t lock;
struct list_head head;
- struct list_head quarantine;
};
static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
@@ -1528,10 +1509,8 @@ void rt_del_uncached_list(struct rtable *rt)
static void ipv4_dst_destroy(struct dst_entry *dst)
{
- struct rtable *rt = (struct rtable *)dst;
-
ip_dst_metrics_put(dst);
- rt_del_uncached_list(rt);
+ rt_del_uncached_list(dst_rtable(dst));
}
void rt_flush_dev(struct net_device *dev)
@@ -1552,7 +1531,7 @@ void rt_flush_dev(struct net_device *dev)
rt->dst.dev = blackhole_netdev;
netdev_ref_replace(dev, blackhole_netdev,
&rt->dst.dev_tracker, GFP_ATOMIC);
- list_move(&rt->dst.rt_uncached, &ul->quarantine);
+ list_del_init(&rt->dst.rt_uncached);
}
spin_unlock_bh(&ul->lock);
}
@@ -1944,7 +1923,7 @@ static u32 fib_multipath_custom_hash_outer(const struct net *net,
hash_keys.ports.dst = keys.ports.dst;
*p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION);
- return flow_hash_from_keys(&hash_keys);
+ return fib_multipath_hash_from_keys(net, &hash_keys);
}
static u32 fib_multipath_custom_hash_inner(const struct net *net,
@@ -1993,7 +1972,7 @@ static u32 fib_multipath_custom_hash_inner(const struct net *net,
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
hash_keys.ports.dst = keys.ports.dst;
- return flow_hash_from_keys(&hash_keys);
+ return fib_multipath_hash_from_keys(net, &hash_keys);
}
static u32 fib_multipath_custom_hash_skb(const struct net *net,
@@ -2030,7 +2009,7 @@ static u32 fib_multipath_custom_hash_fl4(const struct net *net,
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
hash_keys.ports.dst = fl4->fl4_dport;
- return flow_hash_from_keys(&hash_keys);
+ return fib_multipath_hash_from_keys(net, &hash_keys);
}
/* if skb is set it will be used and fl4 can be NULL */
@@ -2051,7 +2030,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
hash_keys.addrs.v4addrs.src = fl4->saddr;
hash_keys.addrs.v4addrs.dst = fl4->daddr;
}
- mhash = flow_hash_from_keys(&hash_keys);
+ mhash = fib_multipath_hash_from_keys(net, &hash_keys);
break;
case 1:
/* skb is currently provided only when forwarding */
@@ -2085,7 +2064,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
hash_keys.ports.dst = fl4->fl4_dport;
hash_keys.basic.ip_proto = fl4->flowi4_proto;
}
- mhash = flow_hash_from_keys(&hash_keys);
+ mhash = fib_multipath_hash_from_keys(net, &hash_keys);
break;
case 2:
memset(&hash_keys, 0, sizeof(hash_keys));
@@ -2116,7 +2095,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
hash_keys.addrs.v4addrs.src = fl4->saddr;
hash_keys.addrs.v4addrs.dst = fl4->daddr;
}
- mhash = flow_hash_from_keys(&hash_keys);
+ mhash = fib_multipath_hash_from_keys(net, &hash_keys);
break;
case 3:
if (skb)
@@ -2639,7 +2618,7 @@ struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
struct rtable *rth;
fl4->flowi4_iif = LOOPBACK_IFINDEX;
- ip_rt_fix_tos(fl4);
+ fl4->flowi4_tos &= IPTOS_RT_MASK;
rcu_read_lock();
rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
@@ -2832,7 +2811,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
{
- struct rtable *ort = (struct rtable *) dst_orig;
+ struct rtable *ort = dst_rtable(dst_orig);
struct rtable *rt;
rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, DST_OBSOLETE_DEAD, 0);
@@ -2877,9 +2856,9 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
if (flp4->flowi4_proto) {
flp4->flowi4_oif = rt->dst.dev->ifindex;
- rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
- flowi4_to_flowi(flp4),
- sk, 0);
+ rt = dst_rtable(xfrm_lookup_route(net, &rt->dst,
+ flowi4_to_flowi(flp4),
+ sk, 0));
}
return rt;
@@ -2888,9 +2867,9 @@ EXPORT_SYMBOL_GPL(ip_route_output_flow);
/* called with rcu_read_lock held */
static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
- struct rtable *rt, u32 table_id, struct flowi4 *fl4,
- struct sk_buff *skb, u32 portid, u32 seq,
- unsigned int flags)
+ struct rtable *rt, u32 table_id, dscp_t dscp,
+ struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
+ u32 seq, unsigned int flags)
{
struct rtmsg *r;
struct nlmsghdr *nlh;
@@ -2906,7 +2885,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
r->rtm_family = AF_INET;
r->rtm_dst_len = 32;
r->rtm_src_len = 0;
- r->rtm_tos = fl4 ? fl4->flowi4_tos : 0;
+ r->rtm_tos = inet_dscp_to_dsfield(dscp);
r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT;
if (nla_put_u32(skb, RTA_TABLE, table_id))
goto nla_put_failure;
@@ -3056,7 +3035,7 @@ static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb,
goto next;
err = rt_fill_info(net, fnhe->fnhe_daddr, 0, rt,
- table_id, NULL, skb,
+ table_id, 0, NULL, skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, flags);
if (err)
@@ -3352,7 +3331,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fri.tb_id = table_id;
fri.dst = res.prefix;
fri.dst_len = res.prefixlen;
- fri.dscp = inet_dsfield_to_dscp(fl4.flowi4_tos);
+ fri.dscp = res.dscp;
fri.type = rt->rt_type;
fri.offload = 0;
fri.trap = 0;
@@ -3379,8 +3358,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, RTM_NEWROUTE, &fri, 0);
} else {
- err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb,
- NETLINK_CB(in_skb).portid,
+ err = rt_fill_info(net, dst, src, rt, table_id, res.dscp, &fl4,
+ skb, NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, 0);
}
if (err < 0)
@@ -3409,7 +3388,7 @@ static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
static int ip_rt_gc_elasticity __read_mostly = 8;
static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU;
-static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
+static int ipv4_sysctl_rtcache_flush(const struct ctl_table *__ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net = (struct net *)__ctl->extra1;
@@ -3510,7 +3489,6 @@ static struct ctl_table ipv4_route_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
};
static const char ipv4_route_flush_procname[] = "flush";
@@ -3544,7 +3522,6 @@ static struct ctl_table ipv4_route_netns_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { },
};
static __net_init int sysctl_route_net_init(struct net *net)
@@ -3562,16 +3539,14 @@ static __net_init int sysctl_route_net_init(struct net *net)
/* Don't export non-whitelisted sysctls to unprivileged users */
if (net->user_ns != &init_user_ns) {
- if (tbl[0].procname != ipv4_route_flush_procname) {
- tbl[0].procname = NULL;
+ if (tbl[0].procname != ipv4_route_flush_procname)
table_size = 0;
- }
}
/* Update the variables to point into the current struct net
* except for the first element flush
*/
- for (i = 1; i < ARRAY_SIZE(ipv4_route_netns_table) - 1; i++)
+ for (i = 1; i < table_size; i++)
tbl[i].data += (void *)net - (void *)&init_net;
}
tbl[0].extra1 = net;
@@ -3591,7 +3566,7 @@ err_dup:
static __net_exit void sysctl_route_net_exit(struct net *net)
{
- struct ctl_table *tbl;
+ const struct ctl_table *tbl;
tbl = net->ipv4.route_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->ipv4.route_hdr);
@@ -3685,7 +3660,6 @@ int __init ip_rt_init(void)
struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
INIT_LIST_HEAD(&ul->head);
- INIT_LIST_HEAD(&ul->quarantine);
spin_lock_init(&ul->lock);
}
#ifdef CONFIG_IP_ROUTE_CLASSID
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 500f665f98cb..1948d15f1f28 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -462,7 +462,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
}
/* Try to redo what tcp_v4_send_synack did. */
- req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
+ req->rsk_window_clamp = READ_ONCE(tp->window_clamp) ? :
+ dst_metric(&rt->dst, RTAX_WINDOW);
/* limit the window selection if the user enforce a smaller rx buffer */
full_space = tcp_full_space(sk);
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
@@ -495,6 +496,6 @@ out:
out_free:
reqsk_free(req);
out_drop:
- kfree_skb_reason(skb, reason);
+ sk_skb_reason_drop(sk, skb, reason);
return NULL;
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 7e4f16a7dcc1..4af0c234d8d7 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -62,7 +62,7 @@ static void set_local_port_range(struct net *net, unsigned int low, unsigned int
}
/* Validate changes from /proc interface. */
-static int ipv4_local_port_range(struct ctl_table *table, int write,
+static int ipv4_local_port_range(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net = table->data;
@@ -96,7 +96,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write,
}
/* Validate changes from /proc interface. */
-static int ipv4_privileged_ports(struct ctl_table *table, int write,
+static int ipv4_privileged_ports(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net = container_of(table->data, struct net,
@@ -130,7 +130,8 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write,
return ret;
}
-static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high)
+static void inet_get_ping_group_range_table(const struct ctl_table *table,
+ kgid_t *low, kgid_t *high)
{
kgid_t *data = table->data;
struct net *net =
@@ -145,7 +146,8 @@ static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low
}
/* Update system visible IP port range */
-static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t high)
+static void set_ping_group_range(const struct ctl_table *table,
+ kgid_t low, kgid_t high)
{
kgid_t *data = table->data;
struct net *net =
@@ -157,7 +159,7 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig
}
/* Validate changes from /proc interface. */
-static int ipv4_ping_group_range(struct ctl_table *table, int write,
+static int ipv4_ping_group_range(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct user_namespace *user_ns = current_user_ns();
@@ -192,7 +194,7 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write,
return ret;
}
-static int ipv4_fwd_update_priority(struct ctl_table *table, int write,
+static int ipv4_fwd_update_priority(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net;
@@ -208,7 +210,7 @@ static int ipv4_fwd_update_priority(struct ctl_table *table, int write,
return ret;
}
-static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
+static int proc_tcp_congestion_control(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net = container_of(ctl->data, struct net,
@@ -228,7 +230,7 @@ static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
return ret;
}
-static int proc_tcp_available_congestion_control(struct ctl_table *ctl,
+static int proc_tcp_available_congestion_control(const struct ctl_table *ctl,
int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
@@ -244,7 +246,7 @@ static int proc_tcp_available_congestion_control(struct ctl_table *ctl,
return ret;
}
-static int proc_allowed_congestion_control(struct ctl_table *ctl,
+static int proc_allowed_congestion_control(const struct ctl_table *ctl,
int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
@@ -281,7 +283,7 @@ static int sscanf_key(char *buf, __le32 *key)
return ret;
}
-static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
+static int proc_tcp_fastopen_key(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net = container_of(table->data, struct net,
@@ -352,7 +354,7 @@ bad_key:
return ret;
}
-static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
+static int proc_tfo_blackhole_detect_timeout(const struct ctl_table *table,
int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
@@ -367,7 +369,7 @@ static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
return ret;
}
-static int proc_tcp_available_ulp(struct ctl_table *ctl,
+static int proc_tcp_available_ulp(const struct ctl_table *ctl,
int write, void *buffer, size_t *lenp,
loff_t *ppos)
{
@@ -384,7 +386,7 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
return ret;
}
-static int proc_tcp_ehash_entries(struct ctl_table *table, int write,
+static int proc_tcp_ehash_entries(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net = container_of(table->data, struct net,
@@ -408,7 +410,7 @@ static int proc_tcp_ehash_entries(struct ctl_table *table, int write,
return proc_dointvec(&tbl, write, buffer, lenp, ppos);
}
-static int proc_udp_hash_entries(struct ctl_table *table, int write,
+static int proc_udp_hash_entries(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net = container_of(table->data, struct net,
@@ -432,7 +434,7 @@ static int proc_udp_hash_entries(struct ctl_table *table, int write,
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
-static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
+static int proc_fib_multipath_hash_policy(const struct ctl_table *table, int write,
void *buffer, size_t *lenp,
loff_t *ppos)
{
@@ -447,7 +449,7 @@ static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
return ret;
}
-static int proc_fib_multipath_hash_fields(struct ctl_table *table, int write,
+static int proc_fib_multipath_hash_fields(const struct ctl_table *table, int write,
void *buffer, size_t *lenp,
loff_t *ppos)
{
@@ -462,6 +464,61 @@ static int proc_fib_multipath_hash_fields(struct ctl_table *table, int write,
return ret;
}
+
+static u32 proc_fib_multipath_hash_rand_seed __ro_after_init;
+
+static void proc_fib_multipath_hash_init_rand_seed(void)
+{
+ get_random_bytes(&proc_fib_multipath_hash_rand_seed,
+ sizeof(proc_fib_multipath_hash_rand_seed));
+}
+
+static void proc_fib_multipath_hash_set_seed(struct net *net, u32 user_seed)
+{
+ struct sysctl_fib_multipath_hash_seed new = {
+ .user_seed = user_seed,
+ .mp_seed = (user_seed ? user_seed :
+ proc_fib_multipath_hash_rand_seed),
+ };
+
+ WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed, new);
+}
+
+static int proc_fib_multipath_hash_seed(const struct ctl_table *table, int write,
+ void *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ struct sysctl_fib_multipath_hash_seed *mphs;
+ struct net *net = table->data;
+ struct ctl_table tmp;
+ u32 user_seed;
+ int ret;
+
+ mphs = &net->ipv4.sysctl_fib_multipath_hash_seed;
+ user_seed = mphs->user_seed;
+
+ tmp = *table;
+ tmp.data = &user_seed;
+
+ ret = proc_douintvec_minmax(&tmp, write, buffer, lenp, ppos);
+
+ if (write && ret == 0) {
+ proc_fib_multipath_hash_set_seed(net, user_seed);
+ call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net);
+ }
+
+ return ret;
+}
+#else
+
+static void proc_fib_multipath_hash_init_rand_seed(void)
+{
+}
+
+static void proc_fib_multipath_hash_set_seed(struct net *net, u32 user_seed)
+{
+}
+
#endif
static struct ctl_table ipv4_table[] = {
@@ -575,7 +632,6 @@ static struct ctl_table ipv4_table[] = {
.extra1 = &sysctl_fib_sync_mem_min,
.extra2 = &sysctl_fib_sync_mem_max,
},
- { }
};
static struct ctl_table ipv4_net_table[] = {
@@ -1071,6 +1127,13 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = SYSCTL_ONE,
.extra2 = &fib_multipath_hash_fields_all_mask,
},
+ {
+ .procname = "fib_multipath_hash_seed",
+ .data = &init_net,
+ .maxlen = sizeof(u32),
+ .mode = 0644,
+ .proc_handler = proc_fib_multipath_hash_seed,
+ },
#endif
{
.procname = "ip_unprivileged_port_start",
@@ -1502,11 +1565,19 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ONE,
},
- { }
+ {
+ .procname = "tcp_rto_min_us",
+ .data = &init_net.ipv4.sysctl_tcp_rto_min_us,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE,
+ },
};
static __net_init int ipv4_sysctl_init_net(struct net *net)
{
+ size_t table_size = ARRAY_SIZE(ipv4_net_table);
struct ctl_table *table;
table = ipv4_net_table;
@@ -1517,7 +1588,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
if (!table)
goto err_alloc;
- for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) {
+ for (i = 0; i < table_size; i++) {
if (table[i].data) {
/* Update the variables to point into
* the current struct net
@@ -1533,7 +1604,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
}
net->ipv4.ipv4_hdr = register_net_sysctl_sz(net, "net/ipv4", table,
- ARRAY_SIZE(ipv4_net_table));
+ table_size);
if (!net->ipv4.ipv4_hdr)
goto err_reg;
@@ -1541,6 +1612,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
if (!net->ipv4.sysctl_local_reserved_ports)
goto err_ports;
+ proc_fib_multipath_hash_set_seed(net, 0);
+
return 0;
err_ports:
@@ -1554,7 +1627,7 @@ err_alloc:
static __net_exit void ipv4_sysctl_exit_net(struct net *net)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
kfree(net->ipv4.sysctl_local_reserved_ports);
table = net->ipv4.ipv4_hdr->ctl_table_arg;
@@ -1575,6 +1648,8 @@ static __init int sysctl_ipv4_init(void)
if (!hdr)
return -ENOMEM;
+ proc_fib_multipath_hash_init_rand_seed();
+
if (register_pernet_subsys(&ipv4_sysctl_ops)) {
unregister_net_sysctl_table(hdr);
return -ENOMEM;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e767721b3a58..e03a342c9162 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -272,13 +272,17 @@
#include <net/inet_common.h>
#include <net/tcp.h>
#include <net/mptcp.h>
+#include <net/proto_memory.h>
#include <net/xfrm.h>
#include <net/ip.h>
#include <net/sock.h>
+#include <net/rstreason.h>
#include <linux/uaccess.h>
#include <asm/ioctls.h>
#include <net/busy_poll.h>
+#include <net/hotdata.h>
+#include <trace/events/tcp.h>
#include <net/rps.h>
/* Track pending CMSGs. */
@@ -290,6 +294,9 @@ enum {
DEFINE_PER_CPU(unsigned int, tcp_orphan_count);
EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count);
+DEFINE_PER_CPU(u32, tcp_tw_isn);
+EXPORT_PER_CPU_SYMBOL_GPL(tcp_tw_isn);
+
long sysctl_tcp_mem[3] __read_mostly;
EXPORT_SYMBOL(sysctl_tcp_mem);
@@ -414,6 +421,7 @@ void tcp_init_sock(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ int rto_min_us;
tp->out_of_order_queue = RB_ROOT;
sk->tcp_rtx_queue = RB_ROOT;
@@ -422,7 +430,8 @@ void tcp_init_sock(struct sock *sk)
INIT_LIST_HEAD(&tp->tsorted_sent_queue);
icsk->icsk_rto = TCP_TIMEOUT_INIT;
- icsk->icsk_rto_min = TCP_RTO_MIN;
+ rto_min_us = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rto_min_us);
+ icsk->icsk_rto_min = usecs_to_jiffies(rto_min_us);
icsk->icsk_delack_max = TCP_DELACK_MAX;
tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U);
@@ -592,7 +601,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
*/
mask |= EPOLLOUT | EPOLLWRNORM;
}
- /* This barrier is coupled with smp_wmb() in tcp_reset() */
+ /* This barrier is coupled with smp_wmb() in tcp_done_with_error() */
smp_rmb();
if (READ_ONCE(sk->sk_err) ||
!skb_queue_empty_lockless(&sk->sk_error_queue))
@@ -1159,6 +1168,9 @@ new_segment:
process_backlog++;
+#ifdef CONFIG_SKB_DECRYPTED
+ skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED);
+#endif
tcp_skb_entail(sk, skb);
copy = size_goal;
@@ -1184,7 +1196,7 @@ new_segment:
if (!skb_can_coalesce(skb, i, pfrag->page,
pfrag->offset)) {
- if (i >= READ_ONCE(sysctl_max_skb_frags)) {
+ if (i >= READ_ONCE(net_hotdata.sysctl_max_skb_frags)) {
tcp_mark_push(tp, skb);
goto new_segment;
}
@@ -1416,8 +1428,6 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
struct sk_buff *skb;
int copied = 0, err = 0;
- /* XXX -- need to support SO_PEEK_OFF */
-
skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
if (err)
@@ -1721,7 +1731,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
space = tcp_space_from_win(sk, val);
if (space > sk->sk_rcvbuf) {
WRITE_ONCE(sk->sk_rcvbuf, space);
- tcp_sk(sk)->window_clamp = val;
+ WRITE_ONCE(tcp_sk(sk)->window_clamp, val);
}
return 0;
}
@@ -2328,6 +2338,7 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
int target; /* Read at least this many bytes */
long timeo;
struct sk_buff *skb, *last;
+ u32 peek_offset = 0;
u32 urg_hole = 0;
err = -ENOTCONN;
@@ -2361,7 +2372,8 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
seq = &tp->copied_seq;
if (flags & MSG_PEEK) {
- peek_seq = tp->copied_seq;
+ peek_offset = max(sk_peek_offset(sk, flags), 0);
+ peek_seq = tp->copied_seq + peek_offset;
seq = &peek_seq;
}
@@ -2464,11 +2476,11 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
}
if ((flags & MSG_PEEK) &&
- (peek_seq - copied - urg_hole != tp->copied_seq)) {
+ (peek_seq - peek_offset - copied - urg_hole != tp->copied_seq)) {
net_dbg_ratelimited("TCP(%s:%d): Application bug, race in MSG_PEEK\n",
current->comm,
task_pid_nr(current));
- peek_seq = tp->copied_seq;
+ peek_seq = tp->copied_seq + peek_offset;
}
continue;
@@ -2509,7 +2521,10 @@ found_ok_skb:
WRITE_ONCE(*seq, *seq + used);
copied += used;
len -= used;
-
+ if (flags & MSG_PEEK)
+ sk_peek_offset_fwd(sk, used);
+ else
+ sk_peek_offset_bwd(sk, used);
tcp_rcv_space_adjust(sk);
skip_copy:
@@ -2637,6 +2652,10 @@ void tcp_set_state(struct sock *sk, int state)
if (oldstate != TCP_ESTABLISHED)
TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
break;
+ case TCP_CLOSE_WAIT:
+ if (oldstate == TCP_SYN_RECV)
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
+ break;
case TCP_CLOSE:
if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
@@ -2648,7 +2667,7 @@ void tcp_set_state(struct sock *sk, int state)
inet_put_port(sk);
fallthrough;
default:
- if (oldstate == TCP_ESTABLISHED)
+ if (oldstate == TCP_ESTABLISHED || oldstate == TCP_CLOSE_WAIT)
TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
}
@@ -2710,7 +2729,7 @@ void tcp_shutdown(struct sock *sk, int how)
/* If we've already sent a FIN, or it's a closed state, skip this. */
if ((1 << sk->sk_state) &
(TCPF_ESTABLISHED | TCPF_SYN_SENT |
- TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
+ TCPF_CLOSE_WAIT)) {
/* Clear out any half completed packets. FIN if needed. */
if (tcp_close_state(sk))
tcp_send_fin(sk);
@@ -2744,7 +2763,15 @@ static bool tcp_too_many_orphans(int shift)
READ_ONCE(sysctl_tcp_max_orphans);
}
-bool tcp_check_oom(struct sock *sk, int shift)
+static bool tcp_out_of_memory(const struct sock *sk)
+{
+ if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
+ sk_memory_allocated(sk) > sk_prot_mem_limits(sk, 2))
+ return true;
+ return false;
+}
+
+bool tcp_check_oom(const struct sock *sk, int shift)
{
bool too_many_orphans, out_of_socket_memory;
@@ -2805,7 +2832,8 @@ void __tcp_close(struct sock *sk, long timeout)
/* Unread data was tossed, zap the connection. */
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
tcp_set_state(sk, TCP_CLOSE);
- tcp_send_active_reset(sk, sk->sk_allocation);
+ tcp_send_active_reset(sk, sk->sk_allocation,
+ SK_RST_REASON_NOT_SPECIFIED);
} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
/* Check zero linger _after_ checking for unread data. */
sk->sk_prot->disconnect(sk, 0);
@@ -2819,7 +2847,7 @@ void __tcp_close(struct sock *sk, long timeout)
* machine. State transitions:
*
* TCP_ESTABLISHED -> TCP_FIN_WAIT1
- * TCP_SYN_RECV -> TCP_FIN_WAIT1 (forget it, it's impossible)
+ * TCP_SYN_RECV -> TCP_FIN_WAIT1 (it is difficult)
* TCP_CLOSE_WAIT -> TCP_LAST_ACK
*
* are legal only when FIN has been sent (i.e. in window),
@@ -2879,7 +2907,8 @@ adjudge_to_death:
struct tcp_sock *tp = tcp_sk(sk);
if (READ_ONCE(tp->linger2) < 0) {
tcp_set_state(sk, TCP_CLOSE);
- tcp_send_active_reset(sk, GFP_ATOMIC);
+ tcp_send_active_reset(sk, GFP_ATOMIC,
+ SK_RST_REASON_NOT_SPECIFIED);
__NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPABORTONLINGER);
} else {
@@ -2897,7 +2926,8 @@ adjudge_to_death:
if (sk->sk_state != TCP_CLOSE) {
if (tcp_check_oom(sk, 0)) {
tcp_set_state(sk, TCP_CLOSE);
- tcp_send_active_reset(sk, GFP_ATOMIC);
+ tcp_send_active_reset(sk, GFP_ATOMIC,
+ SK_RST_REASON_NOT_SPECIFIED);
__NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPABORTONMEMORY);
} else if (!check_net(sock_net(sk))) {
@@ -3001,7 +3031,7 @@ int tcp_disconnect(struct sock *sk, int flags)
/* The last check adjusts for discrepancy of Linux wrt. RFC
* states
*/
- tcp_send_active_reset(sk, gfp_any());
+ tcp_send_active_reset(sk, gfp_any(), SK_RST_REASON_NOT_SPECIFIED);
WRITE_ONCE(sk->sk_err, ECONNRESET);
} else if (old_state == TCP_SYN_SENT)
WRITE_ONCE(sk->sk_err, ECONNRESET);
@@ -3010,6 +3040,7 @@ int tcp_disconnect(struct sock *sk, int flags)
__skb_queue_purge(&sk->sk_receive_queue);
WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
WRITE_ONCE(tp->urg_data, 0);
+ sk_set_peek_off(sk, -1);
tcp_write_queue_purge(sk);
tcp_fastopen_active_disable_ofo_check(sk);
skb_rbtree_purge(&tp->out_of_order_queue);
@@ -3058,7 +3089,7 @@ int tcp_disconnect(struct sock *sk, int flags)
icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
__sk_dst_reset(sk);
- dst_release(xchg((__force struct dst_entry **)&sk->sk_rx_dst, NULL));
+ dst_release(unrcu_pointer(xchg(&sk->sk_rx_dst, NULL)));
tcp_saved_syn_free(tp);
tp->compressed_ack = 0;
tp->segs_in = 0;
@@ -3379,7 +3410,7 @@ int tcp_set_window_clamp(struct sock *sk, int val)
if (!val) {
if (sk->sk_state != TCP_CLOSE)
return -EINVAL;
- tp->window_clamp = 0;
+ WRITE_ONCE(tp->window_clamp, 0);
} else {
u32 new_rcv_ssthresh, old_window_clamp = tp->window_clamp;
u32 new_window_clamp = val < SOCK_MIN_RCVBUF / 2 ?
@@ -3388,7 +3419,7 @@ int tcp_set_window_clamp(struct sock *sk, int val)
if (new_window_clamp == old_window_clamp)
return 0;
- tp->window_clamp = new_window_clamp;
+ WRITE_ONCE(tp->window_clamp, new_window_clamp);
if (new_window_clamp < old_window_clamp) {
/* need to apply the reserved mem provisioning only
* when shrinking the window clamp
@@ -4057,7 +4088,7 @@ int do_tcp_getsockopt(struct sock *sk, int level,
TCP_RTO_MAX / HZ);
break;
case TCP_WINDOW_CLAMP:
- val = tp->window_clamp;
+ val = READ_ONCE(tp->window_clamp);
break;
case TCP_INFO: {
struct tcp_info info;
@@ -4342,6 +4373,9 @@ zerocopy_rcv_out:
return err;
}
+ case TCP_IS_MPTCP:
+ val = 0;
+ break;
default:
return -ENOPROTOOPT;
}
@@ -4430,7 +4464,7 @@ int tcp_md5_hash_key(struct tcp_sigpool *hp,
EXPORT_SYMBOL(tcp_md5_hash_key);
/* Called with rcu_read_lock() */
-enum skb_drop_reason
+static enum skb_drop_reason
tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb,
const void *saddr, const void *daddr,
int family, int l3index, const __u8 *hash_location)
@@ -4450,7 +4484,7 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb,
if (!key && hash_location) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
- tcp_hash_fail("Unexpected MD5 Hash found", family, skb, "");
+ trace_tcp_hash_md5_unexpected(sk, skb);
return SKB_DROP_REASON_TCP_MD5UNEXPECTED;
}
@@ -4465,29 +4499,90 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb,
NULL, skb);
if (genhash || memcmp(hash_location, newhash, 16) != 0) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
- if (family == AF_INET) {
- tcp_hash_fail("MD5 Hash failed", AF_INET, skb, "%s L3 index %d",
- genhash ? "tcp_v4_calc_md5_hash failed"
- : "", l3index);
- } else {
- if (genhash) {
- tcp_hash_fail("MD5 Hash failed",
- AF_INET6, skb, "L3 index %d",
- l3index);
- } else {
- tcp_hash_fail("MD5 Hash mismatch",
- AF_INET6, skb, "L3 index %d",
- l3index);
- }
- }
+ trace_tcp_hash_md5_mismatch(sk, skb);
return SKB_DROP_REASON_TCP_MD5FAILURE;
}
return SKB_NOT_DROPPED_YET;
}
-EXPORT_SYMBOL(tcp_inbound_md5_hash);
+#else
+static inline enum skb_drop_reason
+tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb,
+ const void *saddr, const void *daddr,
+ int family, int l3index, const __u8 *hash_location)
+{
+ return SKB_NOT_DROPPED_YET;
+}
#endif
+/* Called with rcu_read_lock() */
+enum skb_drop_reason
+tcp_inbound_hash(struct sock *sk, const struct request_sock *req,
+ const struct sk_buff *skb,
+ const void *saddr, const void *daddr,
+ int family, int dif, int sdif)
+{
+ const struct tcphdr *th = tcp_hdr(skb);
+ const struct tcp_ao_hdr *aoh;
+ const __u8 *md5_location;
+ int l3index;
+
+ /* Invalid option or two times meet any of auth options */
+ if (tcp_parse_auth_options(th, &md5_location, &aoh)) {
+ trace_tcp_hash_bad_header(sk, skb);
+ return SKB_DROP_REASON_TCP_AUTH_HDR;
+ }
+
+ if (req) {
+ if (tcp_rsk_used_ao(req) != !!aoh) {
+ u8 keyid, rnext, maclen;
+
+ if (aoh) {
+ keyid = aoh->keyid;
+ rnext = aoh->rnext_keyid;
+ maclen = tcp_ao_hdr_maclen(aoh);
+ } else {
+ keyid = rnext = maclen = 0;
+ }
+
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD);
+ trace_tcp_ao_handshake_failure(sk, skb, keyid, rnext, maclen);
+ return SKB_DROP_REASON_TCP_AOFAILURE;
+ }
+ }
+
+ /* sdif set, means packet ingressed via a device
+ * in an L3 domain and dif is set to the l3mdev
+ */
+ l3index = sdif ? dif : 0;
+
+ /* Fast path: unsigned segments */
+ if (likely(!md5_location && !aoh)) {
+ /* Drop if there's TCP-MD5 or TCP-AO key with any rcvid/sndid
+ * for the remote peer. On TCP-AO established connection
+ * the last key is impossible to remove, so there's
+ * always at least one current_key.
+ */
+ if (tcp_ao_required(sk, saddr, family, l3index, true)) {
+ trace_tcp_hash_ao_required(sk, skb);
+ return SKB_DROP_REASON_TCP_AONOTFOUND;
+ }
+ if (unlikely(tcp_md5_do_lookup(sk, l3index, saddr, family))) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
+ trace_tcp_hash_md5_required(sk, skb);
+ return SKB_DROP_REASON_TCP_MD5NOTFOUND;
+ }
+ return SKB_NOT_DROPPED_YET;
+ }
+
+ if (aoh)
+ return tcp_inbound_ao_hash(sk, skb, family, req, l3index, aoh);
+
+ return tcp_inbound_md5_hash(sk, skb, saddr, daddr, family,
+ l3index, md5_location);
+}
+EXPORT_SYMBOL_GPL(tcp_inbound_hash);
+
void tcp_done(struct sock *sk)
{
struct request_sock *req;
@@ -4552,13 +4647,10 @@ int tcp_abort(struct sock *sk, int err)
bh_lock_sock(sk);
if (!sock_flag(sk, SOCK_DEAD)) {
- WRITE_ONCE(sk->sk_err, err);
- /* This barrier is coupled with smp_rmb() in tcp_poll() */
- smp_wmb();
- sk_error_report(sk);
if (tcp_need_reset(sk->sk_state))
- tcp_send_active_reset(sk, GFP_ATOMIC);
- tcp_done(sk);
+ tcp_send_active_reset(sk, GFP_ATOMIC,
+ SK_RST_REASON_NOT_SPECIFIED);
+ tcp_done_with_error(sk, err);
}
bh_unlock_sock(sk);
@@ -4648,16 +4740,16 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, lsndtime);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, mdev_us);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_wstamp_ns);
- CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_clock_cache);
- CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_mstamp);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, rtt_seq);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tsorted_sent_queue);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, highest_sack);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, ecn_flags);
- CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_tx, 105);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_tx, 89);
/* TXRX read-write hotpath cache lines */
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, pred_flags);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, tcp_clock_cache);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, tcp_mstamp);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_nxt);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_nxt);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_una);
@@ -4670,7 +4762,11 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, app_limited);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_wnd);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rx_opt);
- CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 76);
+
+ /* 32bit arches with 8byte alignment on u64 fields might need padding
+ * before tcp_clock_cache.
+ */
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 92 + 4);
/* RX read-write hotpath cache lines */
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_received);
diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c
index 781b67a52571..db6516092daf 100644
--- a/net/ipv4/tcp_ao.c
+++ b/net/ipv4/tcp_ao.c
@@ -16,6 +16,7 @@
#include <net/tcp.h>
#include <net/ipv6.h>
#include <net/icmp.h>
+#include <trace/events/tcp.h>
DEFINE_STATIC_KEY_DEFERRED_FALSE(tcp_ao_needed, HZ);
@@ -266,32 +267,49 @@ static void tcp_ao_key_free_rcu(struct rcu_head *head)
kfree_sensitive(key);
}
-void tcp_ao_destroy_sock(struct sock *sk, bool twsk)
+static void tcp_ao_info_free_rcu(struct rcu_head *head)
{
- struct tcp_ao_info *ao;
+ struct tcp_ao_info *ao = container_of(head, struct tcp_ao_info, rcu);
struct tcp_ao_key *key;
struct hlist_node *n;
+ hlist_for_each_entry_safe(key, n, &ao->head, node) {
+ hlist_del(&key->node);
+ tcp_sigpool_release(key->tcp_sigpool_id);
+ kfree_sensitive(key);
+ }
+ kfree(ao);
+ static_branch_slow_dec_deferred(&tcp_ao_needed);
+}
+
+static void tcp_ao_sk_omem_free(struct sock *sk, struct tcp_ao_info *ao)
+{
+ size_t total_ao_sk_mem = 0;
+ struct tcp_ao_key *key;
+
+ hlist_for_each_entry(key, &ao->head, node)
+ total_ao_sk_mem += tcp_ao_sizeof_key(key);
+ atomic_sub(total_ao_sk_mem, &sk->sk_omem_alloc);
+}
+
+void tcp_ao_destroy_sock(struct sock *sk, bool twsk)
+{
+ struct tcp_ao_info *ao;
+
if (twsk) {
ao = rcu_dereference_protected(tcp_twsk(sk)->ao_info, 1);
- tcp_twsk(sk)->ao_info = NULL;
+ rcu_assign_pointer(tcp_twsk(sk)->ao_info, NULL);
} else {
ao = rcu_dereference_protected(tcp_sk(sk)->ao_info, 1);
- tcp_sk(sk)->ao_info = NULL;
+ rcu_assign_pointer(tcp_sk(sk)->ao_info, NULL);
}
if (!ao || !refcount_dec_and_test(&ao->refcnt))
return;
- hlist_for_each_entry_safe(key, n, &ao->head, node) {
- hlist_del_rcu(&key->node);
- if (!twsk)
- atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc);
- call_rcu(&key->rcu, tcp_ao_key_free_rcu);
- }
-
- kfree_rcu(ao, rcu);
- static_branch_slow_dec_deferred(&tcp_ao_needed);
+ if (!twsk)
+ tcp_ao_sk_omem_free(sk, ao);
+ call_rcu(&ao->rcu, tcp_ao_info_free_rcu);
}
void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp)
@@ -884,17 +902,16 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb,
const struct tcp_ao_hdr *aoh, struct tcp_ao_key *key,
u8 *traffic_key, u8 *phash, u32 sne, int l3index)
{
- u8 maclen = aoh->length - sizeof(struct tcp_ao_hdr);
const struct tcphdr *th = tcp_hdr(skb);
+ u8 maclen = tcp_ao_hdr_maclen(aoh);
void *hash_buf = NULL;
if (maclen != tcp_ao_maclen(key)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD);
atomic64_inc(&info->counters.pkt_bad);
atomic64_inc(&key->pkt_bad);
- tcp_hash_fail("AO hash wrong length", family, skb,
- "%u != %d L3index: %d", maclen,
- tcp_ao_maclen(key), l3index);
+ trace_tcp_ao_wrong_maclen(sk, skb, aoh->keyid,
+ aoh->rnext_keyid, maclen);
return SKB_DROP_REASON_TCP_AOFAILURE;
}
@@ -909,8 +926,8 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb,
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD);
atomic64_inc(&info->counters.pkt_bad);
atomic64_inc(&key->pkt_bad);
- tcp_hash_fail("AO hash mismatch", family, skb,
- "L3index: %d", l3index);
+ trace_tcp_ao_mismatch(sk, skb, aoh->keyid,
+ aoh->rnext_keyid, maclen);
kfree(hash_buf);
return SKB_DROP_REASON_TCP_AOFAILURE;
}
@@ -927,19 +944,21 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb,
int l3index, const struct tcp_ao_hdr *aoh)
{
const struct tcphdr *th = tcp_hdr(skb);
+ u8 maclen = tcp_ao_hdr_maclen(aoh);
u8 *phash = (u8 *)(aoh + 1); /* hash goes just after the header */
struct tcp_ao_info *info;
enum skb_drop_reason ret;
struct tcp_ao_key *key;
__be32 sisn, disn;
u8 *traffic_key;
+ int state;
u32 sne = 0;
info = rcu_dereference(tcp_sk(sk)->ao_info);
if (!info) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOKEYNOTFOUND);
- tcp_hash_fail("AO key not found", family, skb,
- "keyid: %u L3index: %d", aoh->keyid, l3index);
+ trace_tcp_ao_key_not_found(sk, skb, aoh->keyid,
+ aoh->rnext_keyid, maclen);
return SKB_DROP_REASON_TCP_AOUNEXPECTED;
}
@@ -948,8 +967,9 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb,
disn = 0;
}
+ state = READ_ONCE(sk->sk_state);
/* Fast-path */
- if (likely((1 << sk->sk_state) & TCP_AO_ESTABLISHED)) {
+ if (likely((1 << state) & TCP_AO_ESTABLISHED)) {
enum skb_drop_reason err;
struct tcp_ao_key *current_key;
@@ -979,6 +999,9 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb,
current_key = READ_ONCE(info->current_key);
/* Key rotation: the peer asks us to use new key (RNext) */
if (unlikely(aoh->rnext_keyid != current_key->sndid)) {
+ trace_tcp_ao_rnext_request(sk, skb, current_key->sndid,
+ aoh->rnext_keyid,
+ tcp_ao_hdr_maclen(aoh));
/* If the key is not found we do nothing. */
key = tcp_ao_established_key(info, aoh->rnext_keyid, -1);
if (key)
@@ -988,6 +1011,9 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb,
return SKB_NOT_DROPPED_YET;
}
+ if (unlikely(state == TCP_CLOSE))
+ return SKB_DROP_REASON_TCP_CLOSE;
+
/* Lookup key based on peer address and keyid.
* current_key and rnext_key must not be used on tcp listen
* sockets as otherwise:
@@ -1001,7 +1027,7 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb,
if (th->syn && !th->ack)
goto verify_hash;
- if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV)) {
+ if ((1 << state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV)) {
/* Make the initial syn the likely case here */
if (unlikely(req)) {
sne = tcp_ao_compute_sne(0, tcp_rsk(req)->rcv_isn,
@@ -1018,14 +1044,14 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb,
/* no way to figure out initial sisn/disn - drop */
return SKB_DROP_REASON_TCP_FLAGS;
}
- } else if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
+ } else if ((1 << state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
disn = info->lisn;
if (th->syn || th->rst)
sisn = th->seq;
else
sisn = info->risn;
} else {
- WARN_ONCE(1, "TCP-AO: Unexpected sk_state %d", sk->sk_state);
+ WARN_ONCE(1, "TCP-AO: Unexpected sk_state %d", state);
return SKB_DROP_REASON_TCP_AOFAILURE;
}
verify_hash:
@@ -1041,8 +1067,8 @@ verify_hash:
key_not_found:
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOKEYNOTFOUND);
atomic64_inc(&info->counters.key_not_found);
- tcp_hash_fail("Requested by the peer AO key id not found",
- family, skb, "L3index: %d", l3index);
+ trace_tcp_ao_key_not_found(sk, skb, aoh->keyid,
+ aoh->rnext_keyid, maclen);
return SKB_DROP_REASON_TCP_AOKEYNOTFOUND;
}
@@ -1963,8 +1989,10 @@ static int tcp_ao_info_cmd(struct sock *sk, unsigned short int family,
first = true;
}
- if (cmd.ao_required && tcp_ao_required_verify(sk))
- return -EKEYREJECTED;
+ if (cmd.ao_required && tcp_ao_required_verify(sk)) {
+ err = -EKEYREJECTED;
+ goto out;
+ }
/* For sockets in TCP_CLOSED it's possible set keys that aren't
* matching the future peer (address/port/VRF/etc),
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 05dc2d05bc7c..760941e55153 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -1024,7 +1024,7 @@ static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
bbr_update_gains(sk);
}
-__bpf_kfunc static void bbr_main(struct sock *sk, const struct rate_sample *rs)
+__bpf_kfunc static void bbr_main(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs)
{
struct bbr *bbr = inet_csk_ca(sk);
u32 bw;
@@ -1156,8 +1156,6 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
};
BTF_KFUNCS_START(tcp_bbr_check_kfunc_ids)
-#ifdef CONFIG_X86
-#ifdef CONFIG_DYNAMIC_FTRACE
BTF_ID_FLAGS(func, bbr_init)
BTF_ID_FLAGS(func, bbr_main)
BTF_ID_FLAGS(func, bbr_sndbuf_expand)
@@ -1166,8 +1164,6 @@ BTF_ID_FLAGS(func, bbr_cwnd_event)
BTF_ID_FLAGS(func, bbr_ssthresh)
BTF_ID_FLAGS(func, bbr_min_tso_segs)
BTF_ID_FLAGS(func, bbr_set_state)
-#endif
-#endif
BTF_KFUNCS_END(tcp_bbr_check_kfunc_ids)
static const struct btf_kfunc_id_set tcp_bbr_kfunc_set = {
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 28ffcfbeef14..0306d257fa64 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -46,8 +46,7 @@ void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
}
/* Must be called with rcu lock held */
-static struct tcp_congestion_ops *tcp_ca_find_autoload(struct net *net,
- const char *name)
+static struct tcp_congestion_ops *tcp_ca_find_autoload(const char *name)
{
struct tcp_congestion_ops *ca = tcp_ca_find(name);
@@ -178,7 +177,7 @@ int tcp_update_congestion_control(struct tcp_congestion_ops *ca, struct tcp_cong
return ret;
}
-u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca)
+u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca)
{
const struct tcp_congestion_ops *ca;
u32 key = TCP_CA_UNSPEC;
@@ -186,7 +185,7 @@ u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca)
might_sleep();
rcu_read_lock();
- ca = tcp_ca_find_autoload(net, name);
+ ca = tcp_ca_find_autoload(name);
if (ca) {
key = ca->key;
*ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN;
@@ -203,9 +202,10 @@ char *tcp_ca_get_name_by_key(u32 key, char *buffer)
rcu_read_lock();
ca = tcp_ca_find_key(key);
- if (ca)
- ret = strncpy(buffer, ca->name,
- TCP_CA_NAME_MAX);
+ if (ca) {
+ strscpy(buffer, ca->name, TCP_CA_NAME_MAX);
+ ret = buffer;
+ }
rcu_read_unlock();
return ret;
@@ -283,7 +283,7 @@ int tcp_set_default_congestion_control(struct net *net, const char *name)
int ret;
rcu_read_lock();
- ca = tcp_ca_find_autoload(net, name);
+ ca = tcp_ca_find_autoload(name);
if (!ca) {
ret = -ENOENT;
} else if (!bpf_try_module_get(ca, ca->owner)) {
@@ -338,7 +338,7 @@ void tcp_get_default_congestion_control(struct net *net, char *name)
rcu_read_lock();
ca = rcu_dereference(net->ipv4.tcp_congestion_control);
- strncpy(name, ca->name, TCP_CA_NAME_MAX);
+ strscpy(name, ca->name, TCP_CA_NAME_MAX);
rcu_read_unlock();
}
@@ -421,7 +421,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
if (!load)
ca = tcp_ca_find(name);
else
- ca = tcp_ca_find_autoload(sock_net(sk), name);
+ ca = tcp_ca_find_autoload(name);
/* No change asking for existing value */
if (ca == icsk->icsk_ca_ops) {
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 44869ea089e3..5dbed91c6178 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -486,16 +486,12 @@ static struct tcp_congestion_ops cubictcp __read_mostly = {
};
BTF_KFUNCS_START(tcp_cubic_check_kfunc_ids)
-#ifdef CONFIG_X86
-#ifdef CONFIG_DYNAMIC_FTRACE
BTF_ID_FLAGS(func, cubictcp_init)
BTF_ID_FLAGS(func, cubictcp_recalc_ssthresh)
BTF_ID_FLAGS(func, cubictcp_cong_avoid)
BTF_ID_FLAGS(func, cubictcp_state)
BTF_ID_FLAGS(func, cubictcp_cwnd_event)
BTF_ID_FLAGS(func, cubictcp_acked)
-#endif
-#endif
BTF_KFUNCS_END(tcp_cubic_check_kfunc_ids)
static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = {
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index e33fbe4933e4..8a45a4aea933 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -58,7 +58,18 @@ struct dctcp {
};
static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */
-module_param(dctcp_shift_g, uint, 0644);
+
+static int dctcp_shift_g_set(const char *val, const struct kernel_param *kp)
+{
+ return param_set_uint_minmax(val, kp, 0, 10);
+}
+
+static const struct kernel_param_ops dctcp_shift_g_ops = {
+ .set = dctcp_shift_g_set,
+ .get = param_get_uint,
+};
+
+module_param_cb(dctcp_shift_g, &dctcp_shift_g_ops, &dctcp_shift_g, 0644);
MODULE_PARM_DESC(dctcp_shift_g, "parameter g for updating dctcp_alpha");
static unsigned int dctcp_alpha_on_init __read_mostly = DCTCP_MAX_ALPHA;
@@ -261,16 +272,12 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = {
};
BTF_KFUNCS_START(tcp_dctcp_check_kfunc_ids)
-#ifdef CONFIG_X86
-#ifdef CONFIG_DYNAMIC_FTRACE
BTF_ID_FLAGS(func, dctcp_init)
BTF_ID_FLAGS(func, dctcp_update_alpha)
BTF_ID_FLAGS(func, dctcp_cwnd_event)
BTF_ID_FLAGS(func, dctcp_ssthresh)
BTF_ID_FLAGS(func, dctcp_cwnd_undo)
BTF_ID_FLAGS(func, dctcp_state)
-#endif
-#endif
BTF_KFUNCS_END(tcp_dctcp_check_kfunc_ids)
static const struct btf_kfunc_id_set tcp_dctcp_kfunc_set = {
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 8ed54e7334a9..0f523cbfe329 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -49,7 +49,7 @@ void tcp_fastopen_ctx_destroy(struct net *net)
{
struct tcp_fastopen_context *ctxt;
- ctxt = xchg((__force struct tcp_fastopen_context **)&net->ipv4.tcp_fastopen_ctx, NULL);
+ ctxt = unrcu_pointer(xchg(&net->ipv4.tcp_fastopen_ctx, NULL));
if (ctxt)
call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free);
@@ -80,9 +80,10 @@ int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
if (sk) {
q = &inet_csk(sk)->icsk_accept_queue.fastopenq;
- octx = xchg((__force struct tcp_fastopen_context **)&q->ctx, ctx);
+ octx = unrcu_pointer(xchg(&q->ctx, RCU_INITIALIZER(ctx)));
} else {
- octx = xchg((__force struct tcp_fastopen_context **)&net->ipv4.tcp_fastopen_ctx, ctx);
+ octx = unrcu_pointer(xchg(&net->ipv4.tcp_fastopen_ctx,
+ RCU_INITIALIZER(ctx)));
}
if (octx)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5d874817a78d..e37488d3453f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -72,6 +72,7 @@
#include <linux/prefetch.h>
#include <net/dst.h>
#include <net/tcp.h>
+#include <net/proto_memory.h>
#include <net/inet_common.h>
#include <linux/ipsec.h>
#include <asm/unaligned.h>
@@ -237,9 +238,14 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
*/
if (unlikely(len != icsk->icsk_ack.rcv_mss)) {
u64 val = (u64)skb->len << TCP_RMEM_TO_WIN_SCALE;
+ u8 old_ratio = tcp_sk(sk)->scaling_ratio;
do_div(val, skb->truesize);
tcp_sk(sk)->scaling_ratio = val ? val : 1;
+
+ if (old_ratio != tcp_sk(sk)->scaling_ratio)
+ WRITE_ONCE(tcp_sk(sk)->window_clamp,
+ tcp_win_from_space(sk, sk->sk_rcvbuf));
}
icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
tcp_sk(sk)->advmss);
@@ -563,19 +569,20 @@ static void tcp_init_buffer_space(struct sock *sk)
maxwin = tcp_full_space(sk);
if (tp->window_clamp >= maxwin) {
- tp->window_clamp = maxwin;
+ WRITE_ONCE(tp->window_clamp, maxwin);
if (tcp_app_win && maxwin > 4 * tp->advmss)
- tp->window_clamp = max(maxwin -
- (maxwin >> tcp_app_win),
- 4 * tp->advmss);
+ WRITE_ONCE(tp->window_clamp,
+ max(maxwin - (maxwin >> tcp_app_win),
+ 4 * tp->advmss));
}
/* Force reservation of one segment. */
if (tcp_app_win &&
tp->window_clamp > 2 * tp->advmss &&
tp->window_clamp + tp->advmss > maxwin)
- tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
+ WRITE_ONCE(tp->window_clamp,
+ max(2 * tp->advmss, maxwin - tp->advmss));
tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
tp->snd_cwnd_stamp = tcp_jiffies32;
@@ -773,7 +780,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
/* Make the window clamp follow along. */
- tp->window_clamp = tcp_win_from_space(sk, rcvbuf);
+ WRITE_ONCE(tp->window_clamp,
+ tcp_win_from_space(sk, rcvbuf));
}
}
tp->rcvq_space.space = copied;
@@ -911,7 +919,7 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
tp->rtt_seq = tp->snd_nxt;
tp->mdev_max_us = tcp_rto_min_us(sk);
- tcp_bpf_rtt(sk);
+ tcp_bpf_rtt(sk, mrtt_us, srtt);
}
} else {
/* no previous measure. */
@@ -921,7 +929,7 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
tp->mdev_max_us = tp->rttvar_us;
tp->rtt_seq = tp->snd_nxt;
- tcp_bpf_rtt(sk);
+ tcp_bpf_rtt(sk, mrtt_us, srtt);
}
tp->srtt_us = max(1U, srtt);
}
@@ -2126,8 +2134,16 @@ void tcp_clear_retrans(struct tcp_sock *tp)
static inline void tcp_init_undo(struct tcp_sock *tp)
{
tp->undo_marker = tp->snd_una;
+
/* Retransmission still in flight may cause DSACKs later. */
- tp->undo_retrans = tp->retrans_out ? : -1;
+ /* First, account for regular retransmits in flight: */
+ tp->undo_retrans = tp->retrans_out;
+ /* Next, account for TLP retransmits in flight: */
+ if (tp->tlp_high_seq && tp->tlp_retrans)
+ tp->undo_retrans++;
+ /* Finally, avoid 0, because undo_retrans==0 means "can undo now": */
+ if (!tp->undo_retrans)
+ tp->undo_retrans = -1;
}
static bool tcp_is_rack(const struct sock *sk)
@@ -2206,6 +2222,7 @@ void tcp_enter_loss(struct sock *sk)
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->snd_nxt;
+ tp->tlp_high_seq = 0;
tcp_ecn_queue_cwr(tp);
/* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
@@ -2779,13 +2796,37 @@ static void tcp_mtup_probe_success(struct sock *sk)
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS);
}
+/* Sometimes we deduce that packets have been dropped due to reasons other than
+ * congestion, like path MTU reductions or failed client TFO attempts. In these
+ * cases we call this function to retransmit as many packets as cwnd allows,
+ * without reducing cwnd. Given that retransmits will set retrans_stamp to a
+ * non-zero value (and may do so in a later calling context due to TSQ), we
+ * also enter CA_Loss so that we track when all retransmitted packets are ACKed
+ * and clear retrans_stamp when that happens (to ensure later recurring RTOs
+ * are using the correct retrans_stamp and don't declare ETIMEDOUT
+ * prematurely).
+ */
+static void tcp_non_congestion_loss_retransmit(struct sock *sk)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (icsk->icsk_ca_state != TCP_CA_Loss) {
+ tp->high_seq = tp->snd_nxt;
+ tp->snd_ssthresh = tcp_current_ssthresh(sk);
+ tp->prior_ssthresh = 0;
+ tp->undo_marker = 0;
+ tcp_set_ca_state(sk, TCP_CA_Loss);
+ }
+ tcp_xmit_retransmit_queue(sk);
+}
+
/* Do a simple retransmit without using the backoff mechanisms in
* tcp_timer. This is used for path mtu discovery.
* The socket is already locked here.
*/
void tcp_simple_retransmit(struct sock *sk)
{
- const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
int mss;
@@ -2825,14 +2866,7 @@ void tcp_simple_retransmit(struct sock *sk)
* in network, but units changed and effective
* cwnd/ssthresh really reduced now.
*/
- if (icsk->icsk_ca_state != TCP_CA_Loss) {
- tp->high_seq = tp->snd_nxt;
- tp->snd_ssthresh = tcp_current_ssthresh(sk);
- tp->prior_ssthresh = 0;
- tp->undo_marker = 0;
- tcp_set_ca_state(sk, TCP_CA_Loss);
- }
- tcp_xmit_retransmit_queue(sk);
+ tcp_non_congestion_loss_retransmit(sk);
}
EXPORT_SYMBOL(tcp_simple_retransmit);
@@ -3057,7 +3091,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
return;
if (tcp_try_undo_dsack(sk))
- tcp_try_keep_open(sk);
+ tcp_try_to_open(sk, flag);
tcp_identify_packet_loss(sk, ack_flag);
if (icsk->icsk_ca_state != TCP_CA_Recovery) {
@@ -3539,7 +3573,7 @@ static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
const struct inet_connection_sock *icsk = inet_csk(sk);
if (icsk->icsk_ca_ops->cong_control) {
- icsk->icsk_ca_ops->cong_control(sk, rs);
+ icsk->icsk_ca_ops->cong_control(sk, ack, flag, rs);
return;
}
@@ -3575,8 +3609,10 @@ static void tcp_snd_sne_update(struct tcp_sock *tp, u32 ack)
ao = rcu_dereference_protected(tp->ao_info,
lockdep_sock_is_held((struct sock *)tp));
- if (ao && ack < tp->snd_una)
+ if (ao && ack < tp->snd_una) {
ao->snd_sne++;
+ trace_tcp_ao_snd_sne_update((struct sock *)tp, ao->snd_sne);
+ }
#endif
}
@@ -3601,8 +3637,10 @@ static void tcp_rcv_sne_update(struct tcp_sock *tp, u32 seq)
ao = rcu_dereference_protected(tp->ao_info,
lockdep_sock_is_held((struct sock *)tp));
- if (ao && seq < tp->rcv_nxt)
+ if (ao && seq < tp->rcv_nxt) {
ao->rcv_sne++;
+ trace_tcp_ao_rcv_sne_update((struct sock *)tp, ao->rcv_sne);
+ }
#endif
}
@@ -4204,6 +4242,13 @@ void tcp_parse_options(const struct net *net,
*/
break;
#endif
+#ifdef CONFIG_TCP_AO
+ case TCPOPT_AO:
+ /* TCP AO has already been checked
+ * (see tcp_inbound_ao_hash()).
+ */
+ break;
+#endif
case TCPOPT_FASTOPEN:
tcp_parse_fastopen_option(
opsize - TCPOLEN_FASTOPEN_BASE,
@@ -4433,9 +4478,26 @@ static enum skb_drop_reason tcp_sequence(const struct tcp_sock *tp,
return SKB_NOT_DROPPED_YET;
}
+
+void tcp_done_with_error(struct sock *sk, int err)
+{
+ /* This barrier is coupled with smp_rmb() in tcp_poll() */
+ WRITE_ONCE(sk->sk_err, err);
+ smp_wmb();
+
+ tcp_write_queue_purge(sk);
+ tcp_done(sk);
+
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk_error_report(sk);
+}
+EXPORT_SYMBOL(tcp_done_with_error);
+
/* When we get a reset we do this. */
void tcp_reset(struct sock *sk, struct sk_buff *skb)
{
+ int err;
+
trace_tcp_receive_reset(sk);
/* mptcp can't tell us to ignore reset pkts,
@@ -4447,24 +4509,17 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb)
/* We want the right error as BSD sees it (and indeed as we do). */
switch (sk->sk_state) {
case TCP_SYN_SENT:
- WRITE_ONCE(sk->sk_err, ECONNREFUSED);
+ err = ECONNREFUSED;
break;
case TCP_CLOSE_WAIT:
- WRITE_ONCE(sk->sk_err, EPIPE);
+ err = EPIPE;
break;
case TCP_CLOSE:
return;
default:
- WRITE_ONCE(sk->sk_err, ECONNRESET);
+ err = ECONNRESET;
}
- /* This barrier is coupled with smp_rmb() in tcp_poll() */
- smp_wmb();
-
- tcp_write_queue_purge(sk);
- tcp_done(sk);
-
- if (!sock_flag(sk, SOCK_DEAD))
- sk_error_report(sk);
+ tcp_done_with_error(sk, err);
}
/*
@@ -4800,13 +4855,8 @@ static bool tcp_try_coalesce(struct sock *sk,
if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
return false;
- if (!mptcp_skb_can_collapse(to, from))
- return false;
-
-#ifdef CONFIG_TLS_DEVICE
- if (from->decrypted != to->decrypted)
+ if (!tcp_skb_can_collapse_rx(to, from))
return false;
-#endif
if (!skb_try_coalesce(to, from, fragstolen, &delta))
return false;
@@ -4848,7 +4898,7 @@ static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb,
enum skb_drop_reason reason)
{
sk_drops_add(sk, skb);
- kfree_skb_reason(skb, reason);
+ sk_skb_reason_drop(sk, skb, reason);
}
/* This one checks to see if we can put data from the
@@ -5174,6 +5224,16 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
*/
if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
if (tcp_receive_window(tp) == 0) {
+ /* Some stacks are known to send bare FIN packets
+ * in a loop even if we send RWIN 0 in our ACK.
+ * Accepting this FIN does not hurt memory pressure
+ * because the FIN flag will simply be merged to the
+ * receive queue tail skb in most cases.
+ */
+ if (!skb->len &&
+ (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
+ goto queue_and_out;
+
reason = SKB_DROP_REASON_TCP_ZEROWINDOW;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
goto out_of_window;
@@ -5188,7 +5248,7 @@ queue_and_out:
inet_csk_schedule_ack(sk);
sk->sk_data_ready(sk);
- if (skb_queue_len(&sk->sk_receive_queue)) {
+ if (skb_queue_len(&sk->sk_receive_queue) && skb->len) {
reason = SKB_DROP_REASON_PROTO_MEM;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
goto drop;
@@ -5351,7 +5411,7 @@ restart:
break;
}
- if (n && n != tail && mptcp_skb_can_collapse(skb, n) &&
+ if (n && n != tail && tcp_skb_can_collapse_rx(skb, n) &&
TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) {
end_of_skbs = false;
break;
@@ -5375,9 +5435,7 @@ restart:
break;
memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
-#ifdef CONFIG_TLS_DEVICE
- nskb->decrypted = skb->decrypted;
-#endif
+ skb_copy_decrypted(nskb, skb);
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
if (list)
__skb_queue_before(list, skb, nskb);
@@ -5404,13 +5462,9 @@ restart:
skb = tcp_collapse_one(sk, skb, list, root);
if (!skb ||
skb == tail ||
- !mptcp_skb_can_collapse(nskb, skb) ||
+ !tcp_skb_can_collapse_rx(nskb, skb) ||
(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
goto end;
-#ifdef CONFIG_TLS_DEVICE
- if (skb->decrypted != nskb->decrypted)
- goto end;
-#endif
}
}
}
@@ -5949,6 +6003,11 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
* RFC 5961 4.2 : Send a challenge ack
*/
if (th->syn) {
+ if (sk->sk_state == TCP_SYN_RECV && sk->sk_socket && th->ack &&
+ TCP_SKB_CB(skb)->seq + 1 == TCP_SKB_CB(skb)->end_seq &&
+ TCP_SKB_CB(skb)->seq + 1 == tp->rcv_nxt &&
+ TCP_SKB_CB(skb)->ack_seq == tp->snd_nxt)
+ goto pass;
syn_challenge:
if (syn_inerr)
TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
@@ -5958,6 +6017,7 @@ syn_challenge:
goto discard;
}
+pass:
bpf_skops_parse_hdr(sk, skb);
return true;
@@ -6288,7 +6348,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
tp->fastopen_client_fail = TFO_DATA_NOT_ACKED;
skb_rbtree_walk_from(data)
tcp_mark_skb_lost(sk, data);
- tcp_xmit_retransmit_queue(sk);
+ tcp_non_congestion_loss_retransmit(sk);
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPFASTOPENACTIVEFAIL);
return true;
@@ -6426,7 +6486,8 @@ consume:
if (!tp->rx_opt.wscale_ok) {
tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
- tp->window_clamp = min(tp->window_clamp, 65535U);
+ WRITE_ONCE(tp->window_clamp,
+ min(tp->window_clamp, 65535U));
}
if (tp->rx_opt.saw_tstamp) {
@@ -6761,6 +6822,8 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
tcp_initialize_rcv_mss(sk);
tcp_fast_path_on(tp);
+ if (sk->sk_shutdown & SEND_SHUTDOWN)
+ tcp_shutdown(sk, SEND_SHUTDOWN);
break;
case TCP_FIN_WAIT1: {
@@ -6971,35 +7034,10 @@ static void tcp_openreq_init(struct request_sock *req,
#endif
}
-struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
- struct sock *sk_listener,
- bool attach_listener)
-{
- struct request_sock *req = reqsk_alloc(ops, sk_listener,
- attach_listener);
-
- if (req) {
- struct inet_request_sock *ireq = inet_rsk(req);
-
- ireq->ireq_opt = NULL;
-#if IS_ENABLED(CONFIG_IPV6)
- ireq->pktopts = NULL;
-#endif
- atomic64_set(&ireq->ir_cookie, 0);
- ireq->ireq_state = TCP_NEW_SYN_RECV;
- write_pnet(&ireq->ireq_net, sock_net(sk_listener));
- ireq->ireq_family = sk_listener->sk_family;
- req->timeout = TCP_TIMEOUT_INIT;
- }
-
- return req;
-}
-EXPORT_SYMBOL(inet_reqsk_alloc);
-
/*
* Return true if a syncookie should be sent
*/
-static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
+static bool tcp_syn_flood_action(struct sock *sk, const char *proto)
{
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
const char *msg = "Dropping request";
@@ -7100,7 +7138,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
struct sock *sk, struct sk_buff *skb)
{
struct tcp_fastopen_cookie foc = { .len = -1 };
- __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
struct tcp_options_received tmp_opt;
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
@@ -7110,21 +7147,28 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
struct dst_entry *dst;
struct flowi fl;
u8 syncookies;
+ u32 isn;
#ifdef CONFIG_TCP_AO
const struct tcp_ao_hdr *aoh;
#endif
- syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
+ isn = __this_cpu_read(tcp_tw_isn);
+ if (isn) {
+ /* TW buckets are converted to open requests without
+ * limitations, they conserve resources and peer is
+ * evidently real one.
+ */
+ __this_cpu_write(tcp_tw_isn, 0);
+ } else {
+ syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
- /* TW buckets are converted to open requests without
- * limitations, they conserve resources and peer is
- * evidently real one.
- */
- if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) {
- want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
- if (!want_cookie)
- goto drop;
+ if (syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) {
+ want_cookie = tcp_syn_flood_action(sk,
+ rsk_ops->slab_name);
+ if (!want_cookie)
+ goto drop;
+ }
}
if (sk_acceptq_is_full(sk)) {
@@ -7163,7 +7207,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
/* Note: tcp_v6_init_req() might override ir_iif for link locals */
inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);
- dst = af_ops->route_req(sk, skb, &fl, req);
+ dst = af_ops->route_req(sk, skb, &fl, req, isn);
if (!dst)
goto drop_and_free;
@@ -7240,7 +7284,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_rsk(req)->tfo_listener = false;
if (!want_cookie) {
req->timeout = tcp_timeout_init((struct sock *)req);
- inet_csk_reqsk_queue_hash_add(sk, req, req->timeout);
+ if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req,
+ req->timeout))) {
+ reqsk_free(req);
+ return 0;
+ }
+
}
af_ops->send_synack(sk, dst, &fl, req, &foc,
!want_cookie ? TCP_SYNACK_NORMAL :
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a22ee5838751..fd17f25ff288 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -70,6 +70,7 @@
#include <net/xfrm.h>
#include <net/secure_seq.h>
#include <net/busy_poll.h>
+#include <net/rstreason.h>
#include <linux/inet.h>
#include <linux/ipv6.h>
@@ -92,7 +93,9 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
struct inet_hashinfo tcp_hashinfo;
EXPORT_SYMBOL(tcp_hashinfo);
-static DEFINE_PER_CPU(struct sock *, ipv4_tcp_sk);
+static DEFINE_PER_CPU(struct sock_bh_locked, ipv4_tcp_sk) = {
+ .bh_lock = INIT_LOCAL_LOCK(bh_lock),
+};
static u32 tcp_v4_init_seq(const struct sk_buff *skb)
{
@@ -113,6 +116,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
const struct inet_timewait_sock *tw = inet_twsk(sktw);
const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
struct tcp_sock *tp = tcp_sk(sk);
+ int ts_recent_stamp;
if (reuse == 2) {
/* Still does not detect *everything* that goes through
@@ -151,9 +155,16 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
If TW bucket has been already destroyed we fall back to VJ's scheme
and use initial timestamp retrieved from peer table.
*/
- if (tcptw->tw_ts_recent_stamp &&
+ ts_recent_stamp = READ_ONCE(tcptw->tw_ts_recent_stamp);
+ if (ts_recent_stamp &&
(!twp || (reuse && time_after32(ktime_get_seconds(),
- tcptw->tw_ts_recent_stamp)))) {
+ ts_recent_stamp)))) {
+ /* inet_twsk_hashdance_schedule() sets sk_refcnt after putting twsk
+ * and releasing the bucket lock.
+ */
+ if (unlikely(!refcount_inc_not_zero(&sktw->sk_refcnt)))
+ return 0;
+
/* In case of repair and re-using TIME-WAIT sockets we still
* want to be sure that it is safe as above but honor the
* sequence numbers and time stamps set as part of the repair
@@ -171,10 +182,10 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
if (!seq)
seq = 1;
WRITE_ONCE(tp->write_seq, seq);
- tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
- tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
+ tp->rx_opt.ts_recent = READ_ONCE(tcptw->tw_ts_recent);
+ tp->rx_opt.ts_recent_stamp = ts_recent_stamp;
}
- sock_hold(sktw);
+
return 1;
}
@@ -604,15 +615,10 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
ip_icmp_error(sk, skb, err, th->dest, info, (u8 *)th);
- if (!sock_owned_by_user(sk)) {
- WRITE_ONCE(sk->sk_err, err);
-
- sk_error_report(sk);
-
- tcp_done(sk);
- } else {
+ if (!sock_owned_by_user(sk))
+ tcp_done_with_error(sk, err);
+ else
WRITE_ONCE(sk->sk_err_soft, err);
- }
goto out;
}
@@ -723,7 +729,8 @@ out:
* Exception: precedence violation. We do not implement it in any case.
*/
-static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
+static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb,
+ enum sk_rst_reason reason)
{
const struct tcphdr *th = tcp_hdr(skb);
struct {
@@ -866,11 +873,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
* routing might fail in this case. No choice here, if we choose to force
* input interface, we will misroute in case of asymmetric route.
*/
- if (sk) {
+ if (sk)
arg.bound_dev_if = sk->sk_bound_dev_if;
- if (sk_fullsock(sk))
- trace_tcp_send_reset(sk, skb);
- }
+
+ trace_tcp_send_reset(sk, skb, reason);
BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
offsetof(struct inet_timewait_sock, tw_bound_dev_if));
@@ -878,7 +884,9 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
arg.tos = ip_hdr(skb)->tos;
arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
local_bh_disable();
- ctl_sk = this_cpu_read(ipv4_tcp_sk);
+ local_lock_nested_bh(&ipv4_tcp_sk.bh_lock);
+ ctl_sk = this_cpu_read(ipv4_tcp_sk.sock);
+
sock_net_set(ctl_sk, net);
if (sk) {
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
@@ -903,6 +911,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
sock_net_set(ctl_sk, &init_net);
__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
__TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
+ local_unlock_nested_bh(&ipv4_tcp_sk.bh_lock);
local_bh_enable();
#ifdef CONFIG_TCP_MD5SIG
@@ -998,7 +1007,8 @@ static void tcp_v4_send_ack(const struct sock *sk,
arg.tos = tos;
arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
local_bh_disable();
- ctl_sk = this_cpu_read(ipv4_tcp_sk);
+ local_lock_nested_bh(&ipv4_tcp_sk.bh_lock);
+ ctl_sk = this_cpu_read(ipv4_tcp_sk.sock);
sock_net_set(ctl_sk, net);
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_mark : READ_ONCE(sk->sk_mark);
@@ -1013,6 +1023,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
sock_net_set(ctl_sk, &init_net);
__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
+ local_unlock_nested_bh(&ipv4_tcp_sk.bh_lock);
local_bh_enable();
}
@@ -1050,19 +1061,17 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
#else
if (0) {
#endif
-#ifdef CONFIG_TCP_MD5SIG
- } else if (static_branch_unlikely(&tcp_md5_needed.key)) {
+ } else if (static_branch_tcp_md5()) {
key.md5_key = tcp_twsk_md5_key(tcptw);
if (key.md5_key)
key.type = TCP_KEY_MD5;
-#endif
}
tcp_v4_send_ack(sk, skb,
tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_tw_tsval(tcptw),
- tcptw->tw_ts_recent,
+ READ_ONCE(tcptw->tw_ts_recent),
tw->tw_bound_dev_if, &key,
tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
tw->tw_tos,
@@ -1124,8 +1133,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
#else
if (0) {
#endif
-#ifdef CONFIG_TCP_MD5SIG
- } else if (static_branch_unlikely(&tcp_md5_needed.key)) {
+ } else if (static_branch_tcp_md5()) {
const union tcp_md5_addr *addr;
int l3index;
@@ -1134,17 +1142,11 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
key.md5_key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET);
if (key.md5_key)
key.type = TCP_KEY_MD5;
-#endif
}
- /* RFC 7323 2.3
- * The window field (SEG.WND) of every outgoing segment, with the
- * exception of <SYN> segments, MUST be right-shifted by
- * Rcv.Wind.Shift bits:
- */
tcp_v4_send_ack(sk, skb, seq,
tcp_rsk(req)->rcv_nxt,
- req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
+ tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale,
tcp_rsk_tsval(tcp_rsk(req)),
READ_ONCE(req->ts_recent),
0, &key,
@@ -1667,7 +1669,8 @@ static void tcp_v4_init_req(struct request_sock *req,
static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
struct sk_buff *skb,
struct flowi *fl,
- struct request_sock *req)
+ struct request_sock *req,
+ u32 tw_isn)
{
tcp_v4_init_req(req, sk, skb);
@@ -1934,9 +1937,9 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
reset:
- tcp_v4_send_reset(rsk, skb);
+ tcp_v4_send_reset(rsk, skb, sk_rst_convert_drop_reason(reason));
discard:
- kfree_skb_reason(skb, reason);
+ sk_skb_reason_drop(sk, skb, reason);
/* Be careful here. If this function gets more complicated and
* gcc suffers from register pressure on the x86, sk (in %ebx)
* might be destroyed here. This current version compiles correctly,
@@ -1995,7 +1998,7 @@ int tcp_v4_early_demux(struct sk_buff *skb)
bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
enum skb_drop_reason *reason)
{
- u32 limit, tail_gso_size, tail_gso_segs;
+ u32 tail_gso_size, tail_gso_segs;
struct skb_shared_info *shinfo;
const struct tcphdr *th;
struct tcphdr *thtail;
@@ -2004,6 +2007,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
bool fragstolen;
u32 gso_segs;
u32 gso_size;
+ u64 limit;
int delta;
/* In case all data was pulled from skb frags (in __pskb_pull_tail()),
@@ -2045,10 +2049,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) ||
((TCP_SKB_CB(tail)->tcp_flags ^
TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) ||
-#ifdef CONFIG_TLS_DEVICE
- tail->decrypted != skb->decrypted ||
-#endif
- !mptcp_skb_can_collapse(tail, skb) ||
+ !tcp_skb_can_collapse_rx(tail, skb) ||
thtail->doff != th->doff ||
memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th)))
goto no_coalesce;
@@ -2101,7 +2102,13 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
__skb_push(skb, hdrlen);
no_coalesce:
- limit = (u32)READ_ONCE(sk->sk_rcvbuf) + (u32)(READ_ONCE(sk->sk_sndbuf) >> 1);
+ /* sk->sk_backlog.len is reset only at the end of __release_sock().
+ * Both sk->sk_backlog.len and sk->sk_rmem_alloc could reach
+ * sk_rcvbuf in normal conditions.
+ */
+ limit = ((u64)READ_ONCE(sk->sk_rcvbuf)) << 1;
+
+ limit += ((u32)READ_ONCE(sk->sk_sndbuf)) >> 1;
/* Only socket owner can try to collapse/prune rx queues
* to reduce memory overhead, so add a little headroom here.
@@ -2109,6 +2116,8 @@ no_coalesce:
*/
limit += 64 * 1024;
+ limit = min_t(u64, limit, UINT_MAX);
+
if (unlikely(sk_add_backlog(sk, skb, limit))) {
bh_unlock_sock(sk);
*reason = SKB_DROP_REASON_SOCKET_BACKLOG;
@@ -2148,7 +2157,6 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
skb->len - th->doff * 4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
- TCP_SKB_CB(skb)->tcp_tw_isn = 0;
TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->has_rxtstamp =
@@ -2167,9 +2175,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
int dif = inet_iif(skb);
const struct iphdr *iph;
const struct tcphdr *th;
+ struct sock *sk = NULL;
bool refcounted;
- struct sock *sk;
int ret;
+ u32 isn;
drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (skb->pkt_type != PACKET_HOST)
@@ -2207,7 +2216,6 @@ lookup:
if (!sk)
goto no_tcp_socket;
-process:
if (sk->sk_state == TCP_TIME_WAIT)
goto do_time_wait;
@@ -2279,7 +2287,10 @@ process:
} else {
drop_reason = tcp_child_process(sk, nsk, skb);
if (drop_reason) {
- tcp_v4_send_reset(nsk, skb);
+ enum sk_rst_reason rst_reason;
+
+ rst_reason = sk_rst_convert_drop_reason(drop_reason);
+ tcp_v4_send_reset(nsk, skb, rst_reason);
goto discard_and_relse;
}
sock_put(sk);
@@ -2287,6 +2298,7 @@ process:
}
}
+process:
if (static_branch_unlikely(&ip4_min_ttl)) {
/* min_ttl can be changed concurrently from do_ip_setsockopt() */
if (unlikely(iph->ttl < READ_ONCE(inet_sk(sk)->min_ttl))) {
@@ -2357,13 +2369,13 @@ csum_error:
bad_packet:
__TCP_INC_STATS(net, TCP_MIB_INERRS);
} else {
- tcp_v4_send_reset(NULL, skb);
+ tcp_v4_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason));
}
discard_it:
SKB_DR_OR(drop_reason, NOT_SPECIFIED);
/* Discard frame. */
- kfree_skb_reason(skb, drop_reason);
+ sk_skb_reason_drop(sk, skb, drop_reason);
return 0;
discard_and_relse:
@@ -2385,7 +2397,7 @@ do_time_wait:
inet_twsk_put(inet_twsk(sk));
goto csum_error;
}
- switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
+ switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) {
case TCP_TW_SYN: {
struct sock *sk2 = inet_lookup_listener(net,
net->ipv4.tcp_death_row.hashinfo,
@@ -2399,6 +2411,7 @@ do_time_wait:
sk = sk2;
tcp_v4_restore_cb(skb);
refcounted = false;
+ __this_cpu_write(tcp_tw_isn, isn);
goto process;
}
}
@@ -2408,7 +2421,7 @@ do_time_wait:
tcp_v4_timewait_ack(sk, skb);
break;
case TCP_TW_RST:
- tcp_v4_send_reset(sk, skb);
+ tcp_v4_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET);
inet_twsk_deschedule_put(inet_twsk(sk));
goto discard_it;
case TCP_TW_SUCCESS:;
@@ -2418,7 +2431,6 @@ do_time_wait:
static struct timewait_sock_ops tcp_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct tcp_timewait_sock),
- .twsk_unique = tcp_twsk_unique,
.twsk_destructor= tcp_twsk_destructor,
};
@@ -3493,6 +3505,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_shrink_window = 0;
net->ipv4.sysctl_tcp_pingpong_thresh = 1;
+ net->ipv4.sysctl_tcp_rto_min_us = jiffies_to_usecs(TCP_RTO_MIN);
return 0;
}
@@ -3501,7 +3514,7 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
{
struct net *net;
- tcp_twsk_purge(net_exit_list, AF_INET);
+ tcp_twsk_purge(net_exit_list);
list_for_each_entry(net, net_exit_list, exit_list) {
inet_pernet_hashinfo_free(net->ipv4.tcp_death_row.hashinfo);
@@ -3607,7 +3620,9 @@ void __init tcp_v4_init(void)
*/
inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
- per_cpu(ipv4_tcp_sk, cpu) = sk;
+ sk->sk_clockid = CLOCK_MONOTONIC;
+
+ per_cpu(ipv4_tcp_sk.sock, cpu) = sk;
}
if (register_pernet_subsys(&tcp_sk_ops))
panic("Failed to create the TCP control socket.\n");
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index c2a925538542..b01eb6d94413 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -619,6 +619,7 @@ static const struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] =
[TCP_METRICS_ATTR_ADDR_IPV4] = { .type = NLA_U32, },
[TCP_METRICS_ATTR_ADDR_IPV6] = { .type = NLA_BINARY,
.len = sizeof(struct in6_addr), },
+ [TCP_METRICS_ATTR_SADDR_IPV4] = { .type = NLA_U32, },
/* Following attributes are not received for GET/DEL,
* we keep them for reference
*/
@@ -766,6 +767,7 @@ static int tcp_metrics_nl_dump(struct sk_buff *skb,
unsigned int max_rows = 1U << tcp_metrics_hash_log;
unsigned int row, s_row = cb->args[0];
int s_col = cb->args[1], col = s_col;
+ int res = 0;
for (row = s_row; row < max_rows; row++, s_col = 0) {
struct tcp_metrics_block *tm;
@@ -778,7 +780,8 @@ static int tcp_metrics_nl_dump(struct sk_buff *skb,
continue;
if (col < s_col)
continue;
- if (tcp_metrics_dump_info(skb, cb, tm) < 0) {
+ res = tcp_metrics_dump_info(skb, cb, tm);
+ if (res < 0) {
rcu_read_unlock();
goto done;
}
@@ -789,7 +792,7 @@ static int tcp_metrics_nl_dump(struct sk_buff *skb,
done:
cb->args[0] = row;
cb->args[1] = col;
- return skb->len;
+ return res;
}
static int __parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr,
@@ -986,6 +989,7 @@ static struct genl_family tcp_metrics_nl_family __ro_after_init = {
.maxattr = TCP_METRICS_ATTR_MAX,
.policy = tcp_metrics_nl_policy,
.netnsok = true,
+ .parallel_ops = true,
.module = THIS_MODULE,
.small_ops = tcp_metrics_nl_ops,
.n_small_ops = ARRAY_SIZE(tcp_metrics_nl_ops),
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f0761f060a83..a19a9dbd3409 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -22,6 +22,7 @@
#include <net/tcp.h>
#include <net/xfrm.h>
#include <net/busy_poll.h>
+#include <net/rstreason.h>
static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
{
@@ -95,21 +96,23 @@ static void twsk_rcv_nxt_update(struct tcp_timewait_sock *tcptw, u32 seq)
*/
enum tcp_tw_status
tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
- const struct tcphdr *th)
+ const struct tcphdr *th, u32 *tw_isn)
{
struct tcp_options_received tmp_opt;
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
bool paws_reject = false;
+ int ts_recent_stamp;
tmp_opt.saw_tstamp = 0;
- if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
+ ts_recent_stamp = READ_ONCE(tcptw->tw_ts_recent_stamp);
+ if (th->doff > (sizeof(*th) >> 2) && ts_recent_stamp) {
tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL);
if (tmp_opt.saw_tstamp) {
if (tmp_opt.rcv_tsecr)
tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset;
- tmp_opt.ts_recent = tcptw->tw_ts_recent;
- tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
+ tmp_opt.ts_recent = READ_ONCE(tcptw->tw_ts_recent);
+ tmp_opt.ts_recent_stamp = ts_recent_stamp;
paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
}
}
@@ -151,8 +154,10 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
twsk_rcv_nxt_update(tcptw, TCP_SKB_CB(skb)->end_seq);
if (tmp_opt.saw_tstamp) {
- tcptw->tw_ts_recent_stamp = ktime_get_seconds();
- tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
+ WRITE_ONCE(tcptw->tw_ts_recent_stamp,
+ ktime_get_seconds());
+ WRITE_ONCE(tcptw->tw_ts_recent,
+ tmp_opt.rcv_tsval);
}
inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
@@ -196,8 +201,10 @@ kill:
}
if (tmp_opt.saw_tstamp) {
- tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
- tcptw->tw_ts_recent_stamp = ktime_get_seconds();
+ WRITE_ONCE(tcptw->tw_ts_recent,
+ tmp_opt.rcv_tsval);
+ WRITE_ONCE(tcptw->tw_ts_recent_stamp,
+ ktime_get_seconds());
}
inet_twsk_put(tw);
@@ -224,11 +231,11 @@ kill:
if (th->syn && !th->rst && !th->ack && !paws_reject &&
(after(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt) ||
(tmp_opt.saw_tstamp &&
- (s32)(tcptw->tw_ts_recent - tmp_opt.rcv_tsval) < 0))) {
+ (s32)(READ_ONCE(tcptw->tw_ts_recent) - tmp_opt.rcv_tsval) < 0))) {
u32 isn = tcptw->tw_snd_nxt + 65535 + 2;
if (isn == 0)
isn++;
- TCP_SKB_CB(skb)->tcp_tw_isn = isn;
+ *tw_isn = isn;
return TCP_TW_SYN;
}
@@ -338,17 +345,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
if (state == TCP_TIME_WAIT)
timeo = TCP_TIMEWAIT_LEN;
- /* tw_timer is pinned, so we need to make sure BH are disabled
- * in following section, otherwise timer handler could run before
- * we complete the initialization.
- */
- local_bh_disable();
- inet_twsk_schedule(tw, timeo);
/* Linkage updates.
* Note that access to tw after this point is illegal.
*/
- inet_twsk_hashdance(tw, sk, net->ipv4.tcp_death_row.hashinfo);
- local_bh_enable();
+ inet_twsk_hashdance_schedule(tw, sk, net->ipv4.tcp_death_row.hashinfo, timeo);
} else {
/* Sorry, if we're out of memory, just CLOSE this
* socket up. We've got bigger problems than
@@ -388,7 +388,7 @@ void tcp_twsk_destructor(struct sock *sk)
}
EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
-void tcp_twsk_purge(struct list_head *net_exit_list, int family)
+void tcp_twsk_purge(struct list_head *net_exit_list)
{
bool purged_once = false;
struct net *net;
@@ -396,14 +396,13 @@ void tcp_twsk_purge(struct list_head *net_exit_list, int family)
list_for_each_entry(net, net_exit_list, exit_list) {
if (net->ipv4.tcp_death_row.hashinfo->pernet) {
/* Even if tw_refcount == 1, we must clean up kernel reqsk */
- inet_twsk_purge(net->ipv4.tcp_death_row.hashinfo, family);
+ inet_twsk_purge(net->ipv4.tcp_death_row.hashinfo);
} else if (!purged_once) {
- inet_twsk_purge(&tcp_hashinfo, family);
+ inet_twsk_purge(&tcp_hashinfo);
purged_once = true;
}
}
}
-EXPORT_SYMBOL_GPL(tcp_twsk_purge);
/* Warning : This function is called without sk_listener being locked.
* Be sure to read socket fields once, as their value could change under us.
@@ -515,9 +514,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
const struct tcp_sock *oldtp;
struct tcp_sock *newtp;
u32 seq;
-#ifdef CONFIG_TCP_AO
- struct tcp_ao_key *ao_key;
-#endif
if (!newsk)
return NULL;
@@ -608,10 +604,14 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
#endif
#ifdef CONFIG_TCP_AO
newtp->ao_info = NULL;
- ao_key = treq->af_specific->ao_lookup(sk, req,
- tcp_rsk(req)->ao_keyid, -1);
- if (ao_key)
- newtp->tcp_header_len += tcp_ao_len_aligned(ao_key);
+
+ if (tcp_rsk_used_ao(req)) {
+ struct tcp_ao_key *ao_key;
+
+ ao_key = treq->af_specific->ao_lookup(sk, req, tcp_rsk(req)->ao_keyid, -1);
+ if (ao_key)
+ newtp->tcp_header_len += tcp_ao_len_aligned(ao_key);
+ }
#endif
if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
@@ -783,8 +783,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
/* RFC793: "first check sequence number". */
- if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
- tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rsk_rcv_wnd)) {
+ if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq,
+ TCP_SKB_CB(skb)->end_seq,
+ tcp_rsk(req)->rcv_nxt,
+ tcp_rsk(req)->rcv_nxt +
+ tcp_synack_window(req))) {
/* Out of window: send ACK and drop. */
if (!(flg & TCP_FLAG_RST) &&
!tcp_oow_rate_limited(sock_net(sk), skb,
@@ -879,7 +882,7 @@ embryonic_reset:
* avoid becoming vulnerable to outside attack aiming at
* resetting legit local connections.
*/
- req->rsk_ops->send_reset(sk, skb);
+ req->rsk_ops->send_reset(sk, skb, SK_RST_REASON_INVALID_SYN);
} else if (fastopen) { /* received a valid RST pkt */
reqsk_fastopen_remove(sk, req, true);
tcp_reset(sk, skb);
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index ebe4722bb020..e4ad3311e148 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -28,6 +28,70 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq,
}
}
+static void __tcpv4_gso_segment_csum(struct sk_buff *seg,
+ __be32 *oldip, __be32 newip,
+ __be16 *oldport, __be16 newport)
+{
+ struct tcphdr *th;
+ struct iphdr *iph;
+
+ if (*oldip == newip && *oldport == newport)
+ return;
+
+ th = tcp_hdr(seg);
+ iph = ip_hdr(seg);
+
+ inet_proto_csum_replace4(&th->check, seg, *oldip, newip, true);
+ inet_proto_csum_replace2(&th->check, seg, *oldport, newport, false);
+ *oldport = newport;
+
+ csum_replace4(&iph->check, *oldip, newip);
+ *oldip = newip;
+}
+
+static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs)
+{
+ const struct tcphdr *th;
+ const struct iphdr *iph;
+ struct sk_buff *seg;
+ struct tcphdr *th2;
+ struct iphdr *iph2;
+
+ seg = segs;
+ th = tcp_hdr(seg);
+ iph = ip_hdr(seg);
+ th2 = tcp_hdr(seg->next);
+ iph2 = ip_hdr(seg->next);
+
+ if (!(*(const u32 *)&th->source ^ *(const u32 *)&th2->source) &&
+ iph->daddr == iph2->daddr && iph->saddr == iph2->saddr)
+ return segs;
+
+ while ((seg = seg->next)) {
+ th2 = tcp_hdr(seg);
+ iph2 = ip_hdr(seg);
+
+ __tcpv4_gso_segment_csum(seg,
+ &iph2->saddr, iph->saddr,
+ &th2->source, th->source);
+ __tcpv4_gso_segment_csum(seg,
+ &iph2->daddr, iph->daddr,
+ &th2->dest, th->dest);
+ }
+
+ return segs;
+}
+
+static struct sk_buff *__tcp4_gso_segment_list(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
+ if (IS_ERR(skb))
+ return skb;
+
+ return __tcpv4_gso_segment_list_csum(skb);
+}
+
static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
@@ -37,6 +101,9 @@ static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
return ERR_PTR(-EINVAL);
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)
+ return __tcp4_gso_segment_list(skb, features);
+
if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
const struct iphdr *iph = ip_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
@@ -73,6 +140,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
if (thlen < sizeof(*th))
goto out;
+ if (unlikely(skb_checksum_start(skb) != skb_transport_header(skb)))
+ goto out;
+
if (!pskb_may_pull(skb, thlen))
goto out;
@@ -178,63 +248,76 @@ out:
return segs;
}
-struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
+struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th)
{
- struct sk_buff *pp = NULL;
+ struct tcphdr *th2;
struct sk_buff *p;
+
+ list_for_each_entry(p, head, list) {
+ if (!NAPI_GRO_CB(p)->same_flow)
+ continue;
+
+ th2 = tcp_hdr(p);
+ if (*(u32 *)&th->source ^ *(u32 *)&th2->source) {
+ NAPI_GRO_CB(p)->same_flow = 0;
+ continue;
+ }
+
+ return p;
+ }
+
+ return NULL;
+}
+
+struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb)
+{
+ unsigned int thlen, hlen, off;
struct tcphdr *th;
- struct tcphdr *th2;
- unsigned int len;
- unsigned int thlen;
- __be32 flags;
- unsigned int mss = 1;
- unsigned int hlen;
- unsigned int off;
- int flush = 1;
- int i;
off = skb_gro_offset(skb);
hlen = off + sizeof(*th);
th = skb_gro_header(skb, hlen, off);
if (unlikely(!th))
- goto out;
+ return NULL;
thlen = th->doff * 4;
if (thlen < sizeof(*th))
- goto out;
+ return NULL;
hlen = off + thlen;
if (!skb_gro_may_pull(skb, hlen)) {
th = skb_gro_header_slow(skb, hlen, off);
if (unlikely(!th))
- goto out;
+ return NULL;
}
skb_gro_pull(skb, thlen);
- len = skb_gro_len(skb);
- flags = tcp_flag_word(th);
-
- list_for_each_entry(p, head, list) {
- if (!NAPI_GRO_CB(p)->same_flow)
- continue;
+ return th;
+}
- th2 = tcp_hdr(p);
+struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
+ struct tcphdr *th)
+{
+ unsigned int thlen = th->doff * 4;
+ struct sk_buff *pp = NULL;
+ struct sk_buff *p;
+ struct tcphdr *th2;
+ unsigned int len;
+ __be32 flags;
+ unsigned int mss = 1;
+ int flush = 1;
+ int i;
- if (*(u32 *)&th->source ^ *(u32 *)&th2->source) {
- NAPI_GRO_CB(p)->same_flow = 0;
- continue;
- }
+ len = skb_gro_len(skb);
+ flags = tcp_flag_word(th);
- goto found;
- }
- p = NULL;
- goto out_check_final;
+ p = tcp_gro_lookup(head, th);
+ if (!p)
+ goto out_check_final;
-found:
- /* Include the IP ID check below from the inner most IP hdr */
- flush = NAPI_GRO_CB(p)->flush;
- flush |= (__force int)(flags & TCP_FLAG_CWR);
+ th2 = tcp_hdr(p);
+ flush = (__force int)(flags & TCP_FLAG_CWR);
flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH));
flush |= (__force int)(th->ack_seq ^ th2->ack_seq);
@@ -242,16 +325,7 @@ found:
flush |= *(u32 *)((u8 *)th + i) ^
*(u32 *)((u8 *)th2 + i);
- /* When we receive our second frame we can made a decision on if we
- * continue this flow as an atomic flow with a fixed ID or if we use
- * an incrementing ID.
- */
- if (NAPI_GRO_CB(p)->flush_id != 1 ||
- NAPI_GRO_CB(p)->count != 1 ||
- !NAPI_GRO_CB(p)->is_atomic)
- flush |= NAPI_GRO_CB(p)->flush_id;
- else
- NAPI_GRO_CB(p)->is_atomic = false;
+ flush |= gro_receive_network_flush(th, th2, p);
mss = skb_shinfo(p)->gso_size;
@@ -265,9 +339,19 @@ found:
flush |= (len - 1) >= mss;
flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
-#ifdef CONFIG_TLS_DEVICE
- flush |= p->decrypted ^ skb->decrypted;
-#endif
+ flush |= skb_cmp_decrypted(p, skb);
+
+ if (unlikely(NAPI_GRO_CB(p)->is_flist)) {
+ flush |= (__force int)(flags ^ tcp_flag_word(th2));
+ flush |= skb->ip_summed != p->ip_summed;
+ flush |= skb->csum_level != p->csum_level;
+ flush |= NAPI_GRO_CB(p)->count >= 64;
+
+ if (flush || skb_gro_receive_list(p, skb))
+ mss = 1;
+
+ goto out_check_final;
+ }
if (flush || skb_gro_receive(p, skb)) {
mss = 1;
@@ -290,7 +374,6 @@ out_check_final:
if (p && (!NAPI_GRO_CB(skb)->same_flow || flush))
pp = p;
-out:
NAPI_GRO_CB(skb)->flush |= (flush != 0);
return pp;
@@ -316,30 +399,80 @@ void tcp_gro_complete(struct sk_buff *skb)
}
EXPORT_SYMBOL(tcp_gro_complete);
+static void tcp4_check_fraglist_gro(struct list_head *head, struct sk_buff *skb,
+ struct tcphdr *th)
+{
+ const struct iphdr *iph;
+ struct sk_buff *p;
+ struct sock *sk;
+ struct net *net;
+ int iif, sdif;
+
+ if (likely(!(skb->dev->features & NETIF_F_GRO_FRAGLIST)))
+ return;
+
+ p = tcp_gro_lookup(head, th);
+ if (p) {
+ NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist;
+ return;
+ }
+
+ inet_get_iif_sdif(skb, &iif, &sdif);
+ iph = skb_gro_network_header(skb);
+ net = dev_net(skb->dev);
+ sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
+ iph->saddr, th->source,
+ iph->daddr, ntohs(th->dest),
+ iif, sdif);
+ NAPI_GRO_CB(skb)->is_flist = !sk;
+ if (sk)
+ sock_put(sk);
+}
+
INDIRECT_CALLABLE_SCOPE
struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)
{
+ struct tcphdr *th;
+
/* Don't bother verifying checksum if we're going to flush anyway. */
if (!NAPI_GRO_CB(skb)->flush &&
skb_gro_checksum_validate(skb, IPPROTO_TCP,
- inet_gro_compute_pseudo)) {
- NAPI_GRO_CB(skb)->flush = 1;
- return NULL;
- }
+ inet_gro_compute_pseudo))
+ goto flush;
+
+ th = tcp_gro_pull_header(skb);
+ if (!th)
+ goto flush;
- return tcp_gro_receive(head, skb);
+ tcp4_check_fraglist_gro(head, skb, th);
+
+ return tcp_gro_receive(head, skb, th);
+
+flush:
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
}
INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff)
{
- const struct iphdr *iph = ip_hdr(skb);
+ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
+ const struct iphdr *iph = (struct iphdr *)(skb->data + offset);
struct tcphdr *th = tcp_hdr(skb);
+ if (unlikely(NAPI_GRO_CB(skb)->is_flist)) {
+ skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV4;
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+ __skb_incr_checksum_unnecessary(skb);
+
+ return 0;
+ }
+
th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr,
iph->daddr, 0);
skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4 |
- (NAPI_GRO_CB(skb)->is_atomic * SKB_GSO_TCP_FIXEDID);
+ (NAPI_GRO_CB(skb)->ip_fixedid * SKB_GSO_TCP_FIXEDID);
tcp_gro_complete(skb);
return 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e3167ad96567..16c48df8df4c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -39,11 +39,13 @@
#include <net/tcp.h>
#include <net/mptcp.h>
+#include <net/proto_memory.h>
#include <linux/compiler.h>
#include <linux/gfp.h>
#include <linux/module.h>
#include <linux/static_key.h>
+#include <linux/skbuff_ref.h>
#include <trace/events/tcp.h>
@@ -203,16 +205,17 @@ static inline void tcp_event_ack_sent(struct sock *sk, u32 rcv_nxt)
* This MUST be enforced by all callers.
*/
void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
- __u32 *rcv_wnd, __u32 *window_clamp,
+ __u32 *rcv_wnd, __u32 *__window_clamp,
int wscale_ok, __u8 *rcv_wscale,
__u32 init_rcv_wnd)
{
unsigned int space = (__space < 0 ? 0 : __space);
+ u32 window_clamp = READ_ONCE(*__window_clamp);
/* If no clamp set the clamp to the max possible scaled window */
- if (*window_clamp == 0)
- (*window_clamp) = (U16_MAX << TCP_MAX_WSCALE);
- space = min(*window_clamp, space);
+ if (window_clamp == 0)
+ window_clamp = (U16_MAX << TCP_MAX_WSCALE);
+ space = min(window_clamp, space);
/* Quantize space offering to a multiple of mss if possible. */
if (space > mss)
@@ -229,7 +232,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows))
(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
else
- (*rcv_wnd) = min_t(u32, space, U16_MAX);
+ (*rcv_wnd) = space;
if (init_rcv_wnd)
*rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
@@ -239,12 +242,13 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
/* Set window scaling on max possible window */
space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
space = max_t(u32, space, READ_ONCE(sysctl_rmem_max));
- space = min_t(u32, space, *window_clamp);
+ space = min_t(u32, space, window_clamp);
*rcv_wscale = clamp_t(int, ilog2(space) - 15,
0, TCP_MAX_WSCALE);
}
/* Set the clamp no higher than max representable value */
- (*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp);
+ WRITE_ONCE(*__window_clamp,
+ min_t(__u32, U16_MAX << (*rcv_wscale), window_clamp));
}
EXPORT_SYMBOL(tcp_select_initial_window);
@@ -1297,7 +1301,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
tp = tcp_sk(sk);
prior_wstamp = tp->tcp_wstamp_ns;
tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
- skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true);
+ skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC);
if (clone_it) {
oskb = skb;
@@ -1499,18 +1503,22 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
}
/* Initialize TSO segments for a packet. */
-static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
+static int tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
{
+ int tso_segs;
+
if (skb->len <= mss_now) {
/* Avoid the costly divide in the normal
* non-TSO case.
*/
- tcp_skb_pcount_set(skb, 1);
TCP_SKB_CB(skb)->tcp_gso_size = 0;
- } else {
- tcp_skb_pcount_set(skb, DIV_ROUND_UP(skb->len, mss_now));
- TCP_SKB_CB(skb)->tcp_gso_size = mss_now;
+ tcp_skb_pcount_set(skb, 1);
+ return 1;
}
+ TCP_SKB_CB(skb)->tcp_gso_size = mss_now;
+ tso_segs = DIV_ROUND_UP(skb->len, mss_now);
+ tcp_skb_pcount_set(skb, tso_segs);
+ return tso_segs;
}
/* Pcount in the middle of the write queue got changed, we need to do various
@@ -1647,7 +1655,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
skb_split(skb, buff, len);
- skb_set_delivery_time(buff, skb->tstamp, true);
+ skb_set_delivery_time(buff, skb->tstamp, SKB_CLOCK_MONOTONIC);
tcp_fragment_tstamp(skb, buff);
old_factor = tcp_skb_pcount(skb);
@@ -2070,16 +2078,10 @@ static unsigned int tcp_mss_split_point(const struct sock *sk,
/* Can at least one segment of SKB be sent right now, according to the
* congestion window rules? If so, return how many segments are allowed.
*/
-static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
- const struct sk_buff *skb)
+static u32 tcp_cwnd_test(const struct tcp_sock *tp)
{
u32 in_flight, cwnd, halfcwnd;
- /* Don't be strict about the congestion window for the final FIN. */
- if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
- tcp_skb_pcount(skb) == 1)
- return 1;
-
in_flight = tcp_packets_in_flight(tp);
cwnd = tcp_snd_cwnd(tp);
if (in_flight >= cwnd)
@@ -2100,10 +2102,9 @@ static int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now)
{
int tso_segs = tcp_skb_pcount(skb);
- if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) {
- tcp_set_skb_tso_segs(skb, mss_now);
- tso_segs = tcp_skb_pcount(skb);
- }
+ if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now))
+ return tcp_set_skb_tso_segs(skb, mss_now);
+
return tso_segs;
}
@@ -2403,6 +2404,21 @@ commit:
return 0;
}
+/* tcp_mtu_probe() and tcp_grow_skb() can both eat an skb (src) if
+ * all its payload was moved to another one (dst).
+ * Make sure to transfer tcp_flags, eor, and tstamp.
+ */
+static void tcp_eat_one_skb(struct sock *sk,
+ struct sk_buff *dst,
+ struct sk_buff *src)
+{
+ TCP_SKB_CB(dst)->tcp_flags |= TCP_SKB_CB(src)->tcp_flags;
+ TCP_SKB_CB(dst)->eor = TCP_SKB_CB(src)->eor;
+ tcp_skb_collapse_tstamp(dst, src);
+ tcp_unlink_write_queue(src, sk);
+ tcp_wmem_free_skb(sk, src);
+}
+
/* Create a new MTU probe if we are ready.
* MTU probe is regularly attempting to increase the path MTU by
* deliberately sending larger packets. This discovers routing
@@ -2508,16 +2524,7 @@ static int tcp_mtu_probe(struct sock *sk)
copy = min_t(int, skb->len, probe_size - len);
if (skb->len <= copy) {
- /* We've eaten all the data from this skb.
- * Throw it away. */
- TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
- /* If this is the last SKB we copy and eor is set
- * we need to propagate it to the new skb.
- */
- TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor;
- tcp_skb_collapse_tstamp(nskb, skb);
- tcp_unlink_write_queue(skb, sk);
- tcp_wmem_free_skb(sk, skb);
+ tcp_eat_one_skb(sk, nskb, skb);
} else {
TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
~(TCPHDR_FIN|TCPHDR_PSH);
@@ -2683,6 +2690,35 @@ void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type)
tcp_chrono_set(tp, TCP_CHRONO_BUSY);
}
+/* First skb in the write queue is smaller than ideal packet size.
+ * Check if we can move payload from the second skb in the queue.
+ */
+static void tcp_grow_skb(struct sock *sk, struct sk_buff *skb, int amount)
+{
+ struct sk_buff *next_skb = skb->next;
+ unsigned int nlen;
+
+ if (tcp_skb_is_last(sk, skb))
+ return;
+
+ if (!tcp_skb_can_collapse(skb, next_skb))
+ return;
+
+ nlen = min_t(u32, amount, next_skb->len);
+ if (!nlen || !skb_shift(skb, next_skb, nlen))
+ return;
+
+ TCP_SKB_CB(skb)->end_seq += nlen;
+ TCP_SKB_CB(next_skb)->seq += nlen;
+
+ if (!next_skb->len) {
+ /* In case FIN is set, we need to update end_seq */
+ TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
+
+ tcp_eat_one_skb(sk, skb, next_skb);
+ }
+}
+
/* This routine writes packets to the network. It advances the
* send_head. This happens as incoming acks open up the remote
* window for us.
@@ -2703,10 +2739,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
unsigned int tso_segs, sent_pkts;
- int cwnd_quota;
+ u32 cwnd_quota, max_segs;
int result;
bool is_cwnd_limited = false, is_rwnd_limited = false;
- u32 max_segs;
sent_pkts = 0;
@@ -2724,11 +2759,12 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
max_segs = tcp_tso_segs(sk, mss_now);
while ((skb = tcp_send_head(sk))) {
unsigned int limit;
+ int missing_bytes;
if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
/* "skb_mstamp_ns" is used as a start point for the retransmit timer */
tp->tcp_wstamp_ns = tp->tcp_clock_cache;
- skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true);
+ skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC);
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
tcp_init_tso_segs(skb, mss_now);
goto repair; /* Skip network transmission */
@@ -2737,10 +2773,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
if (tcp_pacing_check(sk))
break;
- tso_segs = tcp_init_tso_segs(skb, mss_now);
- BUG_ON(!tso_segs);
-
- cwnd_quota = tcp_cwnd_test(tp, skb);
+ cwnd_quota = tcp_cwnd_test(tp);
if (!cwnd_quota) {
if (push_one == 2)
/* Force out a loss probe pkt. */
@@ -2748,6 +2781,12 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
else
break;
}
+ cwnd_quota = min(cwnd_quota, max_segs);
+ missing_bytes = cwnd_quota * mss_now - skb->len;
+ if (missing_bytes > 0)
+ tcp_grow_skb(sk, skb, missing_bytes);
+
+ tso_segs = tcp_set_skb_tso_segs(skb, mss_now);
if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) {
is_rwnd_limited = true;
@@ -2769,9 +2808,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
limit = mss_now;
if (tso_segs > 1 && !tcp_urg_mode(tp))
limit = tcp_mss_split_point(sk, skb, mss_now,
- min_t(unsigned int,
- cwnd_quota,
- max_segs),
+ cwnd_quota,
nonagle);
if (skb->len > limit &&
@@ -3387,11 +3424,6 @@ start:
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
}
- /* To avoid taking spuriously low RTT samples based on a timestamp
- * for a transmit that never happened, always mark EVER_RETRANS
- */
- TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
-
if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RETRANS_CB_FLAG))
tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB,
TCP_SKB_CB(skb)->seq, segs, err);
@@ -3401,6 +3433,12 @@ start:
} else if (err != -EBUSY) {
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs);
}
+
+ /* To avoid taking spuriously low RTT samples based on a timestamp
+ * for a transmit that never happened, always mark EVER_RETRANS
+ */
+ TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
+
return err;
}
@@ -3563,7 +3601,9 @@ void tcp_send_fin(struct sock *sk)
return;
}
} else {
- skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation);
+ skb = alloc_skb_fclone(MAX_TCP_HEADER,
+ sk_gfp_mask(sk, GFP_ATOMIC |
+ __GFP_NOWARN));
if (unlikely(!skb))
return;
@@ -3583,7 +3623,8 @@ void tcp_send_fin(struct sock *sk)
* was unread data in the receive queue. This behavior is recommended
* by RFC 2525, section 2.17. -DaveM
*/
-void tcp_send_active_reset(struct sock *sk, gfp_t priority)
+void tcp_send_active_reset(struct sock *sk, gfp_t priority,
+ enum sk_rst_reason reason)
{
struct sk_buff *skb;
@@ -3608,7 +3649,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
/* skb of trace_tcp_send_reset() keeps the skb that caused RST,
* skb here is different to the troublesome skb, so use NULL
*/
- trace_tcp_send_reset(sk, NULL);
+ trace_tcp_send_reset(sk, NULL, SK_RST_REASON_NOT_SPECIFIED);
}
/* Send a crossed SYN-ACK during socket establishment.
@@ -3711,11 +3752,11 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
#ifdef CONFIG_SYN_COOKIES
if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok))
skb_set_delivery_time(skb, cookie_init_timestamp(req, now),
- true);
+ SKB_CLOCK_MONOTONIC);
else
#endif
{
- skb_set_delivery_time(skb, now, true);
+ skb_set_delivery_time(skb, now, SKB_CLOCK_MONOTONIC);
if (!tcp_rsk(req)->snt_synack) /* Timestamp first SYNACK */
tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb);
}
@@ -3727,6 +3768,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
#ifdef CONFIG_TCP_AO
struct tcp_ao_key *ao_key = NULL;
u8 keyid = tcp_rsk(req)->ao_keyid;
+ u8 rnext = tcp_rsk(req)->ao_rcv_next;
ao_key = tcp_sk(sk)->af_specific->ao_lookup(sk, req_to_sk(req),
keyid, -1);
@@ -3736,6 +3778,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
* ao_keyid (RFC5925 RNextKeyID), so let's keep it simple here.
*/
if (unlikely(!ao_key)) {
+ trace_tcp_ao_synack_no_key(sk, keyid, rnext);
rcu_read_unlock();
kfree_skb(skb);
net_warn_ratelimited("TCP-AO: the keyid %u from SYN packet is not present - not sending SYNACK\n",
@@ -3802,7 +3845,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
bpf_skops_write_hdr_opt((struct sock *)sk, skb, req, syn_skb,
synack_type, &opts);
- skb_set_delivery_time(skb, now, true);
+ skb_set_delivery_time(skb, now, SKB_CLOCK_MONOTONIC);
tcp_add_tx_delay(skb, tp);
return skb;
@@ -3855,7 +3898,7 @@ static void tcp_connect_init(struct sock *sk)
tcp_ca_dst_init(sk, dst);
if (!tp->window_clamp)
- tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
+ WRITE_ONCE(tp->window_clamp, dst_metric(dst, RTAX_WINDOW));
tp->advmss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
tcp_initialize_rcv_mss(sk);
@@ -3863,7 +3906,7 @@ static void tcp_connect_init(struct sock *sk)
/* limit the window selection if the user enforce a smaller rx buffer */
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
(tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
- tp->window_clamp = tcp_full_space(sk);
+ WRITE_ONCE(tp->window_clamp, tcp_full_space(sk));
rcv_wnd = tcp_rwnd_init_bpf(sk);
if (rcv_wnd == 0)
@@ -3986,7 +4029,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
- skb_set_delivery_time(syn, syn_data->skb_mstamp_ns, true);
+ skb_set_delivery_time(syn, syn_data->skb_mstamp_ns, SKB_CLOCK_MONOTONIC);
/* Now full SYN+DATA was cloned and sent (or not),
* remove the SYN from the original skb (syn_data)
@@ -4122,16 +4165,9 @@ EXPORT_SYMBOL(tcp_connect);
u32 tcp_delack_max(const struct sock *sk)
{
- const struct dst_entry *dst = __sk_dst_get(sk);
- u32 delack_max = inet_csk(sk)->icsk_delack_max;
+ u32 delack_from_rto_min = max(tcp_rto_min(sk), 2) - 1;
- if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) {
- u32 rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
- u32 delack_from_rto_min = max_t(int, 1, rto_min - 1);
-
- delack_max = min_t(u32, delack_max, delack_from_rto_min);
- }
- return delack_max;
+ return min(inet_csk(sk)->icsk_delack_max, delack_from_rto_min);
}
/* Send out a delayed ack, the caller does the policy checking
diff --git a/net/ipv4/tcp_sigpool.c b/net/ipv4/tcp_sigpool.c
index 8512cb09ebc0..d8a4f192873a 100644
--- a/net/ipv4/tcp_sigpool.c
+++ b/net/ipv4/tcp_sigpool.c
@@ -10,7 +10,14 @@
#include <net/tcp.h>
static size_t __scratch_size;
-static DEFINE_PER_CPU(void __rcu *, sigpool_scratch);
+struct sigpool_scratch {
+ local_lock_t bh_lock;
+ void __rcu *pad;
+};
+
+static DEFINE_PER_CPU(struct sigpool_scratch, sigpool_scratch) = {
+ .bh_lock = INIT_LOCAL_LOCK(bh_lock),
+};
struct sigpool_entry {
struct crypto_ahash *hash;
@@ -72,7 +79,7 @@ static int sigpool_reserve_scratch(size_t size)
break;
}
- old_scratch = rcu_replace_pointer(per_cpu(sigpool_scratch, cpu),
+ old_scratch = rcu_replace_pointer(per_cpu(sigpool_scratch.pad, cpu),
scratch, lockdep_is_held(&cpool_mutex));
if (!cpu_online(cpu) || !old_scratch) {
kfree(old_scratch);
@@ -93,7 +100,7 @@ static void sigpool_scratch_free(void)
int cpu;
for_each_possible_cpu(cpu)
- kfree(rcu_replace_pointer(per_cpu(sigpool_scratch, cpu),
+ kfree(rcu_replace_pointer(per_cpu(sigpool_scratch.pad, cpu),
NULL, lockdep_is_held(&cpool_mutex)));
__scratch_size = 0;
}
@@ -277,7 +284,8 @@ int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c) __cond_acquires(RC
/* Pairs with tcp_sigpool_reserve_scratch(), scratch area is
* valid (allocated) until tcp_sigpool_end().
*/
- c->scratch = rcu_dereference_bh(*this_cpu_ptr(&sigpool_scratch));
+ local_lock_nested_bh(&sigpool_scratch.bh_lock);
+ c->scratch = rcu_dereference_bh(*this_cpu_ptr(&sigpool_scratch.pad));
return 0;
}
EXPORT_SYMBOL_GPL(tcp_sigpool_start);
@@ -286,6 +294,7 @@ void tcp_sigpool_end(struct tcp_sigpool *c) __releases(RCU_BH)
{
struct crypto_ahash *hash = crypto_ahash_reqtfm(c->req);
+ local_unlock_nested_bh(&sigpool_scratch.bh_lock);
rcu_read_unlock_bh();
ahash_request_free(c->req);
crypto_free_ahash(hash);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index d1ad20ce1c8c..4d40615dc8fc 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -22,10 +22,11 @@
#include <linux/module.h>
#include <linux/gfp.h>
#include <net/tcp.h>
+#include <net/rstreason.h>
static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
{
- struct inet_connection_sock *icsk = inet_csk(sk);
+ const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
u32 elapsed, user_timeout;
s32 remaining;
@@ -47,7 +48,7 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when)
{
- struct inet_connection_sock *icsk = inet_csk(sk);
+ const struct inet_connection_sock *icsk = inet_csk(sk);
u32 remaining, user_timeout;
s32 elapsed;
@@ -73,11 +74,7 @@ u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when)
static void tcp_write_err(struct sock *sk)
{
- WRITE_ONCE(sk->sk_err, READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT);
- sk_error_report(sk);
-
- tcp_write_queue_purge(sk);
- tcp_done(sk);
+ tcp_done_with_error(sk, READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT);
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT);
}
@@ -127,7 +124,8 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
(!tp->snd_wnd && !tp->packets_out))
do_reset = true;
if (do_reset)
- tcp_send_active_reset(sk, GFP_ATOMIC);
+ tcp_send_active_reset(sk, GFP_ATOMIC,
+ SK_RST_REASON_NOT_SPECIFIED);
tcp_done(sk);
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
return 1;
@@ -481,11 +479,26 @@ static bool tcp_rtx_probe0_timed_out(const struct sock *sk,
const struct sk_buff *skb,
u32 rtx_delta)
{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+ u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout);
const struct tcp_sock *tp = tcp_sk(sk);
- const int timeout = TCP_RTO_MAX * 2;
- u32 rcv_delta;
+ int timeout = TCP_RTO_MAX * 2;
+ s32 rcv_delta;
- rcv_delta = inet_csk(sk)->icsk_timeout - tp->rcv_tstamp;
+ if (user_timeout) {
+ /* If user application specified a TCP_USER_TIMEOUT,
+ * it does not want win 0 packets to 'reset the timer'
+ * while retransmits are not making progress.
+ */
+ if (rtx_delta > user_timeout)
+ return true;
+ timeout = min_t(u32, timeout, msecs_to_jiffies(user_timeout));
+ }
+ /* Note: timer interrupt might have been delayed by at least one jiffy,
+ * and tp->rcv_tstamp might very well have been written recently.
+ * rcv_delta can thus be negative.
+ */
+ rcv_delta = icsk->icsk_timeout - tp->rcv_tstamp;
if (rcv_delta <= timeout)
return false;
@@ -530,8 +543,6 @@ void tcp_retransmit_timer(struct sock *sk)
if (WARN_ON_ONCE(!skb))
return;
- tp->tlp_high_seq = 0;
-
if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
!((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
/* Receiver dastardly shrinks window. Our retransmits
@@ -768,7 +779,7 @@ static void tcp_keepalive_timer (struct timer_list *t)
goto out;
}
}
- tcp_send_active_reset(sk, GFP_ATOMIC);
+ tcp_send_active_reset(sk, GFP_ATOMIC, SK_RST_REASON_NOT_SPECIFIED);
goto death;
}
@@ -795,7 +806,8 @@ static void tcp_keepalive_timer (struct timer_list *t)
icsk->icsk_probes_out > 0) ||
(user_timeout == 0 &&
icsk->icsk_probes_out >= keepalive_probes(tp))) {
- tcp_send_active_reset(sk, GFP_ATOMIC);
+ tcp_send_active_reset(sk, GFP_ATOMIC,
+ SK_RST_REASON_NOT_SPECIFIED);
tcp_write_err(sk);
goto out;
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b32cf2eeeb41..49c622e743e8 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -326,6 +326,8 @@ found:
goto fail_unlock;
}
+ sock_set_flag(sk, SOCK_RCU_FREE);
+
sk_add_node_rcu(sk, &hslot->head);
hslot->count++;
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -342,7 +344,7 @@ found:
hslot2->count++;
spin_unlock(&hslot2->lock);
}
- sock_set_flag(sk, SOCK_RCU_FREE);
+
error = 0;
fail_unlock:
spin_unlock_bh(&hslot->lock);
@@ -427,15 +429,21 @@ static struct sock *udp4_lib_lookup2(struct net *net,
{
struct sock *sk, *result;
int score, badness;
+ bool need_rescore;
result = NULL;
badness = 0;
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
- score = compute_score(sk, net, saddr, sport,
- daddr, hnum, dif, sdif);
+ need_rescore = false;
+rescore:
+ score = compute_score(need_rescore ? result : sk, net, saddr,
+ sport, daddr, hnum, dif, sdif);
if (score > badness) {
badness = score;
+ if (need_rescore)
+ continue;
+
if (sk->sk_state == TCP_ESTABLISHED) {
result = sk;
continue;
@@ -456,9 +464,14 @@ static struct sock *udp4_lib_lookup2(struct net *net,
if (IS_ERR(result))
continue;
- badness = compute_score(result, net, saddr, sport,
- daddr, hnum, dif, sdif);
-
+ /* compute_score is too long of a function to be
+ * inlined, and calling it again here yields
+ * measureable overhead for some
+ * workloads. Work around it by jumping
+ * backwards to rescore 'result'.
+ */
+ need_rescore = true;
+ goto rescore;
}
}
return result;
@@ -927,8 +940,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4,
kfree_skb(skb);
return -EINVAL;
}
- if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite ||
- dst_xfrm(skb_dst(skb))) {
+ if (is_udplite || dst_xfrm(skb_dst(skb))) {
kfree_skb(skb);
return -EIO;
}
@@ -1207,7 +1219,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
if (connected)
- rt = (struct rtable *)sk_dst_check(sk, 0);
+ rt = dst_rtable(sk_dst_check(sk, 0));
if (!rt) {
struct net *net = sock_net(sk);
@@ -1501,13 +1513,15 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
struct sk_buff_head *list = &sk->sk_receive_queue;
int rmem, err = -ENOMEM;
spinlock_t *busy = NULL;
- int size;
+ bool becomes_readable;
+ int size, rcvbuf;
- /* try to avoid the costly atomic add/sub pair when the receive
- * queue is full; always allow at least a packet
+ /* Immediately drop when the receive queue is full.
+ * Always allow at least one packet.
*/
rmem = atomic_read(&sk->sk_rmem_alloc);
- if (rmem > sk->sk_rcvbuf)
+ rcvbuf = READ_ONCE(sk->sk_rcvbuf);
+ if (rmem > rcvbuf)
goto drop;
/* Under mem pressure, it might be helpful to help udp_recvmsg()
@@ -1516,7 +1530,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
* - Less cache line misses at copyout() time
* - Less work at consume_skb() (less alien page frag freeing)
*/
- if (rmem > (sk->sk_rcvbuf >> 1)) {
+ if (rmem > (rcvbuf >> 1)) {
skb_condense(skb);
busy = busylock_acquire(sk);
@@ -1524,12 +1538,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
size = skb->truesize;
udp_set_dev_scratch(skb);
- /* we drop only if the receive buf is full and the receive
- * queue contains some other skb
- */
- rmem = atomic_add_return(size, &sk->sk_rmem_alloc);
- if (rmem > (size + (unsigned int)sk->sk_rcvbuf))
- goto uncharge_drop;
+ atomic_add(size, &sk->sk_rmem_alloc);
spin_lock(&list->lock);
err = udp_rmem_schedule(sk, size);
@@ -1545,12 +1554,19 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
*/
sock_skb_set_dropcount(sk, skb);
+ becomes_readable = skb_queue_empty(list);
__skb_queue_tail(list, skb);
spin_unlock(&list->lock);
- if (!sock_flag(sk, SOCK_DEAD))
- INDIRECT_CALL_1(sk->sk_data_ready, sock_def_readable, sk);
-
+ if (!sock_flag(sk, SOCK_DEAD)) {
+ if (becomes_readable ||
+ sk->sk_data_ready != sock_def_readable ||
+ READ_ONCE(sk->sk_peek_off) >= 0)
+ INDIRECT_CALL_1(sk->sk_data_ready,
+ sock_def_readable, sk);
+ else
+ sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN);
+ }
busylock_release(busy);
return 0;
@@ -2058,8 +2074,8 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
drop_reason = SKB_DROP_REASON_PROTO_MEM;
}
UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
- kfree_skb_reason(skb, drop_reason);
- trace_udp_fail_queue_rcv_skb(rc, sk);
+ trace_udp_fail_queue_rcv_skb(rc, sk, skb);
+ sk_skb_reason_drop(sk, skb, drop_reason);
return -1;
}
@@ -2181,7 +2197,7 @@ csum_error:
drop:
__UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
atomic_inc(&sk->sk_drops);
- kfree_skb_reason(skb, drop_reason);
+ sk_skb_reason_drop(sk, skb, drop_reason);
return -1;
}
@@ -2215,7 +2231,7 @@ bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
struct dst_entry *old;
if (dst_hold_safe(dst)) {
- old = xchg((__force struct dst_entry **)&sk->sk_rx_dst, dst);
+ old = unrcu_pointer(xchg(&sk->sk_rx_dst, RCU_INITIALIZER(dst)));
dst_release(old);
return old != dst;
}
@@ -2368,7 +2384,7 @@ static int udp_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb,
int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
int proto)
{
- struct sock *sk;
+ struct sock *sk = NULL;
struct udphdr *uh;
unsigned short ulen;
struct rtable *rt = skb_rtable(skb);
@@ -2445,7 +2461,7 @@ no_sk:
* Hmm. We got an UDP packet to a port to which we
* don't wanna listen. Ignore it.
*/
- kfree_skb_reason(skb, drop_reason);
+ sk_skb_reason_drop(sk, skb, drop_reason);
return 0;
short_packet:
@@ -2470,7 +2486,7 @@ csum_error:
__UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
drop:
__UDP_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
- kfree_skb_reason(skb, drop_reason);
+ sk_skb_reason_drop(sk, skb, drop_reason);
return 0;
}
@@ -2697,8 +2713,6 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
#ifdef CONFIG_XFRM
case UDP_ENCAP_ESPINUDP:
set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, sk);
- fallthrough;
- case UDP_ENCAP_ESPINUDP_NON_IKE:
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == AF_INET6)
WRITE_ONCE(up->encap_rcv,
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 8721fe5beca2..b254a5dadfcf 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -278,6 +278,16 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
if (gso_skb->len <= sizeof(*uh) + mss)
return ERR_PTR(-EINVAL);
+ if (unlikely(skb_checksum_start(gso_skb) !=
+ skb_transport_header(gso_skb)))
+ return ERR_PTR(-EINVAL);
+
+ /* We don't know if egress device can segment and checksum the packet
+ * when IPv6 extension headers are present. Fall back to software GSO.
+ */
+ if (gso_skb->ip_summed != CHECKSUM_PARTIAL)
+ features &= ~(NETIF_F_GSO_UDP_L4 | NETIF_F_CSUM_MASK);
+
if (skb_gso_ok(gso_skb, features | NETIF_F_GSO_ROBUST)) {
/* Packet is from an untrusted source, reset gso_segs. */
skb_shinfo(gso_skb)->gso_segs = DIV_ROUND_UP(gso_skb->len - sizeof(*uh),
@@ -357,6 +367,14 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
else
uh->check = gso_make_checksum(seg, ~check) ? : CSUM_MANGLED_0;
+ /* On the TX path, CHECKSUM_NONE and CHECKSUM_UNNECESSARY have the same
+ * meaning. However, check for bad offloads in the GSO stack expects the
+ * latter, if the checksum was calculated in software. To vouch for the
+ * segment skbs we actually need to set it on the gso_skb.
+ */
+ if (gso_skb->ip_summed == CHECKSUM_NONE)
+ gso_skb->ip_summed = CHECKSUM_UNNECESSARY;
+
/* update refcount for the packet */
if (copy_dtor) {
int delta = sum_truesize - gso_skb->truesize;
@@ -433,33 +451,6 @@ out:
return segs;
}
-static int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
-{
- if (unlikely(p->len + skb->len >= 65536))
- return -E2BIG;
-
- if (NAPI_GRO_CB(p)->last == p)
- skb_shinfo(p)->frag_list = skb;
- else
- NAPI_GRO_CB(p)->last->next = skb;
-
- skb_pull(skb, skb_gro_offset(skb));
-
- NAPI_GRO_CB(p)->last = skb;
- NAPI_GRO_CB(p)->count++;
- p->data_len += skb->len;
-
- /* sk ownership - if any - completely transferred to the aggregated packet */
- skb->destructor = NULL;
- skb->sk = NULL;
- p->truesize += skb->truesize;
- p->len += skb->len;
-
- NAPI_GRO_CB(skb)->same_flow = 1;
-
- return 0;
-}
-
#define UDP_GRO_CNT_MAX 64
static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
@@ -505,14 +496,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
return p;
}
- flush = NAPI_GRO_CB(p)->flush;
-
- if (NAPI_GRO_CB(p)->flush_id != 1 ||
- NAPI_GRO_CB(p)->count != 1 ||
- !NAPI_GRO_CB(p)->is_atomic)
- flush |= NAPI_GRO_CB(p)->flush_id;
- else
- NAPI_GRO_CB(p)->is_atomic = false;
+ flush = gro_receive_network_flush(uh, uh2, p);
/* Terminate the flow on len mismatch or if it grow "too much".
* Under small packet flood GRO count could elsewhere grow a lot
diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c
index 860aff5f8599..e4e0fa869fa4 100644
--- a/net/ipv4/udp_tunnel_core.c
+++ b/net/ipv4/udp_tunnel_core.c
@@ -183,7 +183,8 @@ void udp_tunnel_sock_release(struct socket *sock)
EXPORT_SYMBOL_GPL(udp_tunnel_sock_release);
struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family,
- __be16 flags, __be64 tunnel_id, int md_size)
+ const unsigned long *flags,
+ __be64 tunnel_id, int md_size)
{
struct metadata_dst *tun_dst;
struct ip_tunnel_info *info;
@@ -199,7 +200,7 @@ struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family,
info->key.tp_src = udp_hdr(skb)->source;
info->key.tp_dst = udp_hdr(skb)->dest;
if (udp_hdr(skb)->check)
- info->key.tun_flags |= TUNNEL_CSUM;
+ __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags);
return tun_dst;
}
EXPORT_SYMBOL_GPL(udp_tun_rx_dst);
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index dae35101d189..a620618cc568 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -63,7 +63,11 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
ip_send_check(iph);
if (xo && (xo->flags & XFRM_GRO)) {
- skb_mac_header_rebuild(skb);
+ /* The full l2 header needs to be preserved so that re-injecting the packet at l2
+ * works correctly in the presence of vlan tags.
+ */
+ skb_mac_header_rebuild_full(skb, xo->orig_mac_len);
+ skb_reset_network_header(skb);
skb_reset_transport_header(skb);
return 0;
}
@@ -113,19 +117,6 @@ static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull
/* Must be an IKE packet.. pass it through */
return 1;
break;
- case UDP_ENCAP_ESPINUDP_NON_IKE:
- /* Check if this is a keepalive packet. If so, eat it. */
- if (len == 1 && udpdata[0] == 0xff) {
- return -EINVAL;
- } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
- udpdata32[0] == 0 && udpdata32[1] == 0) {
-
- /* ESP Packet with Non-IKE marker */
- len = sizeof(struct udphdr) + 2 * sizeof(u32);
- } else
- /* Must be an IKE packet.. pass it through */
- return 1;
- break;
}
/* At this point we are sure that this is an ESPinUDP packet,
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index c33bca2c3841..0294fef577fa 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -69,7 +69,7 @@ static int xfrm4_get_saddr(struct net *net, int oif,
static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
const struct flowi *fl)
{
- struct rtable *rt = (struct rtable *)xdst->route;
+ struct rtable *rt = dst_rtable(xdst->route);
const struct flowi4 *fl4 = &fl->u.ip4;
xdst->u.rt.rt_iif = fl4->flowi4_iif;
@@ -152,7 +152,6 @@ static struct ctl_table xfrm4_policy_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
};
static __net_init int xfrm4_net_sysctl_init(struct net *net)
@@ -186,7 +185,7 @@ err_alloc:
static __net_exit void xfrm4_net_sysctl_exit(struct net *net)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
if (!net->ipv4.xfrm4_hdr)
return;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 779aa6ecdd49..f70d8757af1a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -63,6 +63,7 @@
#include <linux/string.h>
#include <linux/hash.h>
+#include <net/ip_tunnels.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/snmp.h>
@@ -862,7 +863,7 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
}
}
-static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
+static int addrconf_fixup_forwarding(const struct ctl_table *table, int *p, int newf)
{
struct net *net;
int old;
@@ -930,7 +931,7 @@ static void addrconf_linkdown_change(struct net *net, __s32 newf)
}
}
-static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf)
+static int addrconf_fixup_linkdown(const struct ctl_table *table, int *p, int newf)
{
struct net *net;
int old;
@@ -1872,7 +1873,8 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
master, &dst,
scores, hiscore_idx);
- if (scores[hiscore_idx].ifa)
+ if (scores[hiscore_idx].ifa &&
+ scores[hiscore_idx].scopedist >= 0)
goto out;
}
@@ -2918,7 +2920,7 @@ put:
static int addrconf_set_sit_dstaddr(struct net *net, struct net_device *dev,
struct in6_ifreq *ireq)
{
- struct ip_tunnel_parm p = { };
+ struct ip_tunnel_parm_kern p = { };
int err;
if (!(ipv6_addr_type(&ireq->ifr6_addr) & IPV6_ADDR_COMPATv4))
@@ -6307,7 +6309,7 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
#ifdef CONFIG_SYSCTL
-static int addrconf_sysctl_forward(struct ctl_table *ctl, int write,
+static int addrconf_sysctl_forward(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = ctl->data;
@@ -6332,7 +6334,7 @@ static int addrconf_sysctl_forward(struct ctl_table *ctl, int write,
return ret;
}
-static int addrconf_sysctl_mtu(struct ctl_table *ctl, int write,
+static int addrconf_sysctl_mtu(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct inet6_dev *idev = ctl->extra1;
@@ -6377,7 +6379,7 @@ static void addrconf_disable_change(struct net *net, __s32 newf)
}
}
-static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf)
+static int addrconf_disable_ipv6(const struct ctl_table *table, int *p, int newf)
{
struct net *net = (struct net *)table->extra2;
int old;
@@ -6403,7 +6405,7 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf)
return 0;
}
-static int addrconf_sysctl_disable(struct ctl_table *ctl, int write,
+static int addrconf_sysctl_disable(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = ctl->data;
@@ -6428,7 +6430,7 @@ static int addrconf_sysctl_disable(struct ctl_table *ctl, int write,
return ret;
}
-static int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write,
+static int addrconf_sysctl_proxy_ndp(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = ctl->data;
@@ -6469,7 +6471,7 @@ static int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write,
return ret;
}
-static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
+static int addrconf_sysctl_addr_gen_mode(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp,
loff_t *ppos)
{
@@ -6532,7 +6534,7 @@ out:
return ret;
}
-static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write,
+static int addrconf_sysctl_stable_secret(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp,
loff_t *ppos)
{
@@ -6600,7 +6602,7 @@ out:
}
static
-int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl,
+int addrconf_sysctl_ignore_routes_with_linkdown(const struct ctl_table *ctl,
int write, void *buffer,
size_t *lenp,
loff_t *ppos)
@@ -6668,7 +6670,7 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
}
static
-int addrconf_disable_policy(struct ctl_table *ctl, int *valp, int val)
+int addrconf_disable_policy(const struct ctl_table *ctl, int *valp, int val)
{
struct net *net = (struct net *)ctl->extra2;
struct inet6_dev *idev;
@@ -6700,7 +6702,7 @@ int addrconf_disable_policy(struct ctl_table *ctl, int *valp, int val)
return 0;
}
-static int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write,
+static int addrconf_sysctl_disable_policy(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = ctl->data;
@@ -7183,14 +7185,12 @@ static const struct ctl_table addrconf_sysctl[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO,
},
- {
- /* sentinel */
- }
};
static int __addrconf_sysctl_register(struct net *net, char *dev_name,
struct inet6_dev *idev, struct ipv6_devconf *p)
{
+ size_t table_size = ARRAY_SIZE(addrconf_sysctl);
int i, ifindex;
struct ctl_table *table;
char path[sizeof("net/ipv6/conf/") + IFNAMSIZ];
@@ -7199,7 +7199,7 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
if (!table)
goto out;
- for (i = 0; table[i].data; i++) {
+ for (i = 0; i < table_size; i++) {
table[i].data += (char *)p - (char *)&ipv6_devconf;
/* If one of these is already set, then it is not safe to
* overwrite either of them: this makes proc_dointvec_minmax
@@ -7214,7 +7214,7 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name);
p->sysctl_header = register_net_sysctl_sz(net, path, table,
- ARRAY_SIZE(addrconf_sysctl));
+ table_size);
if (!p->sysctl_header)
goto free;
@@ -7237,7 +7237,7 @@ out:
static void __addrconf_sysctl_unregister(struct net *net,
struct ipv6_devconf *p, int ifindex)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
if (!p->sysctl_header)
return;
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 17ac45aa7194..acd70b5992a7 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -234,7 +234,8 @@ static int __ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp,
hlist_add_head_rcu(&newp->list, &net->ipv6.ip6addrlbl_table.head);
out:
if (!ret)
- net->ipv6.ip6addrlbl_table.seq++;
+ WRITE_ONCE(net->ipv6.ip6addrlbl_table.seq,
+ net->ipv6.ip6addrlbl_table.seq + 1);
return ret;
}
@@ -445,7 +446,7 @@ static void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
};
static int ip6addrlbl_fill(struct sk_buff *skb,
- struct ip6addrlbl_entry *p,
+ const struct ip6addrlbl_entry *p,
u32 lseq,
u32 portid, u32 seq, int event,
unsigned int flags)
@@ -498,7 +499,8 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct net *net = sock_net(skb->sk);
struct ip6addrlbl_entry *p;
int idx = 0, s_idx = cb->args[0];
- int err;
+ int err = 0;
+ u32 lseq;
if (cb->strict_check) {
err = ip6addrlbl_valid_dump_req(nlh, cb->extack);
@@ -507,10 +509,11 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
}
rcu_read_lock();
+ lseq = READ_ONCE(net->ipv6.ip6addrlbl_table.seq);
hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) {
if (idx >= s_idx) {
err = ip6addrlbl_fill(skb, p,
- net->ipv6.ip6addrlbl_table.seq,
+ lseq,
NETLINK_CB(cb->skb).portid,
nlh->nlmsg_seq,
RTM_NEWADDRLABEL,
@@ -522,7 +525,7 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
}
rcu_read_unlock();
cb->args[0] = idx;
- return skb->len;
+ return err;
}
static inline int ip6addrlbl_msgsize(void)
@@ -614,7 +617,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
rcu_read_lock();
p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
- lseq = net->ipv6.ip6addrlbl_table.seq;
+ lseq = READ_ONCE(net->ipv6.ip6addrlbl_table.seq);
if (p)
err = ip6addrlbl_fill(skb, p, lseq,
NETLINK_CB(in_skb).portid,
@@ -647,6 +650,7 @@ int __init ipv6_addr_label_rtnl_register(void)
return ret;
ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDRLABEL,
ip6addrlbl_get,
- ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED);
+ ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED |
+ RTNL_FLAG_DUMP_UNLOCKED);
return ret;
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 8041dc181bd4..90d2c7e3f5e9 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -509,7 +509,7 @@ void inet6_cleanup_sock(struct sock *sk)
/* Free tx options */
- opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL);
+ opt = unrcu_pointer(xchg(&np->opt, NULL));
if (opt) {
atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
txopt_put(opt);
@@ -1060,6 +1060,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
.nd_tbl = &nd_tbl,
.ipv6_fragment = ip6_fragment,
.ipv6_dev_find = ipv6_dev_find,
+ .ip6_xmit = ip6_xmit,
};
static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 0f2506e35359..0627c4c18d1a 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -252,9 +252,8 @@ static void aca_free_rcu(struct rcu_head *h)
static void aca_put(struct ifacaddr6 *ac)
{
- if (refcount_dec_and_test(&ac->aca_refcnt)) {
- call_rcu(&ac->rcu, aca_free_rcu);
- }
+ if (refcount_dec_and_test(&ac->aca_refcnt))
+ call_rcu_hurry(&ac->rcu, aca_free_rcu);
}
static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i,
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 7371886d4f9f..3920e8aa1031 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -36,6 +36,7 @@
#include <net/tcp.h>
#include <net/espintcp.h>
#include <net/inet6_hashtables.h>
+#include <linux/skbuff_ref.h>
#include <linux/highmem.h>
@@ -131,7 +132,7 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
*/
if (req->src != req->dst)
for (sg = sg_next(req->src); sg; sg = sg_next(sg))
- skb_page_unref(skb, sg_page(sg), false);
+ skb_page_unref(sg_page(sg), skb->pp_recycle);
}
#ifdef CONFIG_INET6_ESPINTCP
@@ -255,8 +256,7 @@ static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
#else
static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
{
- kfree_skb(skb);
-
+ WARN_ON(1);
return -EOPNOTSUPP;
}
#endif
@@ -383,7 +383,6 @@ static struct ip_esp_hdr *esp6_output_udp_encap(struct sk_buff *skb,
__be16 dport)
{
struct udphdr *uh;
- __be32 *udpdata32;
unsigned int len;
len = skb->len + esp->tailen - skb_transport_offset(skb);
@@ -398,12 +397,6 @@ static struct ip_esp_hdr *esp6_output_udp_encap(struct sk_buff *skb,
*skb_mac_header(skb) = IPPROTO_UDP;
- if (encap_type == UDP_ENCAP_ESPINUDP_NON_IKE) {
- udpdata32 = (__be32 *)(uh + 1);
- udpdata32[0] = udpdata32[1] = 0;
- return (struct ip_esp_hdr *)(udpdata32 + 2);
- }
-
return (struct ip_esp_hdr *)(uh + 1);
}
@@ -459,7 +452,6 @@ static int esp6_output_encap(struct xfrm_state *x, struct sk_buff *skb,
switch (encap_type) {
default:
case UDP_ENCAP_ESPINUDP:
- case UDP_ENCAP_ESPINUDP_NON_IKE:
esph = esp6_output_udp_encap(skb, encap_type, esp, sport, dport);
break;
case TCP_ENCAP_ESPINTCP:
@@ -822,7 +814,6 @@ int esp6_input_done2(struct sk_buff *skb, int err)
source = th->source;
break;
case UDP_ENCAP_ESPINUDP:
- case UDP_ENCAP_ESPINUDP_NON_IKE:
source = uh->source;
break;
default:
@@ -1232,9 +1223,6 @@ static int esp6_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack)
case UDP_ENCAP_ESPINUDP:
x->props.header_len += sizeof(struct udphdr);
break;
- case UDP_ENCAP_ESPINUDP_NON_IKE:
- x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32);
- break;
#ifdef CONFIG_INET6_ESPINTCP
case TCP_ENCAP_ESPINTCP:
/* only the length field, TCP encap is done by
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 527b7caddbc6..919ebfabbe4e 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -83,6 +83,13 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
(xfrm_address_t *)&ipv6_hdr(skb)->daddr,
spi, IPPROTO_ESP, AF_INET6);
+
+ if (unlikely(x && x->dir && x->dir != XFRM_SA_DIR_IN)) {
+ /* non-offload path will record the error and audit log */
+ xfrm_state_put(x);
+ x = NULL;
+ }
+
if (!x)
goto out_reset;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 52c04f0ac498..9e254de7462f 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -233,8 +233,12 @@ static int __fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
rt = pol_lookup_func(lookup,
net, table, flp6, arg->lookup_data, flags);
if (rt != net->ipv6.ip6_null_entry) {
+ struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
+
+ if (!idev)
+ goto again;
err = fib6_rule_saddr(net, rule, flags, flp6,
- ip6_dst_idev(&rt->dst)->dev);
+ idev->dev);
if (err == -EAGAIN)
goto again;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 1635da07285f..7b31674644ef 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -212,7 +212,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
res = true;
} else {
- struct rt6_info *rt = (struct rt6_info *)dst;
+ struct rt6_info *rt = dst_rt6_info(dst);
int tmo = net->ipv6.sysctl.icmpv6_time;
struct inet_peer *peer;
@@ -241,7 +241,7 @@ static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
dst = ip6_route_output(net, sk, fl6);
if (!dst->error) {
- struct rt6_info *rt = (struct rt6_info *)dst;
+ struct rt6_info *rt = dst_rt6_info(dst);
struct in6_addr prefsrc;
rt6_get_prefsrc(rt, &prefsrc);
@@ -616,7 +616,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
if (ip6_append_data(sk, icmpv6_getfrag, &msg,
len + sizeof(struct icmp6hdr),
sizeof(struct icmp6hdr),
- &ipc6, &fl6, (struct rt6_info *)dst,
+ &ipc6, &fl6, dst_rt6_info(dst),
MSG_DONTWAIT)) {
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
@@ -803,7 +803,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
if (ip6_append_data(sk, icmpv6_getfrag, &msg,
skb->len + sizeof(struct icmp6hdr),
sizeof(struct icmp6hdr), &ipc6, &fl6,
- (struct rt6_info *)dst, MSG_DONTWAIT)) {
+ dst_rt6_info(dst), MSG_DONTWAIT)) {
__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
} else {
@@ -1206,7 +1206,6 @@ static struct ctl_table ipv6_icmp_table_template[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
- { },
};
struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index 8c1ce78956ba..ff7e734e335b 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -38,7 +38,7 @@ static inline struct ila_params *ila_params_lwtunnel(
static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
- struct rt6_info *rt = (struct rt6_info *)orig_dst;
+ struct rt6_info *rt = dst_rt6_info(orig_dst);
struct ila_lwt *ilwt = ila_lwt_lwtunnel(orig_dst->lwtstate);
struct dst_entry *dst;
int err = -EINVAL;
@@ -58,7 +58,9 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
return orig_dst->lwtstate->orig_output(net, sk, skb);
}
+ local_bh_disable();
dst = dst_cache_get(&ilwt->dst_cache);
+ local_bh_enable();
if (unlikely(!dst)) {
struct ipv6hdr *ip6h = ipv6_hdr(skb);
struct flowi6 fl6;
@@ -70,7 +72,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = orig_dst->dev->ifindex;
fl6.flowi6_iif = LOOPBACK_IFINDEX;
- fl6.daddr = *rt6_nexthop((struct rt6_info *)orig_dst,
+ fl6.daddr = *rt6_nexthop(dst_rt6_info(orig_dst),
&ip6h->daddr);
dst = ip6_route_output(net, NULL, &fl6);
@@ -86,8 +88,11 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
goto drop;
}
- if (ilwt->connected)
+ if (ilwt->connected) {
+ local_bh_disable();
dst_cache_set_ip6(&ilwt->dst_cache, dst, &fl6.saddr);
+ local_bh_enable();
+ }
}
skb_dst_set(skb, dst);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 2e81383b663b..6db71bb1cd30 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -21,6 +21,7 @@
#include <net/secure_seq.h>
#include <net/ip.h>
#include <net/sock_reuseport.h>
+#include <net/tcp.h>
u32 inet6_ehashfn(const struct net *net,
const struct in6_addr *laddr, const u16 lport,
@@ -289,7 +290,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
dif, sdif))) {
if (sk2->sk_state == TCP_TIME_WAIT) {
tw = inet_twsk(sk2);
- if (twsk_unique(sk, sk2, twp))
+ if (sk->sk_protocol == IPPROTO_TCP &&
+ tcp_twsk_unique(sk, sk2, twp))
break;
}
goto not_unique;
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index 7563f8c6aa87..bf7120ecea1e 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -351,9 +351,9 @@ do_encap:
goto drop;
if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) {
- preempt_disable();
+ local_bh_disable();
dst = dst_cache_get(&ilwt->cache);
- preempt_enable();
+ local_bh_enable();
if (unlikely(!dst)) {
struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -373,9 +373,9 @@ do_encap:
goto drop;
}
- preempt_disable();
+ local_bh_disable();
dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
- preempt_enable();
+ local_bh_enable();
}
skb_dst_drop(skb);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index c1f62352a481..eb111d20615c 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -623,23 +623,22 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
struct rt6_rtnl_dump_arg arg = {
.filter.dump_exceptions = true,
.filter.dump_routes = true,
- .filter.rtnl_held = true,
+ .filter.rtnl_held = false,
};
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
- unsigned int h, s_h;
unsigned int e = 0, s_e;
+ struct hlist_head *head;
struct fib6_walker *w;
struct fib6_table *tb;
- struct hlist_head *head;
- int res = 0;
+ unsigned int h, s_h;
+ int err = 0;
+ rcu_read_lock();
if (cb->strict_check) {
- int err;
-
err = ip_valid_fib_dump_req(net, nlh, &arg.filter, cb);
if (err < 0)
- return err;
+ goto unlock;
} else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
struct rtmsg *rtm = nlmsg_data(nlh);
@@ -654,8 +653,10 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
* 1. allocate and initialize walker.
*/
w = kzalloc(sizeof(*w), GFP_ATOMIC);
- if (!w)
- return -ENOMEM;
+ if (!w) {
+ err = -ENOMEM;
+ goto unlock;
+ }
w->func = fib6_dump_node;
cb->args[2] = (long)w;
@@ -675,46 +676,46 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
tb = fib6_get_table(net, arg.filter.table_id);
if (!tb) {
if (rtnl_msg_family(cb->nlh) != PF_INET6)
- goto out;
+ goto unlock;
NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist");
- return -ENOENT;
+ err = -ENOENT;
+ goto unlock;
}
if (!cb->args[0]) {
- res = fib6_dump_table(tb, skb, cb);
- if (!res)
+ err = fib6_dump_table(tb, skb, cb);
+ if (!err)
cb->args[0] = 1;
}
- goto out;
+ goto unlock;
}
s_h = cb->args[0];
s_e = cb->args[1];
- rcu_read_lock();
for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
e = 0;
head = &net->ipv6.fib_table_hash[h];
hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
if (e < s_e)
goto next;
- res = fib6_dump_table(tb, skb, cb);
- if (res != 0)
- goto out_unlock;
+ err = fib6_dump_table(tb, skb, cb);
+ if (err != 0)
+ goto out;
next:
e++;
}
}
-out_unlock:
- rcu_read_unlock();
+out:
cb->args[1] = e;
cb->args[0] = h;
-out:
- res = res < 0 ? res : skb->len;
- if (res <= 0)
+
+unlock:
+ rcu_read_unlock();
+ if (err <= 0)
fib6_dump_end(cb);
- return res;
+ return err;
}
void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val)
@@ -965,6 +966,7 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
if (!fib6_nh->rt6i_pcpu)
return;
+ rcu_read_lock();
/* release the reference to this fib entry from
* all of its cached pcpu routes
*/
@@ -973,7 +975,9 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
struct rt6_info *pcpu_rt;
ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
- pcpu_rt = *ppcpu_rt;
+
+ /* Paired with xchg() in rt6_get_pcpu_route() */
+ pcpu_rt = READ_ONCE(*ppcpu_rt);
/* only dropping the 'from' reference if the cached route
* is using 'match'. The cached pcpu_rt->from only changes
@@ -983,10 +987,11 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
if (pcpu_rt && rcu_access_pointer(pcpu_rt->from) == match) {
struct fib6_info *from;
- from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
+ from = unrcu_pointer(xchg(&pcpu_rt->from, NULL));
fib6_info_release(from);
}
}
+ rcu_read_unlock();
}
struct fib6_nh_pcpu_arg {
@@ -2509,7 +2514,8 @@ int __init fib6_init(void)
goto out_kmem_cache_create;
ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL,
- inet6_dump_fib, 0);
+ inet6_dump_fib, RTNL_FLAG_DUMP_UNLOCKED |
+ RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
if (ret)
goto out_unregister_subsys;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index c89aef524df9..3942bd2ade78 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -496,11 +496,11 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
tpi->proto);
if (tunnel) {
if (tunnel->parms.collect_md) {
+ IP_TUNNEL_DECLARE_FLAGS(flags);
struct metadata_dst *tun_dst;
__be64 tun_id;
- __be16 flags;
- flags = tpi->flags;
+ ip_tunnel_flags_copy(flags, tpi->flags);
tun_id = key32_to_tunnel_id(tpi->key);
tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id, 0);
@@ -551,14 +551,14 @@ static int ip6erspan_rcv(struct sk_buff *skb,
if (tunnel->parms.collect_md) {
struct erspan_metadata *pkt_md, *md;
+ IP_TUNNEL_DECLARE_FLAGS(flags);
struct metadata_dst *tun_dst;
struct ip_tunnel_info *info;
unsigned char *gh;
__be64 tun_id;
- __be16 flags;
- tpi->flags |= TUNNEL_KEY;
- flags = tpi->flags;
+ __set_bit(IP_TUNNEL_KEY_BIT, tpi->flags);
+ ip_tunnel_flags_copy(flags, tpi->flags);
tun_id = key32_to_tunnel_id(tpi->key);
tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id,
@@ -580,7 +580,8 @@ static int ip6erspan_rcv(struct sk_buff *skb,
md2 = &md->u.md2;
memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
ERSPAN_V2_MDSIZE);
- info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+ __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
+ info->key.tun_flags);
info->options_len = sizeof(*md);
ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
@@ -748,8 +749,8 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
__u32 *pmtu, __be16 proto)
{
struct ip6_tnl *tunnel = netdev_priv(dev);
+ IP_TUNNEL_DECLARE_FLAGS(flags);
__be16 protocol;
- __be16 flags;
if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;
@@ -781,8 +782,11 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
fl6->fl6_gre_key = tunnel_id_to_key32(key->tun_id);
dsfield = key->tos;
- flags = key->tun_flags &
- (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
+ ip_tunnel_flags_zero(flags);
+ __set_bit(IP_TUNNEL_CSUM_BIT, flags);
+ __set_bit(IP_TUNNEL_KEY_BIT, flags);
+ __set_bit(IP_TUNNEL_SEQ_BIT, flags);
+ ip_tunnel_flags_and(flags, flags, key->tun_flags);
tun_hlen = gre_calc_hlen(flags);
if (skb_cow_head(skb, dev->needed_headroom ?: tun_hlen + tunnel->encap_hlen))
@@ -791,19 +795,21 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
gre_build_header(skb, tun_hlen,
flags, protocol,
tunnel_id_to_key32(tun_info->key.tun_id),
- (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno))
- : 0);
+ test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
+ htonl(atomic_fetch_inc(&tunnel->o_seqno)) :
+ 0);
} else {
if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen))
return -ENOMEM;
- flags = tunnel->parms.o_flags;
+ ip_tunnel_flags_copy(flags, tunnel->parms.o_flags);
gre_build_header(skb, tunnel->tun_hlen, flags,
protocol, tunnel->parms.o_key,
- (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno))
- : 0);
+ test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
+ htonl(atomic_fetch_inc(&tunnel->o_seqno)) :
+ 0);
}
return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
@@ -825,7 +831,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
prepare_ip6gre_xmit_ipv4(skb, dev, &fl6,
&dsfield, &encap_limit);
- err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
+ err = gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
+ t->parms.o_flags));
if (err)
return -1;
@@ -859,7 +866,8 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, &dsfield, &encap_limit))
return -1;
- if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)))
+ if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
+ t->parms.o_flags)))
return -1;
err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit,
@@ -886,7 +894,8 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev)
prepare_ip6gre_xmit_other(skb, dev, &fl6, &dsfield, &encap_limit))
return -1;
- err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
+ err = gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
+ t->parms.o_flags));
if (err)
return err;
err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, skb->protocol);
@@ -939,6 +948,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
struct ip_tunnel_info *tun_info = NULL;
struct ip6_tnl *t = netdev_priv(dev);
struct dst_entry *dst = skb_dst(skb);
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
bool truncate = false;
int encap_limit = -1;
__u8 dsfield = false;
@@ -982,7 +992,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
if (skb_cow_head(skb, dev->needed_headroom ?: t->hlen))
goto tx_err;
- t->parms.o_flags &= ~TUNNEL_KEY;
+ __clear_bit(IP_TUNNEL_KEY_BIT, t->parms.o_flags);
IPCB(skb)->flags = 0;
/* For collect_md mode, derive fl6 from the tunnel key,
@@ -1007,7 +1017,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
fl6.fl6_gre_key = tunnel_id_to_key32(key->tun_id);
dsfield = key->tos;
- if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
+ if (!test_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
+ tun_info->key.tun_flags))
goto tx_err;
if (tun_info->options_len < sizeof(*md))
goto tx_err;
@@ -1068,7 +1079,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
}
/* Push GRE header. */
- gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(atomic_fetch_inc(&t->o_seqno)));
+ __set_bit(IP_TUNNEL_SEQ_BIT, flags);
+ gre_build_header(skb, 8, flags, proto, 0,
+ htonl(atomic_fetch_inc(&t->o_seqno)));
/* TooBig packet may have updated dst->dev's mtu */
if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
@@ -1211,8 +1224,8 @@ static void ip6gre_tnl_copy_tnl_parm(struct ip6_tnl *t,
t->parms.proto = p->proto;
t->parms.i_key = p->i_key;
t->parms.o_key = p->o_key;
- t->parms.i_flags = p->i_flags;
- t->parms.o_flags = p->o_flags;
+ ip_tunnel_flags_copy(t->parms.i_flags, p->i_flags);
+ ip_tunnel_flags_copy(t->parms.o_flags, p->o_flags);
t->parms.fwmark = p->fwmark;
t->parms.erspan_ver = p->erspan_ver;
t->parms.index = p->index;
@@ -1241,8 +1254,8 @@ static void ip6gre_tnl_parm_from_user(struct __ip6_tnl_parm *p,
p->link = u->link;
p->i_key = u->i_key;
p->o_key = u->o_key;
- p->i_flags = gre_flags_to_tnl_flags(u->i_flags);
- p->o_flags = gre_flags_to_tnl_flags(u->o_flags);
+ gre_flags_to_tnl_flags(p->i_flags, u->i_flags);
+ gre_flags_to_tnl_flags(p->o_flags, u->o_flags);
memcpy(p->name, u->name, sizeof(u->name));
}
@@ -1394,7 +1407,7 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
ipv6h->daddr = t->parms.raddr;
p = (__be16 *)(ipv6h + 1);
- p[0] = t->parms.o_flags;
+ p[0] = ip_tunnel_flags_to_be16(t->parms.o_flags);
p[1] = htons(type);
/*
@@ -1421,7 +1434,6 @@ static const struct net_device_ops ip6gre_netdev_ops = {
.ndo_start_xmit = ip6gre_tunnel_xmit,
.ndo_siocdevprivate = ip6gre_tunnel_siocdevprivate,
.ndo_change_mtu = ip6_tnl_change_mtu,
- .ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
};
@@ -1431,7 +1443,6 @@ static void ip6gre_dev_free(struct net_device *dev)
gro_cells_destroy(&t->gro_cells);
dst_cache_destroy(&t->dst_cache);
- free_percpu(dev->tstats);
}
static void ip6gre_tunnel_setup(struct net_device *dev)
@@ -1440,6 +1451,7 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
dev->needs_free_netdev = true;
dev->priv_destructor = ip6gre_dev_free;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
dev->type = ARPHRD_IP6GRE;
dev->flags |= IFF_NOARP;
@@ -1458,19 +1470,17 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
static void ip6gre_tnl_init_features(struct net_device *dev)
{
struct ip6_tnl *nt = netdev_priv(dev);
- __be16 flags;
dev->features |= GRE6_FEATURES | NETIF_F_LLTX;
dev->hw_features |= GRE6_FEATURES;
- flags = nt->parms.o_flags;
-
/* TCP offload with GRE SEQ is not supported, nor can we support 2
* levels of outer headers requiring an update.
*/
- if (flags & TUNNEL_SEQ)
+ if (test_bit(IP_TUNNEL_SEQ_BIT, nt->parms.o_flags))
return;
- if (flags & TUNNEL_CSUM && nt->encap.type != TUNNEL_ENCAP_NONE)
+ if (test_bit(IP_TUNNEL_CSUM_BIT, nt->parms.o_flags) &&
+ nt->encap.type != TUNNEL_ENCAP_NONE)
return;
dev->features |= NETIF_F_GSO_SOFTWARE;
@@ -1489,13 +1499,9 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
tunnel->net = dev_net(dev);
strcpy(tunnel->parms.name, dev->name);
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
if (ret)
- goto cleanup_alloc_pcpu_stats;
+ return ret;
ret = gro_cells_init(&tunnel->gro_cells, dev);
if (ret)
@@ -1519,9 +1525,6 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
cleanup_dst_cache_init:
dst_cache_destroy(&tunnel->dst_cache);
-cleanup_alloc_pcpu_stats:
- free_percpu(dev->tstats);
- dev->tstats = NULL;
return ret;
}
@@ -1795,12 +1798,12 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
if (data[IFLA_GRE_IFLAGS])
- parms->i_flags = gre_flags_to_tnl_flags(
- nla_get_be16(data[IFLA_GRE_IFLAGS]));
+ gre_flags_to_tnl_flags(parms->i_flags,
+ nla_get_be16(data[IFLA_GRE_IFLAGS]));
if (data[IFLA_GRE_OFLAGS])
- parms->o_flags = gre_flags_to_tnl_flags(
- nla_get_be16(data[IFLA_GRE_OFLAGS]));
+ gre_flags_to_tnl_flags(parms->o_flags,
+ nla_get_be16(data[IFLA_GRE_OFLAGS]));
if (data[IFLA_GRE_IKEY])
parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
@@ -1853,7 +1856,6 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = ip6_tnl_change_mtu,
- .ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
};
@@ -1882,13 +1884,9 @@ static int ip6erspan_tap_init(struct net_device *dev)
tunnel->net = dev_net(dev);
strcpy(tunnel->parms.name, dev->name);
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
if (ret)
- goto cleanup_alloc_pcpu_stats;
+ return ret;
ret = gro_cells_init(&tunnel->gro_cells, dev);
if (ret)
@@ -1910,9 +1908,6 @@ static int ip6erspan_tap_init(struct net_device *dev)
cleanup_dst_cache_init:
dst_cache_destroy(&tunnel->dst_cache);
-cleanup_alloc_pcpu_stats:
- free_percpu(dev->tstats);
- dev->tstats = NULL;
return ret;
}
@@ -1923,7 +1918,6 @@ static const struct net_device_ops ip6erspan_netdev_ops = {
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = ip6_tnl_change_mtu,
- .ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
};
@@ -1937,6 +1931,7 @@ static void ip6gre_tap_setup(struct net_device *dev)
dev->needs_free_netdev = true;
dev->priv_destructor = ip6gre_dev_free;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
netif_keep_dst(dev);
@@ -2147,11 +2142,13 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
struct __ip6_tnl_parm *p = &t->parms;
- __be16 o_flags = p->o_flags;
+ IP_TUNNEL_DECLARE_FLAGS(o_flags);
+
+ ip_tunnel_flags_copy(o_flags, p->o_flags);
if (p->erspan_ver == 1 || p->erspan_ver == 2) {
if (!p->collect_md)
- o_flags |= TUNNEL_KEY;
+ __set_bit(IP_TUNNEL_KEY_BIT, o_flags);
if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver))
goto nla_put_failure;
@@ -2237,6 +2234,7 @@ static void ip6erspan_tap_setup(struct net_device *dev)
dev->needs_free_netdev = true;
dev->priv_destructor = ip6gre_dev_free;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
netif_keep_dst(dev);
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index c8b909a9904f..9822163428b0 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -67,7 +67,7 @@ static int ipv6_gro_pull_exthdrs(struct sk_buff *skb, int off, int proto)
off += len;
}
- skb_gro_pull(skb, off - skb_network_offset(skb));
+ skb_gro_pull(skb, off - skb_gro_receive_network_offset(skb));
return proto;
}
@@ -236,8 +236,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
if (unlikely(!iph))
goto out;
- skb_set_network_header(skb, off);
- NAPI_GRO_CB(skb)->inner_network_offset = off;
+ NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark] = off;
flush += ntohs(iph->payload_len) != skb->len - hlen;
@@ -260,7 +259,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
NAPI_GRO_CB(skb)->proto = proto;
flush--;
- nlen = skb_network_header_len(skb);
+ nlen = skb_gro_offset(skb) - off;
list_for_each_entry(p, head, list) {
const struct ipv6hdr *iph2;
@@ -291,19 +290,8 @@ not_same_flow:
nlen - sizeof(struct ipv6hdr)))
goto not_same_flow;
}
- /* flush if Traffic Class fields are different */
- NAPI_GRO_CB(p)->flush |= !!((first_word & htonl(0x0FF00000)) |
- (__force __be32)(iph->hop_limit ^ iph2->hop_limit));
- NAPI_GRO_CB(p)->flush |= flush;
-
- /* If the previous IP ID value was based on an atomic
- * datagram we can overwrite the value and ignore it.
- */
- if (NAPI_GRO_CB(skb)->is_atomic)
- NAPI_GRO_CB(p)->flush_id = 0;
}
- NAPI_GRO_CB(skb)->is_atomic = true;
NAPI_GRO_CB(skb)->flush |= flush;
skb_gro_postpull_rcsum(skb, iph, nlen);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index b9dd3a66e423..ab504d31f0cd 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -120,7 +120,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
rcu_read_lock();
- nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
+ nexthop = rt6_nexthop(dst_rt6_info(dst), daddr);
neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
if (unlikely(IS_ERR_OR_NULL(neigh))) {
@@ -234,7 +234,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
skb->protocol = htons(ETH_P_IPV6);
skb->dev = dev;
- if (unlikely(READ_ONCE(idev->cnf.disable_ipv6))) {
+ if (unlikely(!idev || READ_ONCE(idev->cnf.disable_ipv6))) {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED);
return 0;
@@ -599,7 +599,7 @@ int ip6_forward(struct sk_buff *skb)
* send a redirect.
*/
- rt = (struct rt6_info *) dst;
+ rt = dst_rt6_info(dst);
if (rt->rt6i_flags & RTF_GATEWAY)
target = &rt->rt6i_gateway;
else
@@ -856,10 +856,10 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *))
{
struct sk_buff *frag;
- struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
+ struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
inet6_sk(skb->sk) : NULL;
- bool mono_delivery_time = skb->mono_delivery_time;
+ u8 tstamp_type = skb->tstamp_type;
struct ip6_frag_state state;
unsigned int mtu, hlen, nexthdr_offset;
ktime_t tstamp = skb->tstamp;
@@ -955,7 +955,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
if (iter.frag)
ip6_fraglist_prepare(skb, &iter);
- skb_set_delivery_time(skb, tstamp, mono_delivery_time);
+ skb_set_delivery_time(skb, tstamp, tstamp_type);
err = output(net, sk, skb);
if (!err)
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
@@ -1016,7 +1016,7 @@ slow_path:
/*
* Put this fragment into the sending queue.
*/
- skb_set_delivery_time(frag, tstamp, mono_delivery_time);
+ skb_set_delivery_time(frag, tstamp, tstamp_type);
err = output(net, sk, frag);
if (err)
goto fail;
@@ -1063,7 +1063,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
return NULL;
}
- rt = (struct rt6_info *)dst;
+ rt = dst_rt6_info(dst);
/* Yes, checking route validity in not connected
* case is not very simple. Take into account,
* that we do not support routing by source, TOS,
@@ -1118,12 +1118,13 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
struct rt6_info *rt;
*dst = ip6_route_output(net, sk, fl6);
- rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
+ rt = (*dst)->error ? NULL : dst_rt6_info(*dst);
rcu_read_lock();
from = rt ? rcu_dereference(rt->from) : NULL;
err = ip6_route_get_saddr(net, from, &fl6->daddr,
sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0,
+ fl6->flowi6_l3mdev,
&fl6->saddr);
rcu_read_unlock();
@@ -1159,7 +1160,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
* dst entry and replace it instead with the
* dst entry of the nexthop router
*/
- rt = (struct rt6_info *) *dst;
+ rt = dst_rt6_info(*dst);
rcu_read_lock();
n = __ipv6_neigh_lookup_noref(rt->dst.dev,
rt6_nexthop(rt, &fl6->daddr));
@@ -1423,7 +1424,7 @@ static int __ip6_append_data(struct sock *sk,
int offset = 0;
bool zc = false;
u32 tskey = 0;
- struct rt6_info *rt = (struct rt6_info *)cork->dst;
+ struct rt6_info *rt = dst_rt6_info(cork->dst);
bool paged, hold_tskey, extra_uref = false;
struct ipv6_txoptions *opt = v6_cork->opt;
int csummode = CHECKSUM_NONE;
@@ -1877,7 +1878,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
struct net *net = sock_net(sk);
struct ipv6hdr *hdr;
struct ipv6_txoptions *opt = v6_cork->opt;
- struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
+ struct rt6_info *rt = dst_rt6_info(cork->base.dst);
struct flowi6 *fl6 = &cork->fl.u.ip6;
unsigned char proto = fl6->flowi6_proto;
@@ -1924,7 +1925,10 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = cork->base.mark;
- skb->tstamp = cork->base.transmit_time;
+ if (sk_is_tcp(sk))
+ skb_set_delivery_time(skb, cork->base.transmit_time, SKB_CLOCK_MONOTONIC);
+ else
+ skb_set_delivery_type_by_clockid(skb, cork->base.transmit_time, sk->sk_clockid);
ip6_cork_steal_dst(skb, cork);
IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
@@ -1933,7 +1937,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
u8 icmp6_type;
if (sk->sk_socket->type == SOCK_RAW &&
- !inet_test_bit(HDRINCL, sk))
+ !(fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH))
icmp6_type = fl6->fl6_icmp_type;
else
icmp6_type = icmp6_hdr(skb)->icmp6_type;
@@ -1949,7 +1953,7 @@ out:
int ip6_send_skb(struct sk_buff *skb)
{
struct net *net = sock_net(skb->sk);
- struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
+ struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
int err;
err = ip6_local_out(net, skb->sk, skb);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index e9cc315832cb..9dee0c127955 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -798,17 +798,15 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
const struct ipv6hdr *ipv6h;
int nh, err;
- if ((!(tpi->flags & TUNNEL_CSUM) &&
- (tunnel->parms.i_flags & TUNNEL_CSUM)) ||
- ((tpi->flags & TUNNEL_CSUM) &&
- !(tunnel->parms.i_flags & TUNNEL_CSUM))) {
+ if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.i_flags) !=
+ test_bit(IP_TUNNEL_CSUM_BIT, tpi->flags)) {
DEV_STATS_INC(tunnel->dev, rx_crc_errors);
DEV_STATS_INC(tunnel->dev, rx_errors);
goto drop;
}
- if (tunnel->parms.i_flags & TUNNEL_SEQ) {
- if (!(tpi->flags & TUNNEL_SEQ) ||
+ if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.i_flags)) {
+ if (!test_bit(IP_TUNNEL_SEQ_BIT, tpi->flags) ||
(tunnel->i_seqno &&
(s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
DEV_STATS_INC(tunnel->dev, rx_fifo_errors);
@@ -946,7 +944,9 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
if (iptunnel_pull_header(skb, 0, tpi->proto, false))
goto drop;
if (t->parms.collect_md) {
- tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0);
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
+
+ tun_dst = ipv6_tun_rx_dst(skb, flags, 0, 0);
if (!tun_dst)
goto drop;
}
@@ -1746,7 +1746,7 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
if (new_mtu > IP_MAX_MTU - dev->hard_header_len)
return -EINVAL;
}
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
return 0;
}
EXPORT_SYMBOL(ip6_tnl_change_mtu);
@@ -2146,7 +2146,7 @@ struct net *ip6_tnl_get_link_net(const struct net_device *dev)
{
struct ip6_tnl *tunnel = netdev_priv(dev);
- return tunnel->net;
+ return READ_ONCE(tunnel->net);
}
EXPORT_SYMBOL(ip6_tnl_get_link_net);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 7f4f976aa24a..590737c27537 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -174,11 +174,6 @@ vti6_tnl_unlink(struct vti6_net *ip6n, struct ip6_tnl *t)
}
}
-static void vti6_dev_free(struct net_device *dev)
-{
- free_percpu(dev->tstats);
-}
-
static int vti6_tnl_create2(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
@@ -671,7 +666,8 @@ static void vti6_link_config(struct ip6_tnl *t, bool keep_mtu)
dev->flags &= ~IFF_POINTOPOINT;
if (keep_mtu && dev->mtu) {
- dev->mtu = clamp(dev->mtu, dev->min_mtu, dev->max_mtu);
+ WRITE_ONCE(dev->mtu,
+ clamp(dev->mtu, dev->min_mtu, dev->max_mtu));
return;
}
@@ -892,7 +888,6 @@ static const struct net_device_ops vti6_netdev_ops = {
.ndo_uninit = vti6_dev_uninit,
.ndo_start_xmit = vti6_tnl_xmit,
.ndo_siocdevprivate = vti6_siocdevprivate,
- .ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
};
@@ -908,8 +903,8 @@ static void vti6_dev_setup(struct net_device *dev)
dev->netdev_ops = &vti6_netdev_ops;
dev->header_ops = &ip_tunnel_header_ops;
dev->needs_free_netdev = true;
- dev->priv_destructor = vti6_dev_free;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
dev->type = ARPHRD_TUNNEL6;
dev->min_mtu = IPV4_MIN_MTU;
dev->max_mtu = IP_MAX_MTU - sizeof(struct ipv6hdr);
@@ -931,9 +926,6 @@ static inline int vti6_dev_init_gen(struct net_device *dev)
t->dev = dev;
t->net = dev_net(dev);
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
netdev_hold(dev, &t->dev_tracker, GFP_KERNEL);
netdev_lockdep_set_classes(dev);
return 0;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index cb0ee81a068a..dd342e6ecf3f 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -2273,7 +2273,7 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
int err;
struct mr_table *mrt;
struct mfc6_cache *cache;
- struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
+ struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
if (!mrt)
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index d4c28ec1bc51..cd342d5015c6 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -111,8 +111,7 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
}
}
- opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt,
- opt);
+ opt = unrcu_pointer(xchg(&inet6_sk(sk)->opt, RCU_INITIALIZER(opt)));
sk_dst_reset(sk);
return opt;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index ae134634c323..b8eec1b6cc2c 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -227,6 +227,7 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
return NULL;
memset(ndopts, 0, sizeof(*ndopts));
while (opt_len) {
+ bool unknown = false;
int l;
if (opt_len < sizeof(struct nd_opt_hdr))
return NULL;
@@ -262,22 +263,23 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
break;
#endif
default:
- if (ndisc_is_useropt(dev, nd_opt)) {
- ndopts->nd_useropts_end = nd_opt;
- if (!ndopts->nd_useropts)
- ndopts->nd_useropts = nd_opt;
- } else {
- /*
- * Unknown options must be silently ignored,
- * to accommodate future extension to the
- * protocol.
- */
- ND_PRINTK(2, notice,
- "%s: ignored unsupported option; type=%d, len=%d\n",
- __func__,
- nd_opt->nd_opt_type,
- nd_opt->nd_opt_len);
- }
+ unknown = true;
+ }
+ if (ndisc_is_useropt(dev, nd_opt)) {
+ ndopts->nd_useropts_end = nd_opt;
+ if (!ndopts->nd_useropts)
+ ndopts->nd_useropts = nd_opt;
+ } else if (unknown) {
+ /*
+ * Unknown options must be silently ignored,
+ * to accommodate future extension to the
+ * protocol.
+ */
+ ND_PRINTK(2, notice,
+ "%s: ignored unsupported option; type=%d, len=%d\n",
+ __func__,
+ nd_opt->nd_opt_type,
+ nd_opt->nd_opt_len);
}
next_opt:
opt_len -= l;
@@ -1722,7 +1724,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
if (IS_ERR(dst))
return;
- rt = (struct rt6_info *) dst;
+ rt = dst_rt6_info(dst);
if (rt->rt6i_flags & RTF_GATEWAY) {
ND_PRINTK(2, warn,
@@ -1936,7 +1938,7 @@ static struct notifier_block ndisc_netdev_notifier = {
};
#ifdef CONFIG_SYSCTL
-static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
+static void ndisc_warn_deprecated_sysctl(const struct ctl_table *ctl,
const char *func, const char *dev_name)
{
static char warncomm[TASK_COMM_LEN];
@@ -1951,7 +1953,7 @@ static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
}
}
-int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void *buffer,
+int ndisc_ifinfo_sysctl_change(const struct ctl_table *ctl, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
struct net_device *dev = ctl->extra1;
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 53d255838e6a..581ce055bf52 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -36,6 +36,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
.flowi6_uid = sock_net_uid(net, sk),
.daddr = iph->daddr,
.saddr = iph->saddr,
+ .flowlabel = ip6_flowinfo(iph),
};
int err;
@@ -126,7 +127,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
struct sk_buff *))
{
int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
- bool mono_delivery_time = skb->mono_delivery_time;
+ u8 tstamp_type = skb->tstamp_type;
ktime_t tstamp = skb->tstamp;
struct ip6_frag_state state;
u8 *prevhdr, nexthdr = 0;
@@ -192,7 +193,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
if (iter.frag)
ip6_fraglist_prepare(skb, &iter);
- skb_set_delivery_time(skb, tstamp, mono_delivery_time);
+ skb_set_delivery_time(skb, tstamp, tstamp_type);
err = output(net, sk, data, skb);
if (err || !iter.frag)
break;
@@ -225,7 +226,7 @@ slow_path:
goto blackhole;
}
- skb_set_delivery_time(skb2, tstamp, mono_delivery_time);
+ skb_set_delivery_time(skb2, tstamp, tstamp_type);
err = output(net, sk, data, skb2);
if (err)
goto blackhole;
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index df785ebda0ca..e8992693e14a 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -43,7 +43,7 @@ static int ip6table_filter_table_init(struct net *net)
return -ENOMEM;
/* Entry 1 is the FORWARD hook */
((struct ip6t_standard *)repl->entries)[1].target.verdict =
- forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
+ forward ? -NF_ACCEPT - 1 : NF_DROP - 1;
err = ip6t_register_table(net, &packet_filter, repl, filter_ops);
kfree(repl);
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 52cf104e3478..e119d4f090cc 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -147,23 +147,27 @@ static struct pernet_operations ip6table_nat_net_ops = {
static int __init ip6table_nat_init(void)
{
- int ret = xt_register_template(&nf_nat_ipv6_table,
- ip6table_nat_table_init);
+ int ret;
+ /* net->gen->ptr[ip6table_nat_net_id] must be allocated
+ * before calling ip6t_nat_register_lookups().
+ */
+ ret = register_pernet_subsys(&ip6table_nat_net_ops);
if (ret < 0)
return ret;
- ret = register_pernet_subsys(&ip6table_nat_net_ops);
+ ret = xt_register_template(&nf_nat_ipv6_table,
+ ip6table_nat_table_init);
if (ret)
- xt_unregister_template(&nf_nat_ipv6_table);
+ unregister_pernet_subsys(&ip6table_nat_net_ops);
return ret;
}
static void __exit ip6table_nat_exit(void)
{
- unregister_pernet_subsys(&ip6table_nat_net_ops);
xt_unregister_template(&nf_nat_ipv6_table);
+ unregister_pernet_subsys(&ip6table_nat_net_ops);
}
module_init(ip6table_nat_init);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index d0dcbaca1994..4120e67a8ce6 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -62,7 +62,6 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
- { }
};
static int nf_ct_frag6_sysctl_register(struct net *net)
@@ -105,7 +104,7 @@ err_alloc:
static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
{
struct nft_ct_frag6_pernet *nf_frag = nf_frag_pernet(net);
- struct ctl_table *table;
+ const struct ctl_table *table;
table = nf_frag->nf_frag_frags_hdr->ctl_table_arg;
unregister_net_sysctl_table(nf_frag->nf_frag_frags_hdr);
@@ -155,6 +154,10 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
};
struct inet_frag_queue *q;
+ if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST |
+ IPV6_ADDR_LINKLOCAL)))
+ key.iif = 0;
+
q = inet_frag_find(nf_frag->fqdir, &key);
if (!q)
return NULL;
@@ -264,7 +267,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
fq->iif = dev->ifindex;
fq->q.stamp = skb->tstamp;
- fq->q.mono_delivery_time = skb->mono_delivery_time;
+ fq->q.tstamp_type = skb->tstamp_type;
fq->q.meat += skb->len;
fq->ecn |= ecn;
if (payload_len > fq->q.max_size)
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index ef2059c88955..88b3fcacd4f9 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -154,7 +154,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, false);
if (IS_ERR(dst))
return PTR_ERR(dst);
- rt = (struct rt6_info *) dst;
+ rt = dst_rt6_info(dst);
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 6d1d9221649d..752327b10dde 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -27,7 +27,7 @@
#include <net/ipv6.h>
#define MAX4(a, b, c, d) \
- max_t(u32, max_t(u32, a, b), max_t(u32, c, d))
+ MAX_T(u32, MAX_T(u32, a, b), MAX_T(u32, c, d))
#define SNMP_MIB_MAX MAX4(UDP_MIB_MAX, TCP_MIB_MAX, \
IPSTATS_MIB_MAX, ICMP_MIB_MAX)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 0d896ca7b589..608fa9d05b55 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -362,14 +362,14 @@ static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb)
if ((raw6_sk(sk)->checksum || rcu_access_pointer(sk->sk_filter)) &&
skb_checksum_complete(skb)) {
atomic_inc(&sk->sk_drops);
- kfree_skb_reason(skb, SKB_DROP_REASON_SKB_CSUM);
+ sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_SKB_CSUM);
return NET_RX_DROP;
}
/* Charge it to the socket. */
skb_dst_drop(skb);
if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
- kfree_skb_reason(skb, reason);
+ sk_skb_reason_drop(sk, skb, reason);
return NET_RX_DROP;
}
@@ -390,7 +390,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
atomic_inc(&sk->sk_drops);
- kfree_skb_reason(skb, SKB_DROP_REASON_XFRM_POLICY);
+ sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_XFRM_POLICY);
return NET_RX_DROP;
}
nf_reset_ct(skb);
@@ -415,7 +415,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
if (inet_test_bit(HDRINCL, sk)) {
if (skb_checksum_complete(skb)) {
atomic_inc(&sk->sk_drops);
- kfree_skb_reason(skb, SKB_DROP_REASON_SKB_CSUM);
+ sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_SKB_CSUM);
return NET_RX_DROP;
}
}
@@ -598,7 +598,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
struct ipv6hdr *iph;
struct sk_buff *skb;
int err;
- struct rt6_info *rt = (struct rt6_info *)*dstp;
+ struct rt6_info *rt = dst_rt6_info(*dstp);
int hlen = LL_RESERVED_SPACE(rt->dst.dev);
int tlen = rt->dst.dev->needed_tailroom;
@@ -621,7 +621,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
skb->protocol = htons(ETH_P_IPV6);
skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc->mark;
- skb->tstamp = sockc->transmit_time;
+ skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid);
skb_put(skb, length);
skb_reset_network_header(skb);
@@ -917,7 +917,7 @@ back_from_confirm:
ipc6.opt = opt;
lock_sock(sk);
err = ip6_append_data(sk, raw6_getfrag, &rfv,
- len, 0, &ipc6, &fl6, (struct rt6_info *)dst,
+ len, 0, &ipc6, &fl6, dst_rt6_info(dst),
msg->msg_flags);
if (err)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index acb4f119e11f..a48be617a8ab 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -198,7 +198,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
fq->iif = dev->ifindex;
fq->q.stamp = skb->tstamp;
- fq->q.mono_delivery_time = skb->mono_delivery_time;
+ fq->q.tstamp_type = skb->tstamp_type;
fq->q.meat += skb->len;
fq->ecn |= ecn;
add_frag_mem_limit(fq->q.fqdir, skb->truesize);
@@ -369,7 +369,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
* the source of the fragment, with the Pointer field set to zero.
*/
nexthdr = hdr->nexthdr;
- if (ipv6frag_thdr_truncated(skb, skb_transport_offset(skb), &nexthdr)) {
+ if (ipv6frag_thdr_truncated(skb, skb_network_offset(skb) + sizeof(struct ipv6hdr), &nexthdr)) {
__IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_INCOMP, 0);
@@ -436,7 +436,6 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { }
};
/* secret interval has been deprecated */
@@ -449,7 +448,6 @@ static struct ctl_table ip6_frags_ctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { }
};
static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
@@ -487,7 +485,7 @@ err_alloc:
static void __net_exit ip6_frags_ns_sysctl_unregister(struct net *net)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
table = net->ipv6.sysctl.frags_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1f4b935a0e57..219701caba1e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -87,7 +87,8 @@ struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
static unsigned int ip6_default_advmss(const struct dst_entry *dst);
INDIRECT_CALLABLE_SCOPE
unsigned int ip6_mtu(const struct dst_entry *dst);
-static struct dst_entry *ip6_negative_advice(struct dst_entry *);
+static void ip6_negative_advice(struct sock *sk,
+ struct dst_entry *dst);
static void ip6_dst_destroy(struct dst_entry *);
static void ip6_dst_ifdown(struct dst_entry *,
struct net_device *dev);
@@ -130,7 +131,6 @@ static struct fib6_info *rt6_get_route_info(struct net *net,
struct uncached_list {
spinlock_t lock;
struct list_head head;
- struct list_head quarantine;
};
static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
@@ -188,8 +188,7 @@ static void rt6_uncached_list_flush_dev(struct net_device *dev)
handled = true;
}
if (handled)
- list_move(&rt->dst.rt_uncached,
- &ul->quarantine);
+ list_del_init(&rt->dst.rt_uncached);
}
spin_unlock_bh(&ul->lock);
}
@@ -226,7 +225,7 @@ static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
struct sk_buff *skb,
const void *daddr)
{
- const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
+ const struct rt6_info *rt = dst_rt6_info(dst);
return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any),
dst->dev, skb, daddr);
@@ -234,8 +233,8 @@ static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
{
+ const struct rt6_info *rt = dst_rt6_info(dst);
struct net_device *dev = dst->dev;
- struct rt6_info *rt = (struct rt6_info *)dst;
daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr);
if (!daddr)
@@ -354,7 +353,7 @@ EXPORT_SYMBOL(ip6_dst_alloc);
static void ip6_dst_destroy(struct dst_entry *dst)
{
- struct rt6_info *rt = (struct rt6_info *)dst;
+ struct rt6_info *rt = dst_rt6_info(dst);
struct fib6_info *from;
struct inet6_dev *idev;
@@ -367,13 +366,13 @@ static void ip6_dst_destroy(struct dst_entry *dst)
in6_dev_put(idev);
}
- from = xchg((__force struct fib6_info **)&rt->from, NULL);
+ from = unrcu_pointer(xchg(&rt->from, NULL));
fib6_info_release(from);
}
static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
{
- struct rt6_info *rt = (struct rt6_info *)dst;
+ struct rt6_info *rt = dst_rt6_info(dst);
struct inet6_dev *idev = rt->rt6i_idev;
if (idev && idev->dev != blackhole_netdev) {
@@ -637,6 +636,8 @@ static void rt6_probe(struct fib6_nh *fib6_nh)
rcu_read_lock();
last_probe = READ_ONCE(fib6_nh->last_probe);
idev = __in6_dev_get(dev);
+ if (!idev)
+ goto out;
neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
if (neigh) {
if (READ_ONCE(neigh->nud_state) & NUD_VALID)
@@ -1288,7 +1289,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
if (dst->error == 0)
- return (struct rt6_info *) dst;
+ return dst_rt6_info(dst);
dst_release(dst);
@@ -1408,6 +1409,7 @@ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
struct rt6_info *prev, **p;
p = this_cpu_ptr(res->nh->rt6i_pcpu);
+ /* Paired with READ_ONCE() in __fib6_drop_pcpu_from() */
prev = xchg(p, NULL);
if (prev) {
dst_dev_put(&prev->dst);
@@ -1436,7 +1438,7 @@ static struct rt6_info *rt6_make_pcpu_route(struct net *net,
if (res->f6i->fib6_destroying) {
struct fib6_info *from;
- from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
+ from = unrcu_pointer(xchg(&pcpu_rt->from, NULL));
fib6_info_release(from);
}
@@ -1465,7 +1467,7 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
/* purge completely the exception to allow releasing the held resources:
* some [sk] cache may keep the dst around for unlimited time
*/
- from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL);
+ from = unrcu_pointer(xchg(&rt6_ex->rt6i->from, NULL));
fib6_info_release(from);
dst_dev_put(&rt6_ex->rt6i->dst);
@@ -2372,7 +2374,7 @@ static u32 rt6_multipath_custom_hash_outer(const struct net *net,
hash_keys.ports.dst = keys.ports.dst;
*p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION);
- return flow_hash_from_keys(&hash_keys);
+ return fib_multipath_hash_from_keys(net, &hash_keys);
}
static u32 rt6_multipath_custom_hash_inner(const struct net *net,
@@ -2421,7 +2423,7 @@ static u32 rt6_multipath_custom_hash_inner(const struct net *net,
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
hash_keys.ports.dst = keys.ports.dst;
- return flow_hash_from_keys(&hash_keys);
+ return fib_multipath_hash_from_keys(net, &hash_keys);
}
static u32 rt6_multipath_custom_hash_skb(const struct net *net,
@@ -2460,7 +2462,7 @@ static u32 rt6_multipath_custom_hash_fl6(const struct net *net,
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
hash_keys.ports.dst = fl6->fl6_dport;
- return flow_hash_from_keys(&hash_keys);
+ return fib_multipath_hash_from_keys(net, &hash_keys);
}
/* if skb is set it will be used and fl6 can be NULL */
@@ -2482,7 +2484,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
hash_keys.basic.ip_proto = fl6->flowi6_proto;
}
- mhash = flow_hash_from_keys(&hash_keys);
+ mhash = fib_multipath_hash_from_keys(net, &hash_keys);
break;
case 1:
if (skb) {
@@ -2514,7 +2516,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
hash_keys.ports.dst = fl6->fl6_dport;
hash_keys.basic.ip_proto = fl6->flowi6_proto;
}
- mhash = flow_hash_from_keys(&hash_keys);
+ mhash = fib_multipath_hash_from_keys(net, &hash_keys);
break;
case 2:
memset(&hash_keys, 0, sizeof(hash_keys));
@@ -2551,7 +2553,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
hash_keys.basic.ip_proto = fl6->flowi6_proto;
}
- mhash = flow_hash_from_keys(&hash_keys);
+ mhash = fib_multipath_hash_from_keys(net, &hash_keys);
break;
case 3:
if (skb)
@@ -2647,7 +2649,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net,
rcu_read_lock();
dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
- rt6 = (struct rt6_info *)dst;
+ rt6 = dst_rt6_info(dst);
/* For dst cached in uncached_list, refcnt is already taken. */
if (list_empty(&rt6->dst.rt_uncached) && !dst_hold_safe(dst)) {
dst = &net->ipv6.ip6_null_entry->dst;
@@ -2661,7 +2663,7 @@ EXPORT_SYMBOL_GPL(ip6_route_output_flags);
struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
{
- struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
+ struct rt6_info *rt, *ort = dst_rt6_info(dst_orig);
struct net_device *loopback_dev = net->loopback_dev;
struct dst_entry *new = NULL;
@@ -2744,7 +2746,7 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst,
struct fib6_info *from;
struct rt6_info *rt;
- rt = container_of(dst, struct rt6_info, dst);
+ rt = dst_rt6_info(dst);
if (rt->sernum)
return rt6_is_valid(rt) ? dst : NULL;
@@ -2770,24 +2772,24 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst,
}
EXPORT_INDIRECT_CALLABLE(ip6_dst_check);
-static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
+static void ip6_negative_advice(struct sock *sk,
+ struct dst_entry *dst)
{
- struct rt6_info *rt = (struct rt6_info *) dst;
+ struct rt6_info *rt = dst_rt6_info(dst);
- if (rt) {
- if (rt->rt6i_flags & RTF_CACHE) {
- rcu_read_lock();
- if (rt6_check_expired(rt)) {
- rt6_remove_exception_rt(rt);
- dst = NULL;
- }
- rcu_read_unlock();
- } else {
- dst_release(dst);
- dst = NULL;
+ if (rt->rt6i_flags & RTF_CACHE) {
+ rcu_read_lock();
+ if (rt6_check_expired(rt)) {
+ /* counteract the dst_release() in sk_dst_reset() */
+ dst_hold(dst);
+ sk_dst_reset(sk);
+
+ rt6_remove_exception_rt(rt);
}
+ rcu_read_unlock();
+ return;
}
- return dst;
+ sk_dst_reset(sk);
}
static void ip6_link_failure(struct sk_buff *skb)
@@ -2796,7 +2798,7 @@ static void ip6_link_failure(struct sk_buff *skb)
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
- rt = (struct rt6_info *) skb_dst(skb);
+ rt = dst_rt6_info(skb_dst(skb));
if (rt) {
rcu_read_lock();
if (rt->rt6i_flags & RTF_CACHE) {
@@ -2852,7 +2854,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
bool confirm_neigh)
{
const struct in6_addr *daddr, *saddr;
- struct rt6_info *rt6 = (struct rt6_info *)dst;
+ struct rt6_info *rt6 = dst_rt6_info(dst);
/* Note: do *NOT* check dst_metric_locked(dst, RTAX_MTU)
* IPv6 pmtu discovery isn't optional, so 'mtu lock' cannot disable it.
@@ -3601,7 +3603,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
if (!dev)
goto out;
- if (idev->cnf.disable_ipv6) {
+ if (!idev || idev->cnf.disable_ipv6) {
NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
err = -EACCES;
goto out;
@@ -3760,7 +3762,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
if (!rt)
goto out;
- rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
+ rt->fib6_metrics = ip_fib_metrics_init(cfg->fc_mx, cfg->fc_mx_len,
extack);
if (IS_ERR(rt->fib6_metrics)) {
err = PTR_ERR(rt->fib6_metrics);
@@ -4174,7 +4176,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
}
}
- rt = (struct rt6_info *) dst;
+ rt = dst_rt6_info(dst);
if (rt->rt6i_flags & RTF_REJECT) {
net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
return;
@@ -4445,7 +4447,7 @@ static void rtmsg_to_fib6_config(struct net *net,
.fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
: RT6_TABLE_MAIN,
.fc_ifindex = rtmsg->rtmsg_ifindex,
- .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
+ .fc_metric = rtmsg->rtmsg_metric,
.fc_expires = rtmsg->rtmsg_info,
.fc_dst_len = rtmsg->rtmsg_dst_len,
.fc_src_len = rtmsg->rtmsg_src_len,
@@ -4475,6 +4477,9 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg)
rtnl_lock();
switch (cmd) {
case SIOCADDRT:
+ /* Only do the default setting of fc_metric in route adding */
+ if (cfg.fc_metric == 0)
+ cfg.fc_metric = IP6_RT_PRIO_USER;
err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
break;
case SIOCDELRT:
@@ -5608,7 +5613,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
int iif, int type, u32 portid, u32 seq,
unsigned int flags)
{
- struct rt6_info *rt6 = (struct rt6_info *)dst;
+ struct rt6_info *rt6 = dst_rt6_info(dst);
struct rt6key *rt6_dst, *rt6_src;
u32 *pmetrics, table, rt6_flags;
unsigned char nh_flags = 0;
@@ -5682,7 +5687,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
goto nla_put_failure;
} else if (dest) {
struct in6_addr saddr_buf;
- if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
+ if (ip6_route_get_saddr(net, rt, dest, 0, 0, &saddr_buf) == 0 &&
nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
goto nla_put_failure;
}
@@ -6111,7 +6116,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
}
- rt = container_of(dst, struct rt6_info, dst);
+ rt = dst_rt6_info(dst);
if (rt->dst.error) {
err = rt->dst.error;
ip6_rt_put(rt);
@@ -6329,7 +6334,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v)
#ifdef CONFIG_SYSCTL
-static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
+static int ipv6_sysctl_rtcache_flush(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net;
@@ -6338,12 +6343,12 @@ static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
if (!write)
return -EINVAL;
- net = (struct net *)ctl->extra1;
- delay = net->ipv6.sysctl.flush_delay;
ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
if (ret)
return ret;
+ net = (struct net *)ctl->extra1;
+ delay = net->ipv6.sysctl.flush_delay;
fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
return 0;
}
@@ -6428,7 +6433,6 @@ static struct ctl_table ipv6_route_table_template[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
- { }
};
struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
@@ -6452,10 +6456,6 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
-
- /* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns)
- table[1].procname = NULL;
}
return table;
@@ -6756,7 +6756,6 @@ int __init ip6_route_init(void)
struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
INIT_LIST_HEAD(&ul->head);
- INIT_LIST_HEAD(&ul->quarantine);
spin_lock_init(&ul->lock);
}
diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c
index a013b92cbb86..2c83b7586422 100644
--- a/net/ipv6/rpl_iptunnel.c
+++ b/net/ipv6/rpl_iptunnel.c
@@ -212,9 +212,9 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (unlikely(err))
goto drop;
- preempt_disable();
+ local_bh_disable();
dst = dst_cache_get(&rlwt->cache);
- preempt_enable();
+ local_bh_enable();
if (unlikely(!dst)) {
struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -234,9 +234,9 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
goto drop;
}
- preempt_disable();
+ local_bh_disable();
dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr);
- preempt_enable();
+ local_bh_enable();
}
skb_dst_drop(skb);
@@ -268,23 +268,21 @@ static int rpl_input(struct sk_buff *skb)
return err;
}
- preempt_disable();
+ local_bh_disable();
dst = dst_cache_get(&rlwt->cache);
- preempt_enable();
if (!dst) {
ip6_route_input(skb);
dst = skb_dst(skb);
if (!dst->error) {
- preempt_disable();
dst_cache_set_ip6(&rlwt->cache, dst,
&ipv6_hdr(skb)->saddr);
- preempt_enable();
}
} else {
skb_dst_drop(skb);
skb_dst_set(skb, dst);
}
+ local_bh_enable();
err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
if (unlikely(err))
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 35508abd76f4..180da19c148c 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -21,9 +21,7 @@
#include <net/genetlink.h>
#include <linux/seg6.h>
#include <linux/seg6_genl.h>
-#ifdef CONFIG_IPV6_SEG6_HMAC
#include <net/seg6_hmac.h>
-#endif
bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced)
{
@@ -437,13 +435,11 @@ static int __net_init seg6_net_init(struct net *net)
net->ipv6.seg6_data = sdata;
-#ifdef CONFIG_IPV6_SEG6_HMAC
if (seg6_hmac_net_init(net)) {
kfree(rcu_dereference_raw(sdata->tun_src));
kfree(sdata);
return -ENOMEM;
}
-#endif
return 0;
}
@@ -452,9 +448,7 @@ static void __net_exit seg6_net_exit(struct net *net)
{
struct seg6_pernet_data *sdata = seg6_pernet(net);
-#ifdef CONFIG_IPV6_SEG6_HMAC
seg6_hmac_net_exit(net);
-#endif
kfree(rcu_dereference_raw(sdata->tun_src));
kfree(sdata);
@@ -520,39 +514,28 @@ int __init seg6_init(void)
if (err)
goto out_unregister_pernet;
-#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
err = seg6_iptunnel_init();
if (err)
goto out_unregister_genl;
err = seg6_local_init();
- if (err) {
- seg6_iptunnel_exit();
- goto out_unregister_genl;
- }
-#endif
+ if (err)
+ goto out_unregister_iptun;
-#ifdef CONFIG_IPV6_SEG6_HMAC
err = seg6_hmac_init();
if (err)
- goto out_unregister_iptun;
-#endif
+ goto out_unregister_seg6;
pr_info("Segment Routing with IPv6\n");
out:
return err;
-#ifdef CONFIG_IPV6_SEG6_HMAC
-out_unregister_iptun:
-#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
+out_unregister_seg6:
seg6_local_exit();
+out_unregister_iptun:
seg6_iptunnel_exit();
-#endif
-#endif
-#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
out_unregister_genl:
genl_unregister_family(&seg6_genl_family);
-#endif
out_unregister_pernet:
unregister_pernet_subsys(&ip6_segments_ops);
goto out;
@@ -560,12 +543,9 @@ out_unregister_pernet:
void seg6_exit(void)
{
-#ifdef CONFIG_IPV6_SEG6_HMAC
seg6_hmac_exit();
-#endif
-#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
+ seg6_local_exit();
seg6_iptunnel_exit();
-#endif
- unregister_pernet_subsys(&ip6_segments_ops);
genl_unregister_family(&seg6_genl_family);
+ unregister_pernet_subsys(&ip6_segments_ops);
}
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index 861e0366f549..bbf5b84a70fc 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -356,6 +356,7 @@ static int seg6_hmac_init_algo(void)
struct crypto_shash *tfm;
struct shash_desc *shash;
int i, alg_count, cpu;
+ int ret = -ENOMEM;
alg_count = ARRAY_SIZE(hmac_algos);
@@ -366,12 +367,14 @@ static int seg6_hmac_init_algo(void)
algo = &hmac_algos[i];
algo->tfms = alloc_percpu(struct crypto_shash *);
if (!algo->tfms)
- return -ENOMEM;
+ goto error_out;
for_each_possible_cpu(cpu) {
tfm = crypto_alloc_shash(algo->name, 0, 0);
- if (IS_ERR(tfm))
- return PTR_ERR(tfm);
+ if (IS_ERR(tfm)) {
+ ret = PTR_ERR(tfm);
+ goto error_out;
+ }
p_tfm = per_cpu_ptr(algo->tfms, cpu);
*p_tfm = tfm;
}
@@ -383,18 +386,22 @@ static int seg6_hmac_init_algo(void)
algo->shashs = alloc_percpu(struct shash_desc *);
if (!algo->shashs)
- return -ENOMEM;
+ goto error_out;
for_each_possible_cpu(cpu) {
shash = kzalloc_node(shsize, GFP_KERNEL,
cpu_to_node(cpu));
if (!shash)
- return -ENOMEM;
+ goto error_out;
*per_cpu_ptr(algo->shashs, cpu) = shash;
}
}
return 0;
+
+error_out:
+ seg6_hmac_exit();
+ return ret;
}
int __init seg6_hmac_init(void)
@@ -412,22 +419,29 @@ int __net_init seg6_hmac_net_init(struct net *net)
void seg6_hmac_exit(void)
{
struct seg6_hmac_algo *algo = NULL;
+ struct crypto_shash *tfm;
+ struct shash_desc *shash;
int i, alg_count, cpu;
alg_count = ARRAY_SIZE(hmac_algos);
for (i = 0; i < alg_count; i++) {
algo = &hmac_algos[i];
- for_each_possible_cpu(cpu) {
- struct crypto_shash *tfm;
- struct shash_desc *shash;
- shash = *per_cpu_ptr(algo->shashs, cpu);
- kfree(shash);
- tfm = *per_cpu_ptr(algo->tfms, cpu);
- crypto_free_shash(tfm);
+ if (algo->shashs) {
+ for_each_possible_cpu(cpu) {
+ shash = *per_cpu_ptr(algo->shashs, cpu);
+ kfree(shash);
+ }
+ free_percpu(algo->shashs);
+ }
+
+ if (algo->tfms) {
+ for_each_possible_cpu(cpu) {
+ tfm = *per_cpu_ptr(algo->tfms, cpu);
+ crypto_free_shash(tfm);
+ }
+ free_percpu(algo->tfms);
}
- free_percpu(algo->tfms);
- free_percpu(algo->shashs);
}
}
EXPORT_SYMBOL(seg6_hmac_exit);
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 03b877ff4558..098632adc9b5 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -459,34 +459,30 @@ static int seg6_input_core(struct net *net, struct sock *sk,
int err;
err = seg6_do_srh(skb);
- if (unlikely(err)) {
- kfree_skb(skb);
- return err;
- }
+ if (unlikely(err))
+ goto drop;
slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
- preempt_disable();
+ local_bh_disable();
dst = dst_cache_get(&slwt->cache);
- preempt_enable();
if (!dst) {
ip6_route_input(skb);
dst = skb_dst(skb);
if (!dst->error) {
- preempt_disable();
dst_cache_set_ip6(&slwt->cache, dst,
&ipv6_hdr(skb)->saddr);
- preempt_enable();
}
} else {
skb_dst_drop(skb);
skb_dst_set(skb, dst);
}
+ local_bh_enable();
err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
if (unlikely(err))
- return err;
+ goto drop;
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
@@ -494,6 +490,9 @@ static int seg6_input_core(struct net *net, struct sock *sk,
skb_dst(skb)->dev, seg6_input_finish);
return seg6_input_finish(dev_net(skb->dev), NULL, skb);
+drop:
+ kfree_skb(skb);
+ return err;
}
static int seg6_input_nf(struct sk_buff *skb)
@@ -535,9 +534,9 @@ static int seg6_output_core(struct net *net, struct sock *sk,
slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
- preempt_disable();
+ local_bh_disable();
dst = dst_cache_get(&slwt->cache);
- preempt_enable();
+ local_bh_enable();
if (unlikely(!dst)) {
struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -557,9 +556,9 @@ static int seg6_output_core(struct net *net, struct sock *sk,
goto drop;
}
- preempt_disable();
+ local_bh_disable();
dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
- preempt_enable();
+ local_bh_enable();
}
skb_dst_drop(skb);
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 24e2b4b494cb..c74705ead984 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -941,8 +941,8 @@ static int input_action_end_dx6(struct sk_buff *skb,
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
- dev_net(skb->dev), NULL, skb, NULL,
- skb_dst(skb)->dev, input_action_end_dx6_finish);
+ dev_net(skb->dev), NULL, skb, skb->dev,
+ NULL, input_action_end_dx6_finish);
return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb);
drop:
@@ -991,8 +991,8 @@ static int input_action_end_dx4(struct sk_buff *skb,
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
- dev_net(skb->dev), NULL, skb, NULL,
- skb_dst(skb)->dev, input_action_end_dx4_finish);
+ dev_net(skb->dev), NULL, skb, skb->dev,
+ NULL, input_action_end_dx4_finish);
return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb);
drop:
@@ -1380,7 +1380,9 @@ drop:
return err;
}
-DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
+DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states) = {
+ .bh_lock = INIT_LOCAL_LOCK(bh_lock),
+};
bool seg6_bpf_has_valid_srh(struct sk_buff *skb)
{
@@ -1388,6 +1390,7 @@ bool seg6_bpf_has_valid_srh(struct sk_buff *skb)
this_cpu_ptr(&seg6_bpf_srh_states);
struct ipv6_sr_hdr *srh = srh_state->srh;
+ lockdep_assert_held(&srh_state->bh_lock);
if (unlikely(srh == NULL))
return false;
@@ -1408,8 +1411,7 @@ bool seg6_bpf_has_valid_srh(struct sk_buff *skb)
static int input_action_end_bpf(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
- struct seg6_bpf_srh_state *srh_state =
- this_cpu_ptr(&seg6_bpf_srh_states);
+ struct seg6_bpf_srh_state *srh_state;
struct ipv6_sr_hdr *srh;
int ret;
@@ -1420,10 +1422,14 @@ static int input_action_end_bpf(struct sk_buff *skb,
}
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
- /* preempt_disable is needed to protect the per-CPU buffer srh_state,
- * which is also accessed by the bpf_lwt_seg6_* helpers
+ /* The access to the per-CPU buffer srh_state is protected by running
+ * always in softirq context (with disabled BH). On PREEMPT_RT the
+ * required locking is provided by the following local_lock_nested_bh()
+ * statement. It is also accessed by the bpf_lwt_seg6_* helpers via
+ * bpf_prog_run_save_cb().
*/
- preempt_disable();
+ local_lock_nested_bh(&seg6_bpf_srh_states.bh_lock);
+ srh_state = this_cpu_ptr(&seg6_bpf_srh_states);
srh_state->srh = srh;
srh_state->hdrlen = srh->hdrlen << 3;
srh_state->valid = true;
@@ -1446,15 +1452,15 @@ static int input_action_end_bpf(struct sk_buff *skb,
if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
goto drop;
+ local_unlock_nested_bh(&seg6_bpf_srh_states.bh_lock);
- preempt_enable();
if (ret != BPF_REDIRECT)
seg6_lookup_nexthop(skb, NULL, 0);
return dst_input(skb);
drop:
- preempt_enable();
+ local_unlock_nested_bh(&seg6_bpf_srh_states.bh_lock);
kfree_skb(skb);
return -EINVAL;
}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 655c9b1a19b8..83b195f09561 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -132,8 +132,8 @@ static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net,
return NULL;
}
-static struct ip_tunnel __rcu **__ipip6_bucket(struct sit_net *sitn,
- struct ip_tunnel_parm *parms)
+static struct ip_tunnel __rcu **
+__ipip6_bucket(struct sit_net *sitn, struct ip_tunnel_parm_kern *parms)
{
__be32 remote = parms->iph.daddr;
__be32 local = parms->iph.saddr;
@@ -207,7 +207,7 @@ static int ipip6_tunnel_create(struct net_device *dev)
__dev_addr_set(dev, &t->parms.iph.saddr, 4);
memcpy(dev->broadcast, &t->parms.iph.daddr, 4);
- if ((__force u16)t->parms.i_flags & SIT_ISATAP)
+ if (test_bit(IP_TUNNEL_SIT_ISATAP_BIT, t->parms.i_flags))
dev->priv_flags |= IFF_ISATAP;
dev->rtnl_link_ops = &sit_link_ops;
@@ -226,7 +226,8 @@ out:
}
static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
- struct ip_tunnel_parm *parms, int create)
+ struct ip_tunnel_parm_kern *parms,
+ int create)
{
__be32 remote = parms->iph.daddr;
__be32 local = parms->iph.saddr;
@@ -1135,7 +1136,8 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
dev->needed_headroom = t_hlen + hlen;
}
-static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p,
+static void ipip6_tunnel_update(struct ip_tunnel *t,
+ struct ip_tunnel_parm_kern *p,
__u32 fwmark)
{
struct net *net = t->net;
@@ -1196,11 +1198,11 @@ static int
ipip6_tunnel_get6rd(struct net_device *dev, struct ip_tunnel_parm __user *data)
{
struct ip_tunnel *t = netdev_priv(dev);
+ struct ip_tunnel_parm_kern p;
struct ip_tunnel_6rd ip6rd;
- struct ip_tunnel_parm p;
if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) {
- if (copy_from_user(&p, data, sizeof(p)))
+ if (!ip_tunnel_parm_from_user(&p, data))
return -EFAULT;
t = ipip6_tunnel_locate(t->net, &p, 0);
}
@@ -1251,7 +1253,7 @@ static bool ipip6_valid_ip_proto(u8 ipproto)
}
static int
-__ipip6_tunnel_ioctl_validate(struct net *net, struct ip_tunnel_parm *p)
+__ipip6_tunnel_ioctl_validate(struct net *net, struct ip_tunnel_parm_kern *p)
{
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
@@ -1268,7 +1270,7 @@ __ipip6_tunnel_ioctl_validate(struct net *net, struct ip_tunnel_parm *p)
}
static int
-ipip6_tunnel_get(struct net_device *dev, struct ip_tunnel_parm *p)
+ipip6_tunnel_get(struct net_device *dev, struct ip_tunnel_parm_kern *p)
{
struct ip_tunnel *t = netdev_priv(dev);
@@ -1281,7 +1283,7 @@ ipip6_tunnel_get(struct net_device *dev, struct ip_tunnel_parm *p)
}
static int
-ipip6_tunnel_add(struct net_device *dev, struct ip_tunnel_parm *p)
+ipip6_tunnel_add(struct net_device *dev, struct ip_tunnel_parm_kern *p)
{
struct ip_tunnel *t = netdev_priv(dev);
int err;
@@ -1297,7 +1299,7 @@ ipip6_tunnel_add(struct net_device *dev, struct ip_tunnel_parm *p)
}
static int
-ipip6_tunnel_change(struct net_device *dev, struct ip_tunnel_parm *p)
+ipip6_tunnel_change(struct net_device *dev, struct ip_tunnel_parm_kern *p)
{
struct ip_tunnel *t = netdev_priv(dev);
int err;
@@ -1328,7 +1330,7 @@ ipip6_tunnel_change(struct net_device *dev, struct ip_tunnel_parm *p)
}
static int
-ipip6_tunnel_del(struct net_device *dev, struct ip_tunnel_parm *p)
+ipip6_tunnel_del(struct net_device *dev, struct ip_tunnel_parm_kern *p)
{
struct ip_tunnel *t = netdev_priv(dev);
@@ -1348,7 +1350,8 @@ ipip6_tunnel_del(struct net_device *dev, struct ip_tunnel_parm *p)
}
static int
-ipip6_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
+ipip6_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p,
+ int cmd)
{
switch (cmd) {
case SIOCGETTUNNEL:
@@ -1490,7 +1493,7 @@ static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[],
}
static void ipip6_netlink_parms(struct nlattr *data[],
- struct ip_tunnel_parm *parms,
+ struct ip_tunnel_parm_kern *parms,
__u32 *fwmark)
{
memset(parms, 0, sizeof(*parms));
@@ -1599,8 +1602,8 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
struct ip_tunnel *t = netdev_priv(dev);
- struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
+ struct ip_tunnel_parm_kern p;
struct net *net = t->net;
struct sit_net *sitn = net_generic(net, sit_net_id);
#ifdef CONFIG_IPV6_SIT_6RD
@@ -1687,7 +1690,7 @@ static size_t ipip6_get_size(const struct net_device *dev)
static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct ip_tunnel_parm *parm = &tunnel->parms;
+ struct ip_tunnel_parm_kern *parm = &tunnel->parms;
if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
@@ -1697,7 +1700,8 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
!!(parm->iph.frag_off & htons(IP_DF))) ||
nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
- nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags) ||
+ nla_put_be16(skb, IFLA_IPTUN_FLAGS,
+ ip_tunnel_flags_to_be16(parm->i_flags)) ||
nla_put_u32(skb, IFLA_IPTUN_FWMARK, tunnel->fwmark))
goto nla_put_failure;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 6d8286c299c9..9d83eadd308b 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -246,7 +246,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
}
}
- req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
+ req->rsk_window_clamp = READ_ONCE(tp->window_clamp) ? :dst_metric(dst, RTAX_WINDOW);
/* limit the window selection if the user enforce a smaller rx buffer */
full_space = tcp_full_space(sk);
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
@@ -275,6 +275,6 @@ out:
out_free:
reqsk_free(req);
out_drop:
- kfree_skb_reason(skb, reason);
+ sk_skb_reason_drop(sk, skb, reason);
return NULL;
}
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 888676163e90..d2cd33e2698d 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -30,7 +30,7 @@ static u32 rt6_multipath_hash_fields_all_mask =
static u32 ioam6_id_max = IOAM6_DEFAULT_ID;
static u64 ioam6_id_wide_max = IOAM6_DEFAULT_ID_WIDE;
-static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
+static int proc_rt6_multipath_hash_policy(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net;
@@ -46,7 +46,7 @@ static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
}
static int
-proc_rt6_multipath_hash_fields(struct ctl_table *table, int write, void *buffer,
+proc_rt6_multipath_hash_fields(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
struct net *net;
@@ -213,7 +213,6 @@ static struct ctl_table ipv6_table_template[] = {
.proc_handler = proc_doulongvec_minmax,
.extra2 = &ioam6_id_wide_max,
},
- { }
};
static struct ctl_table ipv6_rotable[] = {
@@ -248,11 +247,11 @@ static struct ctl_table ipv6_rotable[] = {
.proc_handler = proc_dointvec,
},
#endif /* CONFIG_NETLABEL */
- { }
};
static int __net_init ipv6_sysctl_net_init(struct net *net)
{
+ size_t table_size = ARRAY_SIZE(ipv6_table_template);
struct ctl_table *ipv6_table;
struct ctl_table *ipv6_route_table;
struct ctl_table *ipv6_icmp_table;
@@ -264,7 +263,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
if (!ipv6_table)
goto out;
/* Update the variables to point into the current struct net */
- for (i = 0; i < ARRAY_SIZE(ipv6_table_template) - 1; i++)
+ for (i = 0; i < table_size; i++)
ipv6_table[i].data += (void *)net - (void *)&init_net;
ipv6_route_table = ipv6_route_sysctl_init(net);
@@ -276,8 +275,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
goto out_ipv6_route_table;
net->ipv6.sysctl.hdr = register_net_sysctl_sz(net, "net/ipv6",
- ipv6_table,
- ARRAY_SIZE(ipv6_table_template));
+ ipv6_table, table_size);
if (!net->ipv6.sysctl.hdr)
goto out_ipv6_icmp_table;
@@ -313,9 +311,9 @@ out_ipv6_table:
static void __net_exit ipv6_sysctl_net_exit(struct net *net)
{
- struct ctl_table *ipv6_table;
- struct ctl_table *ipv6_route_table;
- struct ctl_table *ipv6_icmp_table;
+ const struct ctl_table *ipv6_table;
+ const struct ctl_table *ipv6_route_table;
+ const struct ctl_table *ipv6_icmp_table;
ipv6_table = net->ipv6.sysctl.hdr->ctl_table_arg;
ipv6_route_table = net->ipv6.sysctl.route_hdr->ctl_table_arg;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3f4cba49e9ee..200fea92f12f 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -60,6 +60,7 @@
#include <net/secure_seq.h>
#include <net/hotdata.h>
#include <net/busy_poll.h>
+#include <net/rstreason.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -69,7 +70,8 @@
#include <trace/events/tcp.h>
-static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
+static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
+ enum sk_rst_reason reason);
static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req);
@@ -95,11 +97,9 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
struct dst_entry *dst = skb_dst(skb);
if (dst && dst_hold_safe(dst)) {
- const struct rt6_info *rt = (const struct rt6_info *)dst;
-
rcu_assign_pointer(sk->sk_rx_dst, dst);
sk->sk_rx_dst_ifindex = skb->skb_iif;
- sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
+ sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst));
}
}
@@ -490,14 +490,10 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
- if (!sock_owned_by_user(sk)) {
- WRITE_ONCE(sk->sk_err, err);
- sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
-
- tcp_done(sk);
- } else {
+ if (!sock_owned_by_user(sk))
+ tcp_done_with_error(sk, err);
+ else
WRITE_ONCE(sk->sk_err_soft, err);
- }
goto out;
case TCP_LISTEN:
break;
@@ -793,7 +789,8 @@ clear_hash_nostart:
static void tcp_v6_init_req(struct request_sock *req,
const struct sock *sk_listener,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ u32 tw_isn)
{
bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
struct inet_request_sock *ireq = inet_rsk(req);
@@ -807,7 +804,7 @@ static void tcp_v6_init_req(struct request_sock *req,
ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
ireq->ir_iif = tcp_v6_iif(skb);
- if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
+ if (!tw_isn &&
(ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
np->rxopt.bits.rxinfo ||
np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
@@ -820,9 +817,10 @@ static void tcp_v6_init_req(struct request_sock *req,
static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
struct sk_buff *skb,
struct flowi *fl,
- struct request_sock *req)
+ struct request_sock *req,
+ u32 tw_isn)
{
- tcp_v6_init_req(req, sk, skb);
+ tcp_v6_init_req(req, sk, skb, tw_isn);
if (security_inet_conn_request(sk, skb, req))
return NULL;
@@ -973,7 +971,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
mark = inet_twsk(sk)->tw_mark;
else
mark = READ_ONCE(sk->sk_mark);
- skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
+ skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC);
}
if (txhash) {
/* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
@@ -1006,7 +1004,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
kfree_skb(buff);
}
-static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
+static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
+ enum sk_rst_reason reason)
{
const struct tcphdr *th = tcp_hdr(skb);
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
@@ -1113,7 +1112,6 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
if (sk) {
oif = sk->sk_bound_dev_if;
if (sk_fullsock(sk)) {
- trace_tcp_send_reset(sk, skb);
if (inet6_test_bit(REPFLOW, sk))
label = ip6_flowlabel(ipv6h);
priority = READ_ONCE(sk->sk_priority);
@@ -1129,6 +1127,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
label = ip6_flowlabel(ipv6h);
}
+ trace_tcp_send_reset(sk, skb, reason);
+
tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1,
ipv6_get_dsfield(ipv6h), label, priority, txhash,
&key);
@@ -1196,9 +1196,9 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_tw_tsval(tcptw),
- tcptw->tw_ts_recent, tw->tw_bound_dev_if, &key,
- tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
- tw->tw_txhash);
+ READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if,
+ &key, tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel),
+ tw->tw_priority, tw->tw_txhash);
#ifdef CONFIG_TCP_AO
out:
@@ -1268,15 +1268,10 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
*/
- /* RFC 7323 2.3
- * The window field (SEG.WND) of every outgoing segment, with the
- * exception of <SYN> segments, MUST be right-shifted by
- * Rcv.Wind.Shift bits:
- */
tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
tcp_rsk(req)->rcv_nxt,
- req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
+ tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale,
tcp_rsk_tsval(tcp_rsk(req)),
READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
&key, ipv6_get_dsfield(ipv6_hdr(skb)), 0,
@@ -1440,7 +1435,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
*/
newsk->sk_gso_type = SKB_GSO_TCPV6;
- ip6_dst_store(newsk, dst, NULL, NULL);
inet6_sk_rx_dst_set(newsk, skb);
inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
@@ -1451,6 +1445,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
+ ip6_dst_store(newsk, dst, NULL, NULL);
+
newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
newnp->saddr = ireq->ir_v6_loc_addr;
newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
@@ -1674,11 +1670,11 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
reset:
- tcp_v6_send_reset(sk, skb);
+ tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(reason));
discard:
if (opt_skb)
__kfree_skb(opt_skb);
- kfree_skb_reason(skb, reason);
+ sk_skb_reason_drop(sk, skb, reason);
return 0;
csum_err:
reason = SKB_DROP_REASON_TCP_CSUM;
@@ -1738,7 +1734,6 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
skb->len - th->doff*4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
- TCP_SKB_CB(skb)->tcp_tw_isn = 0;
TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->has_rxtstamp =
@@ -1752,9 +1747,10 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
int dif = inet6_iif(skb);
const struct tcphdr *th;
const struct ipv6hdr *hdr;
+ struct sock *sk = NULL;
bool refcounted;
- struct sock *sk;
int ret;
+ u32 isn;
struct net *net = dev_net(skb->dev);
drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
@@ -1791,7 +1787,6 @@ lookup:
if (!sk)
goto no_tcp_socket;
-process:
if (sk->sk_state == TCP_TIME_WAIT)
goto do_time_wait;
@@ -1860,7 +1855,10 @@ process:
} else {
drop_reason = tcp_child_process(sk, nsk, skb);
if (drop_reason) {
- tcp_v6_send_reset(nsk, skb);
+ enum sk_rst_reason rst_reason;
+
+ rst_reason = sk_rst_convert_drop_reason(drop_reason);
+ tcp_v6_send_reset(nsk, skb, rst_reason);
goto discard_and_relse;
}
sock_put(sk);
@@ -1868,6 +1866,7 @@ process:
}
}
+process:
if (static_branch_unlikely(&ip6_min_hopcount)) {
/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) {
@@ -1936,12 +1935,12 @@ csum_error:
bad_packet:
__TCP_INC_STATS(net, TCP_MIB_INERRS);
} else {
- tcp_v6_send_reset(NULL, skb);
+ tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason));
}
discard_it:
SKB_DR_OR(drop_reason, NOT_SPECIFIED);
- kfree_skb_reason(skb, drop_reason);
+ sk_skb_reason_drop(sk, skb, drop_reason);
return 0;
discard_and_relse:
@@ -1964,7 +1963,7 @@ do_time_wait:
goto csum_error;
}
- switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
+ switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) {
case TCP_TW_SYN:
{
struct sock *sk2;
@@ -1982,6 +1981,7 @@ do_time_wait:
sk = sk2;
tcp_v6_restore_cb(skb);
refcounted = false;
+ __this_cpu_write(tcp_tw_isn, isn);
goto process;
}
}
@@ -1991,7 +1991,7 @@ do_time_wait:
tcp_v6_timewait_ack(sk, skb);
break;
case TCP_TW_RST:
- tcp_v6_send_reset(sk, skb);
+ tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET);
inet_twsk_deschedule_put(inet_twsk(sk));
goto discard_it;
case TCP_TW_SUCCESS:
@@ -2041,7 +2041,6 @@ void tcp_v6_early_demux(struct sk_buff *skb)
static struct timewait_sock_ops tcp6_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct tcp6_timewait_sock),
- .twsk_unique = tcp_twsk_unique,
.twsk_destructor = tcp_twsk_destructor,
};
@@ -2380,8 +2379,14 @@ static struct inet_protosw tcpv6_protosw = {
static int __net_init tcpv6_net_init(struct net *net)
{
- return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
- SOCK_RAW, IPPROTO_TCP, net);
+ int res;
+
+ res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
+ SOCK_RAW, IPPROTO_TCP, net);
+ if (!res)
+ net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC;
+
+ return res;
}
static void __net_exit tcpv6_net_exit(struct net *net)
@@ -2389,15 +2394,9 @@ static void __net_exit tcpv6_net_exit(struct net *net)
inet_ctl_sock_destroy(net->ipv6.tcp_sk);
}
-static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
-{
- tcp_twsk_purge(net_exit_list, AF_INET6);
-}
-
static struct pernet_operations tcpv6_net_ops = {
.init = tcpv6_net_init,
.exit = tcpv6_net_exit,
- .exit_batch = tcpv6_net_exit_batch,
};
int __init tcpv6_init(void)
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index 4b07d1e6c952..23971903e66d 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -7,31 +7,84 @@
*/
#include <linux/indirect_call_wrapper.h>
#include <linux/skbuff.h>
+#include <net/inet6_hashtables.h>
#include <net/gro.h>
#include <net/protocol.h>
#include <net/tcp.h>
#include <net/ip6_checksum.h>
#include "ip6_offload.h"
+static void tcp6_check_fraglist_gro(struct list_head *head, struct sk_buff *skb,
+ struct tcphdr *th)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ const struct ipv6hdr *hdr;
+ struct sk_buff *p;
+ struct sock *sk;
+ struct net *net;
+ int iif, sdif;
+
+ if (likely(!(skb->dev->features & NETIF_F_GRO_FRAGLIST)))
+ return;
+
+ p = tcp_gro_lookup(head, th);
+ if (p) {
+ NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist;
+ return;
+ }
+
+ inet6_get_iif_sdif(skb, &iif, &sdif);
+ hdr = skb_gro_network_header(skb);
+ net = dev_net(skb->dev);
+ sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
+ &hdr->saddr, th->source,
+ &hdr->daddr, ntohs(th->dest),
+ iif, sdif);
+ NAPI_GRO_CB(skb)->is_flist = !sk;
+ if (sk)
+ sock_put(sk);
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+}
+
INDIRECT_CALLABLE_SCOPE
struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)
{
+ struct tcphdr *th;
+
/* Don't bother verifying checksum if we're going to flush anyway. */
if (!NAPI_GRO_CB(skb)->flush &&
skb_gro_checksum_validate(skb, IPPROTO_TCP,
- ip6_gro_compute_pseudo)) {
- NAPI_GRO_CB(skb)->flush = 1;
- return NULL;
- }
+ ip6_gro_compute_pseudo))
+ goto flush;
- return tcp_gro_receive(head, skb);
+ th = tcp_gro_pull_header(skb);
+ if (!th)
+ goto flush;
+
+ tcp6_check_fraglist_gro(head, skb, th);
+
+ return tcp_gro_receive(head, skb, th);
+
+flush:
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
}
INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
{
- const struct ipv6hdr *iph = ipv6_hdr(skb);
+ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
+ const struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + offset);
struct tcphdr *th = tcp_hdr(skb);
+ if (unlikely(NAPI_GRO_CB(skb)->is_flist)) {
+ skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV6;
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+ __skb_incr_checksum_unnecessary(skb);
+
+ return 0;
+ }
+
th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr,
&iph->daddr, 0);
skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
@@ -40,6 +93,61 @@ INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
return 0;
}
+static void __tcpv6_gso_segment_csum(struct sk_buff *seg,
+ __be16 *oldport, __be16 newport)
+{
+ struct tcphdr *th;
+
+ if (*oldport == newport)
+ return;
+
+ th = tcp_hdr(seg);
+ inet_proto_csum_replace2(&th->check, seg, *oldport, newport, false);
+ *oldport = newport;
+}
+
+static struct sk_buff *__tcpv6_gso_segment_list_csum(struct sk_buff *segs)
+{
+ const struct tcphdr *th;
+ const struct ipv6hdr *iph;
+ struct sk_buff *seg;
+ struct tcphdr *th2;
+ struct ipv6hdr *iph2;
+
+ seg = segs;
+ th = tcp_hdr(seg);
+ iph = ipv6_hdr(seg);
+ th2 = tcp_hdr(seg->next);
+ iph2 = ipv6_hdr(seg->next);
+
+ if (!(*(const u32 *)&th->source ^ *(const u32 *)&th2->source) &&
+ ipv6_addr_equal(&iph->saddr, &iph2->saddr) &&
+ ipv6_addr_equal(&iph->daddr, &iph2->daddr))
+ return segs;
+
+ while ((seg = seg->next)) {
+ th2 = tcp_hdr(seg);
+ iph2 = ipv6_hdr(seg);
+
+ iph2->saddr = iph->saddr;
+ iph2->daddr = iph->daddr;
+ __tcpv6_gso_segment_csum(seg, &th2->source, th->source);
+ __tcpv6_gso_segment_csum(seg, &th2->dest, th->dest);
+ }
+
+ return segs;
+}
+
+static struct sk_buff *__tcp6_gso_segment_list(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
+ if (IS_ERR(skb))
+ return skb;
+
+ return __tcpv6_gso_segment_list_csum(skb);
+}
+
static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
@@ -51,6 +159,9 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(*th)))
return ERR_PTR(-EINVAL);
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)
+ return __tcp6_gso_segment_list(skb, features);
+
if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 8f7aa8bac1e7..6602a2e9cdb5 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -34,6 +34,7 @@
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/indirect_call_wrapper.h>
+#include <trace/events/udp.h>
#include <net/addrconf.h>
#include <net/ndisc.h>
@@ -45,7 +46,6 @@
#include <net/tcp_states.h>
#include <net/ip6_checksum.h>
#include <net/ip6_tunnel.h>
-#include <trace/events/udp.h>
#include <net/xfrm.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
@@ -168,15 +168,21 @@ static struct sock *udp6_lib_lookup2(struct net *net,
{
struct sock *sk, *result;
int score, badness;
+ bool need_rescore;
result = NULL;
badness = -1;
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
- score = compute_score(sk, net, saddr, sport,
- daddr, hnum, dif, sdif);
+ need_rescore = false;
+rescore:
+ score = compute_score(need_rescore ? result : sk, net, saddr,
+ sport, daddr, hnum, dif, sdif);
if (score > badness) {
badness = score;
+ if (need_rescore)
+ continue;
+
if (sk->sk_state == TCP_ESTABLISHED) {
result = sk;
continue;
@@ -197,8 +203,14 @@ static struct sock *udp6_lib_lookup2(struct net *net,
if (IS_ERR(result))
continue;
- badness = compute_score(sk, net, saddr, sport,
- daddr, hnum, dif, sdif);
+ /* compute_score is too long of a function to be
+ * inlined, and calling it again here yields
+ * measureable overhead for some
+ * workloads. Work around it by jumping
+ * backwards to rescore 'result'.
+ */
+ need_rescore = true;
+ goto rescore;
}
}
return result;
@@ -659,8 +671,8 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
drop_reason = SKB_DROP_REASON_PROTO_MEM;
}
UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
- kfree_skb_reason(skb, drop_reason);
- trace_udp_fail_queue_rcv_skb(rc, sk);
+ trace_udp_fail_queue_rcv_skb(rc, sk, skb);
+ sk_skb_reason_drop(sk, skb, drop_reason);
return -1;
}
@@ -763,7 +775,7 @@ csum_error:
drop:
__UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
atomic_inc(&sk->sk_drops);
- kfree_skb_reason(skb, drop_reason);
+ sk_skb_reason_drop(sk, skb, drop_reason);
return -1;
}
@@ -898,11 +910,8 @@ start_lookup:
static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
{
- if (udp_sk_rx_dst_set(sk, dst)) {
- const struct rt6_info *rt = (const struct rt6_info *)dst;
-
- sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
- }
+ if (udp_sk_rx_dst_set(sk, dst))
+ sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst));
}
/* wrapper for udp_queue_rcv_skb tacking care of csum conversion and
@@ -930,8 +939,8 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
const struct in6_addr *saddr, *daddr;
struct net *net = dev_net(skb->dev);
+ struct sock *sk = NULL;
struct udphdr *uh;
- struct sock *sk;
bool refcounted;
u32 ulen = 0;
@@ -1023,7 +1032,7 @@ no_sk:
__UDP6_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
- kfree_skb_reason(skb, reason);
+ sk_skb_reason_drop(sk, skb, reason);
return 0;
short_packet:
@@ -1044,7 +1053,7 @@ csum_error:
__UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
discard:
__UDP6_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
- kfree_skb_reason(skb, reason);
+ sk_skb_reason_drop(sk, skb, reason);
return 0;
}
@@ -1247,8 +1256,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
kfree_skb(skb);
return -EINVAL;
}
- if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite ||
- dst_xfrm(skb_dst(skb))) {
+ if (is_udplite || dst_xfrm(skb_dst(skb))) {
kfree_skb(skb);
return -EIO;
}
@@ -1573,7 +1581,7 @@ back_from_confirm:
skb = ip6_make_skb(sk, getfrag, msg, ulen,
sizeof(struct udphdr), &ipc6,
- (struct rt6_info *)dst,
+ dst_rt6_info(dst),
msg->msg_flags, &cork);
err = PTR_ERR(skb);
if (!IS_ERR_OR_NULL(skb))
@@ -1600,7 +1608,7 @@ do_append_data:
ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk);
up->len += ulen;
err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr),
- &ipc6, fl6, (struct rt6_info *)dst,
+ &ipc6, fl6, dst_rt6_info(dst),
corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
if (err)
udp_v6_flush_pending_frames(sk);
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index a17d783dc7c0..4abc5e9d6322 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -58,7 +58,11 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
skb_postpush_rcsum(skb, skb_network_header(skb), nhlen);
if (xo && (xo->flags & XFRM_GRO)) {
- skb_mac_header_rebuild(skb);
+ /* The full l2 header needs to be preserved so that re-injecting the packet at l2
+ * works correctly in the presence of vlan tags.
+ */
+ skb_mac_header_rebuild_full(skb, xo->orig_mac_len);
+ skb_reset_network_header(skb);
skb_reset_transport_header(skb);
return 0;
}
@@ -109,19 +113,6 @@ static int __xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull
/* Must be an IKE packet.. pass it through */
return 1;
break;
- case UDP_ENCAP_ESPINUDP_NON_IKE:
- /* Check if this is a keepalive packet. If so, eat it. */
- if (len == 1 && udpdata[0] == 0xff) {
- return -EINVAL;
- } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
- udpdata32[0] == 0 && udpdata32[1] == 0) {
-
- /* ESP Packet with Non-IKE marker */
- len = sizeof(struct udphdr) + 2 * sizeof(u32);
- } else
- /* Must be an IKE packet.. pass it through */
- return 1;
- break;
}
/* At this point we are sure that this is an ESPinUDP packet,
@@ -279,6 +270,13 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
if (!x)
continue;
+ if (unlikely(x->dir && x->dir != XFRM_SA_DIR_IN)) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEDIRERROR);
+ xfrm_state_put(x);
+ x = NULL;
+ continue;
+ }
+
spin_lock(&x->lock);
if ((!i || (x->props.flags & XFRM_STATE_WILDRECV)) &&
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 42fb6996b077..b1d81c4270ab 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -56,12 +56,18 @@ static int xfrm6_get_saddr(struct net *net, int oif,
{
struct dst_entry *dst;
struct net_device *dev;
+ struct inet6_dev *idev;
dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr, mark);
if (IS_ERR(dst))
return -EHOSTUNREACH;
- dev = ip6_dst_idev(dst)->dev;
+ idev = ip6_dst_idev(dst);
+ if (!idev) {
+ dst_release(dst);
+ return -EHOSTUNREACH;
+ }
+ dev = idev->dev;
ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6, 0, &saddr->in6);
dst_release(dst);
return 0;
@@ -70,7 +76,7 @@ static int xfrm6_get_saddr(struct net *net, int oif,
static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
const struct flowi *fl)
{
- struct rt6_info *rt = (struct rt6_info *)xdst->route;
+ struct rt6_info *rt = dst_rt6_info(xdst->route);
xdst->u.dst.dev = dev;
netdev_hold(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC);
@@ -184,7 +190,6 @@ static struct ctl_table xfrm6_policy_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
};
static int __net_init xfrm6_net_sysctl_init(struct net *net)
@@ -218,7 +223,7 @@ err_alloc:
static void __net_exit xfrm6_net_sysctl_exit(struct net *net)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
if (!net->ipv6.sysctl.xfrm6_hdr)
return;
@@ -285,8 +290,14 @@ int __init xfrm6_init(void)
ret = register_pernet_subsys(&xfrm6_net_ops);
if (ret)
goto out_protocol;
+
+ ret = xfrm_nat_keepalive_init(AF_INET6);
+ if (ret)
+ goto out_nat_keepalive;
out:
return ret;
+out_nat_keepalive:
+ unregister_pernet_subsys(&xfrm6_net_ops);
out_protocol:
xfrm6_protocol_fini();
out_state:
@@ -298,6 +309,7 @@ out_policy:
void xfrm6_fini(void)
{
+ xfrm_nat_keepalive_fini(AF_INET6);
unregister_pernet_subsys(&xfrm6_net_ops);
xfrm6_protocol_fini();
xfrm6_policy_fini();
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 7c8c3adcac6e..c00323fa9eb6 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -184,7 +184,7 @@ static void iucv_sock_wake_msglim(struct sock *sk)
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
wake_up_interruptible_all(&wq->wait);
- sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+ sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
rcu_read_unlock();
}
@@ -335,8 +335,8 @@ static void iucv_sever_path(struct sock *sk, int with_user_data)
struct iucv_sock *iucv = iucv_sk(sk);
struct iucv_path *path = iucv->path;
- if (iucv->path) {
- iucv->path = NULL;
+ /* Whoever resets the path pointer, must sever and free it. */
+ if (xchg(&iucv->path, NULL)) {
if (with_user_data) {
low_nmcpy(user_data, iucv->src_name);
high_nmcpy(user_data, iucv->dst_name);
@@ -795,7 +795,7 @@ done:
/* Accept a pending connection */
static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
- int flags, bool kern)
+ struct proto_accept_arg *arg)
{
DECLARE_WAITQUEUE(wait, current);
struct sock *sk = sock->sk, *nsk;
@@ -809,7 +809,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
goto done;
}
- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
/* Wait for an incoming connection */
add_wait_queue_exclusive(sk_sleep(sk), &wait);
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index a4ab615ca3e3..1e42e13ad24e 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -62,7 +62,7 @@
#define IUCV_IPNORPY 0x10
#define IUCV_IPALL 0x80
-static int iucv_bus_match(struct device *dev, struct device_driver *drv)
+static int iucv_bus_match(struct device *dev, const struct device_driver *drv)
{
return 0;
}
@@ -73,8 +73,42 @@ const struct bus_type iucv_bus = {
};
EXPORT_SYMBOL(iucv_bus);
-struct device *iucv_root;
-EXPORT_SYMBOL(iucv_root);
+static struct device *iucv_root;
+
+static void iucv_release_device(struct device *device)
+{
+ kfree(device);
+}
+
+struct device *iucv_alloc_device(const struct attribute_group **attrs,
+ struct device_driver *driver,
+ void *priv, const char *fmt, ...)
+{
+ struct device *dev;
+ va_list vargs;
+ int rc;
+
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ goto out_error;
+ va_start(vargs, fmt);
+ rc = dev_set_name(dev, fmt, vargs);
+ va_end(vargs);
+ if (rc)
+ goto out_error;
+ dev->bus = &iucv_bus;
+ dev->parent = iucv_root;
+ dev->driver = driver;
+ dev->groups = attrs;
+ dev->release = iucv_release_device;
+ dev_set_drvdata(dev, priv);
+ return dev;
+
+out_error:
+ kfree(dev);
+ return NULL;
+}
+EXPORT_SYMBOL(iucv_alloc_device);
static int iucv_available;
@@ -520,7 +554,7 @@ static void iucv_setmask_mp(void)
*/
static void iucv_setmask_up(void)
{
- cpumask_t cpumask;
+ static cpumask_t cpumask;
int cpu;
/* Disable all cpu but the first in cpu_irq_cpumask. */
@@ -628,23 +662,33 @@ static int iucv_cpu_online(unsigned int cpu)
static int iucv_cpu_down_prep(unsigned int cpu)
{
- cpumask_t cpumask;
+ cpumask_var_t cpumask;
+ int ret = 0;
if (!iucv_path_table)
return 0;
- cpumask_copy(&cpumask, &iucv_buffer_cpumask);
- cpumask_clear_cpu(cpu, &cpumask);
- if (cpumask_empty(&cpumask))
+ if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
+ return -ENOMEM;
+
+ cpumask_copy(cpumask, &iucv_buffer_cpumask);
+ cpumask_clear_cpu(cpu, cpumask);
+ if (cpumask_empty(cpumask)) {
/* Can't offline last IUCV enabled cpu. */
- return -EINVAL;
+ ret = -EINVAL;
+ goto __free_cpumask;
+ }
iucv_retrieve_cpu(NULL);
if (!cpumask_empty(&iucv_irq_cpumask))
- return 0;
+ goto __free_cpumask;
+
smp_call_function_single(cpumask_first(&iucv_buffer_cpumask),
iucv_allow_cpu, NULL, 1);
- return 0;
+
+__free_cpumask:
+ free_cpumask_var(cpumask);
+ return ret;
}
/**
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 8d21ff25f160..2e86f520f799 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -39,7 +39,6 @@
#include <linux/ip.h>
#include <linux/udp.h>
#include <linux/l2tp.h>
-#include <linux/hash.h>
#include <linux/sort.h>
#include <linux/file.h>
#include <linux/nsproxy.h>
@@ -61,7 +60,6 @@
#include <linux/atomic.h>
#include "l2tp_core.h"
-#include "trace.h"
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -88,6 +86,11 @@
/* Default trace flags */
#define L2TP_DEFAULT_DEBUG_FLAGS 0
+#define L2TP_DEPTH_NESTING 2
+#if L2TP_DEPTH_NESTING == SINGLE_DEPTH_NESTING
+#error "L2TP requires its own lockdep subclass"
+#endif
+
/* Private data stored for received packets in the skb.
*/
struct l2tp_skb_cb {
@@ -107,11 +110,23 @@ struct l2tp_net {
/* Lock for write access to l2tp_tunnel_idr */
spinlock_t l2tp_tunnel_idr_lock;
struct idr l2tp_tunnel_idr;
- struct hlist_head l2tp_session_hlist[L2TP_HASH_SIZE_2];
- /* Lock for write access to l2tp_session_hlist */
- spinlock_t l2tp_session_hlist_lock;
+ /* Lock for write access to l2tp_v[23]_session_idr/htable */
+ spinlock_t l2tp_session_idr_lock;
+ struct idr l2tp_v2_session_idr;
+ struct idr l2tp_v3_session_idr;
+ struct hlist_head l2tp_v3_session_htable[16];
};
+static inline u32 l2tp_v2_session_key(u16 tunnel_id, u16 session_id)
+{
+ return ((u32)tunnel_id) << 16 | session_id;
+}
+
+static inline unsigned long l2tp_v3_session_hashkey(struct sock *sk, u32 session_id)
+{
+ return ((unsigned long)sk) + session_id;
+}
+
#if IS_ENABLED(CONFIG_IPV6)
static bool l2tp_sk_is_v6(struct sock *sk)
{
@@ -125,29 +140,6 @@ static inline struct l2tp_net *l2tp_pernet(const struct net *net)
return net_generic(net, l2tp_net_id);
}
-/* Session hash global list for L2TPv3.
- * The session_id SHOULD be random according to RFC3931, but several
- * L2TP implementations use incrementing session_ids. So we do a real
- * hash on the session_id, rather than a simple bitmask.
- */
-static inline struct hlist_head *
-l2tp_session_id_hash_2(struct l2tp_net *pn, u32 session_id)
-{
- return &pn->l2tp_session_hlist[hash_32(session_id, L2TP_HASH_BITS_2)];
-}
-
-/* Session hash list.
- * The session_id SHOULD be random according to RFC2661, but several
- * L2TP implementations (Cisco and Microsoft) use incrementing
- * session_ids. So we do a real hash on the session_id, rather than a
- * simple bitmask.
- */
-static inline struct hlist_head *
-l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id)
-{
- return &tunnel->session_hlist[hash_32(session_id, L2TP_HASH_BITS)];
-}
-
static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
{
trace_free_tunnel(tunnel);
@@ -240,66 +232,82 @@ struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth)
}
EXPORT_SYMBOL_GPL(l2tp_tunnel_get_nth);
-struct l2tp_session *l2tp_tunnel_get_session(struct l2tp_tunnel *tunnel,
- u32 session_id)
+struct l2tp_session *l2tp_v3_session_get(const struct net *net, struct sock *sk, u32 session_id)
{
- struct hlist_head *session_list;
+ const struct l2tp_net *pn = l2tp_pernet(net);
struct l2tp_session *session;
- session_list = l2tp_session_id_hash(tunnel, session_id);
-
rcu_read_lock_bh();
- hlist_for_each_entry_rcu(session, session_list, hlist)
- if (session->session_id == session_id) {
- l2tp_session_inc_refcount(session);
- rcu_read_unlock_bh();
+ session = idr_find(&pn->l2tp_v3_session_idr, session_id);
+ if (session && !hash_hashed(&session->hlist) &&
+ refcount_inc_not_zero(&session->ref_count)) {
+ rcu_read_unlock_bh();
+ return session;
+ }
- return session;
+ /* If we get here and session is non-NULL, the session_id
+ * collides with one in another tunnel. If sk is non-NULL,
+ * find the session matching sk.
+ */
+ if (session && sk) {
+ unsigned long key = l2tp_v3_session_hashkey(sk, session->session_id);
+
+ hash_for_each_possible_rcu(pn->l2tp_v3_session_htable, session,
+ hlist, key) {
+ if (session->tunnel->sock == sk &&
+ refcount_inc_not_zero(&session->ref_count)) {
+ rcu_read_unlock_bh();
+ return session;
+ }
}
+ }
rcu_read_unlock_bh();
return NULL;
}
-EXPORT_SYMBOL_GPL(l2tp_tunnel_get_session);
+EXPORT_SYMBOL_GPL(l2tp_v3_session_get);
-struct l2tp_session *l2tp_session_get(const struct net *net, u32 session_id)
+struct l2tp_session *l2tp_v2_session_get(const struct net *net, u16 tunnel_id, u16 session_id)
{
- struct hlist_head *session_list;
+ u32 session_key = l2tp_v2_session_key(tunnel_id, session_id);
+ const struct l2tp_net *pn = l2tp_pernet(net);
struct l2tp_session *session;
- session_list = l2tp_session_id_hash_2(l2tp_pernet(net), session_id);
-
rcu_read_lock_bh();
- hlist_for_each_entry_rcu(session, session_list, global_hlist)
- if (session->session_id == session_id) {
- l2tp_session_inc_refcount(session);
- rcu_read_unlock_bh();
-
- return session;
- }
+ session = idr_find(&pn->l2tp_v2_session_idr, session_key);
+ if (session && refcount_inc_not_zero(&session->ref_count)) {
+ rcu_read_unlock_bh();
+ return session;
+ }
rcu_read_unlock_bh();
return NULL;
}
+EXPORT_SYMBOL_GPL(l2tp_v2_session_get);
+
+struct l2tp_session *l2tp_session_get(const struct net *net, struct sock *sk, int pver,
+ u32 tunnel_id, u32 session_id)
+{
+ if (pver == L2TP_HDR_VER_2)
+ return l2tp_v2_session_get(net, tunnel_id, session_id);
+ else
+ return l2tp_v3_session_get(net, sk, session_id);
+}
EXPORT_SYMBOL_GPL(l2tp_session_get);
struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth)
{
- int hash;
struct l2tp_session *session;
int count = 0;
rcu_read_lock_bh();
- for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
- hlist_for_each_entry_rcu(session, &tunnel->session_hlist[hash], hlist) {
- if (++count > nth) {
- l2tp_session_inc_refcount(session);
- rcu_read_unlock_bh();
- return session;
- }
+ list_for_each_entry_rcu(session, &tunnel->session_list, list) {
+ if (++count > nth) {
+ l2tp_session_inc_refcount(session);
+ rcu_read_unlock_bh();
+ return session;
}
}
-
rcu_read_unlock_bh();
return NULL;
@@ -313,86 +321,182 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
const char *ifname)
{
struct l2tp_net *pn = l2tp_pernet(net);
- int hash;
+ unsigned long tunnel_id, tmp;
struct l2tp_session *session;
+ struct l2tp_tunnel *tunnel;
rcu_read_lock_bh();
- for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) {
- hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) {
- if (!strcmp(session->ifname, ifname)) {
- l2tp_session_inc_refcount(session);
- rcu_read_unlock_bh();
-
- return session;
+ idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) {
+ if (tunnel) {
+ list_for_each_entry_rcu(session, &tunnel->session_list, list) {
+ if (!strcmp(session->ifname, ifname)) {
+ l2tp_session_inc_refcount(session);
+ rcu_read_unlock_bh();
+
+ return session;
+ }
}
}
}
-
rcu_read_unlock_bh();
return NULL;
}
EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname);
+static void l2tp_session_coll_list_add(struct l2tp_session_coll_list *clist,
+ struct l2tp_session *session)
+{
+ l2tp_session_inc_refcount(session);
+ WARN_ON_ONCE(session->coll_list);
+ session->coll_list = clist;
+ spin_lock(&clist->lock);
+ list_add(&session->clist, &clist->list);
+ spin_unlock(&clist->lock);
+}
+
+static int l2tp_session_collision_add(struct l2tp_net *pn,
+ struct l2tp_session *session1,
+ struct l2tp_session *session2)
+{
+ struct l2tp_session_coll_list *clist;
+
+ lockdep_assert_held(&pn->l2tp_session_idr_lock);
+
+ if (!session2)
+ return -EEXIST;
+
+ /* If existing session is in IP-encap tunnel, refuse new session */
+ if (session2->tunnel->encap == L2TP_ENCAPTYPE_IP)
+ return -EEXIST;
+
+ clist = session2->coll_list;
+ if (!clist) {
+ /* First collision. Allocate list to manage the collided sessions
+ * and add the existing session to the list.
+ */
+ clist = kmalloc(sizeof(*clist), GFP_ATOMIC);
+ if (!clist)
+ return -ENOMEM;
+
+ spin_lock_init(&clist->lock);
+ INIT_LIST_HEAD(&clist->list);
+ refcount_set(&clist->ref_count, 1);
+ l2tp_session_coll_list_add(clist, session2);
+ }
+
+ /* If existing session isn't already in the session hlist, add it. */
+ if (!hash_hashed(&session2->hlist))
+ hash_add(pn->l2tp_v3_session_htable, &session2->hlist,
+ session2->hlist_key);
+
+ /* Add new session to the hlist and collision list */
+ hash_add(pn->l2tp_v3_session_htable, &session1->hlist,
+ session1->hlist_key);
+ refcount_inc(&clist->ref_count);
+ l2tp_session_coll_list_add(clist, session1);
+
+ return 0;
+}
+
+static void l2tp_session_collision_del(struct l2tp_net *pn,
+ struct l2tp_session *session)
+{
+ struct l2tp_session_coll_list *clist = session->coll_list;
+ unsigned long session_key = session->session_id;
+ struct l2tp_session *session2;
+
+ lockdep_assert_held(&pn->l2tp_session_idr_lock);
+
+ hash_del(&session->hlist);
+
+ if (clist) {
+ /* Remove session from its collision list. If there
+ * are other sessions with the same ID, replace this
+ * session's IDR entry with that session, otherwise
+ * remove the IDR entry. If this is the last session,
+ * the collision list data is freed.
+ */
+ spin_lock(&clist->lock);
+ list_del_init(&session->clist);
+ session2 = list_first_entry_or_null(&clist->list, struct l2tp_session, clist);
+ if (session2) {
+ void *old = idr_replace(&pn->l2tp_v3_session_idr, session2, session_key);
+
+ WARN_ON_ONCE(IS_ERR_VALUE(old));
+ } else {
+ void *removed = idr_remove(&pn->l2tp_v3_session_idr, session_key);
+
+ WARN_ON_ONCE(removed != session);
+ }
+ session->coll_list = NULL;
+ spin_unlock(&clist->lock);
+ if (refcount_dec_and_test(&clist->ref_count))
+ kfree(clist);
+ l2tp_session_dec_refcount(session);
+ }
+}
+
int l2tp_session_register(struct l2tp_session *session,
struct l2tp_tunnel *tunnel)
{
- struct l2tp_session *session_walk;
- struct hlist_head *g_head;
- struct hlist_head *head;
- struct l2tp_net *pn;
+ struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
+ struct l2tp_session *other_session = NULL;
+ u32 session_key;
int err;
- head = l2tp_session_id_hash(tunnel, session->session_id);
+ spin_lock_bh(&tunnel->list_lock);
+ spin_lock_bh(&pn->l2tp_session_idr_lock);
- spin_lock_bh(&tunnel->hlist_lock);
if (!tunnel->acpt_newsess) {
err = -ENODEV;
- goto err_tlock;
+ goto out;
}
- hlist_for_each_entry(session_walk, head, hlist)
- if (session_walk->session_id == session->session_id) {
- err = -EEXIST;
- goto err_tlock;
- }
-
if (tunnel->version == L2TP_HDR_VER_3) {
- pn = l2tp_pernet(tunnel->l2tp_net);
- g_head = l2tp_session_id_hash_2(pn, session->session_id);
-
- spin_lock_bh(&pn->l2tp_session_hlist_lock);
-
+ session_key = session->session_id;
+ err = idr_alloc_u32(&pn->l2tp_v3_session_idr, NULL,
+ &session_key, session_key, GFP_ATOMIC);
/* IP encap expects session IDs to be globally unique, while
- * UDP encap doesn't.
+ * UDP encap doesn't. This isn't per the RFC, which says that
+ * sessions are identified only by the session ID, but is to
+ * support existing userspace which depends on it.
*/
- hlist_for_each_entry(session_walk, g_head, global_hlist)
- if (session_walk->session_id == session->session_id &&
- (session_walk->tunnel->encap == L2TP_ENCAPTYPE_IP ||
- tunnel->encap == L2TP_ENCAPTYPE_IP)) {
- err = -EEXIST;
- goto err_tlock_pnlock;
- }
-
- l2tp_tunnel_inc_refcount(tunnel);
- hlist_add_head_rcu(&session->global_hlist, g_head);
-
- spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+ if (err == -ENOSPC && tunnel->encap == L2TP_ENCAPTYPE_UDP) {
+ other_session = idr_find(&pn->l2tp_v3_session_idr,
+ session_key);
+ err = l2tp_session_collision_add(pn, session,
+ other_session);
+ }
} else {
- l2tp_tunnel_inc_refcount(tunnel);
+ session_key = l2tp_v2_session_key(tunnel->tunnel_id,
+ session->session_id);
+ err = idr_alloc_u32(&pn->l2tp_v2_session_idr, NULL,
+ &session_key, session_key, GFP_ATOMIC);
}
- hlist_add_head_rcu(&session->hlist, head);
- spin_unlock_bh(&tunnel->hlist_lock);
+ if (err) {
+ if (err == -ENOSPC)
+ err = -EEXIST;
+ goto out;
+ }
- trace_register_session(session);
+ l2tp_tunnel_inc_refcount(tunnel);
+ list_add(&session->list, &tunnel->session_list);
- return 0;
+ if (tunnel->version == L2TP_HDR_VER_3) {
+ if (!other_session)
+ idr_replace(&pn->l2tp_v3_session_idr, session, session_key);
+ } else {
+ idr_replace(&pn->l2tp_v2_session_idr, session, session_key);
+ }
-err_tlock_pnlock:
- spin_unlock_bh(&pn->l2tp_session_hlist_lock);
-err_tlock:
- spin_unlock_bh(&tunnel->hlist_lock);
+out:
+ spin_unlock_bh(&pn->l2tp_session_idr_lock);
+ spin_unlock_bh(&tunnel->list_lock);
+
+ if (!err)
+ trace_register_session(session);
return err;
}
@@ -785,18 +889,14 @@ static void l2tp_session_queue_purge(struct l2tp_session *session)
}
}
-/* Internal UDP receive frame. Do the real work of receiving an L2TP data frame
- * here. The skb is not on a list when we get here.
- * Returns 0 if the packet was a data packet and was successfully passed on.
- * Returns 1 if the packet was not a good data packet and could not be
- * forwarded. All such packets are passed up to userspace to deal with.
- */
-static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
+/* UDP encapsulation receive handler. See net/ipv4/udp.c for details. */
+int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
struct l2tp_session *session = NULL;
+ struct l2tp_tunnel *tunnel = NULL;
+ struct net *net = sock_net(sk);
unsigned char *ptr, *optr;
u16 hdrflags;
- u32 tunnel_id, session_id;
u16 version;
int length;
@@ -806,11 +906,8 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
__skb_pull(skb, sizeof(struct udphdr));
/* Short packet? */
- if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) {
- pr_debug_ratelimited("%s: recv short packet (len=%d)\n",
- tunnel->name, skb->len);
- goto invalid;
- }
+ if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX))
+ goto pass;
/* Point to L2TP header */
optr = skb->data;
@@ -819,13 +916,8 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
/* Get L2TP header flags */
hdrflags = ntohs(*(__be16 *)ptr);
- /* Check protocol version */
+ /* Get protocol version */
version = hdrflags & L2TP_HDR_VER_MASK;
- if (version != tunnel->version) {
- pr_debug_ratelimited("%s: recv protocol version mismatch: got %d expected %d\n",
- tunnel->name, version, tunnel->version);
- goto invalid;
- }
/* Get length of L2TP packet */
length = skb->len;
@@ -837,7 +929,9 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
/* Skip flags */
ptr += 2;
- if (tunnel->version == L2TP_HDR_VER_2) {
+ if (version == L2TP_HDR_VER_2) {
+ u16 tunnel_id, session_id;
+
/* If length is present, skip it */
if (hdrflags & L2TP_HDRFLAG_L)
ptr += 2;
@@ -847,26 +941,33 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
ptr += 2;
session_id = ntohs(*(__be16 *)ptr);
ptr += 2;
+
+ session = l2tp_v2_session_get(net, tunnel_id, session_id);
} else {
+ u32 session_id;
+
ptr += 2; /* skip reserved bits */
- tunnel_id = tunnel->tunnel_id;
session_id = ntohl(*(__be32 *)ptr);
ptr += 4;
+
+ session = l2tp_v3_session_get(net, sk, session_id);
}
- /* Find the session context */
- session = l2tp_tunnel_get_session(tunnel, session_id);
if (!session || !session->recv_skb) {
if (session)
l2tp_session_dec_refcount(session);
/* Not found? Pass to userspace to deal with */
- pr_debug_ratelimited("%s: no session found (%u/%u). Passing up.\n",
- tunnel->name, tunnel_id, session_id);
goto pass;
}
- if (tunnel->version == L2TP_HDR_VER_3 &&
+ tunnel = session->tunnel;
+
+ /* Check protocol version */
+ if (version != tunnel->version)
+ goto invalid;
+
+ if (version == L2TP_HDR_VER_3 &&
l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) {
l2tp_session_dec_refcount(session);
goto invalid;
@@ -886,38 +987,25 @@ pass:
return 1;
}
+EXPORT_SYMBOL_GPL(l2tp_udp_encap_recv);
-/* UDP encapsulation receive handler. See net/ipv4/udp.c.
- * Return codes:
- * 0 : success.
- * <0: error
- * >0: skb should be passed up to userspace as UDP.
- */
-int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
+/* UDP encapsulation receive error handler. See net/ipv4/udp.c for details. */
+static void l2tp_udp_encap_err_recv(struct sock *sk, struct sk_buff *skb, int err,
+ __be16 port, u32 info, u8 *payload)
{
- struct l2tp_tunnel *tunnel;
+ sk->sk_err = err;
+ sk_error_report(sk);
- /* Note that this is called from the encap_rcv hook inside an
- * RCU-protected region, but without the socket being locked.
- * Hence we use rcu_dereference_sk_user_data to access the
- * tunnel data structure rather the usual l2tp_sk_to_tunnel
- * accessor function.
- */
- tunnel = rcu_dereference_sk_user_data(sk);
- if (!tunnel)
- goto pass_up;
- if (WARN_ON(tunnel->magic != L2TP_TUNNEL_MAGIC))
- goto pass_up;
-
- if (l2tp_udp_recv_core(tunnel, skb))
- goto pass_up;
-
- return 0;
-
-pass_up:
- return 1;
+ if (ip_hdr(skb)->version == IPVERSION) {
+ if (inet_test_bit(RECVERR, sk))
+ return ip_icmp_error(sk, skb, err, port, info, payload);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else {
+ if (inet6_test_bit(RECVERR6, sk))
+ return ipv6_icmp_error(sk, skb, err, port, info, payload);
+#endif
+ }
}
-EXPORT_SYMBOL_GPL(l2tp_udp_encap_recv);
/************************************************************************
* Transmit handling
@@ -1041,7 +1129,13 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, uns
IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED);
nf_reset_ct(skb);
- bh_lock_sock_nested(sk);
+ /* L2TP uses its own lockdep subclass to avoid lockdep splats caused by
+ * nested socket calls on the same lockdep socket class. This can
+ * happen when data from a user socket is routed over l2tp, which uses
+ * another userspace socket.
+ */
+ spin_lock_nested(&sk->sk_lock.slock, L2TP_DEPTH_NESTING);
+
if (sock_owned_by_user(sk)) {
kfree_skb(skb);
ret = NET_XMIT_DROP;
@@ -1093,7 +1187,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, uns
ret = l2tp_xmit_queue(tunnel, skb, &inet->cork.fl);
out_unlock:
- bh_unlock_sock(sk);
+ spin_unlock(&sk->sk_lock.slock);
return ret;
}
@@ -1162,26 +1256,38 @@ end:
return;
}
-/* Remove an l2tp session from l2tp_core's hash lists. */
+/* Remove an l2tp session from l2tp_core's lists. */
static void l2tp_session_unhash(struct l2tp_session *session)
{
struct l2tp_tunnel *tunnel = session->tunnel;
- /* Remove the session from core hashes */
if (tunnel) {
- /* Remove from the per-tunnel hash */
- spin_lock_bh(&tunnel->hlist_lock);
- hlist_del_init_rcu(&session->hlist);
- spin_unlock_bh(&tunnel->hlist_lock);
-
- /* For L2TPv3 we have a per-net hash: remove from there, too */
- if (tunnel->version != L2TP_HDR_VER_2) {
- struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
-
- spin_lock_bh(&pn->l2tp_session_hlist_lock);
- hlist_del_init_rcu(&session->global_hlist);
- spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+ struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
+ struct l2tp_session *removed = session;
+
+ spin_lock_bh(&tunnel->list_lock);
+ spin_lock_bh(&pn->l2tp_session_idr_lock);
+
+ /* Remove from the per-tunnel list */
+ list_del_init(&session->list);
+
+ /* Remove from per-net IDR */
+ if (tunnel->version == L2TP_HDR_VER_3) {
+ if (hash_hashed(&session->hlist))
+ l2tp_session_collision_del(pn, session);
+ else
+ removed = idr_remove(&pn->l2tp_v3_session_idr,
+ session->session_id);
+ } else {
+ u32 session_key = l2tp_v2_session_key(tunnel->tunnel_id,
+ session->session_id);
+ removed = idr_remove(&pn->l2tp_v2_session_idr,
+ session_key);
}
+ WARN_ON_ONCE(removed && removed != session);
+
+ spin_unlock_bh(&pn->l2tp_session_idr_lock);
+ spin_unlock_bh(&tunnel->list_lock);
synchronize_rcu();
}
@@ -1192,28 +1298,22 @@ static void l2tp_session_unhash(struct l2tp_session *session)
static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
{
struct l2tp_session *session;
- int hash;
- spin_lock_bh(&tunnel->hlist_lock);
+ spin_lock_bh(&tunnel->list_lock);
tunnel->acpt_newsess = false;
- for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
-again:
- hlist_for_each_entry_rcu(session, &tunnel->session_hlist[hash], hlist) {
- hlist_del_init_rcu(&session->hlist);
-
- spin_unlock_bh(&tunnel->hlist_lock);
- l2tp_session_delete(session);
- spin_lock_bh(&tunnel->hlist_lock);
-
- /* Now restart from the beginning of this hash
- * chain. We always remove a session from the
- * list so we are guaranteed to make forward
- * progress.
- */
- goto again;
- }
+ for (;;) {
+ session = list_first_entry_or_null(&tunnel->session_list,
+ struct l2tp_session, list);
+ if (!session)
+ break;
+ l2tp_session_inc_refcount(session);
+ list_del_init(&session->list);
+ spin_unlock_bh(&tunnel->list_lock);
+ l2tp_session_delete(session);
+ spin_lock_bh(&tunnel->list_lock);
+ l2tp_session_dec_refcount(session);
}
- spin_unlock_bh(&tunnel->hlist_lock);
+ spin_unlock_bh(&tunnel->list_lock);
}
/* Tunnel socket destroy hook for UDP encapsulation */
@@ -1407,8 +1507,9 @@ int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id,
tunnel->magic = L2TP_TUNNEL_MAGIC;
sprintf(&tunnel->name[0], "tunl %u", tunnel_id);
- spin_lock_init(&tunnel->hlist_lock);
+ spin_lock_init(&tunnel->list_lock);
tunnel->acpt_newsess = true;
+ INIT_LIST_HEAD(&tunnel->session_list);
tunnel->encap = encap;
@@ -1418,8 +1519,6 @@ int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id,
/* Init delete workqueue struct */
INIT_WORK(&tunnel->del_work, l2tp_tunnel_del_work);
- INIT_LIST_HEAD(&tunnel->list);
-
err = 0;
err:
if (tunnelp)
@@ -1493,6 +1592,7 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
.sk_user_data = tunnel,
.encap_type = UDP_ENCAP_L2TPINUDP,
.encap_rcv = l2tp_udp_encap_recv,
+ .encap_err_rcv = l2tp_udp_encap_err_recv,
.encap_destroy = l2tp_udp_encap_destroy,
};
@@ -1606,8 +1706,10 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
skb_queue_head_init(&session->reorder_q);
+ session->hlist_key = l2tp_v3_session_hashkey(tunnel->sock, session->session_id);
INIT_HLIST_NODE(&session->hlist);
- INIT_HLIST_NODE(&session->global_hlist);
+ INIT_LIST_HEAD(&session->clist);
+ INIT_LIST_HEAD(&session->list);
if (cfg) {
session->pwtype = cfg->pw_type;
@@ -1640,15 +1742,13 @@ EXPORT_SYMBOL_GPL(l2tp_session_create);
static __net_init int l2tp_init_net(struct net *net)
{
struct l2tp_net *pn = net_generic(net, l2tp_net_id);
- int hash;
idr_init(&pn->l2tp_tunnel_idr);
spin_lock_init(&pn->l2tp_tunnel_idr_lock);
- for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++)
- INIT_HLIST_HEAD(&pn->l2tp_session_hlist[hash]);
-
- spin_lock_init(&pn->l2tp_session_hlist_lock);
+ idr_init(&pn->l2tp_v2_session_idr);
+ idr_init(&pn->l2tp_v3_session_idr);
+ spin_lock_init(&pn->l2tp_session_idr_lock);
return 0;
}
@@ -1658,7 +1758,6 @@ static __net_exit void l2tp_exit_net(struct net *net)
struct l2tp_net *pn = l2tp_pernet(net);
struct l2tp_tunnel *tunnel = NULL;
unsigned long tunnel_id, tmp;
- int hash;
rcu_read_lock_bh();
idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) {
@@ -1671,8 +1770,8 @@ static __net_exit void l2tp_exit_net(struct net *net)
flush_workqueue(l2tp_wq);
rcu_barrier();
- for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++)
- WARN_ON_ONCE(!hlist_empty(&pn->l2tp_session_hlist[hash]));
+ idr_destroy(&pn->l2tp_v2_session_idr);
+ idr_destroy(&pn->l2tp_v3_session_idr);
idr_destroy(&pn->l2tp_tunnel_idr);
}
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 91ebf0a3f499..8ac81bc1bc6f 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -19,14 +19,6 @@
#define L2TP_TUNNEL_MAGIC 0x42114DDA
#define L2TP_SESSION_MAGIC 0x0C04EB7D
-/* Per tunnel session hash table size */
-#define L2TP_HASH_BITS 4
-#define L2TP_HASH_SIZE BIT(L2TP_HASH_BITS)
-
-/* System-wide session hash table size */
-#define L2TP_HASH_BITS_2 8
-#define L2TP_HASH_SIZE_2 BIT(L2TP_HASH_BITS_2)
-
struct sk_buff;
struct l2tp_stats {
@@ -61,10 +53,15 @@ struct l2tp_session_cfg {
char *ifname;
};
+struct l2tp_session_coll_list {
+ spinlock_t lock; /* for access to list */
+ struct list_head list;
+ refcount_t ref_count;
+};
+
/* Represents a session (pseudowire) instance.
* Tracks runtime state including cookies, dataplane packet sequencing, and IO statistics.
- * Is linked into a per-tunnel session hashlist; and in the case of an L2TPv3 session into
- * an additional per-net ("global") hashlist.
+ * Is linked into a per-tunnel session list and a per-net ("global") IDR tree.
*/
#define L2TP_SESSION_NAME_MAX 32
struct l2tp_session {
@@ -88,8 +85,12 @@ struct l2tp_session {
u32 nr_oos; /* NR of last OOS packet */
int nr_oos_count; /* for OOS recovery */
int nr_oos_count_max;
- struct hlist_node hlist; /* hash list node */
+ struct list_head list; /* per-tunnel list node */
refcount_t ref_count;
+ struct hlist_node hlist; /* per-net session hlist */
+ unsigned long hlist_key; /* key for session hlist */
+ struct l2tp_session_coll_list *coll_list; /* session collision list */
+ struct list_head clist; /* for coll_list */
char name[L2TP_SESSION_NAME_MAX]; /* for logging */
char ifname[IFNAMSIZ];
@@ -102,7 +103,6 @@ struct l2tp_session {
int reorder_skip; /* set if skip to next nr */
enum l2tp_pwtype pwtype;
struct l2tp_stats stats;
- struct hlist_node global_hlist; /* global hash list node */
/* Session receive handler for data packets.
* Each pseudowire implementation should implement this callback in order to
@@ -114,7 +114,7 @@ struct l2tp_session {
/* Session close handler.
* Each pseudowire implementation may implement this callback in order to carry
* out pseudowire-specific shutdown actions.
- * The callback is called by core after unhashing the session and purging its
+ * The callback is called by core after unlisting the session and purging its
* reorder queue.
*/
void (*session_close)(struct l2tp_session *session);
@@ -150,7 +150,7 @@ struct l2tp_tunnel_cfg {
/* Represents a tunnel instance.
* Tracks runtime state including IO statistics.
* Holds the tunnel socket (either passed from userspace or directly created by the kernel).
- * Maintains a hashlist of sessions belonging to the tunnel instance.
+ * Maintains a list of sessions belonging to the tunnel instance.
* Is linked into a per-net list of tunnels.
*/
#define L2TP_TUNNEL_NAME_MAX 20
@@ -160,12 +160,11 @@ struct l2tp_tunnel {
unsigned long dead;
struct rcu_head rcu;
- spinlock_t hlist_lock; /* write-protection for session_hlist */
+ spinlock_t list_lock; /* write-protection for session_list */
bool acpt_newsess; /* indicates whether this tunnel accepts
- * new sessions. Protected by hlist_lock.
+ * new sessions. Protected by list_lock.
*/
- struct hlist_head session_hlist[L2TP_HASH_SIZE];
- /* hashed list of sessions, hashed by id */
+ struct list_head session_list; /* list of sessions */
u32 tunnel_id;
u32 peer_tunnel_id;
int version; /* 2=>L2TPv2, 3=>L2TPv3 */
@@ -174,7 +173,6 @@ struct l2tp_tunnel {
enum l2tp_encap_type encap;
struct l2tp_stats stats;
- struct list_head list; /* list node on per-namespace list of tunnels */
struct net *l2tp_net; /* the net we belong to */
refcount_t ref_count;
@@ -224,10 +222,11 @@ void l2tp_session_dec_refcount(struct l2tp_session *session);
*/
struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id);
struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth);
-struct l2tp_session *l2tp_tunnel_get_session(struct l2tp_tunnel *tunnel,
- u32 session_id);
-struct l2tp_session *l2tp_session_get(const struct net *net, u32 session_id);
+struct l2tp_session *l2tp_v3_session_get(const struct net *net, struct sock *sk, u32 session_id);
+struct l2tp_session *l2tp_v2_session_get(const struct net *net, u16 tunnel_id, u16 session_id);
+struct l2tp_session *l2tp_session_get(const struct net *net, struct sock *sk, int pver,
+ u32 tunnel_id, u32 session_id);
struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth);
struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
const char *ifname);
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 4595b56d175d..8755ae521154 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -123,17 +123,14 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
struct l2tp_tunnel *tunnel = v;
struct l2tp_session *session;
int session_count = 0;
- int hash;
rcu_read_lock_bh();
- for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
- hlist_for_each_entry_rcu(session, &tunnel->session_hlist[hash], hlist) {
- /* Session ID of zero is a dummy/reserved value used by pppol2tp */
- if (session->session_id == 0)
- continue;
+ list_for_each_entry_rcu(session, &tunnel->session_list, list) {
+ /* Session ID of zero is a dummy/reserved value used by pppol2tp */
+ if (session->session_id == 0)
+ continue;
- session_count++;
- }
+ session_count++;
}
rcu_read_unlock_bh();
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 970af3983d11..e48aa177d74c 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -140,7 +140,7 @@ static int l2tp_ip_recv(struct sk_buff *skb)
}
/* Ok, this is a data packet. Lookup the session. */
- session = l2tp_session_get(net, session_id);
+ session = l2tp_v3_session_get(net, NULL, session_id);
if (!session)
goto discard;
@@ -459,7 +459,7 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl4 = &inet->cork.fl.u.ip4;
if (connected)
- rt = (struct rtable *)__sk_dst_check(sk, 0);
+ rt = dst_rtable(__sk_dst_check(sk, 0));
rcu_read_lock();
if (!rt) {
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 7bf14cf9ffaa..d217ff1f229e 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -150,7 +150,7 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
}
/* Ok, this is a data packet. Lookup the session. */
- session = l2tp_session_get(net, session_id);
+ session = l2tp_v3_session_get(net, NULL, session_id);
if (!session)
goto discard;
@@ -630,7 +630,7 @@ back_from_confirm:
ulen = len + (skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0);
err = ip6_append_data(sk, ip_generic_getfrag, msg,
ulen, transhdrlen, &ipc6,
- &fl6, (struct rt6_info *)dst,
+ &fl6, dst_rt6_info(dst),
msg->msg_flags);
if (err)
ip6_flush_pending_frames(sk);
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index a901fd14fe3b..d105030520f9 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -61,7 +61,8 @@ static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info)
session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
tunnel = l2tp_tunnel_get(net, tunnel_id);
if (tunnel) {
- session = l2tp_tunnel_get_session(tunnel, session_id);
+ session = l2tp_session_get(net, tunnel->sock, tunnel->version,
+ tunnel_id, session_id);
l2tp_tunnel_dec_refcount(tunnel);
}
}
@@ -635,7 +636,8 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
&cfg);
if (ret >= 0) {
- session = l2tp_tunnel_get_session(tunnel, session_id);
+ session = l2tp_session_get(net, tunnel->sock, tunnel->version,
+ tunnel_id, session_id);
if (session) {
ret = l2tp_session_notify(&l2tp_nl_family, info, session,
L2TP_CMD_SESSION_CREATE);
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 6146e4e67bbb..3596290047b2 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -753,7 +753,8 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
if (tunnel->peer_tunnel_id == 0)
tunnel->peer_tunnel_id = info.peer_tunnel_id;
- session = l2tp_tunnel_get_session(tunnel, info.session_id);
+ session = l2tp_session_get(sock_net(sk), tunnel->sock, tunnel->version,
+ info.tunnel_id, info.session_id);
if (session) {
drop_refcnt = true;
@@ -1045,7 +1046,8 @@ static int pppol2tp_tunnel_copy_stats(struct pppol2tp_ioc_stats *stats,
/* If session_id is set, search the corresponding session in the
* context of this tunnel and record the session's statistics.
*/
- session = l2tp_tunnel_get_session(tunnel, stats->session_id);
+ session = l2tp_session_get(tunnel->l2tp_net, tunnel->sock, tunnel->version,
+ tunnel->tunnel_id, stats->session_id);
if (!session)
return -EBADR;
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index fde1140d899e..4eb52add7103 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -688,14 +688,13 @@ static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb)
* llc_ui_accept - accept a new incoming connection.
* @sock: Socket which connections arrive on.
* @newsock: Socket to move incoming connection to.
- * @flags: User specified operational flags.
- * @kern: If the socket is kernel internal
+ * @arg: User specified arguments
*
* Accept a new incoming connection.
* Returns 0 upon success, negative otherwise.
*/
-static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int llc_ui_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sock *sk = sock->sk, *newsk;
struct llc_sock *llc, *newllc;
diff --git a/net/llc/llc_c_st.c b/net/llc/llc_c_st.c
index 2467573b5f84..1c267db304df 100644
--- a/net/llc/llc_c_st.c
+++ b/net/llc/llc_c_st.c
@@ -42,7 +42,7 @@ static const llc_conn_action_t llc_common_actions_1[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_common_state_trans_1 = {
+static const struct llc_conn_state_trans llc_common_state_trans_1 = {
.ev = llc_conn_ev_disc_req,
.next_state = LLC_CONN_STATE_D_CONN,
.ev_qualifiers = NONE,
@@ -59,7 +59,7 @@ static const llc_conn_action_t llc_common_actions_2[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_common_state_trans_2 = {
+static const struct llc_conn_state_trans llc_common_state_trans_2 = {
.ev = llc_conn_ev_rst_req,
.next_state = LLC_CONN_STATE_RESET,
.ev_qualifiers = NONE,
@@ -79,7 +79,7 @@ static const llc_conn_action_t llc_common_actions_3[] = {
[8] = NULL,
};
-static struct llc_conn_state_trans llc_common_state_trans_3 = {
+static const struct llc_conn_state_trans llc_common_state_trans_3 = {
.ev = llc_conn_ev_rx_sabme_cmd_pbit_set_x,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = NONE,
@@ -95,7 +95,7 @@ static const llc_conn_action_t llc_common_actions_4[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_common_state_trans_4 = {
+static const struct llc_conn_state_trans llc_common_state_trans_4 = {
.ev = llc_conn_ev_rx_disc_cmd_pbit_set_x,
.next_state = LLC_CONN_STATE_ADM,
.ev_qualifiers = NONE,
@@ -114,7 +114,7 @@ static const llc_conn_action_t llc_common_actions_5[] = {
[7] = NULL,
};
-static struct llc_conn_state_trans llc_common_state_trans_5 = {
+static const struct llc_conn_state_trans llc_common_state_trans_5 = {
.ev = llc_conn_ev_rx_frmr_rsp_fbit_set_x,
.next_state = LLC_CONN_STATE_RESET,
.ev_qualifiers = NONE,
@@ -129,7 +129,7 @@ static const llc_conn_action_t llc_common_actions_6[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_common_state_trans_6 = {
+static const struct llc_conn_state_trans llc_common_state_trans_6 = {
.ev = llc_conn_ev_rx_dm_rsp_fbit_set_x,
.next_state = LLC_CONN_STATE_ADM,
.ev_qualifiers = NONE,
@@ -145,7 +145,7 @@ static const llc_conn_action_t llc_common_actions_7a[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_common_state_trans_7a = {
+static const struct llc_conn_state_trans llc_common_state_trans_7a = {
.ev = llc_conn_ev_rx_zzz_cmd_pbit_set_x_inval_nr,
.next_state = LLC_CONN_STATE_ERROR,
.ev_qualifiers = NONE,
@@ -161,7 +161,7 @@ static const llc_conn_action_t llc_common_actions_7b[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_common_state_trans_7b = {
+static const struct llc_conn_state_trans llc_common_state_trans_7b = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_x_inval_ns,
.next_state = LLC_CONN_STATE_ERROR,
.ev_qualifiers = NONE,
@@ -177,7 +177,7 @@ static const llc_conn_action_t llc_common_actions_8a[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_common_state_trans_8a = {
+static const struct llc_conn_state_trans llc_common_state_trans_8a = {
.ev = llc_conn_ev_rx_zzz_rsp_fbit_set_x_inval_nr,
.next_state = LLC_CONN_STATE_ERROR,
.ev_qualifiers = NONE,
@@ -193,7 +193,7 @@ static const llc_conn_action_t llc_common_actions_8b[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_common_state_trans_8b = {
+static const struct llc_conn_state_trans llc_common_state_trans_8b = {
.ev = llc_conn_ev_rx_i_rsp_fbit_set_x_inval_ns,
.next_state = LLC_CONN_STATE_ERROR,
.ev_qualifiers = NONE,
@@ -209,7 +209,7 @@ static const llc_conn_action_t llc_common_actions_8c[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_common_state_trans_8c = {
+static const struct llc_conn_state_trans llc_common_state_trans_8c = {
.ev = llc_conn_ev_rx_bad_pdu,
.next_state = LLC_CONN_STATE_ERROR,
.ev_qualifiers = NONE,
@@ -225,7 +225,7 @@ static const llc_conn_action_t llc_common_actions_9[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_common_state_trans_9 = {
+static const struct llc_conn_state_trans llc_common_state_trans_9 = {
.ev = llc_conn_ev_rx_ua_rsp_fbit_set_x,
.next_state = LLC_CONN_STATE_ERROR,
.ev_qualifiers = NONE,
@@ -247,7 +247,7 @@ static const llc_conn_action_t llc_common_actions_10[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_common_state_trans_10 = {
+static const struct llc_conn_state_trans llc_common_state_trans_10 = {
.ev = llc_conn_ev_rx_xxx_rsp_fbit_set_1,
.next_state = LLC_CONN_STATE_ERROR,
.ev_qualifiers = llc_common_ev_qfyrs_10,
@@ -270,7 +270,7 @@ static const llc_conn_action_t llc_common_actions_11a[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_common_state_trans_11a = {
+static const struct llc_conn_state_trans llc_common_state_trans_11a = {
.ev = llc_conn_ev_p_tmr_exp,
.next_state = LLC_CONN_STATE_RESET,
.ev_qualifiers = llc_common_ev_qfyrs_11a,
@@ -292,7 +292,7 @@ static const llc_conn_action_t llc_common_actions_11b[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_common_state_trans_11b = {
+static const struct llc_conn_state_trans llc_common_state_trans_11b = {
.ev = llc_conn_ev_ack_tmr_exp,
.next_state = LLC_CONN_STATE_RESET,
.ev_qualifiers = llc_common_ev_qfyrs_11b,
@@ -314,7 +314,7 @@ static const llc_conn_action_t llc_common_actions_11c[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_common_state_trans_11c = {
+static const struct llc_conn_state_trans llc_common_state_trans_11c = {
.ev = llc_conn_ev_rej_tmr_exp,
.next_state = LLC_CONN_STATE_RESET,
.ev_qualifiers = llc_common_ev_qfyrs_11c,
@@ -336,7 +336,7 @@ static const llc_conn_action_t llc_common_actions_11d[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_common_state_trans_11d = {
+static const struct llc_conn_state_trans llc_common_state_trans_11d = {
.ev = llc_conn_ev_busy_tmr_exp,
.next_state = LLC_CONN_STATE_RESET,
.ev_qualifiers = llc_common_ev_qfyrs_11d,
@@ -347,7 +347,7 @@ static struct llc_conn_state_trans llc_common_state_trans_11d = {
* Common dummy state transition; must be last entry for all state
* transition groups - it'll be on .bss, so will be zeroed.
*/
-static struct llc_conn_state_trans llc_common_state_trans_end;
+static const struct llc_conn_state_trans llc_common_state_trans_end;
/* LLC_CONN_STATE_ADM transitions */
/* State transitions for LLC_CONN_EV_CONN_REQ event */
@@ -359,7 +359,7 @@ static const llc_conn_action_t llc_adm_actions_1[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_adm_state_trans_1 = {
+static const struct llc_conn_state_trans llc_adm_state_trans_1 = {
.ev = llc_conn_ev_conn_req,
.next_state = LLC_CONN_STATE_SETUP,
.ev_qualifiers = NONE,
@@ -378,7 +378,7 @@ static const llc_conn_action_t llc_adm_actions_2[] = {
[7] = NULL,
};
-static struct llc_conn_state_trans llc_adm_state_trans_2 = {
+static const struct llc_conn_state_trans llc_adm_state_trans_2 = {
.ev = llc_conn_ev_rx_sabme_cmd_pbit_set_x,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = NONE,
@@ -392,7 +392,7 @@ static const llc_conn_action_t llc_adm_actions_3[] = {
[2] = NULL,
};
-static struct llc_conn_state_trans llc_adm_state_trans_3 = {
+static const struct llc_conn_state_trans llc_adm_state_trans_3 = {
.ev = llc_conn_ev_rx_disc_cmd_pbit_set_x,
.next_state = LLC_CONN_STATE_ADM,
.ev_qualifiers = NONE,
@@ -406,7 +406,7 @@ static const llc_conn_action_t llc_adm_actions_4[] = {
[2] = NULL,
};
-static struct llc_conn_state_trans llc_adm_state_trans_4 = {
+static const struct llc_conn_state_trans llc_adm_state_trans_4 = {
.ev = llc_conn_ev_rx_xxx_cmd_pbit_set_1,
.next_state = LLC_CONN_STATE_ADM,
.ev_qualifiers = NONE,
@@ -419,7 +419,7 @@ static const llc_conn_action_t llc_adm_actions_5[] = {
[1] = NULL,
};
-static struct llc_conn_state_trans llc_adm_state_trans_5 = {
+static const struct llc_conn_state_trans llc_adm_state_trans_5 = {
.ev = llc_conn_ev_rx_any_frame,
.next_state = LLC_CONN_OUT_OF_SVC,
.ev_qualifiers = NONE,
@@ -430,7 +430,7 @@ static struct llc_conn_state_trans llc_adm_state_trans_5 = {
* Array of pointers;
* one to each transition
*/
-static struct llc_conn_state_trans *llc_adm_state_transitions[] = {
+static const struct llc_conn_state_trans *llc_adm_state_transitions[] = {
[0] = &llc_adm_state_trans_1, /* Request */
[1] = &llc_common_state_trans_end,
[2] = &llc_common_state_trans_end, /* local_busy */
@@ -453,7 +453,7 @@ static const llc_conn_action_t llc_setup_actions_1[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_setup_state_trans_1 = {
+static const struct llc_conn_state_trans llc_setup_state_trans_1 = {
.ev = llc_conn_ev_rx_sabme_cmd_pbit_set_x,
.next_state = LLC_CONN_STATE_SETUP,
.ev_qualifiers = NONE,
@@ -477,7 +477,7 @@ static const llc_conn_action_t llc_setup_actions_2[] = {
[6] = NULL,
};
-static struct llc_conn_state_trans llc_setup_state_trans_2 = {
+static const struct llc_conn_state_trans llc_setup_state_trans_2 = {
.ev = llc_conn_ev_rx_ua_rsp_fbit_set_x,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = llc_setup_ev_qfyrs_2,
@@ -498,7 +498,7 @@ static const llc_conn_action_t llc_setup_actions_3[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_setup_state_trans_3 = {
+static const struct llc_conn_state_trans llc_setup_state_trans_3 = {
.ev = llc_conn_ev_ack_tmr_exp,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = llc_setup_ev_qfyrs_3,
@@ -519,7 +519,7 @@ static const llc_conn_action_t llc_setup_actions_4[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_setup_state_trans_4 = {
+static const struct llc_conn_state_trans llc_setup_state_trans_4 = {
.ev = llc_conn_ev_rx_disc_cmd_pbit_set_x,
.next_state = LLC_CONN_STATE_ADM,
.ev_qualifiers = llc_setup_ev_qfyrs_4,
@@ -539,7 +539,7 @@ static const llc_conn_action_t llc_setup_actions_5[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_setup_state_trans_5 = {
+static const struct llc_conn_state_trans llc_setup_state_trans_5 = {
.ev = llc_conn_ev_rx_dm_rsp_fbit_set_x,
.next_state = LLC_CONN_STATE_ADM,
.ev_qualifiers = llc_setup_ev_qfyrs_5,
@@ -560,7 +560,7 @@ static const llc_conn_action_t llc_setup_actions_7[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_setup_state_trans_7 = {
+static const struct llc_conn_state_trans llc_setup_state_trans_7 = {
.ev = llc_conn_ev_ack_tmr_exp,
.next_state = LLC_CONN_STATE_SETUP,
.ev_qualifiers = llc_setup_ev_qfyrs_7,
@@ -581,7 +581,7 @@ static const llc_conn_action_t llc_setup_actions_8[] = {
[2] = NULL,
};
-static struct llc_conn_state_trans llc_setup_state_trans_8 = {
+static const struct llc_conn_state_trans llc_setup_state_trans_8 = {
.ev = llc_conn_ev_ack_tmr_exp,
.next_state = LLC_CONN_STATE_ADM,
.ev_qualifiers = llc_setup_ev_qfyrs_8,
@@ -592,7 +592,7 @@ static struct llc_conn_state_trans llc_setup_state_trans_8 = {
* Array of pointers;
* one to each transition
*/
-static struct llc_conn_state_trans *llc_setup_state_transitions[] = {
+static const struct llc_conn_state_trans *llc_setup_state_transitions[] = {
[0] = &llc_common_state_trans_end, /* Request */
[1] = &llc_common_state_trans_end, /* local busy */
[2] = &llc_common_state_trans_end, /* init_pf_cycle */
@@ -622,7 +622,7 @@ static const llc_conn_action_t llc_normal_actions_1[] = {
[2] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_1 = {
+static const struct llc_conn_state_trans llc_normal_state_trans_1 = {
.ev = llc_conn_ev_data_req,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = llc_normal_ev_qfyrs_1,
@@ -643,7 +643,7 @@ static const llc_conn_action_t llc_normal_actions_2[] = {
[2] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_2 = {
+static const struct llc_conn_state_trans llc_normal_state_trans_2 = {
.ev = llc_conn_ev_data_req,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = llc_normal_ev_qfyrs_2,
@@ -660,7 +660,7 @@ static const llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_2_1[] = {
/* just one member, NULL, .bss zeroes it */
static const llc_conn_action_t llc_normal_actions_2_1[1];
-static struct llc_conn_state_trans llc_normal_state_trans_2_1 = {
+static const struct llc_conn_state_trans llc_normal_state_trans_2_1 = {
.ev = llc_conn_ev_data_req,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = llc_normal_ev_qfyrs_2_1,
@@ -680,7 +680,7 @@ static const llc_conn_action_t llc_normal_actions_3[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_3 = {
+static const struct llc_conn_state_trans llc_normal_state_trans_3 = {
.ev = llc_conn_ev_local_busy_detected,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = llc_normal_ev_qfyrs_3,
@@ -700,7 +700,7 @@ static const llc_conn_action_t llc_normal_actions_4[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_4 = {
+static const struct llc_conn_state_trans llc_normal_state_trans_4 = {
.ev = llc_conn_ev_local_busy_detected,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = llc_normal_ev_qfyrs_4,
@@ -723,7 +723,7 @@ static const llc_conn_action_t llc_normal_actions_5a[] = {
[6] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_5a = {
+static const struct llc_conn_state_trans llc_normal_state_trans_5a = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = llc_normal_ev_qfyrs_5a,
@@ -746,7 +746,7 @@ static const llc_conn_action_t llc_normal_actions_5b[] = {
[6] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_5b = {
+static const struct llc_conn_state_trans llc_normal_state_trans_5b = {
.ev = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = llc_normal_ev_qfyrs_5b,
@@ -769,7 +769,7 @@ static const llc_conn_action_t llc_normal_actions_5c[] = {
[6] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_5c = {
+static const struct llc_conn_state_trans llc_normal_state_trans_5c = {
.ev = llc_conn_ev_rx_i_rsp_fbit_set_1_unexpd_ns,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = llc_normal_ev_qfyrs_5c,
@@ -790,7 +790,7 @@ static const llc_conn_action_t llc_normal_actions_6a[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_6a = {
+static const struct llc_conn_state_trans llc_normal_state_trans_6a = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = llc_normal_ev_qfyrs_6a,
@@ -811,7 +811,7 @@ static const llc_conn_action_t llc_normal_actions_6b[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_6b = {
+static const struct llc_conn_state_trans llc_normal_state_trans_6b = {
.ev = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = llc_normal_ev_qfyrs_6b,
@@ -827,7 +827,7 @@ static const llc_conn_action_t llc_normal_actions_7[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_7 = {
+static const struct llc_conn_state_trans llc_normal_state_trans_7 = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = NONE,
@@ -850,7 +850,7 @@ static const llc_conn_action_t llc_normal_actions_8[] = {
[6] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_8a = {
+static const struct llc_conn_state_trans llc_normal_state_trans_8a = {
.ev = llc_conn_ev_rx_i_rsp_fbit_set_x,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = llc_normal_ev_qfyrs_8a,
@@ -863,7 +863,7 @@ static const llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_8b[] = {
[1] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_8b = {
+static const struct llc_conn_state_trans llc_normal_state_trans_8b = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = llc_normal_ev_qfyrs_8b,
@@ -884,7 +884,7 @@ static const llc_conn_action_t llc_normal_actions_9a[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_9a = {
+static const struct llc_conn_state_trans llc_normal_state_trans_9a = {
.ev = llc_conn_ev_rx_i_rsp_fbit_set_0,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = llc_normal_ev_qfyrs_9a,
@@ -905,7 +905,7 @@ static const llc_conn_action_t llc_normal_actions_9b[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_9b = {
+static const struct llc_conn_state_trans llc_normal_state_trans_9b = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = llc_normal_ev_qfyrs_9b,
@@ -922,7 +922,7 @@ static const llc_conn_action_t llc_normal_actions_10[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_10 = {
+static const struct llc_conn_state_trans llc_normal_state_trans_10 = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_1,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = NONE,
@@ -937,7 +937,7 @@ static const llc_conn_action_t llc_normal_actions_11a[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_11a = {
+static const struct llc_conn_state_trans llc_normal_state_trans_11a = {
.ev = llc_conn_ev_rx_rr_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = NONE,
@@ -952,7 +952,7 @@ static const llc_conn_action_t llc_normal_actions_11b[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_11b = {
+static const struct llc_conn_state_trans llc_normal_state_trans_11b = {
.ev = llc_conn_ev_rx_rr_rsp_fbit_set_0,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = NONE,
@@ -973,7 +973,7 @@ static const llc_conn_action_t llc_normal_actions_11c[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_11c = {
+static const struct llc_conn_state_trans llc_normal_state_trans_11c = {
.ev = llc_conn_ev_rx_rr_rsp_fbit_set_1,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = llc_normal_ev_qfyrs_11c,
@@ -990,7 +990,7 @@ static const llc_conn_action_t llc_normal_actions_12[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_12 = {
+static const struct llc_conn_state_trans llc_normal_state_trans_12 = {
.ev = llc_conn_ev_rx_rr_cmd_pbit_set_1,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = NONE,
@@ -1005,7 +1005,7 @@ static const llc_conn_action_t llc_normal_actions_13a[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_13a = {
+static const struct llc_conn_state_trans llc_normal_state_trans_13a = {
.ev = llc_conn_ev_rx_rnr_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = NONE,
@@ -1020,7 +1020,7 @@ static const llc_conn_action_t llc_normal_actions_13b[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_13b = {
+static const struct llc_conn_state_trans llc_normal_state_trans_13b = {
.ev = llc_conn_ev_rx_rnr_rsp_fbit_set_0,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = NONE,
@@ -1040,7 +1040,7 @@ static const llc_conn_action_t llc_normal_actions_13c[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_13c = {
+static const struct llc_conn_state_trans llc_normal_state_trans_13c = {
.ev = llc_conn_ev_rx_rnr_rsp_fbit_set_1,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = llc_normal_ev_qfyrs_13c,
@@ -1057,7 +1057,7 @@ static const llc_conn_action_t llc_normal_actions_14[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_14 = {
+static const struct llc_conn_state_trans llc_normal_state_trans_14 = {
.ev = llc_conn_ev_rx_rnr_cmd_pbit_set_1,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = NONE,
@@ -1080,7 +1080,7 @@ static const llc_conn_action_t llc_normal_actions_15a[] = {
[6] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_15a = {
+static const struct llc_conn_state_trans llc_normal_state_trans_15a = {
.ev = llc_conn_ev_rx_rej_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = llc_normal_ev_qfyrs_15a,
@@ -1103,7 +1103,7 @@ static const llc_conn_action_t llc_normal_actions_15b[] = {
[6] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_15b = {
+static const struct llc_conn_state_trans llc_normal_state_trans_15b = {
.ev = llc_conn_ev_rx_rej_rsp_fbit_set_x,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = llc_normal_ev_qfyrs_15b,
@@ -1125,7 +1125,7 @@ static const llc_conn_action_t llc_normal_actions_16a[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_16a = {
+static const struct llc_conn_state_trans llc_normal_state_trans_16a = {
.ev = llc_conn_ev_rx_rej_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = llc_normal_ev_qfyrs_16a,
@@ -1147,7 +1147,7 @@ static const llc_conn_action_t llc_normal_actions_16b[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_16b = {
+static const struct llc_conn_state_trans llc_normal_state_trans_16b = {
.ev = llc_conn_ev_rx_rej_rsp_fbit_set_0,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = llc_normal_ev_qfyrs_16b,
@@ -1164,7 +1164,7 @@ static const llc_conn_action_t llc_normal_actions_17[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_17 = {
+static const struct llc_conn_state_trans llc_normal_state_trans_17 = {
.ev = llc_conn_ev_rx_rej_cmd_pbit_set_1,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = NONE,
@@ -1183,7 +1183,7 @@ static const llc_conn_action_t llc_normal_actions_18[] = {
[2] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_18 = {
+static const struct llc_conn_state_trans llc_normal_state_trans_18 = {
.ev = llc_conn_ev_init_p_f_cycle,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = llc_normal_ev_qfyrs_18,
@@ -1205,7 +1205,7 @@ static const llc_conn_action_t llc_normal_actions_19[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_19 = {
+static const struct llc_conn_state_trans llc_normal_state_trans_19 = {
.ev = llc_conn_ev_p_tmr_exp,
.next_state = LLC_CONN_STATE_AWAIT,
.ev_qualifiers = llc_normal_ev_qfyrs_19,
@@ -1228,7 +1228,7 @@ static const llc_conn_action_t llc_normal_actions_20a[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_20a = {
+static const struct llc_conn_state_trans llc_normal_state_trans_20a = {
.ev = llc_conn_ev_ack_tmr_exp,
.next_state = LLC_CONN_STATE_AWAIT,
.ev_qualifiers = llc_normal_ev_qfyrs_20a,
@@ -1251,7 +1251,7 @@ static const llc_conn_action_t llc_normal_actions_20b[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_20b = {
+static const struct llc_conn_state_trans llc_normal_state_trans_20b = {
.ev = llc_conn_ev_busy_tmr_exp,
.next_state = LLC_CONN_STATE_AWAIT,
.ev_qualifiers = llc_normal_ev_qfyrs_20b,
@@ -1270,7 +1270,7 @@ static const llc_conn_action_t llc_normal_actions_21[] = {
[2] = NULL,
};
-static struct llc_conn_state_trans llc_normal_state_trans_21 = {
+static const struct llc_conn_state_trans llc_normal_state_trans_21 = {
.ev = llc_conn_ev_tx_buffer_full,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = llc_normal_ev_qfyrs_21,
@@ -1281,7 +1281,7 @@ static struct llc_conn_state_trans llc_normal_state_trans_21 = {
* Array of pointers;
* one to each transition
*/
-static struct llc_conn_state_trans *llc_normal_state_transitions[] = {
+static const struct llc_conn_state_trans *llc_normal_state_transitions[] = {
[0] = &llc_normal_state_trans_1, /* Requests */
[1] = &llc_normal_state_trans_2,
[2] = &llc_normal_state_trans_2_1,
@@ -1354,7 +1354,7 @@ static const llc_conn_action_t llc_busy_actions_1[] = {
[2] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_1 = {
+static const struct llc_conn_state_trans llc_busy_state_trans_1 = {
.ev = llc_conn_ev_data_req,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = llc_busy_ev_qfyrs_1,
@@ -1374,7 +1374,7 @@ static const llc_conn_action_t llc_busy_actions_2[] = {
[2] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_2 = {
+static const struct llc_conn_state_trans llc_busy_state_trans_2 = {
.ev = llc_conn_ev_data_req,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = llc_busy_ev_qfyrs_2,
@@ -1391,7 +1391,7 @@ static const llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_2_1[] = {
/* just one member, NULL, .bss zeroes it */
static const llc_conn_action_t llc_busy_actions_2_1[1];
-static struct llc_conn_state_trans llc_busy_state_trans_2_1 = {
+static const struct llc_conn_state_trans llc_busy_state_trans_2_1 = {
.ev = llc_conn_ev_data_req,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = llc_busy_ev_qfyrs_2_1,
@@ -1411,7 +1411,7 @@ static const llc_conn_action_t llc_busy_actions_3[] = {
[2] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_3 = {
+static const struct llc_conn_state_trans llc_busy_state_trans_3 = {
.ev = llc_conn_ev_local_busy_cleared,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = llc_busy_ev_qfyrs_3,
@@ -1431,7 +1431,7 @@ static const llc_conn_action_t llc_busy_actions_4[] = {
[2] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_4 = {
+static const struct llc_conn_state_trans llc_busy_state_trans_4 = {
.ev = llc_conn_ev_local_busy_cleared,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = llc_busy_ev_qfyrs_4,
@@ -1450,7 +1450,7 @@ static const llc_conn_action_t llc_busy_actions_5[] = {
[1] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_5 = {
+static const struct llc_conn_state_trans llc_busy_state_trans_5 = {
.ev = llc_conn_ev_local_busy_cleared,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = llc_busy_ev_qfyrs_5,
@@ -1469,7 +1469,7 @@ static const llc_conn_action_t llc_busy_actions_6[] = {
[1] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_6 = {
+static const struct llc_conn_state_trans llc_busy_state_trans_6 = {
.ev = llc_conn_ev_local_busy_cleared,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = llc_busy_ev_qfyrs_6,
@@ -1488,7 +1488,7 @@ static const llc_conn_action_t llc_busy_actions_7[] = {
[1] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_7 = {
+static const struct llc_conn_state_trans llc_busy_state_trans_7 = {
.ev = llc_conn_ev_local_busy_cleared,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = llc_busy_ev_qfyrs_7,
@@ -1507,7 +1507,7 @@ static const llc_conn_action_t llc_busy_actions_8[] = {
[1] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_8 = {
+static const struct llc_conn_state_trans llc_busy_state_trans_8 = {
.ev = llc_conn_ev_local_busy_cleared,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = llc_busy_ev_qfyrs_8,
@@ -1529,7 +1529,7 @@ static const llc_conn_action_t llc_busy_actions_9a[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_9a = {
+static const struct llc_conn_state_trans llc_busy_state_trans_9a = {
.ev = llc_conn_ev_rx_i_rsp_fbit_set_x_unexpd_ns,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = llc_busy_ev_qfyrs_9a,
@@ -1551,7 +1551,7 @@ static const llc_conn_action_t llc_busy_actions_9b[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_9b = {
+static const struct llc_conn_state_trans llc_busy_state_trans_9b = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = llc_busy_ev_qfyrs_9b,
@@ -1571,7 +1571,7 @@ static const llc_conn_action_t llc_busy_actions_10a[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_10a = {
+static const struct llc_conn_state_trans llc_busy_state_trans_10a = {
.ev = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = llc_busy_ev_qfyrs_10a,
@@ -1591,7 +1591,7 @@ static const llc_conn_action_t llc_busy_actions_10b[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_10b = {
+static const struct llc_conn_state_trans llc_busy_state_trans_10b = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = llc_busy_ev_qfyrs_10b,
@@ -1606,7 +1606,7 @@ static const llc_conn_action_t llc_busy_actions_11[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_11 = {
+static const struct llc_conn_state_trans llc_busy_state_trans_11 = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = NONE,
@@ -1624,7 +1624,7 @@ static const llc_conn_action_t llc_busy_actions_12[] = {
[6] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_12 = {
+static const struct llc_conn_state_trans llc_busy_state_trans_12 = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_1,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = NONE,
@@ -1649,7 +1649,7 @@ static const llc_conn_action_t llc_busy_actions_13a[] = {
[8] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_13a = {
+static const struct llc_conn_state_trans llc_busy_state_trans_13a = {
.ev = llc_conn_ev_rx_i_rsp_fbit_set_x,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = llc_busy_ev_qfyrs_13a,
@@ -1674,7 +1674,7 @@ static const llc_conn_action_t llc_busy_actions_13b[] = {
[8] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_13b = {
+static const struct llc_conn_state_trans llc_busy_state_trans_13b = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = llc_busy_ev_qfyrs_13b,
@@ -1697,7 +1697,7 @@ static const llc_conn_action_t llc_busy_actions_14a[] = {
[6] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_14a = {
+static const struct llc_conn_state_trans llc_busy_state_trans_14a = {
.ev = llc_conn_ev_rx_i_rsp_fbit_set_0,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = llc_busy_ev_qfyrs_14a,
@@ -1720,7 +1720,7 @@ static const llc_conn_action_t llc_busy_actions_14b[] = {
[6] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_14b = {
+static const struct llc_conn_state_trans llc_busy_state_trans_14b = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = llc_busy_ev_qfyrs_14b,
@@ -1735,7 +1735,7 @@ static const llc_conn_action_t llc_busy_actions_15a[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_15a = {
+static const struct llc_conn_state_trans llc_busy_state_trans_15a = {
.ev = llc_conn_ev_rx_rr_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = NONE,
@@ -1750,7 +1750,7 @@ static const llc_conn_action_t llc_busy_actions_15b[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_15b = {
+static const struct llc_conn_state_trans llc_busy_state_trans_15b = {
.ev = llc_conn_ev_rx_rr_rsp_fbit_set_0,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = NONE,
@@ -1770,7 +1770,7 @@ static const llc_conn_action_t llc_busy_actions_15c[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_15c = {
+static const struct llc_conn_state_trans llc_busy_state_trans_15c = {
.ev = llc_conn_ev_rx_rr_rsp_fbit_set_1,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = llc_busy_ev_qfyrs_15c,
@@ -1785,7 +1785,7 @@ static const llc_conn_action_t llc_busy_actions_16[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_16 = {
+static const struct llc_conn_state_trans llc_busy_state_trans_16 = {
.ev = llc_conn_ev_rx_rr_cmd_pbit_set_1,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = NONE,
@@ -1800,7 +1800,7 @@ static const llc_conn_action_t llc_busy_actions_17a[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_17a = {
+static const struct llc_conn_state_trans llc_busy_state_trans_17a = {
.ev = llc_conn_ev_rx_rnr_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = NONE,
@@ -1815,7 +1815,7 @@ static const llc_conn_action_t llc_busy_actions_17b[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_17b = {
+static const struct llc_conn_state_trans llc_busy_state_trans_17b = {
.ev = llc_conn_ev_rx_rnr_rsp_fbit_set_0,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = NONE,
@@ -1835,7 +1835,7 @@ static const llc_conn_action_t llc_busy_actions_17c[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_17c = {
+static const struct llc_conn_state_trans llc_busy_state_trans_17c = {
.ev = llc_conn_ev_rx_rnr_rsp_fbit_set_1,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = llc_busy_ev_qfyrs_17c,
@@ -1850,7 +1850,7 @@ static const llc_conn_action_t llc_busy_actions_18[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_18 = {
+static const struct llc_conn_state_trans llc_busy_state_trans_18 = {
.ev = llc_conn_ev_rx_rnr_cmd_pbit_set_1,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = NONE,
@@ -1872,7 +1872,7 @@ static const llc_conn_action_t llc_busy_actions_19a[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_19a = {
+static const struct llc_conn_state_trans llc_busy_state_trans_19a = {
.ev = llc_conn_ev_rx_rej_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = llc_busy_ev_qfyrs_19a,
@@ -1894,7 +1894,7 @@ static const llc_conn_action_t llc_busy_actions_19b[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_19b = {
+static const struct llc_conn_state_trans llc_busy_state_trans_19b = {
.ev = llc_conn_ev_rx_rej_rsp_fbit_set_x,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = llc_busy_ev_qfyrs_19b,
@@ -1915,7 +1915,7 @@ static const llc_conn_action_t llc_busy_actions_20a[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_20a = {
+static const struct llc_conn_state_trans llc_busy_state_trans_20a = {
.ev = llc_conn_ev_rx_rej_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = llc_busy_ev_qfyrs_20a,
@@ -1936,7 +1936,7 @@ static const llc_conn_action_t llc_busy_actions_20b[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_20b = {
+static const struct llc_conn_state_trans llc_busy_state_trans_20b = {
.ev = llc_conn_ev_rx_rej_rsp_fbit_set_0,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = llc_busy_ev_qfyrs_20b,
@@ -1953,7 +1953,7 @@ static const llc_conn_action_t llc_busy_actions_21[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_21 = {
+static const struct llc_conn_state_trans llc_busy_state_trans_21 = {
.ev = llc_conn_ev_rx_rej_cmd_pbit_set_1,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = NONE,
@@ -1972,7 +1972,7 @@ static const llc_conn_action_t llc_busy_actions_22[] = {
[2] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_22 = {
+static const struct llc_conn_state_trans llc_busy_state_trans_22 = {
.ev = llc_conn_ev_init_p_f_cycle,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = llc_busy_ev_qfyrs_22,
@@ -1993,7 +1993,7 @@ static const llc_conn_action_t llc_busy_actions_23[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_23 = {
+static const struct llc_conn_state_trans llc_busy_state_trans_23 = {
.ev = llc_conn_ev_p_tmr_exp,
.next_state = LLC_CONN_STATE_AWAIT_BUSY,
.ev_qualifiers = llc_busy_ev_qfyrs_23,
@@ -2015,7 +2015,7 @@ static const llc_conn_action_t llc_busy_actions_24a[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_24a = {
+static const struct llc_conn_state_trans llc_busy_state_trans_24a = {
.ev = llc_conn_ev_ack_tmr_exp,
.next_state = LLC_CONN_STATE_AWAIT_BUSY,
.ev_qualifiers = llc_busy_ev_qfyrs_24a,
@@ -2037,7 +2037,7 @@ static const llc_conn_action_t llc_busy_actions_24b[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_24b = {
+static const struct llc_conn_state_trans llc_busy_state_trans_24b = {
.ev = llc_conn_ev_busy_tmr_exp,
.next_state = LLC_CONN_STATE_AWAIT_BUSY,
.ev_qualifiers = llc_busy_ev_qfyrs_24b,
@@ -2060,7 +2060,7 @@ static const llc_conn_action_t llc_busy_actions_25[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_25 = {
+static const struct llc_conn_state_trans llc_busy_state_trans_25 = {
.ev = llc_conn_ev_rej_tmr_exp,
.next_state = LLC_CONN_STATE_AWAIT_BUSY,
.ev_qualifiers = llc_busy_ev_qfyrs_25,
@@ -2079,7 +2079,7 @@ static const llc_conn_action_t llc_busy_actions_26[] = {
[1] = NULL,
};
-static struct llc_conn_state_trans llc_busy_state_trans_26 = {
+static const struct llc_conn_state_trans llc_busy_state_trans_26 = {
.ev = llc_conn_ev_rej_tmr_exp,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = llc_busy_ev_qfyrs_26,
@@ -2090,7 +2090,7 @@ static struct llc_conn_state_trans llc_busy_state_trans_26 = {
* Array of pointers;
* one to each transition
*/
-static struct llc_conn_state_trans *llc_busy_state_transitions[] = {
+static const struct llc_conn_state_trans *llc_busy_state_transitions[] = {
[0] = &llc_common_state_trans_1, /* Request */
[1] = &llc_common_state_trans_2,
[2] = &llc_busy_state_trans_1,
@@ -2166,7 +2166,7 @@ static const llc_conn_action_t llc_reject_actions_1[] = {
[1] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_1 = {
+static const struct llc_conn_state_trans llc_reject_state_trans_1 = {
.ev = llc_conn_ev_data_req,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = llc_reject_ev_qfyrs_1,
@@ -2185,7 +2185,7 @@ static const llc_conn_action_t llc_reject_actions_2[] = {
[1] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_2 = {
+static const struct llc_conn_state_trans llc_reject_state_trans_2 = {
.ev = llc_conn_ev_data_req,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = llc_reject_ev_qfyrs_2,
@@ -2202,7 +2202,7 @@ static const llc_conn_ev_qfyr_t llc_reject_ev_qfyrs_2_1[] = {
/* just one member, NULL, .bss zeroes it */
static const llc_conn_action_t llc_reject_actions_2_1[1];
-static struct llc_conn_state_trans llc_reject_state_trans_2_1 = {
+static const struct llc_conn_state_trans llc_reject_state_trans_2_1 = {
.ev = llc_conn_ev_data_req,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = llc_reject_ev_qfyrs_2_1,
@@ -2222,7 +2222,7 @@ static const llc_conn_action_t llc_reject_actions_3[] = {
[2] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_3 = {
+static const struct llc_conn_state_trans llc_reject_state_trans_3 = {
.ev = llc_conn_ev_local_busy_detected,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = llc_reject_ev_qfyrs_3,
@@ -2241,7 +2241,7 @@ static const llc_conn_action_t llc_reject_actions_4[] = {
[2] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_4 = {
+static const struct llc_conn_state_trans llc_reject_state_trans_4 = {
.ev = llc_conn_ev_local_busy_detected,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = llc_reject_ev_qfyrs_4,
@@ -2256,7 +2256,7 @@ static const llc_conn_action_t llc_reject_actions_5a[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_5a = {
+static const struct llc_conn_state_trans llc_reject_state_trans_5a = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = NONE,
@@ -2271,7 +2271,7 @@ static const llc_conn_action_t llc_reject_actions_5b[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_5b = {
+static const struct llc_conn_state_trans llc_reject_state_trans_5b = {
.ev = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = NONE,
@@ -2291,7 +2291,7 @@ static const llc_conn_action_t llc_reject_actions_5c[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_5c = {
+static const struct llc_conn_state_trans llc_reject_state_trans_5c = {
.ev = llc_conn_ev_rx_i_rsp_fbit_set_1_unexpd_ns,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = llc_reject_ev_qfyrs_5c,
@@ -2305,7 +2305,7 @@ static const llc_conn_action_t llc_reject_actions_6[] = {
[2] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_6 = {
+static const struct llc_conn_state_trans llc_reject_state_trans_6 = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = NONE,
@@ -2330,7 +2330,7 @@ static const llc_conn_action_t llc_reject_actions_7a[] = {
};
-static struct llc_conn_state_trans llc_reject_state_trans_7a = {
+static const struct llc_conn_state_trans llc_reject_state_trans_7a = {
.ev = llc_conn_ev_rx_i_rsp_fbit_set_x,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = llc_reject_ev_qfyrs_7a,
@@ -2354,7 +2354,7 @@ static const llc_conn_action_t llc_reject_actions_7b[] = {
[7] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_7b = {
+static const struct llc_conn_state_trans llc_reject_state_trans_7b = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = llc_reject_ev_qfyrs_7b,
@@ -2376,7 +2376,7 @@ static const llc_conn_action_t llc_reject_actions_8a[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_8a = {
+static const struct llc_conn_state_trans llc_reject_state_trans_8a = {
.ev = llc_conn_ev_rx_i_rsp_fbit_set_0,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = llc_reject_ev_qfyrs_8a,
@@ -2398,7 +2398,7 @@ static const llc_conn_action_t llc_reject_actions_8b[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_8b = {
+static const struct llc_conn_state_trans llc_reject_state_trans_8b = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = llc_reject_ev_qfyrs_8b,
@@ -2415,7 +2415,7 @@ static const llc_conn_action_t llc_reject_actions_9[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_9 = {
+static const struct llc_conn_state_trans llc_reject_state_trans_9 = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_1,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = NONE,
@@ -2430,7 +2430,7 @@ static const llc_conn_action_t llc_reject_actions_10a[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_10a = {
+static const struct llc_conn_state_trans llc_reject_state_trans_10a = {
.ev = llc_conn_ev_rx_rr_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = NONE,
@@ -2445,7 +2445,7 @@ static const llc_conn_action_t llc_reject_actions_10b[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_10b = {
+static const struct llc_conn_state_trans llc_reject_state_trans_10b = {
.ev = llc_conn_ev_rx_rr_rsp_fbit_set_0,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = NONE,
@@ -2465,7 +2465,7 @@ static const llc_conn_action_t llc_reject_actions_10c[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_10c = {
+static const struct llc_conn_state_trans llc_reject_state_trans_10c = {
.ev = llc_conn_ev_rx_rr_rsp_fbit_set_1,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = llc_reject_ev_qfyrs_10c,
@@ -2480,7 +2480,7 @@ static const llc_conn_action_t llc_reject_actions_11[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_11 = {
+static const struct llc_conn_state_trans llc_reject_state_trans_11 = {
.ev = llc_conn_ev_rx_rr_cmd_pbit_set_1,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = NONE,
@@ -2495,7 +2495,7 @@ static const llc_conn_action_t llc_reject_actions_12a[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_12a = {
+static const struct llc_conn_state_trans llc_reject_state_trans_12a = {
.ev = llc_conn_ev_rx_rnr_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = NONE,
@@ -2510,7 +2510,7 @@ static const llc_conn_action_t llc_reject_actions_12b[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_12b = {
+static const struct llc_conn_state_trans llc_reject_state_trans_12b = {
.ev = llc_conn_ev_rx_rnr_rsp_fbit_set_0,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = NONE,
@@ -2530,7 +2530,7 @@ static const llc_conn_action_t llc_reject_actions_12c[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_12c = {
+static const struct llc_conn_state_trans llc_reject_state_trans_12c = {
.ev = llc_conn_ev_rx_rnr_rsp_fbit_set_1,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = llc_reject_ev_qfyrs_12c,
@@ -2545,7 +2545,7 @@ static const llc_conn_action_t llc_reject_actions_13[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_13 = {
+static const struct llc_conn_state_trans llc_reject_state_trans_13 = {
.ev = llc_conn_ev_rx_rnr_cmd_pbit_set_1,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = NONE,
@@ -2567,7 +2567,7 @@ static const llc_conn_action_t llc_reject_actions_14a[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_14a = {
+static const struct llc_conn_state_trans llc_reject_state_trans_14a = {
.ev = llc_conn_ev_rx_rej_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = llc_reject_ev_qfyrs_14a,
@@ -2589,7 +2589,7 @@ static const llc_conn_action_t llc_reject_actions_14b[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_14b = {
+static const struct llc_conn_state_trans llc_reject_state_trans_14b = {
.ev = llc_conn_ev_rx_rej_rsp_fbit_set_x,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = llc_reject_ev_qfyrs_14b,
@@ -2610,7 +2610,7 @@ static const llc_conn_action_t llc_reject_actions_15a[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_15a = {
+static const struct llc_conn_state_trans llc_reject_state_trans_15a = {
.ev = llc_conn_ev_rx_rej_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = llc_reject_ev_qfyrs_15a,
@@ -2631,7 +2631,7 @@ static const llc_conn_action_t llc_reject_actions_15b[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_15b = {
+static const struct llc_conn_state_trans llc_reject_state_trans_15b = {
.ev = llc_conn_ev_rx_rej_rsp_fbit_set_0,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = llc_reject_ev_qfyrs_15b,
@@ -2647,7 +2647,7 @@ static const llc_conn_action_t llc_reject_actions_16[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_16 = {
+static const struct llc_conn_state_trans llc_reject_state_trans_16 = {
.ev = llc_conn_ev_rx_rej_cmd_pbit_set_1,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = NONE,
@@ -2666,7 +2666,7 @@ static const llc_conn_action_t llc_reject_actions_17[] = {
[2] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_17 = {
+static const struct llc_conn_state_trans llc_reject_state_trans_17 = {
.ev = llc_conn_ev_init_p_f_cycle,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = llc_reject_ev_qfyrs_17,
@@ -2688,7 +2688,7 @@ static const llc_conn_action_t llc_reject_actions_18[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_18 = {
+static const struct llc_conn_state_trans llc_reject_state_trans_18 = {
.ev = llc_conn_ev_rej_tmr_exp,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = llc_reject_ev_qfyrs_18,
@@ -2710,7 +2710,7 @@ static const llc_conn_action_t llc_reject_actions_19[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_19 = {
+static const struct llc_conn_state_trans llc_reject_state_trans_19 = {
.ev = llc_conn_ev_p_tmr_exp,
.next_state = LLC_CONN_STATE_AWAIT_REJ,
.ev_qualifiers = llc_reject_ev_qfyrs_19,
@@ -2733,7 +2733,7 @@ static const llc_conn_action_t llc_reject_actions_20a[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_20a = {
+static const struct llc_conn_state_trans llc_reject_state_trans_20a = {
.ev = llc_conn_ev_ack_tmr_exp,
.next_state = LLC_CONN_STATE_AWAIT_REJ,
.ev_qualifiers = llc_reject_ev_qfyrs_20a,
@@ -2756,7 +2756,7 @@ static const llc_conn_action_t llc_reject_actions_20b[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_reject_state_trans_20b = {
+static const struct llc_conn_state_trans llc_reject_state_trans_20b = {
.ev = llc_conn_ev_busy_tmr_exp,
.next_state = LLC_CONN_STATE_AWAIT_REJ,
.ev_qualifiers = llc_reject_ev_qfyrs_20b,
@@ -2767,7 +2767,7 @@ static struct llc_conn_state_trans llc_reject_state_trans_20b = {
* Array of pointers;
* one to each transition
*/
-static struct llc_conn_state_trans *llc_reject_state_transitions[] = {
+static const struct llc_conn_state_trans *llc_reject_state_transitions[] = {
[0] = &llc_common_state_trans_1, /* Request */
[1] = &llc_common_state_trans_2,
[2] = &llc_common_state_trans_end,
@@ -2834,7 +2834,7 @@ static const llc_conn_ev_qfyr_t llc_await_ev_qfyrs_1_0[] = {
/* just one member, NULL, .bss zeroes it */
static const llc_conn_action_t llc_await_actions_1_0[1];
-static struct llc_conn_state_trans llc_await_state_trans_1_0 = {
+static const struct llc_conn_state_trans llc_await_state_trans_1_0 = {
.ev = llc_conn_ev_data_req,
.next_state = LLC_CONN_STATE_AWAIT,
.ev_qualifiers = llc_await_ev_qfyrs_1_0,
@@ -2848,7 +2848,7 @@ static const llc_conn_action_t llc_await_actions_1[] = {
[2] = NULL,
};
-static struct llc_conn_state_trans llc_await_state_trans_1 = {
+static const struct llc_conn_state_trans llc_await_state_trans_1 = {
.ev = llc_conn_ev_local_busy_detected,
.next_state = LLC_CONN_STATE_AWAIT_BUSY,
.ev_qualifiers = NONE,
@@ -2867,7 +2867,7 @@ static const llc_conn_action_t llc_await_actions_2[] = {
[7] = NULL,
};
-static struct llc_conn_state_trans llc_await_state_trans_2 = {
+static const struct llc_conn_state_trans llc_await_state_trans_2 = {
.ev = llc_conn_ev_rx_i_rsp_fbit_set_1_unexpd_ns,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = NONE,
@@ -2883,7 +2883,7 @@ static const llc_conn_action_t llc_await_actions_3a[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_await_state_trans_3a = {
+static const struct llc_conn_state_trans llc_await_state_trans_3a = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns,
.next_state = LLC_CONN_STATE_AWAIT_REJ,
.ev_qualifiers = NONE,
@@ -2899,7 +2899,7 @@ static const llc_conn_action_t llc_await_actions_3b[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_await_state_trans_3b = {
+static const struct llc_conn_state_trans llc_await_state_trans_3b = {
.ev = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns,
.next_state = LLC_CONN_STATE_AWAIT_REJ,
.ev_qualifiers = NONE,
@@ -2916,7 +2916,7 @@ static const llc_conn_action_t llc_await_actions_4[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_await_state_trans_4 = {
+static const struct llc_conn_state_trans llc_await_state_trans_4 = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns,
.next_state = LLC_CONN_STATE_AWAIT_REJ,
.ev_qualifiers = NONE,
@@ -2935,7 +2935,7 @@ static const llc_conn_action_t llc_await_actions_5[] = {
[7] = NULL,
};
-static struct llc_conn_state_trans llc_await_state_trans_5 = {
+static const struct llc_conn_state_trans llc_await_state_trans_5 = {
.ev = llc_conn_ev_rx_i_rsp_fbit_set_1,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = NONE,
@@ -2952,7 +2952,7 @@ static const llc_conn_action_t llc_await_actions_6a[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_await_state_trans_6a = {
+static const struct llc_conn_state_trans llc_await_state_trans_6a = {
.ev = llc_conn_ev_rx_i_rsp_fbit_set_0,
.next_state = LLC_CONN_STATE_AWAIT,
.ev_qualifiers = NONE,
@@ -2969,7 +2969,7 @@ static const llc_conn_action_t llc_await_actions_6b[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_await_state_trans_6b = {
+static const struct llc_conn_state_trans llc_await_state_trans_6b = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_AWAIT,
.ev_qualifiers = NONE,
@@ -2986,7 +2986,7 @@ static const llc_conn_action_t llc_await_actions_7[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_await_state_trans_7 = {
+static const struct llc_conn_state_trans llc_await_state_trans_7 = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_1,
.next_state = LLC_CONN_STATE_AWAIT,
.ev_qualifiers = NONE,
@@ -3003,7 +3003,7 @@ static const llc_conn_action_t llc_await_actions_8a[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_await_state_trans_8a = {
+static const struct llc_conn_state_trans llc_await_state_trans_8a = {
.ev = llc_conn_ev_rx_rr_rsp_fbit_set_1,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = NONE,
@@ -3020,7 +3020,7 @@ static const llc_conn_action_t llc_await_actions_8b[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_await_state_trans_8b = {
+static const struct llc_conn_state_trans llc_await_state_trans_8b = {
.ev = llc_conn_ev_rx_rej_rsp_fbit_set_1,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = NONE,
@@ -3035,7 +3035,7 @@ static const llc_conn_action_t llc_await_actions_9a[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_await_state_trans_9a = {
+static const struct llc_conn_state_trans llc_await_state_trans_9a = {
.ev = llc_conn_ev_rx_rr_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_AWAIT,
.ev_qualifiers = NONE,
@@ -3050,7 +3050,7 @@ static const llc_conn_action_t llc_await_actions_9b[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_await_state_trans_9b = {
+static const struct llc_conn_state_trans llc_await_state_trans_9b = {
.ev = llc_conn_ev_rx_rr_rsp_fbit_set_0,
.next_state = LLC_CONN_STATE_AWAIT,
.ev_qualifiers = NONE,
@@ -3065,7 +3065,7 @@ static const llc_conn_action_t llc_await_actions_9c[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_await_state_trans_9c = {
+static const struct llc_conn_state_trans llc_await_state_trans_9c = {
.ev = llc_conn_ev_rx_rej_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_AWAIT,
.ev_qualifiers = NONE,
@@ -3080,7 +3080,7 @@ static const llc_conn_action_t llc_await_actions_9d[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_await_state_trans_9d = {
+static const struct llc_conn_state_trans llc_await_state_trans_9d = {
.ev = llc_conn_ev_rx_rej_rsp_fbit_set_0,
.next_state = LLC_CONN_STATE_AWAIT,
.ev_qualifiers = NONE,
@@ -3096,7 +3096,7 @@ static const llc_conn_action_t llc_await_actions_10a[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_await_state_trans_10a = {
+static const struct llc_conn_state_trans llc_await_state_trans_10a = {
.ev = llc_conn_ev_rx_rr_cmd_pbit_set_1,
.next_state = LLC_CONN_STATE_AWAIT,
.ev_qualifiers = NONE,
@@ -3112,7 +3112,7 @@ static const llc_conn_action_t llc_await_actions_10b[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_await_state_trans_10b = {
+static const struct llc_conn_state_trans llc_await_state_trans_10b = {
.ev = llc_conn_ev_rx_rej_cmd_pbit_set_1,
.next_state = LLC_CONN_STATE_AWAIT,
.ev_qualifiers = NONE,
@@ -3128,7 +3128,7 @@ static const llc_conn_action_t llc_await_actions_11[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_await_state_trans_11 = {
+static const struct llc_conn_state_trans llc_await_state_trans_11 = {
.ev = llc_conn_ev_rx_rnr_rsp_fbit_set_1,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = NONE,
@@ -3143,7 +3143,7 @@ static const llc_conn_action_t llc_await_actions_12a[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_await_state_trans_12a = {
+static const struct llc_conn_state_trans llc_await_state_trans_12a = {
.ev = llc_conn_ev_rx_rnr_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_AWAIT,
.ev_qualifiers = NONE,
@@ -3158,7 +3158,7 @@ static const llc_conn_action_t llc_await_actions_12b[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_await_state_trans_12b = {
+static const struct llc_conn_state_trans llc_await_state_trans_12b = {
.ev = llc_conn_ev_rx_rnr_rsp_fbit_set_0,
.next_state = LLC_CONN_STATE_AWAIT,
.ev_qualifiers = NONE,
@@ -3174,7 +3174,7 @@ static const llc_conn_action_t llc_await_actions_13[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_await_state_trans_13 = {
+static const struct llc_conn_state_trans llc_await_state_trans_13 = {
.ev = llc_conn_ev_rx_rnr_cmd_pbit_set_1,
.next_state = LLC_CONN_STATE_AWAIT,
.ev_qualifiers = NONE,
@@ -3194,7 +3194,7 @@ static const llc_conn_action_t llc_await_actions_14[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_await_state_trans_14 = {
+static const struct llc_conn_state_trans llc_await_state_trans_14 = {
.ev = llc_conn_ev_p_tmr_exp,
.next_state = LLC_CONN_STATE_AWAIT,
.ev_qualifiers = llc_await_ev_qfyrs_14,
@@ -3205,7 +3205,7 @@ static struct llc_conn_state_trans llc_await_state_trans_14 = {
* Array of pointers;
* one to each transition
*/
-static struct llc_conn_state_trans *llc_await_state_transitions[] = {
+static const struct llc_conn_state_trans *llc_await_state_transitions[] = {
[0] = &llc_common_state_trans_1, /* Request */
[1] = &llc_common_state_trans_2,
[2] = &llc_await_state_trans_1_0,
@@ -3263,7 +3263,7 @@ static const llc_conn_ev_qfyr_t llc_await_busy_ev_qfyrs_1_0[] = {
/* just one member, NULL, .bss zeroes it */
static const llc_conn_action_t llc_await_busy_actions_1_0[1];
-static struct llc_conn_state_trans llc_await_busy_state_trans_1_0 = {
+static const struct llc_conn_state_trans llc_await_busy_state_trans_1_0 = {
.ev = llc_conn_ev_data_req,
.next_state = LLC_CONN_STATE_AWAIT_BUSY,
.ev_qualifiers = llc_await_busy_ev_qfyrs_1_0,
@@ -3282,7 +3282,7 @@ static const llc_conn_action_t llc_await_busy_actions_1[] = {
[2] = NULL,
};
-static struct llc_conn_state_trans llc_await_busy_state_trans_1 = {
+static const struct llc_conn_state_trans llc_await_busy_state_trans_1 = {
.ev = llc_conn_ev_local_busy_cleared,
.next_state = LLC_CONN_STATE_AWAIT_REJ,
.ev_qualifiers = llc_await_busy_ev_qfyrs_1,
@@ -3300,7 +3300,7 @@ static const llc_conn_action_t llc_await_busy_actions_2[] = {
[1] = NULL,
};
-static struct llc_conn_state_trans llc_await_busy_state_trans_2 = {
+static const struct llc_conn_state_trans llc_await_busy_state_trans_2 = {
.ev = llc_conn_ev_local_busy_cleared,
.next_state = LLC_CONN_STATE_AWAIT,
.ev_qualifiers = llc_await_busy_ev_qfyrs_2,
@@ -3318,7 +3318,7 @@ static const llc_conn_action_t llc_await_busy_actions_3[] = {
[1] = NULL,
};
-static struct llc_conn_state_trans llc_await_busy_state_trans_3 = {
+static const struct llc_conn_state_trans llc_await_busy_state_trans_3 = {
.ev = llc_conn_ev_local_busy_cleared,
.next_state = LLC_CONN_STATE_AWAIT_REJ,
.ev_qualifiers = llc_await_busy_ev_qfyrs_3,
@@ -3337,7 +3337,7 @@ static const llc_conn_action_t llc_await_busy_actions_4[] = {
[7] = NULL,
};
-static struct llc_conn_state_trans llc_await_busy_state_trans_4 = {
+static const struct llc_conn_state_trans llc_await_busy_state_trans_4 = {
.ev = llc_conn_ev_rx_i_rsp_fbit_set_1_unexpd_ns,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = NONE,
@@ -3353,7 +3353,7 @@ static const llc_conn_action_t llc_await_busy_actions_5a[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_await_busy_state_trans_5a = {
+static const struct llc_conn_state_trans llc_await_busy_state_trans_5a = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns,
.next_state = LLC_CONN_STATE_AWAIT_BUSY,
.ev_qualifiers = NONE,
@@ -3369,7 +3369,7 @@ static const llc_conn_action_t llc_await_busy_actions_5b[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_await_busy_state_trans_5b = {
+static const struct llc_conn_state_trans llc_await_busy_state_trans_5b = {
.ev = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns,
.next_state = LLC_CONN_STATE_AWAIT_BUSY,
.ev_qualifiers = NONE,
@@ -3385,7 +3385,7 @@ static const llc_conn_action_t llc_await_busy_actions_6[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_await_busy_state_trans_6 = {
+static const struct llc_conn_state_trans llc_await_busy_state_trans_6 = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns,
.next_state = LLC_CONN_STATE_AWAIT_BUSY,
.ev_qualifiers = NONE,
@@ -3406,7 +3406,7 @@ static const llc_conn_action_t llc_await_busy_actions_7[] = {
[9] = NULL,
};
-static struct llc_conn_state_trans llc_await_busy_state_trans_7 = {
+static const struct llc_conn_state_trans llc_await_busy_state_trans_7 = {
.ev = llc_conn_ev_rx_i_rsp_fbit_set_1,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = NONE,
@@ -3424,7 +3424,7 @@ static const llc_conn_action_t llc_await_busy_actions_8a[] = {
[6] = NULL,
};
-static struct llc_conn_state_trans llc_await_busy_state_trans_8a = {
+static const struct llc_conn_state_trans llc_await_busy_state_trans_8a = {
.ev = llc_conn_ev_rx_i_rsp_fbit_set_0,
.next_state = LLC_CONN_STATE_AWAIT_BUSY,
.ev_qualifiers = NONE,
@@ -3442,7 +3442,7 @@ static const llc_conn_action_t llc_await_busy_actions_8b[] = {
[6] = NULL,
};
-static struct llc_conn_state_trans llc_await_busy_state_trans_8b = {
+static const struct llc_conn_state_trans llc_await_busy_state_trans_8b = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_AWAIT_BUSY,
.ev_qualifiers = NONE,
@@ -3460,7 +3460,7 @@ static const llc_conn_action_t llc_await_busy_actions_9[] = {
[6] = NULL,
};
-static struct llc_conn_state_trans llc_await_busy_state_trans_9 = {
+static const struct llc_conn_state_trans llc_await_busy_state_trans_9 = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_1,
.next_state = LLC_CONN_STATE_AWAIT_BUSY,
.ev_qualifiers = NONE,
@@ -3477,7 +3477,7 @@ static const llc_conn_action_t llc_await_busy_actions_10a[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_await_busy_state_trans_10a = {
+static const struct llc_conn_state_trans llc_await_busy_state_trans_10a = {
.ev = llc_conn_ev_rx_rr_rsp_fbit_set_1,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = NONE,
@@ -3494,7 +3494,7 @@ static const llc_conn_action_t llc_await_busy_actions_10b[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_await_busy_state_trans_10b = {
+static const struct llc_conn_state_trans llc_await_busy_state_trans_10b = {
.ev = llc_conn_ev_rx_rej_rsp_fbit_set_1,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = NONE,
@@ -3509,7 +3509,7 @@ static const llc_conn_action_t llc_await_busy_actions_11a[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_await_busy_state_trans_11a = {
+static const struct llc_conn_state_trans llc_await_busy_state_trans_11a = {
.ev = llc_conn_ev_rx_rr_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_AWAIT_BUSY,
.ev_qualifiers = NONE,
@@ -3524,7 +3524,7 @@ static const llc_conn_action_t llc_await_busy_actions_11b[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_await_busy_state_trans_11b = {
+static const struct llc_conn_state_trans llc_await_busy_state_trans_11b = {
.ev = llc_conn_ev_rx_rr_rsp_fbit_set_0,
.next_state = LLC_CONN_STATE_AWAIT_BUSY,
.ev_qualifiers = NONE,
@@ -3539,7 +3539,7 @@ static const llc_conn_action_t llc_await_busy_actions_11c[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_await_busy_state_trans_11c = {
+static const struct llc_conn_state_trans llc_await_busy_state_trans_11c = {
.ev = llc_conn_ev_rx_rej_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_AWAIT_BUSY,
.ev_qualifiers = NONE,
@@ -3554,7 +3554,7 @@ static const llc_conn_action_t llc_await_busy_actions_11d[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_await_busy_state_trans_11d = {
+static const struct llc_conn_state_trans llc_await_busy_state_trans_11d = {
.ev = llc_conn_ev_rx_rej_rsp_fbit_set_0,
.next_state = LLC_CONN_STATE_AWAIT_BUSY,
.ev_qualifiers = NONE,
@@ -3570,7 +3570,7 @@ static const llc_conn_action_t llc_await_busy_actions_12a[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_await_busy_state_trans_12a = {
+static const struct llc_conn_state_trans llc_await_busy_state_trans_12a = {
.ev = llc_conn_ev_rx_rr_cmd_pbit_set_1,
.next_state = LLC_CONN_STATE_AWAIT_BUSY,
.ev_qualifiers = NONE,
@@ -3586,7 +3586,7 @@ static const llc_conn_action_t llc_await_busy_actions_12b[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_await_busy_state_trans_12b = {
+static const struct llc_conn_state_trans llc_await_busy_state_trans_12b = {
.ev = llc_conn_ev_rx_rej_cmd_pbit_set_1,
.next_state = LLC_CONN_STATE_AWAIT_BUSY,
.ev_qualifiers = NONE,
@@ -3602,7 +3602,7 @@ static const llc_conn_action_t llc_await_busy_actions_13[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_await_busy_state_trans_13 = {
+static const struct llc_conn_state_trans llc_await_busy_state_trans_13 = {
.ev = llc_conn_ev_rx_rnr_rsp_fbit_set_1,
.next_state = LLC_CONN_STATE_BUSY,
.ev_qualifiers = NONE,
@@ -3617,7 +3617,7 @@ static const llc_conn_action_t llc_await_busy_actions_14a[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_await_busy_state_trans_14a = {
+static const struct llc_conn_state_trans llc_await_busy_state_trans_14a = {
.ev = llc_conn_ev_rx_rnr_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_AWAIT_BUSY,
.ev_qualifiers = NONE,
@@ -3632,7 +3632,7 @@ static const llc_conn_action_t llc_await_busy_actions_14b[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_await_busy_state_trans_14b = {
+static const struct llc_conn_state_trans llc_await_busy_state_trans_14b = {
.ev = llc_conn_ev_rx_rnr_rsp_fbit_set_0,
.next_state = LLC_CONN_STATE_AWAIT_BUSY,
.ev_qualifiers = NONE,
@@ -3648,7 +3648,7 @@ static const llc_conn_action_t llc_await_busy_actions_15[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_await_busy_state_trans_15 = {
+static const struct llc_conn_state_trans llc_await_busy_state_trans_15 = {
.ev = llc_conn_ev_rx_rnr_cmd_pbit_set_1,
.next_state = LLC_CONN_STATE_AWAIT_BUSY,
.ev_qualifiers = NONE,
@@ -3668,7 +3668,7 @@ static const llc_conn_action_t llc_await_busy_actions_16[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_await_busy_state_trans_16 = {
+static const struct llc_conn_state_trans llc_await_busy_state_trans_16 = {
.ev = llc_conn_ev_p_tmr_exp,
.next_state = LLC_CONN_STATE_AWAIT_BUSY,
.ev_qualifiers = llc_await_busy_ev_qfyrs_16,
@@ -3679,7 +3679,7 @@ static struct llc_conn_state_trans llc_await_busy_state_trans_16 = {
* Array of pointers;
* one to each transition
*/
-static struct llc_conn_state_trans *llc_await_busy_state_transitions[] = {
+static const struct llc_conn_state_trans *llc_await_busy_state_transitions[] = {
[0] = &llc_common_state_trans_1, /* Request */
[1] = &llc_common_state_trans_2,
[2] = &llc_await_busy_state_trans_1_0,
@@ -3739,7 +3739,7 @@ static const llc_conn_ev_qfyr_t llc_await_reject_ev_qfyrs_1_0[] = {
/* just one member, NULL, .bss zeroes it */
static const llc_conn_action_t llc_await_reject_actions_1_0[1];
-static struct llc_conn_state_trans llc_await_reject_state_trans_1_0 = {
+static const struct llc_conn_state_trans llc_await_reject_state_trans_1_0 = {
.ev = llc_conn_ev_data_req,
.next_state = LLC_CONN_STATE_AWAIT_REJ,
.ev_qualifiers = llc_await_reject_ev_qfyrs_1_0,
@@ -3753,7 +3753,7 @@ static const llc_conn_action_t llc_await_rejct_actions_1[] = {
[2] = NULL
};
-static struct llc_conn_state_trans llc_await_rejct_state_trans_1 = {
+static const struct llc_conn_state_trans llc_await_rejct_state_trans_1 = {
.ev = llc_conn_ev_local_busy_detected,
.next_state = LLC_CONN_STATE_AWAIT_BUSY,
.ev_qualifiers = NONE,
@@ -3767,7 +3767,7 @@ static const llc_conn_action_t llc_await_rejct_actions_2a[] = {
[2] = NULL
};
-static struct llc_conn_state_trans llc_await_rejct_state_trans_2a = {
+static const struct llc_conn_state_trans llc_await_rejct_state_trans_2a = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns,
.next_state = LLC_CONN_STATE_AWAIT_REJ,
.ev_qualifiers = NONE,
@@ -3781,7 +3781,7 @@ static const llc_conn_action_t llc_await_rejct_actions_2b[] = {
[2] = NULL
};
-static struct llc_conn_state_trans llc_await_rejct_state_trans_2b = {
+static const struct llc_conn_state_trans llc_await_rejct_state_trans_2b = {
.ev = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns,
.next_state = LLC_CONN_STATE_AWAIT_REJ,
.ev_qualifiers = NONE,
@@ -3796,7 +3796,7 @@ static const llc_conn_action_t llc_await_rejct_actions_3[] = {
[3] = NULL
};
-static struct llc_conn_state_trans llc_await_rejct_state_trans_3 = {
+static const struct llc_conn_state_trans llc_await_rejct_state_trans_3 = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns,
.next_state = LLC_CONN_STATE_AWAIT_REJ,
.ev_qualifiers = NONE,
@@ -3816,7 +3816,7 @@ static const llc_conn_action_t llc_await_rejct_actions_4[] = {
[8] = NULL,
};
-static struct llc_conn_state_trans llc_await_rejct_state_trans_4 = {
+static const struct llc_conn_state_trans llc_await_rejct_state_trans_4 = {
.ev = llc_conn_ev_rx_i_rsp_fbit_set_1,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = NONE,
@@ -3834,7 +3834,7 @@ static const llc_conn_action_t llc_await_rejct_actions_5a[] = {
[6] = NULL,
};
-static struct llc_conn_state_trans llc_await_rejct_state_trans_5a = {
+static const struct llc_conn_state_trans llc_await_rejct_state_trans_5a = {
.ev = llc_conn_ev_rx_i_rsp_fbit_set_0,
.next_state = LLC_CONN_STATE_AWAIT,
.ev_qualifiers = NONE,
@@ -3852,7 +3852,7 @@ static const llc_conn_action_t llc_await_rejct_actions_5b[] = {
[6] = NULL,
};
-static struct llc_conn_state_trans llc_await_rejct_state_trans_5b = {
+static const struct llc_conn_state_trans llc_await_rejct_state_trans_5b = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_AWAIT,
.ev_qualifiers = NONE,
@@ -3870,7 +3870,7 @@ static const llc_conn_action_t llc_await_rejct_actions_6[] = {
[6] = NULL,
};
-static struct llc_conn_state_trans llc_await_rejct_state_trans_6 = {
+static const struct llc_conn_state_trans llc_await_rejct_state_trans_6 = {
.ev = llc_conn_ev_rx_i_cmd_pbit_set_1,
.next_state = LLC_CONN_STATE_AWAIT,
.ev_qualifiers = NONE,
@@ -3887,7 +3887,7 @@ static const llc_conn_action_t llc_await_rejct_actions_7a[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_await_rejct_state_trans_7a = {
+static const struct llc_conn_state_trans llc_await_rejct_state_trans_7a = {
.ev = llc_conn_ev_rx_rr_rsp_fbit_set_1,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = NONE,
@@ -3904,7 +3904,7 @@ static const llc_conn_action_t llc_await_rejct_actions_7b[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_await_rejct_state_trans_7b = {
+static const struct llc_conn_state_trans llc_await_rejct_state_trans_7b = {
.ev = llc_conn_ev_rx_rej_rsp_fbit_set_1,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = NONE,
@@ -3921,7 +3921,7 @@ static const llc_conn_action_t llc_await_rejct_actions_7c[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_await_rejct_state_trans_7c = {
+static const struct llc_conn_state_trans llc_await_rejct_state_trans_7c = {
.ev = llc_conn_ev_rx_i_rsp_fbit_set_1_unexpd_ns,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = NONE,
@@ -3936,7 +3936,7 @@ static const llc_conn_action_t llc_await_rejct_actions_8a[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_await_rejct_state_trans_8a = {
+static const struct llc_conn_state_trans llc_await_rejct_state_trans_8a = {
.ev = llc_conn_ev_rx_rr_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_AWAIT_REJ,
.ev_qualifiers = NONE,
@@ -3951,7 +3951,7 @@ static const llc_conn_action_t llc_await_rejct_actions_8b[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_await_rejct_state_trans_8b = {
+static const struct llc_conn_state_trans llc_await_rejct_state_trans_8b = {
.ev = llc_conn_ev_rx_rr_rsp_fbit_set_0,
.next_state = LLC_CONN_STATE_AWAIT_REJ,
.ev_qualifiers = NONE,
@@ -3966,7 +3966,7 @@ static const llc_conn_action_t llc_await_rejct_actions_8c[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_await_rejct_state_trans_8c = {
+static const struct llc_conn_state_trans llc_await_rejct_state_trans_8c = {
.ev = llc_conn_ev_rx_rej_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_AWAIT_REJ,
.ev_qualifiers = NONE,
@@ -3981,7 +3981,7 @@ static const llc_conn_action_t llc_await_rejct_actions_8d[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_await_rejct_state_trans_8d = {
+static const struct llc_conn_state_trans llc_await_rejct_state_trans_8d = {
.ev = llc_conn_ev_rx_rej_rsp_fbit_set_0,
.next_state = LLC_CONN_STATE_AWAIT_REJ,
.ev_qualifiers = NONE,
@@ -3997,7 +3997,7 @@ static const llc_conn_action_t llc_await_rejct_actions_9a[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_await_rejct_state_trans_9a = {
+static const struct llc_conn_state_trans llc_await_rejct_state_trans_9a = {
.ev = llc_conn_ev_rx_rr_cmd_pbit_set_1,
.next_state = LLC_CONN_STATE_AWAIT_REJ,
.ev_qualifiers = NONE,
@@ -4013,7 +4013,7 @@ static const llc_conn_action_t llc_await_rejct_actions_9b[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_await_rejct_state_trans_9b = {
+static const struct llc_conn_state_trans llc_await_rejct_state_trans_9b = {
.ev = llc_conn_ev_rx_rej_cmd_pbit_set_1,
.next_state = LLC_CONN_STATE_AWAIT_REJ,
.ev_qualifiers = NONE,
@@ -4029,7 +4029,7 @@ static const llc_conn_action_t llc_await_rejct_actions_10[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_await_rejct_state_trans_10 = {
+static const struct llc_conn_state_trans llc_await_rejct_state_trans_10 = {
.ev = llc_conn_ev_rx_rnr_rsp_fbit_set_1,
.next_state = LLC_CONN_STATE_REJ,
.ev_qualifiers = NONE,
@@ -4044,7 +4044,7 @@ static const llc_conn_action_t llc_await_rejct_actions_11a[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_await_rejct_state_trans_11a = {
+static const struct llc_conn_state_trans llc_await_rejct_state_trans_11a = {
.ev = llc_conn_ev_rx_rnr_cmd_pbit_set_0,
.next_state = LLC_CONN_STATE_AWAIT_REJ,
.ev_qualifiers = NONE,
@@ -4059,7 +4059,7 @@ static const llc_conn_action_t llc_await_rejct_actions_11b[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_await_rejct_state_trans_11b = {
+static const struct llc_conn_state_trans llc_await_rejct_state_trans_11b = {
.ev = llc_conn_ev_rx_rnr_rsp_fbit_set_0,
.next_state = LLC_CONN_STATE_AWAIT_REJ,
.ev_qualifiers = NONE,
@@ -4075,7 +4075,7 @@ static const llc_conn_action_t llc_await_rejct_actions_12[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_await_rejct_state_trans_12 = {
+static const struct llc_conn_state_trans llc_await_rejct_state_trans_12 = {
.ev = llc_conn_ev_rx_rnr_cmd_pbit_set_1,
.next_state = LLC_CONN_STATE_AWAIT_REJ,
.ev_qualifiers = NONE,
@@ -4095,7 +4095,7 @@ static const llc_conn_action_t llc_await_rejct_actions_13[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_await_rejct_state_trans_13 = {
+static const struct llc_conn_state_trans llc_await_rejct_state_trans_13 = {
.ev = llc_conn_ev_p_tmr_exp,
.next_state = LLC_CONN_STATE_AWAIT_REJ,
.ev_qualifiers = llc_await_rejct_ev_qfyrs_13,
@@ -4106,7 +4106,7 @@ static struct llc_conn_state_trans llc_await_rejct_state_trans_13 = {
* Array of pointers;
* one to each transition
*/
-static struct llc_conn_state_trans *llc_await_rejct_state_transitions[] = {
+static const struct llc_conn_state_trans *llc_await_rejct_state_transitions[] = {
[0] = &llc_await_reject_state_trans_1_0,
[1] = &llc_common_state_trans_1, /* requests */
[2] = &llc_common_state_trans_2,
@@ -4171,7 +4171,7 @@ static const llc_conn_action_t llc_d_conn_actions_1[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_d_conn_state_trans_1 = {
+static const struct llc_conn_state_trans llc_d_conn_state_trans_1 = {
.ev = llc_conn_ev_rx_sabme_cmd_pbit_set_x,
.next_state = LLC_CONN_STATE_ADM,
.ev_qualifiers = llc_d_conn_ev_qfyrs_1,
@@ -4194,7 +4194,7 @@ static const llc_conn_action_t llc_d_conn_actions_1_1[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_d_conn_state_trans_1_1 = {
+static const struct llc_conn_state_trans llc_d_conn_state_trans_1_1 = {
.ev = llc_conn_ev_rx_sabme_cmd_pbit_set_x,
.next_state = LLC_CONN_STATE_ADM,
.ev_qualifiers = llc_d_conn_ev_qfyrs_1_1,
@@ -4218,7 +4218,7 @@ static const llc_conn_action_t llc_d_conn_actions_2[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_d_conn_state_trans_2 = {
+static const struct llc_conn_state_trans llc_d_conn_state_trans_2 = {
.ev = llc_conn_ev_rx_ua_rsp_fbit_set_x,
.next_state = LLC_CONN_STATE_ADM,
.ev_qualifiers = llc_d_conn_ev_qfyrs_2,
@@ -4241,7 +4241,7 @@ static const llc_conn_action_t llc_d_conn_actions_2_1[] = {
[2] = NULL,
};
-static struct llc_conn_state_trans llc_d_conn_state_trans_2_1 = {
+static const struct llc_conn_state_trans llc_d_conn_state_trans_2_1 = {
.ev = llc_conn_ev_rx_ua_rsp_fbit_set_x,
.next_state = LLC_CONN_STATE_ADM,
.ev_qualifiers = llc_d_conn_ev_qfyrs_2_1,
@@ -4254,7 +4254,7 @@ static const llc_conn_action_t llc_d_conn_actions_3[] = {
[1] = NULL,
};
-static struct llc_conn_state_trans llc_d_conn_state_trans_3 = {
+static const struct llc_conn_state_trans llc_d_conn_state_trans_3 = {
.ev = llc_conn_ev_rx_disc_cmd_pbit_set_x,
.next_state = LLC_CONN_STATE_D_CONN,
.ev_qualifiers = NONE,
@@ -4277,7 +4277,7 @@ static const llc_conn_action_t llc_d_conn_actions_4[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_d_conn_state_trans_4 = {
+static const struct llc_conn_state_trans llc_d_conn_state_trans_4 = {
.ev = llc_conn_ev_rx_dm_rsp_fbit_set_x,
.next_state = LLC_CONN_STATE_ADM,
.ev_qualifiers = llc_d_conn_ev_qfyrs_4,
@@ -4299,7 +4299,7 @@ static const llc_conn_action_t llc_d_conn_actions_4_1[] = {
[2] = NULL,
};
-static struct llc_conn_state_trans llc_d_conn_state_trans_4_1 = {
+static const struct llc_conn_state_trans llc_d_conn_state_trans_4_1 = {
.ev = llc_conn_ev_rx_dm_rsp_fbit_set_x,
.next_state = LLC_CONN_STATE_ADM,
.ev_qualifiers = llc_d_conn_ev_qfyrs_4_1,
@@ -4318,7 +4318,7 @@ static const llc_conn_ev_qfyr_t llc_d_conn_ev_qfyrs_5[] = {
/* just one member, NULL, .bss zeroes it */
static const llc_conn_action_t llc_d_conn_actions_5[1];
-static struct llc_conn_state_trans llc_d_conn_state_trans_5 = {
+static const struct llc_conn_state_trans llc_d_conn_state_trans_5 = {
.ev = llc_conn_ev_data_req,
.next_state = LLC_CONN_STATE_D_CONN,
.ev_qualifiers = llc_d_conn_ev_qfyrs_5,
@@ -4338,7 +4338,7 @@ static const llc_conn_action_t llc_d_conn_actions_6[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_d_conn_state_trans_6 = {
+static const struct llc_conn_state_trans llc_d_conn_state_trans_6 = {
.ev = llc_conn_ev_ack_tmr_exp,
.next_state = LLC_CONN_STATE_D_CONN,
.ev_qualifiers = llc_d_conn_ev_qfyrs_6,
@@ -4359,7 +4359,7 @@ static const llc_conn_action_t llc_d_conn_actions_7[] = {
[2] = NULL,
};
-static struct llc_conn_state_trans llc_d_conn_state_trans_7 = {
+static const struct llc_conn_state_trans llc_d_conn_state_trans_7 = {
.ev = llc_conn_ev_ack_tmr_exp,
.next_state = LLC_CONN_STATE_ADM,
.ev_qualifiers = llc_d_conn_ev_qfyrs_7,
@@ -4379,7 +4379,7 @@ static const llc_conn_action_t llc_d_conn_actions_8[] = {
[1] = NULL,
};
-static struct llc_conn_state_trans llc_d_conn_state_trans_8 = {
+static const struct llc_conn_state_trans llc_d_conn_state_trans_8 = {
.ev = llc_conn_ev_ack_tmr_exp,
.next_state = LLC_CONN_STATE_ADM,
.ev_qualifiers = llc_d_conn_ev_qfyrs_8,
@@ -4390,7 +4390,7 @@ static struct llc_conn_state_trans llc_d_conn_state_trans_8 = {
* Array of pointers;
* one to each transition
*/
-static struct llc_conn_state_trans *llc_d_conn_state_transitions[] = {
+static const struct llc_conn_state_trans *llc_d_conn_state_transitions[] = {
[0] = &llc_d_conn_state_trans_5, /* Request */
[1] = &llc_common_state_trans_end,
[2] = &llc_common_state_trans_end, /* Local busy */
@@ -4419,7 +4419,7 @@ static const llc_conn_action_t llc_rst_actions_1[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_rst_state_trans_1 = {
+static const struct llc_conn_state_trans llc_rst_state_trans_1 = {
.ev = llc_conn_ev_rx_sabme_cmd_pbit_set_x,
.next_state = LLC_CONN_STATE_RESET,
.ev_qualifiers = NONE,
@@ -4447,7 +4447,7 @@ static const llc_conn_action_t llc_rst_actions_2[] = {
[7] = NULL,
};
-static struct llc_conn_state_trans llc_rst_state_trans_2 = {
+static const struct llc_conn_state_trans llc_rst_state_trans_2 = {
.ev = llc_conn_ev_rx_ua_rsp_fbit_set_x,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = llc_rst_ev_qfyrs_2,
@@ -4475,7 +4475,7 @@ static const llc_conn_action_t llc_rst_actions_2_1[] = {
[7] = NULL,
};
-static struct llc_conn_state_trans llc_rst_state_trans_2_1 = {
+static const struct llc_conn_state_trans llc_rst_state_trans_2_1 = {
.ev = llc_conn_ev_rx_ua_rsp_fbit_set_x,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = llc_rst_ev_qfyrs_2_1,
@@ -4495,7 +4495,7 @@ static const llc_conn_action_t llc_rst_actions_3[] = {
[2] = NULL,
};
-static struct llc_conn_state_trans llc_rst_state_trans_3 = {
+static const struct llc_conn_state_trans llc_rst_state_trans_3 = {
.ev = llc_conn_ev_ack_tmr_exp,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = llc_rst_ev_qfyrs_3,
@@ -4518,7 +4518,7 @@ static const llc_conn_action_t llc_rst_actions_4[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_rst_state_trans_4 = {
+static const struct llc_conn_state_trans llc_rst_state_trans_4 = {
.ev = llc_conn_ev_rx_disc_cmd_pbit_set_x,
.next_state = LLC_CONN_STATE_ADM,
.ev_qualifiers = llc_rst_ev_qfyrs_4,
@@ -4541,7 +4541,7 @@ static const llc_conn_action_t llc_rst_actions_4_1[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_rst_state_trans_4_1 = {
+static const struct llc_conn_state_trans llc_rst_state_trans_4_1 = {
.ev = llc_conn_ev_rx_disc_cmd_pbit_set_x,
.next_state = LLC_CONN_STATE_ADM,
.ev_qualifiers = llc_rst_ev_qfyrs_4_1,
@@ -4564,7 +4564,7 @@ static const llc_conn_action_t llc_rst_actions_5[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_rst_state_trans_5 = {
+static const struct llc_conn_state_trans llc_rst_state_trans_5 = {
.ev = llc_conn_ev_rx_dm_rsp_fbit_set_x,
.next_state = LLC_CONN_STATE_ADM,
.ev_qualifiers = llc_rst_ev_qfyrs_5,
@@ -4586,7 +4586,7 @@ static const llc_conn_action_t llc_rst_actions_5_1[] = {
[2] = NULL,
};
-static struct llc_conn_state_trans llc_rst_state_trans_5_1 = {
+static const struct llc_conn_state_trans llc_rst_state_trans_5_1 = {
.ev = llc_conn_ev_rx_dm_rsp_fbit_set_x,
.next_state = LLC_CONN_STATE_ADM,
.ev_qualifiers = llc_rst_ev_qfyrs_5_1,
@@ -4602,7 +4602,7 @@ static const llc_conn_ev_qfyr_t llc_rst_ev_qfyrs_6[] = {
/* just one member, NULL, .bss zeroes it */
static const llc_conn_action_t llc_rst_actions_6[1];
-static struct llc_conn_state_trans llc_rst_state_trans_6 = {
+static const struct llc_conn_state_trans llc_rst_state_trans_6 = {
.ev = llc_conn_ev_data_req,
.next_state = LLC_CONN_STATE_RESET,
.ev_qualifiers = llc_rst_ev_qfyrs_6,
@@ -4623,7 +4623,7 @@ static const llc_conn_action_t llc_rst_actions_7[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_rst_state_trans_7 = {
+static const struct llc_conn_state_trans llc_rst_state_trans_7 = {
.ev = llc_conn_ev_ack_tmr_exp,
.next_state = LLC_CONN_STATE_RESET,
.ev_qualifiers = llc_rst_ev_qfyrs_7,
@@ -4644,7 +4644,7 @@ static const llc_conn_action_t llc_rst_actions_8[] = {
[2] = NULL,
};
-static struct llc_conn_state_trans llc_rst_state_trans_8 = {
+static const struct llc_conn_state_trans llc_rst_state_trans_8 = {
.ev = llc_conn_ev_ack_tmr_exp,
.next_state = LLC_CONN_STATE_ADM,
.ev_qualifiers = llc_rst_ev_qfyrs_8,
@@ -4665,7 +4665,7 @@ static const llc_conn_action_t llc_rst_actions_8_1[] = {
[2] = NULL,
};
-static struct llc_conn_state_trans llc_rst_state_trans_8_1 = {
+static const struct llc_conn_state_trans llc_rst_state_trans_8_1 = {
.ev = llc_conn_ev_ack_tmr_exp,
.next_state = LLC_CONN_STATE_ADM,
.ev_qualifiers = llc_rst_ev_qfyrs_8_1,
@@ -4676,7 +4676,7 @@ static struct llc_conn_state_trans llc_rst_state_trans_8_1 = {
* Array of pointers;
* one to each transition
*/
-static struct llc_conn_state_trans *llc_rst_state_transitions[] = {
+static const struct llc_conn_state_trans *llc_rst_state_transitions[] = {
[0] = &llc_rst_state_trans_6, /* Request */
[1] = &llc_common_state_trans_end,
[2] = &llc_common_state_trans_end, /* Local busy */
@@ -4710,7 +4710,7 @@ static const llc_conn_action_t llc_error_actions_1[] = {
[8] = NULL,
};
-static struct llc_conn_state_trans llc_error_state_trans_1 = {
+static const struct llc_conn_state_trans llc_error_state_trans_1 = {
.ev = llc_conn_ev_rx_sabme_cmd_pbit_set_x,
.next_state = LLC_CONN_STATE_NORMAL,
.ev_qualifiers = NONE,
@@ -4726,7 +4726,7 @@ static const llc_conn_action_t llc_error_actions_2[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_error_state_trans_2 = {
+static const struct llc_conn_state_trans llc_error_state_trans_2 = {
.ev = llc_conn_ev_rx_disc_cmd_pbit_set_x,
.next_state = LLC_CONN_STATE_ADM,
.ev_qualifiers = NONE,
@@ -4741,7 +4741,7 @@ static const llc_conn_action_t llc_error_actions_3[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_error_state_trans_3 = {
+static const struct llc_conn_state_trans llc_error_state_trans_3 = {
.ev = llc_conn_ev_rx_dm_rsp_fbit_set_x,
.next_state = LLC_CONN_STATE_ADM,
.ev_qualifiers = NONE,
@@ -4757,7 +4757,7 @@ static const llc_conn_action_t llc_error_actions_4[] = {
[4] = NULL,
};
-static struct llc_conn_state_trans llc_error_state_trans_4 = {
+static const struct llc_conn_state_trans llc_error_state_trans_4 = {
.ev = llc_conn_ev_rx_frmr_rsp_fbit_set_x,
.next_state = LLC_CONN_STATE_RESET,
.ev_qualifiers = NONE,
@@ -4770,7 +4770,7 @@ static const llc_conn_action_t llc_error_actions_5[] = {
[1] = NULL,
};
-static struct llc_conn_state_trans llc_error_state_trans_5 = {
+static const struct llc_conn_state_trans llc_error_state_trans_5 = {
.ev = llc_conn_ev_rx_xxx_cmd_pbit_set_x,
.next_state = LLC_CONN_STATE_ERROR,
.ev_qualifiers = NONE,
@@ -4778,7 +4778,7 @@ static struct llc_conn_state_trans llc_error_state_trans_5 = {
};
/* State transitions for LLC_CONN_EV_RX_XXX_RSP_Fbit_SET_X event */
-static struct llc_conn_state_trans llc_error_state_trans_6 = {
+static const struct llc_conn_state_trans llc_error_state_trans_6 = {
.ev = llc_conn_ev_rx_xxx_rsp_fbit_set_x,
.next_state = LLC_CONN_STATE_ERROR,
.ev_qualifiers = NONE,
@@ -4798,7 +4798,7 @@ static const llc_conn_action_t llc_error_actions_7[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_error_state_trans_7 = {
+static const struct llc_conn_state_trans llc_error_state_trans_7 = {
.ev = llc_conn_ev_ack_tmr_exp,
.next_state = LLC_CONN_STATE_ERROR,
.ev_qualifiers = llc_error_ev_qfyrs_7,
@@ -4820,7 +4820,7 @@ static const llc_conn_action_t llc_error_actions_8[] = {
[5] = NULL,
};
-static struct llc_conn_state_trans llc_error_state_trans_8 = {
+static const struct llc_conn_state_trans llc_error_state_trans_8 = {
.ev = llc_conn_ev_ack_tmr_exp,
.next_state = LLC_CONN_STATE_RESET,
.ev_qualifiers = llc_error_ev_qfyrs_8,
@@ -4836,7 +4836,7 @@ static const llc_conn_ev_qfyr_t llc_error_ev_qfyrs_9[] = {
/* just one member, NULL, .bss zeroes it */
static const llc_conn_action_t llc_error_actions_9[1];
-static struct llc_conn_state_trans llc_error_state_trans_9 = {
+static const struct llc_conn_state_trans llc_error_state_trans_9 = {
.ev = llc_conn_ev_data_req,
.next_state = LLC_CONN_STATE_ERROR,
.ev_qualifiers = llc_error_ev_qfyrs_9,
@@ -4847,7 +4847,7 @@ static struct llc_conn_state_trans llc_error_state_trans_9 = {
* Array of pointers;
* one to each transition
*/
-static struct llc_conn_state_trans *llc_error_state_transitions[] = {
+static const struct llc_conn_state_trans *llc_error_state_transitions[] = {
[0] = &llc_error_state_trans_9, /* Request */
[1] = &llc_common_state_trans_end,
[2] = &llc_common_state_trans_end, /* Local busy */
@@ -4873,7 +4873,7 @@ static const llc_conn_action_t llc_temp_actions_1[] = {
[3] = NULL,
};
-static struct llc_conn_state_trans llc_temp_state_trans_1 = {
+static const struct llc_conn_state_trans llc_temp_state_trans_1 = {
.ev = llc_conn_ev_disc_req,
.next_state = LLC_CONN_STATE_ADM,
.ev_qualifiers = NONE,
@@ -4884,7 +4884,7 @@ static struct llc_conn_state_trans llc_temp_state_trans_1 = {
* Array of pointers;
* one to each transition
*/
-static struct llc_conn_state_trans *llc_temp_state_transitions[] = {
+static const struct llc_conn_state_trans *llc_temp_state_transitions[] = {
[0] = &llc_temp_state_trans_1, /* requests */
[1] = &llc_common_state_trans_end,
[2] = &llc_common_state_trans_end, /* local busy */
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 0a3f5e0bec00..afc6974eafda 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -34,10 +34,10 @@ static int llc_find_offset(int state, int ev_type);
static void llc_conn_send_pdus(struct sock *sk);
static int llc_conn_service(struct sock *sk, struct sk_buff *skb);
static int llc_exec_conn_trans_actions(struct sock *sk,
- struct llc_conn_state_trans *trans,
+ const struct llc_conn_state_trans *trans,
struct sk_buff *ev);
-static struct llc_conn_state_trans *llc_qualify_conn_ev(struct sock *sk,
- struct sk_buff *skb);
+static const struct llc_conn_state_trans *llc_qualify_conn_ev(struct sock *sk,
+ struct sk_buff *skb);
/* Offset table on connection states transition diagram */
static int llc_offset_table[NBR_CONN_STATES][NBR_CONN_EV];
@@ -356,9 +356,9 @@ static void llc_conn_send_pdus(struct sock *sk)
*/
static int llc_conn_service(struct sock *sk, struct sk_buff *skb)
{
- int rc = 1;
+ const struct llc_conn_state_trans *trans;
struct llc_sock *llc = llc_sk(sk);
- struct llc_conn_state_trans *trans;
+ int rc = 1;
if (llc->state > NBR_CONN_STATES)
goto out;
@@ -384,10 +384,10 @@ out:
* This function finds transition that matches with happened event.
* Returns pointer to found transition on success, %NULL otherwise.
*/
-static struct llc_conn_state_trans *llc_qualify_conn_ev(struct sock *sk,
- struct sk_buff *skb)
+static const struct llc_conn_state_trans *llc_qualify_conn_ev(struct sock *sk,
+ struct sk_buff *skb)
{
- struct llc_conn_state_trans **next_trans;
+ const struct llc_conn_state_trans **next_trans;
const llc_conn_ev_qfyr_t *next_qualifier;
struct llc_conn_state_ev *ev = llc_conn_ev(skb);
struct llc_sock *llc = llc_sk(sk);
@@ -432,7 +432,7 @@ static struct llc_conn_state_trans *llc_qualify_conn_ev(struct sock *sk,
* success, 1 to indicate failure of at least one action.
*/
static int llc_exec_conn_trans_actions(struct sock *sk,
- struct llc_conn_state_trans *trans,
+ const struct llc_conn_state_trans *trans,
struct sk_buff *skb)
{
int rc = 0;
@@ -635,8 +635,8 @@ u8 llc_data_accept_state(u8 state)
*/
static u16 __init llc_find_next_offset(struct llc_conn_state *state, u16 offset)
{
+ const struct llc_conn_state_trans **next_trans;
u16 cnt = 0;
- struct llc_conn_state_trans **next_trans;
for (next_trans = state->transitions + offset;
(*next_trans)->ev; next_trans++)
diff --git a/net/llc/llc_s_st.c b/net/llc/llc_s_st.c
index 308c616883a4..acccc827c562 100644
--- a/net/llc/llc_s_st.c
+++ b/net/llc/llc_s_st.c
@@ -24,7 +24,7 @@
* last entry for this state
* all members are zeros, .bss zeroes it
*/
-static struct llc_sap_state_trans llc_sap_state_trans_end;
+static const struct llc_sap_state_trans llc_sap_state_trans_end;
/* state LLC_SAP_STATE_INACTIVE transition for
* LLC_SAP_EV_ACTIVATION_REQ event
@@ -34,14 +34,14 @@ static const llc_sap_action_t llc_sap_inactive_state_actions_1[] = {
[1] = NULL,
};
-static struct llc_sap_state_trans llc_sap_inactive_state_trans_1 = {
+static const struct llc_sap_state_trans llc_sap_inactive_state_trans_1 = {
.ev = llc_sap_ev_activation_req,
.next_state = LLC_SAP_STATE_ACTIVE,
.ev_actions = llc_sap_inactive_state_actions_1,
};
/* array of pointers; one to each transition */
-static struct llc_sap_state_trans *llc_sap_inactive_state_transitions[] = {
+static const struct llc_sap_state_trans *llc_sap_inactive_state_transitions[] = {
[0] = &llc_sap_inactive_state_trans_1,
[1] = &llc_sap_state_trans_end,
};
@@ -52,7 +52,7 @@ static const llc_sap_action_t llc_sap_active_state_actions_1[] = {
[1] = NULL,
};
-static struct llc_sap_state_trans llc_sap_active_state_trans_1 = {
+static const struct llc_sap_state_trans llc_sap_active_state_trans_1 = {
.ev = llc_sap_ev_rx_ui,
.next_state = LLC_SAP_STATE_ACTIVE,
.ev_actions = llc_sap_active_state_actions_1,
@@ -64,7 +64,7 @@ static const llc_sap_action_t llc_sap_active_state_actions_2[] = {
[1] = NULL,
};
-static struct llc_sap_state_trans llc_sap_active_state_trans_2 = {
+static const struct llc_sap_state_trans llc_sap_active_state_trans_2 = {
.ev = llc_sap_ev_unitdata_req,
.next_state = LLC_SAP_STATE_ACTIVE,
.ev_actions = llc_sap_active_state_actions_2,
@@ -76,7 +76,7 @@ static const llc_sap_action_t llc_sap_active_state_actions_3[] = {
[1] = NULL,
};
-static struct llc_sap_state_trans llc_sap_active_state_trans_3 = {
+static const struct llc_sap_state_trans llc_sap_active_state_trans_3 = {
.ev = llc_sap_ev_xid_req,
.next_state = LLC_SAP_STATE_ACTIVE,
.ev_actions = llc_sap_active_state_actions_3,
@@ -88,7 +88,7 @@ static const llc_sap_action_t llc_sap_active_state_actions_4[] = {
[1] = NULL,
};
-static struct llc_sap_state_trans llc_sap_active_state_trans_4 = {
+static const struct llc_sap_state_trans llc_sap_active_state_trans_4 = {
.ev = llc_sap_ev_rx_xid_c,
.next_state = LLC_SAP_STATE_ACTIVE,
.ev_actions = llc_sap_active_state_actions_4,
@@ -100,7 +100,7 @@ static const llc_sap_action_t llc_sap_active_state_actions_5[] = {
[1] = NULL,
};
-static struct llc_sap_state_trans llc_sap_active_state_trans_5 = {
+static const struct llc_sap_state_trans llc_sap_active_state_trans_5 = {
.ev = llc_sap_ev_rx_xid_r,
.next_state = LLC_SAP_STATE_ACTIVE,
.ev_actions = llc_sap_active_state_actions_5,
@@ -112,7 +112,7 @@ static const llc_sap_action_t llc_sap_active_state_actions_6[] = {
[1] = NULL,
};
-static struct llc_sap_state_trans llc_sap_active_state_trans_6 = {
+static const struct llc_sap_state_trans llc_sap_active_state_trans_6 = {
.ev = llc_sap_ev_test_req,
.next_state = LLC_SAP_STATE_ACTIVE,
.ev_actions = llc_sap_active_state_actions_6,
@@ -124,7 +124,7 @@ static const llc_sap_action_t llc_sap_active_state_actions_7[] = {
[1] = NULL,
};
-static struct llc_sap_state_trans llc_sap_active_state_trans_7 = {
+static const struct llc_sap_state_trans llc_sap_active_state_trans_7 = {
.ev = llc_sap_ev_rx_test_c,
.next_state = LLC_SAP_STATE_ACTIVE,
.ev_actions = llc_sap_active_state_actions_7
@@ -136,7 +136,7 @@ static const llc_sap_action_t llc_sap_active_state_actions_8[] = {
[1] = NULL,
};
-static struct llc_sap_state_trans llc_sap_active_state_trans_8 = {
+static const struct llc_sap_state_trans llc_sap_active_state_trans_8 = {
.ev = llc_sap_ev_rx_test_r,
.next_state = LLC_SAP_STATE_ACTIVE,
.ev_actions = llc_sap_active_state_actions_8,
@@ -150,14 +150,14 @@ static const llc_sap_action_t llc_sap_active_state_actions_9[] = {
[1] = NULL,
};
-static struct llc_sap_state_trans llc_sap_active_state_trans_9 = {
+static const struct llc_sap_state_trans llc_sap_active_state_trans_9 = {
.ev = llc_sap_ev_deactivation_req,
.next_state = LLC_SAP_STATE_INACTIVE,
.ev_actions = llc_sap_active_state_actions_9
};
/* array of pointers; one to each transition */
-static struct llc_sap_state_trans *llc_sap_active_state_transitions[] = {
+static const struct llc_sap_state_trans *llc_sap_active_state_transitions[] = {
[0] = &llc_sap_active_state_trans_2,
[1] = &llc_sap_active_state_trans_1,
[2] = &llc_sap_active_state_trans_3,
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 116c0e479183..6cd03c2ae7d5 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -114,12 +114,12 @@ void llc_sap_rtn_pdu(struct llc_sap *sap, struct sk_buff *skb)
* Returns the pointer to found transition on success or %NULL for
* failure.
*/
-static struct llc_sap_state_trans *llc_find_sap_trans(struct llc_sap *sap,
- struct sk_buff *skb)
+static const struct llc_sap_state_trans *llc_find_sap_trans(struct llc_sap *sap,
+ struct sk_buff *skb)
{
int i = 0;
- struct llc_sap_state_trans *rc = NULL;
- struct llc_sap_state_trans **next_trans;
+ const struct llc_sap_state_trans *rc = NULL;
+ const struct llc_sap_state_trans **next_trans;
struct llc_sap_state *curr_state = &llc_sap_state_table[sap->state - 1];
/*
* Search thru events for this state until list exhausted or until
@@ -143,7 +143,7 @@ static struct llc_sap_state_trans *llc_find_sap_trans(struct llc_sap *sap,
* Returns 0 for success and 1 for failure of at least one action.
*/
static int llc_exec_sap_trans_actions(struct llc_sap *sap,
- struct llc_sap_state_trans *trans,
+ const struct llc_sap_state_trans *trans,
struct sk_buff *skb)
{
int rc = 0;
@@ -166,8 +166,8 @@ static int llc_exec_sap_trans_actions(struct llc_sap *sap,
*/
static int llc_sap_next_state(struct llc_sap *sap, struct sk_buff *skb)
{
+ const struct llc_sap_state_trans *trans;
int rc = 1;
- struct llc_sap_state_trans *trans;
if (sap->state > LLC_NR_SAP_STATES)
goto out;
diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c
index 8443a6d841b0..72e101135f8c 100644
--- a/net/llc/sysctl_net_llc.c
+++ b/net/llc/sysctl_net_llc.c
@@ -44,11 +44,6 @@ static struct ctl_table llc2_timeout_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { },
-};
-
-static struct ctl_table llc_station_table[] = {
- { },
};
static struct ctl_table_header *llc2_timeout_header;
@@ -56,8 +51,9 @@ static struct ctl_table_header *llc_station_header;
int __init llc_sysctl_init(void)
{
+ struct ctl_table empty[1] = {};
llc2_timeout_header = register_net_sysctl(&init_net, "net/llc/llc2/timeout", llc2_timeout_table);
- llc_station_header = register_net_sysctl(&init_net, "net/llc/station", llc_station_table);
+ llc_station_header = register_net_sysctl_sz(&init_net, "net/llc/station", empty, 0);
if (!llc2_timeout_header || !llc_station_header) {
llc_sysctl_exit();
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 21d55dc539f6..677bbbac9f16 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -616,7 +616,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
return -EINVAL;
if (!pubsta->deflink.ht_cap.ht_supported &&
- sta->sdata->vif.bss_conf.chanreq.oper.chan->band != NL80211_BAND_6GHZ)
+ !pubsta->deflink.vht_cap.vht_supported &&
+ !pubsta->deflink.he_cap.has_he &&
+ !pubsta->deflink.eht_cap.has_eht)
return -EINVAL;
if (WARN_ON_ONCE(!local->ops->ampdu_action))
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index f67c1d021812..b02b84ce2130 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -114,7 +114,7 @@ static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata,
/* apply all changes now - no failures allowed */
- if (monitor_sdata)
+ if (monitor_sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
ieee80211_set_mu_mimo_follow(monitor_sdata, params);
if (params->flags) {
@@ -263,7 +263,7 @@ static int ieee80211_start_p2p_device(struct wiphy *wiphy,
lockdep_assert_wiphy(sdata->local->hw.wiphy);
- ret = ieee80211_check_combinations(sdata, NULL, 0, 0);
+ ret = ieee80211_check_combinations(sdata, NULL, 0, 0, -1);
if (ret < 0)
return ret;
@@ -285,7 +285,7 @@ static int ieee80211_start_nan(struct wiphy *wiphy,
lockdep_assert_wiphy(sdata->local->hw.wiphy);
- ret = ieee80211_check_combinations(sdata, NULL, 0, 0);
+ ret = ieee80211_check_combinations(sdata, NULL, 0, 0, -1);
if (ret < 0)
return ret;
@@ -742,9 +742,6 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
break;
}
- params.key = key->conf.key;
- params.key_len = key->conf.keylen;
-
callback(cookie, &params);
err = 0;
@@ -1379,6 +1376,11 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
(IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_80MHZ |
IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_160MHZ |
IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_320MHZ);
+ link_conf->eht_80mhz_full_bw_ul_mumimo =
+ params->eht_cap->fixed.phy_cap_info[7] &
+ (IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_80MHZ |
+ IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_160MHZ |
+ IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_320MHZ);
} else {
link_conf->eht_su_beamformer = false;
link_conf->eht_su_beamformee = false;
@@ -1486,7 +1488,10 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
if (old)
kfree_rcu(old, rcu_head);
RCU_INIT_POINTER(link->u.ap.beacon, NULL);
- sdata->u.ap.active = false;
+
+ if (ieee80211_num_beaconing_links(sdata) == 0)
+ sdata->u.ap.active = false;
+
goto error;
}
@@ -1607,11 +1612,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
/* abort any running channel switch or color change */
link_conf->csa_active = false;
link_conf->color_change_active = false;
- if (sdata->csa_blocked_tx) {
- ieee80211_wake_vif_queues(local, sdata,
- IEEE80211_QUEUE_STOP_REASON_CSA);
- sdata->csa_blocked_tx = false;
- }
+ ieee80211_vif_unblock_queues_csa(sdata);
ieee80211_free_next_beacon(link);
@@ -1619,11 +1620,12 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
netif_carrier_off(vlan->dev);
- if (ieee80211_num_beaconing_links(sdata) <= 1)
+ if (ieee80211_num_beaconing_links(sdata) <= 1) {
netif_carrier_off(dev);
+ sdata->u.ap.active = false;
+ }
/* remove beacon and probe response */
- sdata->u.ap.active = false;
RCU_INIT_POINTER(link->u.ap.beacon, NULL);
RCU_INIT_POINTER(link->u.ap.probe_resp, NULL);
RCU_INIT_POINTER(link->u.ap.fils_discovery, NULL);
@@ -1662,7 +1664,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
if (sdata->wdev.cac_started) {
chandef = link_conf->chanreq.oper;
- wiphy_delayed_work_cancel(wiphy, &link->dfs_cac_timer_work);
+ wiphy_delayed_work_cancel(wiphy, &sdata->dfs_cac_timer_work);
cfg80211_cac_event(sdata->dev, &chandef,
NL80211_RADAR_CAC_ABORTED,
GFP_KERNEL);
@@ -1805,11 +1807,17 @@ static void sta_apply_mesh_params(struct ieee80211_local *local,
#endif
}
+enum sta_link_apply_mode {
+ STA_LINK_MODE_NEW,
+ STA_LINK_MODE_STA_MODIFY,
+ STA_LINK_MODE_LINK_MODIFY,
+};
+
static int sta_link_apply_parameters(struct ieee80211_local *local,
- struct sta_info *sta, bool new_link,
+ struct sta_info *sta,
+ enum sta_link_apply_mode mode,
struct link_station_parameters *params)
{
- int ret = 0;
struct ieee80211_supported_band *sband;
struct ieee80211_sub_if_data *sdata = sta->sdata;
u32 link_id = params->link_id < 0 ? 0 : params->link_id;
@@ -1818,18 +1826,29 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
struct link_sta_info *link_sta =
rcu_dereference_protected(sta->link[link_id],
lockdep_is_held(&local->hw.wiphy->mtx));
-
- /*
- * If there are no changes, then accept a link that exist,
- * unless it's a new link.
- */
- if (params->link_id >= 0 && !new_link &&
- !params->link_mac && !params->txpwr_set &&
- !params->supported_rates_len &&
- !params->ht_capa && !params->vht_capa &&
- !params->he_capa && !params->eht_capa &&
- !params->opmode_notif_used)
- return 0;
+ bool changes = params->link_mac ||
+ params->txpwr_set ||
+ params->supported_rates_len ||
+ params->ht_capa ||
+ params->vht_capa ||
+ params->he_capa ||
+ params->eht_capa ||
+ params->opmode_notif_used;
+
+ switch (mode) {
+ case STA_LINK_MODE_NEW:
+ if (!params->link_mac)
+ return -EINVAL;
+ break;
+ case STA_LINK_MODE_LINK_MODIFY:
+ break;
+ case STA_LINK_MODE_STA_MODIFY:
+ if (params->link_id >= 0)
+ break;
+ if (!changes)
+ return 0;
+ break;
+ }
if (!link || !link_sta)
return -EINVAL;
@@ -1839,18 +1858,18 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
return -EINVAL;
if (params->link_mac) {
- if (new_link) {
+ if (mode == STA_LINK_MODE_NEW) {
memcpy(link_sta->addr, params->link_mac, ETH_ALEN);
memcpy(link_sta->pub->addr, params->link_mac, ETH_ALEN);
} else if (!ether_addr_equal(link_sta->addr,
params->link_mac)) {
return -EINVAL;
}
- } else if (new_link) {
- return -EINVAL;
}
if (params->txpwr_set) {
+ int ret;
+
link_sta->pub->txpwr.type = params->txpwr.type;
if (params->txpwr.type == NL80211_TX_POWER_LIMITED)
link_sta->pub->txpwr.power = params->txpwr.power;
@@ -1903,7 +1922,7 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
ieee80211_sta_init_nss(link_sta);
- return ret;
+ return 0;
}
static int sta_apply_parameters(struct ieee80211_local *local,
@@ -2019,7 +2038,7 @@ static int sta_apply_parameters(struct ieee80211_local *local,
if (params->listen_interval >= 0)
sta->listen_interval = params->listen_interval;
- ret = sta_link_apply_parameters(local, sta, false,
+ ret = sta_link_apply_parameters(local, sta, STA_LINK_MODE_STA_MODIFY,
&params->link_sta_params);
if (ret)
return ret;
@@ -2954,8 +2973,9 @@ static int ieee80211_set_mcast_rate(struct wiphy *wiphy, struct net_device *dev,
memcpy(sdata->vif.bss_conf.mcast_rate, rate,
sizeof(int) * NUM_NL80211_BANDS);
- ieee80211_link_info_change_notify(sdata, &sdata->deflink,
- BSS_CHANGED_MCAST_RATE);
+ if (ieee80211_sdata_running(sdata))
+ ieee80211_link_info_change_notify(sdata, &sdata->deflink,
+ BSS_CHANGED_MCAST_RATE);
return 0;
}
@@ -3033,6 +3053,9 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
+ if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
+ return -EOPNOTSUPP;
+
sdata = wiphy_dereference(local->hw.wiphy,
local->monitor_sdata);
if (!sdata)
@@ -3095,7 +3118,7 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
if (has_monitor) {
sdata = wiphy_dereference(local->hw.wiphy,
local->monitor_sdata);
- if (sdata) {
+ if (sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) {
sdata->deflink.user_power_level = local->user_power_level;
if (txp_type != sdata->vif.bss_conf.txpower_type)
update_txp_type = true;
@@ -3462,7 +3485,7 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy,
if (err)
goto out_unlock;
- wiphy_delayed_work_queue(wiphy, &sdata->deflink.dfs_cac_timer_work,
+ wiphy_delayed_work_queue(wiphy, &sdata->dfs_cac_timer_work,
msecs_to_jiffies(cac_time_ms));
out_unlock:
@@ -3478,12 +3501,8 @@ static void ieee80211_end_cac(struct wiphy *wiphy,
lockdep_assert_wiphy(local->hw.wiphy);
list_for_each_entry(sdata, &local->interfaces, list) {
- /* it might be waiting for the local->mtx, but then
- * by the time it gets it, sdata->wdev.cac_started
- * will no longer be true
- */
wiphy_delayed_work_cancel(wiphy,
- &sdata->deflink.dfs_cac_timer_work);
+ &sdata->dfs_cac_timer_work);
if (sdata->wdev.cac_started) {
ieee80211_link_release_channel(&sdata->deflink);
@@ -3633,10 +3652,10 @@ void ieee80211_csa_finish(struct ieee80211_vif *vif, unsigned int link_id)
continue;
wiphy_work_queue(iter->local->hw.wiphy,
- &iter->deflink.csa_finalize_work);
+ &iter->deflink.csa.finalize_work);
}
}
- wiphy_work_queue(local->hw.wiphy, &link_data->csa_finalize_work);
+ wiphy_work_queue(local->hw.wiphy, &link_data->csa.finalize_work);
rcu_read_unlock();
}
@@ -3648,7 +3667,7 @@ void ieee80211_channel_switch_disconnect(struct ieee80211_vif *vif, bool block_t
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_local *local = sdata->local;
- sdata->csa_blocked_tx = block_tx;
+ sdata->csa_blocked_queues = block_tx;
sdata_info(sdata, "channel switch failed, disconnecting\n");
wiphy_work_queue(local->hw.wiphy, &ifmgd->csa_connection_drop_work);
}
@@ -3723,7 +3742,7 @@ static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data)
}
if (!cfg80211_chandef_identical(&link_conf->chanreq.oper,
- &link_data->csa_chanreq.oper))
+ &link_data->csa.chanreq.oper))
return -EINVAL;
link_conf->csa_active = false;
@@ -3734,17 +3753,13 @@ static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data)
ieee80211_link_info_change_notify(sdata, link_data, changed);
- if (sdata->csa_blocked_tx) {
- ieee80211_wake_vif_queues(local, sdata,
- IEEE80211_QUEUE_STOP_REASON_CSA);
- sdata->csa_blocked_tx = false;
- }
+ ieee80211_vif_unblock_queues_csa(sdata);
err = drv_post_channel_switch(link_data);
if (err)
return err;
- cfg80211_ch_switch_notify(sdata->dev, &link_data->csa_chanreq.oper,
+ cfg80211_ch_switch_notify(sdata->dev, &link_data->csa.chanreq.oper,
link_data->link_id);
return 0;
@@ -3765,7 +3780,7 @@ static void ieee80211_csa_finalize(struct ieee80211_link_data *link_data)
void ieee80211_csa_finalize_work(struct wiphy *wiphy, struct wiphy_work *work)
{
struct ieee80211_link_data *link =
- container_of(work, struct ieee80211_link_data, csa_finalize_work);
+ container_of(work, struct ieee80211_link_data, csa.finalize_work);
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
@@ -3914,13 +3929,13 @@ static int ieee80211_set_csa_beacon(struct ieee80211_link_data *link_data,
return 0;
}
-static void ieee80211_color_change_abort(struct ieee80211_sub_if_data *sdata)
+static void ieee80211_color_change_abort(struct ieee80211_link_data *link)
{
- sdata->vif.bss_conf.color_change_active = false;
+ link->conf->color_change_active = false;
- ieee80211_free_next_beacon(&sdata->deflink);
+ ieee80211_free_next_beacon(link);
- cfg80211_color_change_aborted_notify(sdata->dev);
+ cfg80211_color_change_aborted_notify(link->sdata->dev, link->link_id);
}
static int
@@ -3996,7 +4011,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
goto out;
/* if reservation is invalid then this will fail */
- err = ieee80211_check_combinations(sdata, NULL, chanctx->mode, 0);
+ err = ieee80211_check_combinations(sdata, NULL, chanctx->mode, 0, -1);
if (err) {
ieee80211_link_unreserve_chanctx(link_data);
goto out;
@@ -4004,7 +4019,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
/* if there is a color change in progress, abort it */
if (link_conf->color_change_active)
- ieee80211_color_change_abort(sdata);
+ ieee80211_color_change_abort(link_data);
err = ieee80211_set_csa_beacon(link_data, params, &changed);
if (err) {
@@ -4012,23 +4027,19 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
goto out;
}
- link_data->csa_chanreq = chanreq;
+ link_data->csa.chanreq = chanreq;
link_conf->csa_active = true;
- if (params->block_tx &&
- !ieee80211_hw_check(&local->hw, HANDLES_QUIET_CSA)) {
- ieee80211_stop_vif_queues(local, sdata,
- IEEE80211_QUEUE_STOP_REASON_CSA);
- sdata->csa_blocked_tx = true;
- }
+ if (params->block_tx)
+ ieee80211_vif_block_queues_csa(sdata);
cfg80211_ch_switch_started_notify(sdata->dev,
- &link_data->csa_chanreq.oper, 0,
+ &link_data->csa.chanreq.oper, link_id,
params->count, params->block_tx);
if (changed) {
ieee80211_link_info_change_notify(sdata, link_data, changed);
- drv_channel_switch_beacon(sdata, &link_data->csa_chanreq.oper);
+ drv_channel_switch_beacon(sdata, &link_data->csa.chanreq.oper);
} else {
/* if the beacon didn't change, we can finalize immediately */
ieee80211_csa_finalize(link_data);
@@ -4662,20 +4673,22 @@ static int ieee80211_set_sar_specs(struct wiphy *wiphy,
}
static int
-ieee80211_set_after_color_change_beacon(struct ieee80211_sub_if_data *sdata,
+ieee80211_set_after_color_change_beacon(struct ieee80211_link_data *link,
u64 *changed)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
+
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP: {
int ret;
- if (!sdata->deflink.u.ap.next_beacon)
+ if (!link->u.ap.next_beacon)
return -EINVAL;
- ret = ieee80211_assign_beacon(sdata, &sdata->deflink,
- sdata->deflink.u.ap.next_beacon,
+ ret = ieee80211_assign_beacon(sdata, link,
+ link->u.ap.next_beacon,
NULL, NULL, changed);
- ieee80211_free_next_beacon(&sdata->deflink);
+ ieee80211_free_next_beacon(link);
if (ret < 0)
return ret;
@@ -4691,18 +4704,19 @@ ieee80211_set_after_color_change_beacon(struct ieee80211_sub_if_data *sdata,
}
static int
-ieee80211_set_color_change_beacon(struct ieee80211_sub_if_data *sdata,
+ieee80211_set_color_change_beacon(struct ieee80211_link_data *link,
struct cfg80211_color_change_settings *params,
u64 *changed)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_color_change_settings color_change = {};
int err;
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP:
- sdata->deflink.u.ap.next_beacon =
+ link->u.ap.next_beacon =
cfg80211_beacon_dup(&params->beacon_next);
- if (!sdata->deflink.u.ap.next_beacon)
+ if (!link->u.ap.next_beacon)
return -ENOMEM;
if (params->count <= 1)
@@ -4714,11 +4728,11 @@ ieee80211_set_color_change_beacon(struct ieee80211_sub_if_data *sdata,
params->counter_offset_presp;
color_change.count = params->count;
- err = ieee80211_assign_beacon(sdata, &sdata->deflink,
+ err = ieee80211_assign_beacon(sdata, link,
&params->beacon_color_change,
NULL, &color_change, changed);
if (err < 0) {
- ieee80211_free_next_beacon(&sdata->deflink);
+ ieee80211_free_next_beacon(link);
return err;
}
break;
@@ -4730,16 +4744,18 @@ ieee80211_set_color_change_beacon(struct ieee80211_sub_if_data *sdata,
}
static void
-ieee80211_color_change_bss_config_notify(struct ieee80211_sub_if_data *sdata,
+ieee80211_color_change_bss_config_notify(struct ieee80211_link_data *link,
u8 color, int enable, u64 changed)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
+
lockdep_assert_wiphy(sdata->local->hw.wiphy);
- sdata->vif.bss_conf.he_bss_color.color = color;
- sdata->vif.bss_conf.he_bss_color.enabled = enable;
+ link->conf->he_bss_color.color = color;
+ link->conf->he_bss_color.enabled = enable;
changed |= BSS_CHANGED_HE_BSS_COLOR;
- ieee80211_link_info_change_notify(sdata, &sdata->deflink, changed);
+ ieee80211_link_info_change_notify(sdata, link, changed);
if (!sdata->vif.bss_conf.nontransmitted && sdata->vif.mbssid_tx_vif) {
struct ieee80211_sub_if_data *child;
@@ -4756,26 +4772,27 @@ ieee80211_color_change_bss_config_notify(struct ieee80211_sub_if_data *sdata,
}
}
-static int ieee80211_color_change_finalize(struct ieee80211_sub_if_data *sdata)
+static int ieee80211_color_change_finalize(struct ieee80211_link_data *link)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
u64 changed = 0;
int err;
lockdep_assert_wiphy(local->hw.wiphy);
- sdata->vif.bss_conf.color_change_active = false;
+ link->conf->color_change_active = false;
- err = ieee80211_set_after_color_change_beacon(sdata, &changed);
+ err = ieee80211_set_after_color_change_beacon(link, &changed);
if (err) {
- cfg80211_color_change_aborted_notify(sdata->dev);
+ cfg80211_color_change_aborted_notify(sdata->dev, link->link_id);
return err;
}
- ieee80211_color_change_bss_config_notify(sdata,
- sdata->vif.bss_conf.color_change_color,
+ ieee80211_color_change_bss_config_notify(link,
+ link->conf->color_change_color,
1, changed);
- cfg80211_color_change_notify(sdata->dev);
+ cfg80211_color_change_notify(sdata->dev, link->link_id);
return 0;
}
@@ -4783,21 +4800,23 @@ static int ieee80211_color_change_finalize(struct ieee80211_sub_if_data *sdata)
void ieee80211_color_change_finalize_work(struct wiphy *wiphy,
struct wiphy_work *work)
{
- struct ieee80211_sub_if_data *sdata =
- container_of(work, struct ieee80211_sub_if_data,
- deflink.color_change_finalize_work);
+ struct ieee80211_link_data *link =
+ container_of(work, struct ieee80211_link_data,
+ color_change_finalize_work);
+ struct ieee80211_sub_if_data *sdata = link->sdata;
+ struct ieee80211_bss_conf *link_conf = link->conf;
struct ieee80211_local *local = sdata->local;
lockdep_assert_wiphy(local->hw.wiphy);
/* AP might have been stopped while waiting for the lock. */
- if (!sdata->vif.bss_conf.color_change_active)
+ if (!link_conf->color_change_active)
return;
if (!ieee80211_sdata_running(sdata))
return;
- ieee80211_color_change_finalize(sdata);
+ ieee80211_color_change_finalize(link);
}
void ieee80211_color_collision_detection_work(struct work_struct *work)
@@ -4808,30 +4827,60 @@ void ieee80211_color_collision_detection_work(struct work_struct *work)
color_collision_detect_work);
struct ieee80211_sub_if_data *sdata = link->sdata;
- cfg80211_obss_color_collision_notify(sdata->dev, link->color_bitmap);
+ cfg80211_obss_color_collision_notify(sdata->dev, link->color_bitmap,
+ link->link_id);
}
-void ieee80211_color_change_finish(struct ieee80211_vif *vif)
+void ieee80211_color_change_finish(struct ieee80211_vif *vif, u8 link_id)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct ieee80211_link_data *link;
+
+ if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS))
+ return;
+
+ rcu_read_lock();
+
+ link = rcu_dereference(sdata->link[link_id]);
+ if (WARN_ON(!link)) {
+ rcu_read_unlock();
+ return;
+ }
wiphy_work_queue(sdata->local->hw.wiphy,
- &sdata->deflink.color_change_finalize_work);
+ &link->color_change_finalize_work);
+
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(ieee80211_color_change_finish);
void
ieee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
- u64 color_bitmap)
+ u64 color_bitmap, u8 link_id)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
- struct ieee80211_link_data *link = &sdata->deflink;
+ struct ieee80211_link_data *link;
- if (sdata->vif.bss_conf.color_change_active || sdata->vif.bss_conf.csa_active)
+ if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS))
return;
- if (delayed_work_pending(&link->color_collision_detect_work))
+ rcu_read_lock();
+
+ link = rcu_dereference(sdata->link[link_id]);
+ if (WARN_ON(!link)) {
+ rcu_read_unlock();
+ return;
+ }
+
+ if (link->conf->color_change_active || link->conf->csa_active) {
+ rcu_read_unlock();
+ return;
+ }
+
+ if (delayed_work_pending(&link->color_collision_detect_work)) {
+ rcu_read_unlock();
return;
+ }
link->color_bitmap = color_bitmap;
/* queue the color collision detection event every 500 ms in order to
@@ -4840,6 +4889,8 @@ ieee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
ieee80211_queue_delayed_work(&sdata->local->hw,
&link->color_collision_detect_work,
msecs_to_jiffies(500));
+
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(ieee80211_obss_color_collision_notify);
@@ -4849,36 +4900,48 @@ ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev,
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
+ struct ieee80211_bss_conf *link_conf;
+ struct ieee80211_link_data *link;
+ u8 link_id = params->link_id;
u64 changed = 0;
int err;
lockdep_assert_wiphy(local->hw.wiphy);
- if (sdata->vif.bss_conf.nontransmitted)
+ if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS))
+ return -EINVAL;
+
+ link = wiphy_dereference(wiphy, sdata->link[link_id]);
+ if (!link)
+ return -ENOLINK;
+
+ link_conf = link->conf;
+
+ if (link_conf->nontransmitted)
return -EINVAL;
/* don't allow another color change if one is already active or if csa
* is active
*/
- if (sdata->vif.bss_conf.color_change_active || sdata->vif.bss_conf.csa_active) {
+ if (link_conf->color_change_active || link_conf->csa_active) {
err = -EBUSY;
goto out;
}
- err = ieee80211_set_color_change_beacon(sdata, params, &changed);
+ err = ieee80211_set_color_change_beacon(link, params, &changed);
if (err)
goto out;
- sdata->vif.bss_conf.color_change_active = true;
- sdata->vif.bss_conf.color_change_color = params->color;
+ link_conf->color_change_active = true;
+ link_conf->color_change_color = params->color;
- cfg80211_color_change_started_notify(sdata->dev, params->count);
+ cfg80211_color_change_started_notify(sdata->dev, params->count, link_id);
if (changed)
- ieee80211_color_change_bss_config_notify(sdata, 0, 0, changed);
+ ieee80211_color_change_bss_config_notify(link, 0, 0, changed);
else
/* if the beacon didn't change, we can finalize immediately */
- ieee80211_color_change_finalize(sdata);
+ ieee80211_color_change_finalize(link);
out:
@@ -4922,13 +4985,17 @@ static void ieee80211_del_intf_link(struct wiphy *wiphy,
ieee80211_vif_set_links(sdata, wdev->valid_links, 0);
}
-static int sta_add_link_station(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- struct link_station_parameters *params)
+static int
+ieee80211_add_link_station(struct wiphy *wiphy, struct net_device *dev,
+ struct link_station_parameters *params)
{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_local *local = wiphy_priv(wiphy);
struct sta_info *sta;
int ret;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
sta = sta_info_get_bss(sdata, params->mld_mac);
if (!sta)
return -ENOENT;
@@ -4943,7 +5010,7 @@ static int sta_add_link_station(struct ieee80211_local *local,
if (ret)
return ret;
- ret = sta_link_apply_parameters(local, sta, true, params);
+ ret = sta_link_apply_parameters(local, sta, STA_LINK_MODE_NEW, params);
if (ret) {
ieee80211_sta_free_link(sta, params->link_id);
return ret;
@@ -4954,23 +5021,15 @@ static int sta_add_link_station(struct ieee80211_local *local,
}
static int
-ieee80211_add_link_station(struct wiphy *wiphy, struct net_device *dev,
+ieee80211_mod_link_station(struct wiphy *wiphy, struct net_device *dev,
struct link_station_parameters *params)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = wiphy_priv(wiphy);
-
- lockdep_assert_wiphy(sdata->local->hw.wiphy);
-
- return sta_add_link_station(local, sdata, params);
-}
-
-static int sta_mod_link_station(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- struct link_station_parameters *params)
-{
struct sta_info *sta;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
sta = sta_info_get_bss(sdata, params->mld_mac);
if (!sta)
return -ENOENT;
@@ -4978,26 +5037,19 @@ static int sta_mod_link_station(struct ieee80211_local *local,
if (!(sta->sta.valid_links & BIT(params->link_id)))
return -EINVAL;
- return sta_link_apply_parameters(local, sta, false, params);
+ return sta_link_apply_parameters(local, sta, STA_LINK_MODE_LINK_MODIFY,
+ params);
}
static int
-ieee80211_mod_link_station(struct wiphy *wiphy, struct net_device *dev,
- struct link_station_parameters *params)
+ieee80211_del_link_station(struct wiphy *wiphy, struct net_device *dev,
+ struct link_station_del_parameters *params)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
- struct ieee80211_local *local = wiphy_priv(wiphy);
+ struct sta_info *sta;
lockdep_assert_wiphy(sdata->local->hw.wiphy);
- return sta_mod_link_station(local, sdata, params);
-}
-
-static int sta_del_link_station(struct ieee80211_sub_if_data *sdata,
- struct link_station_del_parameters *params)
-{
- struct sta_info *sta;
-
sta = sta_info_get_bss(sdata, params->mld_mac);
if (!sta)
return -ENOENT;
@@ -5014,17 +5066,6 @@ static int sta_del_link_station(struct ieee80211_sub_if_data *sdata,
return 0;
}
-static int
-ieee80211_del_link_station(struct wiphy *wiphy, struct net_device *dev,
- struct link_station_del_parameters *params)
-{
- struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
- lockdep_assert_wiphy(sdata->local->hw.wiphy);
-
- return sta_del_link_station(sdata, params);
-}
-
static int ieee80211_set_hw_timestamp(struct wiphy *wiphy,
struct net_device *dev,
struct cfg80211_set_hw_timestamp *hwts)
@@ -5165,4 +5206,5 @@ const struct cfg80211_ops mac80211_config_ops = {
.del_link_station = ieee80211_del_link_station,
.set_hw_timestamp = ieee80211_set_hw_timestamp,
.set_ttlm = ieee80211_set_ttlm,
+ .get_radio_mask = ieee80211_get_radio_mask,
};
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index ccacaed32817..e8567723e94d 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -47,24 +47,29 @@ int ieee80211_chanctx_refcount(struct ieee80211_local *local,
ieee80211_chanctx_num_reserved(local, ctx);
}
-static int ieee80211_num_chanctx(struct ieee80211_local *local)
+static int ieee80211_num_chanctx(struct ieee80211_local *local, int radio_idx)
{
struct ieee80211_chanctx *ctx;
int num = 0;
lockdep_assert_wiphy(local->hw.wiphy);
- list_for_each_entry(ctx, &local->chanctx_list, list)
+ list_for_each_entry(ctx, &local->chanctx_list, list) {
+ if (radio_idx >= 0 && ctx->conf.radio_idx != radio_idx)
+ continue;
num++;
+ }
return num;
}
-static bool ieee80211_can_create_new_chanctx(struct ieee80211_local *local)
+static bool ieee80211_can_create_new_chanctx(struct ieee80211_local *local,
+ int radio_idx)
{
lockdep_assert_wiphy(local->hw.wiphy);
- return ieee80211_num_chanctx(local) < ieee80211_max_num_channels(local);
+ return ieee80211_num_chanctx(local, radio_idx) <
+ ieee80211_max_num_channels(local, radio_idx);
}
static struct ieee80211_chanctx *
@@ -295,17 +300,24 @@ ieee80211_get_max_required_bw(struct ieee80211_link_data *link)
static enum nl80211_chan_width
ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx,
- struct ieee80211_link_data *rsvd_for)
+ struct ieee80211_link_data *rsvd_for,
+ bool check_reserved)
{
struct ieee80211_sub_if_data *sdata;
struct ieee80211_link_data *link;
enum nl80211_chan_width max_bw = NL80211_CHAN_WIDTH_20_NOHT;
+ if (WARN_ON(check_reserved && rsvd_for))
+ return ctx->conf.def.width;
+
for_each_sdata_link(local, link) {
enum nl80211_chan_width width = NL80211_CHAN_WIDTH_20_NOHT;
- if (link != rsvd_for &&
- rcu_access_pointer(link->conf->chanctx_conf) != &ctx->conf)
+ if (check_reserved) {
+ if (link->reserved_chanctx != ctx)
+ continue;
+ } else if (link != rsvd_for &&
+ rcu_access_pointer(link->conf->chanctx_conf) != &ctx->conf)
continue;
switch (link->sdata->vif.type) {
@@ -359,7 +371,8 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local,
static u32
_ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx,
- struct ieee80211_link_data *rsvd_for)
+ struct ieee80211_link_data *rsvd_for,
+ bool check_reserved)
{
enum nl80211_chan_width max_bw;
struct cfg80211_chan_def min_def;
@@ -379,7 +392,8 @@ _ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
return 0;
}
- max_bw = ieee80211_get_chanctx_max_required_bw(local, ctx, rsvd_for);
+ max_bw = ieee80211_get_chanctx_max_required_bw(local, ctx, rsvd_for,
+ check_reserved);
/* downgrade chandef up to max_bw */
min_def = ctx->conf.def;
@@ -396,12 +410,9 @@ _ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
return IEEE80211_CHANCTX_CHANGE_MIN_WIDTH;
}
-/* calling this function is assuming that station vif is updated to
- * lates changes by calling ieee80211_link_update_chanreq
- */
static void ieee80211_chan_bw_change(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx,
- bool narrowed)
+ bool reserved, bool narrowed)
{
struct sta_info *sta;
struct ieee80211_supported_band *sband =
@@ -418,13 +429,17 @@ static void ieee80211_chan_bw_change(struct ieee80211_local *local,
continue;
for (link_id = 0; link_id < ARRAY_SIZE(sta->sdata->link); link_id++) {
- struct ieee80211_bss_conf *link_conf =
- rcu_dereference(sdata->vif.link_conf[link_id]);
+ struct ieee80211_link_data *link =
+ rcu_dereference(sdata->link[link_id]);
+ struct ieee80211_bss_conf *link_conf;
+ struct cfg80211_chan_def *new_chandef;
struct link_sta_info *link_sta;
- if (!link_conf)
+ if (!link)
continue;
+ link_conf = link->conf;
+
if (rcu_access_pointer(link_conf->chanctx_conf) != &ctx->conf)
continue;
@@ -432,7 +447,13 @@ static void ieee80211_chan_bw_change(struct ieee80211_local *local,
if (!link_sta)
continue;
- new_sta_bw = ieee80211_sta_cur_vht_bw(link_sta);
+ if (reserved)
+ new_chandef = &link->reserved.oper;
+ else
+ new_chandef = &link_conf->chanreq.oper;
+
+ new_sta_bw = _ieee80211_sta_cur_vht_bw(link_sta,
+ new_chandef);
/* nothing change */
if (new_sta_bw == link_sta->pub->bandwidth)
@@ -458,20 +479,22 @@ static void ieee80211_chan_bw_change(struct ieee80211_local *local,
*/
void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx,
- struct ieee80211_link_data *rsvd_for)
+ struct ieee80211_link_data *rsvd_for,
+ bool check_reserved)
{
- u32 changed = _ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for);
+ u32 changed = _ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for,
+ check_reserved);
if (!changed)
return;
/* check is BW narrowed */
- ieee80211_chan_bw_change(local, ctx, true);
+ ieee80211_chan_bw_change(local, ctx, false, true);
drv_change_chanctx(local, ctx, changed);
/* check is BW wider */
- ieee80211_chan_bw_change(local, ctx, false);
+ ieee80211_chan_bw_change(local, ctx, false, false);
}
static void _ieee80211_change_chanctx(struct ieee80211_local *local,
@@ -505,10 +528,10 @@ static void _ieee80211_change_chanctx(struct ieee80211_local *local,
* due to maybe not returning from it, e.g in case new context was added
* first time with all parameters up to date.
*/
- ieee80211_chan_bw_change(local, old_ctx, true);
+ ieee80211_chan_bw_change(local, old_ctx, false, true);
if (ieee80211_chanreq_identical(&ctx_req, chanreq)) {
- ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for);
+ ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for, false);
return;
}
@@ -529,14 +552,14 @@ static void _ieee80211_change_chanctx(struct ieee80211_local *local,
ctx->conf.ap = chanreq->ap;
/* check if min chanctx also changed */
- changed |= _ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for);
+ changed |= _ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for, false);
ieee80211_add_wbrf(local, &ctx->conf.def);
drv_change_chanctx(local, ctx, changed);
/* check if BW is wider */
- ieee80211_chan_bw_change(local, old_ctx, false);
+ ieee80211_chan_bw_change(local, old_ctx, false, false);
}
static void ieee80211_change_chanctx(struct ieee80211_local *local,
@@ -547,8 +570,10 @@ static void ieee80211_change_chanctx(struct ieee80211_local *local,
_ieee80211_change_chanctx(local, ctx, old_ctx, chanreq, NULL);
}
+/* Note: if successful, the returned chanctx is reserved for the link */
static struct ieee80211_chanctx *
ieee80211_find_chanctx(struct ieee80211_local *local,
+ struct ieee80211_link_data *link,
const struct ieee80211_chan_req *chanreq,
enum ieee80211_chanctx_mode mode)
{
@@ -560,6 +585,9 @@ ieee80211_find_chanctx(struct ieee80211_local *local,
if (mode == IEEE80211_CHANCTX_EXCLUSIVE)
return NULL;
+ if (WARN_ON(link->reserved_chanctx))
+ return NULL;
+
list_for_each_entry(ctx, &local->chanctx_list, list) {
const struct ieee80211_chan_req *compat;
@@ -578,6 +606,16 @@ ieee80211_find_chanctx(struct ieee80211_local *local,
if (!compat)
continue;
+ /*
+ * Reserve the chanctx temporarily, as the driver might change
+ * active links during callbacks we make into it below and/or
+ * later during assignment, which could (otherwise) cause the
+ * context to actually be removed.
+ */
+ link->reserved_chanctx = ctx;
+ list_add(&link->reserved_chanctx_list,
+ &ctx->reserved_links);
+
ieee80211_change_chanctx(local, ctx, ctx, compat);
return ctx;
@@ -623,7 +661,8 @@ ieee80211_chanctx_radar_required(struct ieee80211_local *local,
static struct ieee80211_chanctx *
ieee80211_alloc_chanctx(struct ieee80211_local *local,
const struct ieee80211_chan_req *chanreq,
- enum ieee80211_chanctx_mode mode)
+ enum ieee80211_chanctx_mode mode,
+ int radio_idx)
{
struct ieee80211_chanctx *ctx;
@@ -641,7 +680,8 @@ ieee80211_alloc_chanctx(struct ieee80211_local *local,
ctx->conf.rx_chains_dynamic = 1;
ctx->mode = mode;
ctx->conf.radar_enabled = false;
- _ieee80211_recalc_chanctx_min_def(local, ctx, NULL);
+ ctx->conf.radio_idx = radio_idx;
+ _ieee80211_recalc_chanctx_min_def(local, ctx, NULL, false);
return ctx;
}
@@ -673,48 +713,55 @@ static int ieee80211_add_chanctx(struct ieee80211_local *local,
static struct ieee80211_chanctx *
ieee80211_new_chanctx(struct ieee80211_local *local,
const struct ieee80211_chan_req *chanreq,
- enum ieee80211_chanctx_mode mode)
+ enum ieee80211_chanctx_mode mode,
+ bool assign_on_failure,
+ int radio_idx)
{
struct ieee80211_chanctx *ctx;
int err;
lockdep_assert_wiphy(local->hw.wiphy);
- ctx = ieee80211_alloc_chanctx(local, chanreq, mode);
+ ctx = ieee80211_alloc_chanctx(local, chanreq, mode, radio_idx);
if (!ctx)
return ERR_PTR(-ENOMEM);
err = ieee80211_add_chanctx(local, ctx);
- if (err) {
+ if (!assign_on_failure && err) {
kfree(ctx);
return ERR_PTR(err);
}
+ /* We ignored a driver error, see _ieee80211_set_active_links */
+ WARN_ON_ONCE(err && !local->in_reconfig);
list_add_rcu(&ctx->list, &local->chanctx_list);
return ctx;
}
static void ieee80211_del_chanctx(struct ieee80211_local *local,
- struct ieee80211_chanctx *ctx)
+ struct ieee80211_chanctx *ctx,
+ bool skip_idle_recalc)
{
lockdep_assert_wiphy(local->hw.wiphy);
drv_remove_chanctx(local, ctx);
- ieee80211_recalc_idle(local);
+ if (!skip_idle_recalc)
+ ieee80211_recalc_idle(local);
ieee80211_remove_wbrf(local, &ctx->conf.def);
}
static void ieee80211_free_chanctx(struct ieee80211_local *local,
- struct ieee80211_chanctx *ctx)
+ struct ieee80211_chanctx *ctx,
+ bool skip_idle_recalc)
{
lockdep_assert_wiphy(local->hw.wiphy);
WARN_ON_ONCE(ieee80211_chanctx_refcount(local, ctx) != 0);
list_del_rcu(&ctx->list);
- ieee80211_del_chanctx(local, ctx);
+ ieee80211_del_chanctx(local, ctx, skip_idle_recalc);
kfree_rcu(ctx, rcu_head);
}
@@ -754,13 +801,24 @@ void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local,
/* TDLS peers can sometimes affect the chandef width */
list_for_each_entry(sta, &local->sta_list, list) {
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
struct ieee80211_chan_req tdls_chanreq = {};
+ int tdls_link_id;
+
if (!sta->uploaded ||
!test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW) ||
!test_sta_flag(sta, WLAN_STA_AUTHORIZED) ||
!sta->tdls_chandef.chan)
continue;
+ tdls_link_id = ieee80211_tdls_sta_link_id(sta);
+ link = sdata_dereference(sdata->link[tdls_link_id], sdata);
+ if (!link)
+ continue;
+
+ if (rcu_access_pointer(link->conf->chanctx_conf) != conf)
+ continue;
+
tdls_chanreq.oper = sta->tdls_chandef;
/* note this always fills and returns &tmp if compat */
@@ -791,14 +849,15 @@ static void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local,
}
static int ieee80211_assign_link_chanctx(struct ieee80211_link_data *link,
- struct ieee80211_chanctx *new_ctx)
+ struct ieee80211_chanctx *new_ctx,
+ bool assign_on_failure)
{
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx_conf *conf;
struct ieee80211_chanctx *curr_ctx = NULL;
bool new_idle;
- int ret = 0;
+ int ret;
if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_NAN))
return -EOPNOTSUPP;
@@ -816,30 +875,35 @@ static int ieee80211_assign_link_chanctx(struct ieee80211_link_data *link,
if (new_ctx) {
/* recalc considering the link we'll use it for now */
- ieee80211_recalc_chanctx_min_def(local, new_ctx, link);
+ ieee80211_recalc_chanctx_min_def(local, new_ctx, link, false);
ret = drv_assign_vif_chanctx(local, sdata, link->conf, new_ctx);
- if (ret)
- goto out;
-
- conf = &new_ctx->conf;
- list_add(&link->assigned_chanctx_list,
- &new_ctx->assigned_links);
+ if (assign_on_failure || !ret) {
+ /* Need to continue, see _ieee80211_set_active_links */
+ WARN_ON_ONCE(ret && !local->in_reconfig);
+ ret = 0;
+
+ /* succeeded, so commit it to the data structures */
+ conf = &new_ctx->conf;
+ list_add(&link->assigned_chanctx_list,
+ &new_ctx->assigned_links);
+ }
+ } else {
+ ret = 0;
}
-out:
rcu_assign_pointer(link->conf->chanctx_conf, conf);
if (curr_ctx && ieee80211_chanctx_num_assigned(local, curr_ctx) > 0) {
ieee80211_recalc_chanctx_chantype(local, curr_ctx);
ieee80211_recalc_smps_chanctx(local, curr_ctx);
ieee80211_recalc_radar_chanctx(local, curr_ctx);
- ieee80211_recalc_chanctx_min_def(local, curr_ctx, NULL);
+ ieee80211_recalc_chanctx_min_def(local, curr_ctx, NULL, false);
}
if (new_ctx && ieee80211_chanctx_num_assigned(local, new_ctx) > 0) {
ieee80211_recalc_txpower(sdata, false);
- ieee80211_recalc_chanctx_min_def(local, new_ctx, NULL);
+ ieee80211_recalc_chanctx_min_def(local, new_ctx, NULL, false);
}
if (conf) {
@@ -1019,13 +1083,114 @@ int ieee80211_link_unreserve_chanctx(struct ieee80211_link_data *link)
list_del_rcu(&ctx->list);
kfree_rcu(ctx, rcu_head);
} else {
- ieee80211_free_chanctx(sdata->local, ctx);
+ ieee80211_free_chanctx(sdata->local, ctx, false);
}
}
return 0;
}
+static struct ieee80211_chanctx *
+ieee80211_replace_chanctx(struct ieee80211_local *local,
+ const struct ieee80211_chan_req *chanreq,
+ enum ieee80211_chanctx_mode mode,
+ struct ieee80211_chanctx *curr_ctx)
+{
+ struct ieee80211_chanctx *new_ctx, *ctx;
+ struct wiphy *wiphy = local->hw.wiphy;
+ const struct wiphy_radio *radio;
+
+ if (!curr_ctx || (curr_ctx->replace_state ==
+ IEEE80211_CHANCTX_WILL_BE_REPLACED) ||
+ !list_empty(&curr_ctx->reserved_links)) {
+ /*
+ * Another link already requested this context for a
+ * reservation. Find another one hoping all links assigned
+ * to it will also switch soon enough.
+ *
+ * TODO: This needs a little more work as some cases
+ * (more than 2 chanctx capable devices) may fail which could
+ * otherwise succeed provided some channel context juggling was
+ * performed.
+ *
+ * Consider ctx1..3, link1..6, each ctx has 2 links. link1 and
+ * link2 from ctx1 request new different chandefs starting 2
+ * in-place reserations with ctx4 and ctx5 replacing ctx1 and
+ * ctx2 respectively. Next link5 and link6 from ctx3 reserve
+ * ctx4. If link3 and link4 remain on ctx2 as they are then this
+ * fails unless `replace_ctx` from ctx5 is replaced with ctx3.
+ */
+ list_for_each_entry(ctx, &local->chanctx_list, list) {
+ if (ctx->replace_state !=
+ IEEE80211_CHANCTX_REPLACE_NONE)
+ continue;
+
+ if (!list_empty(&ctx->reserved_links))
+ continue;
+
+ if (ctx->conf.radio_idx >= 0) {
+ radio = &wiphy->radio[ctx->conf.radio_idx];
+ if (!cfg80211_radio_chandef_valid(radio, &chanreq->oper))
+ continue;
+ }
+
+ curr_ctx = ctx;
+ break;
+ }
+ }
+
+ /*
+ * If that's true then all available contexts already have reservations
+ * and cannot be used.
+ */
+ if (!curr_ctx || (curr_ctx->replace_state ==
+ IEEE80211_CHANCTX_WILL_BE_REPLACED) ||
+ !list_empty(&curr_ctx->reserved_links))
+ return ERR_PTR(-EBUSY);
+
+ new_ctx = ieee80211_alloc_chanctx(local, chanreq, mode, -1);
+ if (!new_ctx)
+ return ERR_PTR(-ENOMEM);
+
+ new_ctx->replace_ctx = curr_ctx;
+ new_ctx->replace_state = IEEE80211_CHANCTX_REPLACES_OTHER;
+
+ curr_ctx->replace_ctx = new_ctx;
+ curr_ctx->replace_state = IEEE80211_CHANCTX_WILL_BE_REPLACED;
+
+ list_add_rcu(&new_ctx->list, &local->chanctx_list);
+
+ return new_ctx;
+}
+
+static bool
+ieee80211_find_available_radio(struct ieee80211_local *local,
+ const struct ieee80211_chan_req *chanreq,
+ int *radio_idx)
+{
+ struct wiphy *wiphy = local->hw.wiphy;
+ const struct wiphy_radio *radio;
+ int i;
+
+ *radio_idx = -1;
+ if (!wiphy->n_radio)
+ return true;
+
+ for (i = 0; i < wiphy->n_radio; i++) {
+ radio = &wiphy->radio[i];
+ if (!cfg80211_radio_chandef_valid(radio, &chanreq->oper))
+ continue;
+
+ if (!ieee80211_can_create_new_chanctx(local, i))
+ continue;
+
+ *radio_idx = i;
+ return true;
+ }
+
+ return false;
+}
+
int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link,
const struct ieee80211_chan_req *chanreq,
enum ieee80211_chanctx_mode mode,
@@ -1033,7 +1198,8 @@ int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link,
{
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
- struct ieee80211_chanctx *new_ctx, *curr_ctx, *ctx;
+ struct ieee80211_chanctx *new_ctx, *curr_ctx;
+ int radio_idx;
lockdep_assert_wiphy(local->hw.wiphy);
@@ -1043,75 +1209,15 @@ int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link,
new_ctx = ieee80211_find_reservation_chanctx(local, chanreq, mode);
if (!new_ctx) {
- if (ieee80211_can_create_new_chanctx(local)) {
- new_ctx = ieee80211_new_chanctx(local, chanreq, mode);
- if (IS_ERR(new_ctx))
- return PTR_ERR(new_ctx);
- } else {
- if (!curr_ctx ||
- (curr_ctx->replace_state ==
- IEEE80211_CHANCTX_WILL_BE_REPLACED) ||
- !list_empty(&curr_ctx->reserved_links)) {
- /*
- * Another link already requested this context
- * for a reservation. Find another one hoping
- * all links assigned to it will also switch
- * soon enough.
- *
- * TODO: This needs a little more work as some
- * cases (more than 2 chanctx capable devices)
- * may fail which could otherwise succeed
- * provided some channel context juggling was
- * performed.
- *
- * Consider ctx1..3, link1..6, each ctx has 2
- * links. link1 and link2 from ctx1 request new
- * different chandefs starting 2 in-place
- * reserations with ctx4 and ctx5 replacing
- * ctx1 and ctx2 respectively. Next link5 and
- * link6 from ctx3 reserve ctx4. If link3 and
- * link4 remain on ctx2 as they are then this
- * fails unless `replace_ctx` from ctx5 is
- * replaced with ctx3.
- */
- list_for_each_entry(ctx, &local->chanctx_list,
- list) {
- if (ctx->replace_state !=
- IEEE80211_CHANCTX_REPLACE_NONE)
- continue;
-
- if (!list_empty(&ctx->reserved_links))
- continue;
-
- curr_ctx = ctx;
- break;
- }
- }
-
- /*
- * If that's true then all available contexts already
- * have reservations and cannot be used.
- */
- if (!curr_ctx ||
- (curr_ctx->replace_state ==
- IEEE80211_CHANCTX_WILL_BE_REPLACED) ||
- !list_empty(&curr_ctx->reserved_links))
- return -EBUSY;
-
- new_ctx = ieee80211_alloc_chanctx(local, chanreq, mode);
- if (!new_ctx)
- return -ENOMEM;
-
- new_ctx->replace_ctx = curr_ctx;
- new_ctx->replace_state =
- IEEE80211_CHANCTX_REPLACES_OTHER;
-
- curr_ctx->replace_ctx = new_ctx;
- curr_ctx->replace_state =
- IEEE80211_CHANCTX_WILL_BE_REPLACED;
-
- list_add_rcu(&new_ctx->list, &local->chanctx_list);
- }
+ if (ieee80211_can_create_new_chanctx(local, -1) &&
+ ieee80211_find_available_radio(local, chanreq, &radio_idx))
+ new_ctx = ieee80211_new_chanctx(local, chanreq, mode,
+ false, radio_idx);
+ else
+ new_ctx = ieee80211_replace_chanctx(local, chanreq,
+ mode, curr_ctx);
+ if (IS_ERR(new_ctx))
+ return PTR_ERR(new_ctx);
}
list_add(&link->reserved_chanctx_list, &new_ctx->reserved_links);
@@ -1134,11 +1240,11 @@ ieee80211_link_chanctx_reservation_complete(struct ieee80211_link_data *link)
case NL80211_IFTYPE_MESH_POINT:
case NL80211_IFTYPE_OCB:
wiphy_work_queue(sdata->local->hw.wiphy,
- &link->csa_finalize_work);
+ &link->csa.finalize_work);
break;
case NL80211_IFTYPE_STATION:
wiphy_delayed_work_queue(sdata->local->hw.wiphy,
- &link->u.mgd.chswitch_work, 0);
+ &link->u.mgd.csa.switch_work, 0);
break;
case NL80211_IFTYPE_UNSPECIFIED:
case NL80211_IFTYPE_AP_VLAN:
@@ -1235,7 +1341,7 @@ ieee80211_link_use_reserved_reassign(struct ieee80211_link_data *link)
CHANCTX_SWMODE_REASSIGN_VIF);
if (err) {
if (ieee80211_chanctx_refcount(local, new_ctx) == 0)
- ieee80211_free_chanctx(local, new_ctx);
+ ieee80211_free_chanctx(local, new_ctx, false);
goto out;
}
@@ -1249,9 +1355,9 @@ ieee80211_link_use_reserved_reassign(struct ieee80211_link_data *link)
ieee80211_check_fast_xmit_iface(sdata);
if (ieee80211_chanctx_refcount(local, old_ctx) == 0)
- ieee80211_free_chanctx(local, old_ctx);
+ ieee80211_free_chanctx(local, old_ctx, false);
- ieee80211_recalc_chanctx_min_def(local, new_ctx, NULL);
+ ieee80211_recalc_chanctx_min_def(local, new_ctx, NULL, false);
ieee80211_recalc_smps_chanctx(local, new_ctx);
ieee80211_recalc_radar_chanctx(local, new_ctx);
@@ -1300,10 +1406,10 @@ ieee80211_link_use_reserved_assign(struct ieee80211_link_data *link)
list_del(&link->reserved_chanctx_list);
link->reserved_chanctx = NULL;
- err = ieee80211_assign_link_chanctx(link, new_ctx);
+ err = ieee80211_assign_link_chanctx(link, new_ctx, false);
if (err) {
if (ieee80211_chanctx_refcount(local, new_ctx) == 0)
- ieee80211_free_chanctx(local, new_ctx);
+ ieee80211_free_chanctx(local, new_ctx, false);
goto out;
}
@@ -1400,7 +1506,7 @@ static int ieee80211_chsw_switch_ctxs(struct ieee80211_local *local)
if (!list_empty(&ctx->replace_ctx->assigned_links))
continue;
- ieee80211_del_chanctx(local, ctx->replace_ctx);
+ ieee80211_del_chanctx(local, ctx->replace_ctx, false);
err = ieee80211_add_chanctx(local, ctx);
if (err)
goto err;
@@ -1417,7 +1523,7 @@ err:
if (!list_empty(&ctx->replace_ctx->assigned_links))
continue;
- ieee80211_del_chanctx(local, ctx);
+ ieee80211_del_chanctx(local, ctx, false);
WARN_ON(ieee80211_add_chanctx(local, ctx->replace_ctx));
}
@@ -1517,6 +1623,31 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
goto err;
}
+ /* update station rate control and min width before switch */
+ list_for_each_entry(ctx, &local->chanctx_list, list) {
+ struct ieee80211_link_data *link;
+
+ if (ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER)
+ continue;
+
+ if (WARN_ON(!ctx->replace_ctx)) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ list_for_each_entry(link, &ctx->reserved_links,
+ reserved_chanctx_list) {
+ if (!ieee80211_link_has_in_place_reservation(link))
+ continue;
+
+ ieee80211_chan_bw_change(local,
+ ieee80211_link_get_chanctx(link),
+ true, true);
+ }
+
+ ieee80211_recalc_chanctx_min_def(local, ctx, NULL, true);
+ }
+
/*
* All necessary vifs are ready. Perform the switch now depending on
* reservations and driver capabilities.
@@ -1584,7 +1715,7 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
ieee80211_recalc_chanctx_chantype(local, ctx);
ieee80211_recalc_smps_chanctx(local, ctx);
ieee80211_recalc_radar_chanctx(local, ctx);
- ieee80211_recalc_chanctx_min_def(local, ctx, NULL);
+ ieee80211_recalc_chanctx_min_def(local, ctx, NULL, false);
list_for_each_entry_safe(link, link_tmp, &ctx->reserved_links,
reserved_chanctx_list) {
@@ -1597,6 +1728,7 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
link->reserved_chanctx = NULL;
ieee80211_link_chanctx_reservation_complete(link);
+ ieee80211_chan_bw_change(local, ctx, false, false);
}
/*
@@ -1669,7 +1801,8 @@ err:
return err;
}
-static void __ieee80211_link_release_channel(struct ieee80211_link_data *link)
+void __ieee80211_link_release_channel(struct ieee80211_link_data *link,
+ bool skip_idle_recalc)
{
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_bss_conf *link_conf = link->conf;
@@ -1695,9 +1828,9 @@ static void __ieee80211_link_release_channel(struct ieee80211_link_data *link)
ieee80211_link_unreserve_chanctx(link);
}
- ieee80211_assign_link_chanctx(link, NULL);
+ ieee80211_assign_link_chanctx(link, NULL, false);
if (ieee80211_chanctx_refcount(local, ctx) == 0)
- ieee80211_free_chanctx(local, ctx);
+ ieee80211_free_chanctx(local, ctx, skip_idle_recalc);
link->radar_required = false;
@@ -1706,14 +1839,17 @@ static void __ieee80211_link_release_channel(struct ieee80211_link_data *link)
ieee80211_vif_use_reserved_switch(local);
}
-int ieee80211_link_use_channel(struct ieee80211_link_data *link,
- const struct ieee80211_chan_req *chanreq,
- enum ieee80211_chanctx_mode mode)
+int _ieee80211_link_use_channel(struct ieee80211_link_data *link,
+ const struct ieee80211_chan_req *chanreq,
+ enum ieee80211_chanctx_mode mode,
+ bool assign_on_failure)
{
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx *ctx;
u8 radar_detect_width = 0;
+ bool reserved = false;
+ int radio_idx;
int ret;
lockdep_assert_wiphy(local->hw.wiphy);
@@ -1734,15 +1870,21 @@ int ieee80211_link_use_channel(struct ieee80211_link_data *link,
link->radar_required = ret;
ret = ieee80211_check_combinations(sdata, &chanreq->oper, mode,
- radar_detect_width);
+ radar_detect_width, -1);
if (ret < 0)
goto out;
- __ieee80211_link_release_channel(link);
-
- ctx = ieee80211_find_chanctx(local, chanreq, mode);
- if (!ctx)
- ctx = ieee80211_new_chanctx(local, chanreq, mode);
+ __ieee80211_link_release_channel(link, false);
+
+ ctx = ieee80211_find_chanctx(local, link, chanreq, mode);
+ /* Note: context is now reserved */
+ if (ctx)
+ reserved = true;
+ else if (!ieee80211_find_available_radio(local, chanreq, &radio_idx))
+ ctx = ERR_PTR(-EBUSY);
+ else
+ ctx = ieee80211_new_chanctx(local, chanreq, mode,
+ assign_on_failure, radio_idx);
if (IS_ERR(ctx)) {
ret = PTR_ERR(ctx);
goto out;
@@ -1750,11 +1892,19 @@ int ieee80211_link_use_channel(struct ieee80211_link_data *link,
ieee80211_link_update_chanreq(link, chanreq);
- ret = ieee80211_assign_link_chanctx(link, ctx);
+ ret = ieee80211_assign_link_chanctx(link, ctx, assign_on_failure);
+
+ if (reserved) {
+ /* remove reservation */
+ WARN_ON(link->reserved_chanctx != ctx);
+ link->reserved_chanctx = NULL;
+ list_del(&link->reserved_chanctx_list);
+ }
+
if (ret) {
/* if assign fails refcount stays the same */
if (ieee80211_chanctx_refcount(local, ctx) == 0)
- ieee80211_free_chanctx(local, ctx);
+ ieee80211_free_chanctx(local, ctx, false);
goto out;
}
@@ -1947,7 +2097,7 @@ void ieee80211_link_release_channel(struct ieee80211_link_data *link)
lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (rcu_access_pointer(link->conf->chanctx_conf))
- __ieee80211_link_release_channel(link);
+ __ieee80211_link_release_channel(link, false);
}
void ieee80211_link_vlan_copy_chanctx(struct ieee80211_link_data *link)
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 2f68e92a7404..02b5476a4376 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -483,7 +483,6 @@ static const char *hw_flag_names[] = {
FLAG(REPORTS_LOW_ACK),
FLAG(SUPPORTS_TX_FRAG),
FLAG(SUPPORTS_TDLS_BUFFER_STA),
- FLAG(DEAUTH_NEED_MGD_TX_PREP),
FLAG(DOESNT_SUPPORT_QOS_NDP),
FLAG(BUFF_MMPDU_TXQ),
FLAG(SUPPORTS_VHT_EXT_NSS_BW),
@@ -498,6 +497,7 @@ static const char *hw_flag_names[] = {
FLAG(DETECTS_COLOR_COLLISION),
FLAG(MLO_MCAST_MULTI_LINK_TX),
FLAG(DISALLOW_PUNCTURING),
+ FLAG(DISALLOW_PUNCTURING_5GHZ),
FLAG(HANDLES_QUIET_CSA),
#undef FLAG
};
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index dce37ba8ebe3..fe868b521622 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c
@@ -33,7 +33,7 @@ int drv_start(struct ieee80211_local *local)
return ret;
}
-void drv_stop(struct ieee80211_local *local)
+void drv_stop(struct ieee80211_local *local, bool suspend)
{
might_sleep();
lockdep_assert_wiphy(local->hw.wiphy);
@@ -41,8 +41,8 @@ void drv_stop(struct ieee80211_local *local)
if (WARN_ON(!local->started))
return;
- trace_drv_stop(local);
- local->ops->stop(&local->hw);
+ trace_drv_stop(local, suspend);
+ local->ops->stop(&local->hw, suspend);
trace_drv_return_void(local);
/* sync away all work on the tasklet before clearing started */
@@ -311,6 +311,18 @@ int drv_assign_vif_chanctx(struct ieee80211_local *local,
might_sleep();
lockdep_assert_wiphy(local->hw.wiphy);
+ /*
+ * We should perhaps push emulate chanctx down and only
+ * make it call ->config() when the chanctx is actually
+ * assigned here (and unassigned below), but that's yet
+ * another change to all drivers to add assign/unassign
+ * emulation callbacks. Maybe later.
+ */
+ if (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
+ local->emulate_chanctx &&
+ !ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
+ return 0;
+
if (!check_sdata_in_driver(sdata))
return -EIO;
@@ -338,6 +350,11 @@ void drv_unassign_vif_chanctx(struct ieee80211_local *local,
might_sleep();
lockdep_assert_wiphy(local->hw.wiphy);
+ if (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
+ local->emulate_chanctx &&
+ !ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
+ return;
+
if (!check_sdata_in_driver(sdata))
return;
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 5d078c0a2323..d382d9729e85 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -88,7 +88,7 @@ static inline int drv_get_et_sset_count(struct ieee80211_sub_if_data *sdata,
}
int drv_start(struct ieee80211_local *local);
-void drv_stop(struct ieee80211_local *local);
+void drv_stop(struct ieee80211_local *local, bool suspend);
#ifdef CONFIG_PM
static inline int drv_suspend(struct ieee80211_local *local,
@@ -1150,6 +1150,9 @@ drv_pre_channel_switch(struct ieee80211_sub_if_data *sdata,
if (!check_sdata_in_driver(sdata))
return -EIO;
+ if (!ieee80211_vif_link_active(&sdata->vif, ch_switch->link_id))
+ return 0;
+
trace_drv_pre_channel_switch(local, sdata, ch_switch);
if (local->ops->pre_channel_switch)
ret = local->ops->pre_channel_switch(&local->hw, &sdata->vif,
@@ -1171,6 +1174,9 @@ drv_post_channel_switch(struct ieee80211_link_data *link)
if (!check_sdata_in_driver(sdata))
return -EIO;
+ if (!ieee80211_vif_link_active(&sdata->vif, link->link_id))
+ return 0;
+
trace_drv_post_channel_switch(local, sdata);
if (local->ops->post_channel_switch)
ret = local->ops->post_channel_switch(&local->hw, &sdata->vif,
@@ -1191,6 +1197,9 @@ drv_abort_channel_switch(struct ieee80211_link_data *link)
if (!check_sdata_in_driver(sdata))
return;
+ if (!ieee80211_vif_link_active(&sdata->vif, link->link_id))
+ return;
+
trace_drv_abort_channel_switch(local, sdata);
if (local->ops->abort_channel_switch)
@@ -1210,6 +1219,9 @@ drv_channel_switch_rx_beacon(struct ieee80211_sub_if_data *sdata,
if (!check_sdata_in_driver(sdata))
return;
+ if (!ieee80211_vif_link_active(&sdata->vif, ch_switch->link_id))
+ return;
+
trace_drv_channel_switch_rx_beacon(local, sdata, ch_switch);
if (local->ops->channel_switch_rx_beacon)
local->ops->channel_switch_rx_beacon(&local->hw, &sdata->vif,
diff --git a/net/mac80211/drop.h b/net/mac80211/drop.h
index 12a6f0e9eca6..59e3ec4dc960 100644
--- a/net/mac80211/drop.h
+++ b/net/mac80211/drop.h
@@ -2,7 +2,7 @@
/*
* mac80211 drop reason list
*
- * Copyright (C) 2023 Intel Corporation
+ * Copyright (C) 2023-2024 Intel Corporation
*/
#ifndef MAC80211_DROP_H
@@ -66,6 +66,7 @@ typedef unsigned int __bitwise ieee80211_rx_result;
R(RX_DROP_U_UNEXPECTED_STA_4ADDR) \
R(RX_DROP_U_UNEXPECTED_VLAN_MCAST) \
R(RX_DROP_U_NOT_PORT_CONTROL) \
+ R(RX_DROP_U_UNKNOWN_ACTION_REJECTED) \
/* this line for the trailing \ - add before this */
/* having two enums allows for checking ieee80211_rx_result use with sparse */
diff --git a/net/mac80211/he.c b/net/mac80211/he.c
index 9f5ffdc9db28..ecbb042dd043 100644
--- a/net/mac80211/he.c
+++ b/net/mac80211/he.c
@@ -230,15 +230,21 @@ ieee80211_he_spr_ie_to_bss_conf(struct ieee80211_vif *vif,
if (!he_spr_ie_elem)
return;
+
+ he_obss_pd->sr_ctrl = he_spr_ie_elem->he_sr_control;
data = he_spr_ie_elem->optional;
if (he_spr_ie_elem->he_sr_control &
IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT)
- data++;
+ he_obss_pd->non_srg_max_offset = *data++;
+
if (he_spr_ie_elem->he_sr_control &
IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT) {
- he_obss_pd->max_offset = *data++;
he_obss_pd->min_offset = *data++;
+ he_obss_pd->max_offset = *data++;
+ memcpy(he_obss_pd->bss_color_bitmap, data, 8);
+ data += 8;
+ memcpy(he_obss_pd->partial_bssid_bitmap, data, 8);
he_obss_pd->enable = true;
}
}
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index c3330aea4da3..79caeb485fd5 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -475,7 +475,7 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
else if (sdata->vif.type == NL80211_IFTYPE_STATION)
- memcpy(mgmt->bssid, sdata->deflink.u.mgd.bssid, ETH_ALEN);
+ memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN);
else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
memcpy(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN);
@@ -580,7 +580,7 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
/* we'll do more on status of this frame */
info = IEEE80211_SKB_CB(skb);
info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
- /* we have 12 bits, and need 6: link_id 4, smps 2 */
+ /* we have 13 bits, and need 6: link_id 4, smps 2 */
info->status_data = IEEE80211_STATUS_TYPE_SMPS |
u16_encode_bits(status_link_id << 2 | smps,
IEEE80211_STATUS_SUBDATA_MASK);
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 7ace5cdc6c26..3f74bbceeca5 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -9,7 +9,7 @@
* Copyright 2009, Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2016 Intel Deutschland GmbH
- * Copyright(c) 2018-2023 Intel Corporation
+ * Copyright(c) 2018-2024 Intel Corporation
*/
#include <linux/delay.h>
@@ -533,12 +533,12 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata, u64 *changed)
IEEE80211_PRIVACY(ifibss->privacy));
/* XXX: should not really modify cfg80211 data */
if (cbss) {
- cbss->channel = sdata->deflink.csa_chanreq.oper.chan;
+ cbss->channel = sdata->deflink.csa.chanreq.oper.chan;
cfg80211_put_bss(sdata->local->hw.wiphy, cbss);
}
}
- ifibss->chandef = sdata->deflink.csa_chanreq.oper;
+ ifibss->chandef = sdata->deflink.csa.chanreq.oper;
/* generate the beacon */
return ieee80211_ibss_csa_beacon(sdata, NULL, changed);
@@ -785,7 +785,8 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata,
err = ieee80211_parse_ch_switch_ie(sdata, elems,
ifibss->chandef.chan->band,
vht_cap_info, &conn,
- ifibss->bssid, &csa_ie);
+ ifibss->bssid, false,
+ &csa_ie);
/* can't switch to destination channel, fail */
if (err < 0)
goto disconnect;
@@ -1745,7 +1746,7 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
IEEE80211_CHANCTX_SHARED : IEEE80211_CHANCTX_EXCLUSIVE;
ret = ieee80211_check_combinations(sdata, &params->chandef, chanmode,
- radar_detect_width);
+ radar_detect_width, -1);
if (ret < 0)
return ret;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index bd507d6b65e3..a3485e4c6132 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -26,6 +26,7 @@
#include <linux/idr.h>
#include <linux/rhashtable.h>
#include <linux/rbtree.h>
+#include <kunit/visibility.h>
#include <net/ieee80211_radiotap.h>
#include <net/cfg80211.h>
#include <net/mac80211.h>
@@ -89,7 +90,8 @@ enum ieee80211_status_data {
IEEE80211_STATUS_TYPE_MASK = 0x00f,
IEEE80211_STATUS_TYPE_INVALID = 0,
IEEE80211_STATUS_TYPE_SMPS = 1,
- IEEE80211_STATUS_SUBDATA_MASK = 0xff0,
+ IEEE80211_STATUS_TYPE_NEG_TTLM = 2,
+ IEEE80211_STATUS_SUBDATA_MASK = 0x1ff0,
};
static inline bool
@@ -595,6 +597,7 @@ struct ieee80211_if_managed {
/* TID-to-link mapping support */
struct wiphy_delayed_work ttlm_work;
struct ieee80211_adv_ttlm_info ttlm_info;
+ struct wiphy_work teardown_ttlm_work;
/* dialog token enumerator for neg TTLM request */
u8 dialog_token_alloc;
@@ -684,7 +687,7 @@ struct mesh_csa_settings {
};
/**
- * struct mesh_table
+ * struct mesh_table - mesh hash table
*
* @known_gates: list of known mesh gates and their mpaths by the station. The
* gate's mpath may or may not be resolved and active.
@@ -972,9 +975,15 @@ struct ieee80211_link_data_managed {
bool disable_wmm_tracking;
bool operating_11g_mode;
- bool csa_waiting_bcn;
- bool csa_ignored_same_chan;
- struct wiphy_delayed_work chswitch_work;
+ struct {
+ struct wiphy_delayed_work switch_work;
+ struct cfg80211_chan_def ap_chandef;
+ struct ieee80211_parsed_tpe tpe;
+ unsigned long time;
+ bool waiting_bcn;
+ bool ignored_same_chan;
+ bool blocked_tx;
+ } csa;
struct wiphy_work request_smps_work;
/* used to reconfigure hardware SM PS */
@@ -1033,11 +1042,13 @@ struct ieee80211_link_data {
struct ieee80211_key __rcu *default_mgmt_key;
struct ieee80211_key __rcu *default_beacon_key;
- struct wiphy_work csa_finalize_work;
bool operating_11g_mode;
- struct ieee80211_chan_req csa_chanreq;
+ struct {
+ struct wiphy_work finalize_work;
+ struct ieee80211_chan_req chanreq;
+ } csa;
struct wiphy_work color_change_finalize_work;
struct delayed_work color_collision_detect_work;
@@ -1056,7 +1067,6 @@ struct ieee80211_link_data {
int ap_power_level; /* in dBm */
bool radar_required;
- struct wiphy_delayed_work dfs_cac_timer_work;
union {
struct ieee80211_link_data_managed mgd;
@@ -1092,7 +1102,7 @@ struct ieee80211_sub_if_data {
unsigned long state;
- bool csa_blocked_tx;
+ bool csa_blocked_queues;
char name[IFNAMSIZ];
@@ -1155,10 +1165,14 @@ struct ieee80211_sub_if_data {
struct ieee80211_link_data deflink;
struct ieee80211_link_data __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS];
+ struct wiphy_delayed_work dfs_cac_timer_work;
+
/* for ieee80211_set_active_links_async() */
struct wiphy_work activate_links_work;
u16 desired_active_links;
+ u16 restart_active_links;
+
#ifdef CONFIG_MAC80211_DEBUGFS
struct {
struct dentry *subdir_stations;
@@ -1703,7 +1717,6 @@ struct ieee802_11_elems {
const struct ieee80211_he_spr *he_spr;
const struct ieee80211_mu_edca_param_set *mu_edca_param_set;
const struct ieee80211_he_6ghz_capa *he_6ghz_capa;
- const struct ieee80211_tx_pwr_env *tx_pwr_env[IEEE80211_TPE_MAX_IE_COUNT];
const u8 *uora_element;
const u8 *mesh_id;
const u8 *peering;
@@ -1741,6 +1754,10 @@ struct ieee802_11_elems {
const struct ieee80211_bandwidth_indication *bandwidth_indication;
const struct ieee80211_ttlm_elem *ttlm[IEEE80211_TTLM_MAX_CNT];
+ /* not the order in the psd values is per element, not per chandef */
+ struct ieee80211_parsed_tpe tpe;
+ struct ieee80211_parsed_tpe csa_tpe;
+
/* length of them, respectively */
u8 ext_capab_len;
u8 ssid_len;
@@ -1759,8 +1776,6 @@ struct ieee802_11_elems {
u8 perr_len;
u8 country_elem_len;
u8 bssid_index_len;
- u8 tx_pwr_env_len[IEEE80211_TPE_MAX_IE_COUNT];
- u8 tx_pwr_env_num;
u8 eht_cap_len;
/* mult-link element can be de-fragmented and thus u8 is not sufficient */
@@ -1808,6 +1823,9 @@ ieee80211_have_rx_timestamp(struct ieee80211_rx_status *status)
void ieee80211_vif_inc_num_mcast(struct ieee80211_sub_if_data *sdata);
void ieee80211_vif_dec_num_mcast(struct ieee80211_sub_if_data *sdata);
+void ieee80211_vif_block_queues_csa(struct ieee80211_sub_if_data *sdata);
+void ieee80211_vif_unblock_queues_csa(struct ieee80211_sub_if_data *sdata);
+
/* This function returns the number of multicast stations connected to this
* interface. It returns -1 if that number is not tracked, that is for netdevs
* not in AP or AP_VLAN mode or when using 4addr.
@@ -1840,6 +1858,8 @@ void ieee80211_link_info_change_notify(struct ieee80211_sub_if_data *sdata,
void ieee80211_configure_filter(struct ieee80211_local *local);
u64 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata);
+void ieee80211_handle_queued_frames(struct ieee80211_local *local);
+
u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local);
int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb,
u64 *cookie, gfp_t gfp);
@@ -1963,6 +1983,7 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local);
void ieee80211_offchannel_return(struct ieee80211_local *local);
void ieee80211_roc_setup(struct ieee80211_local *local);
void ieee80211_start_next_roc(struct ieee80211_local *local);
+void ieee80211_reconfig_roc(struct ieee80211_local *local);
void ieee80211_roc_purge(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata);
int ieee80211_remain_on_channel(struct wiphy *wiphy, struct wireless_dev *wdev,
@@ -2137,9 +2158,21 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
const struct ieee80211_vht_cap *vht_cap_ie2,
struct link_sta_info *link_sta);
enum ieee80211_sta_rx_bandwidth
-ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta);
+_ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta,
+ struct cfg80211_chan_def *chandef);
+static inline enum ieee80211_sta_rx_bandwidth
+ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta)
+{
+ return _ieee80211_sta_cap_rx_bw(link_sta, NULL);
+}
enum ieee80211_sta_rx_bandwidth
-ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta);
+_ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta,
+ struct cfg80211_chan_def *chandef);
+static inline enum ieee80211_sta_rx_bandwidth
+ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta)
+{
+ return _ieee80211_sta_cur_vht_bw(link_sta, NULL);
+}
void ieee80211_sta_init_nss(struct link_sta_info *link_sta);
enum ieee80211_sta_rx_bandwidth
ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width);
@@ -2197,6 +2230,8 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
* @conn: contains information about own capabilities and restrictions
* to decide which channel switch announcements can be accepted
* @bssid: the currently connected bssid (for reporting)
+ * @unprot_action: whether the frame was an unprotected frame or not,
+ * used for reporting
* @csa_ie: parsed 802.11 csa elements on count, mode, chandef and mesh ttl.
* All of them will be filled with if success only.
* Return: 0 on success, <0 on error and >0 if there is nothing to parse.
@@ -2206,12 +2241,12 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
enum nl80211_band current_band,
u32 vht_cap_info,
struct ieee80211_conn_settings *conn,
- u8 *bssid,
+ u8 *bssid, bool unprot_action,
struct ieee80211_csa_ie *csa_ie);
/* Suspend/resume and hw reconfiguration */
int ieee80211_reconfig(struct ieee80211_local *local);
-void ieee80211_stop_device(struct ieee80211_local *local);
+void ieee80211_stop_device(struct ieee80211_local *local, bool suspend);
int __ieee80211_suspend(struct ieee80211_hw *hw,
struct cfg80211_wowlan *wowlan);
@@ -2238,6 +2273,7 @@ int ieee80211_frame_duration(enum nl80211_band band, size_t len,
void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
struct ieee80211_tx_queue_params *qparam,
int ac);
+void ieee80211_clear_tpe(struct ieee80211_parsed_tpe *tpe);
void ieee80211_set_wmm_default(struct ieee80211_link_data *link,
bool bss_notify, bool enable_qos);
void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
@@ -2549,9 +2585,19 @@ bool ieee80211_chanreq_identical(const struct ieee80211_chan_req *a,
const struct ieee80211_chan_req *b);
int __must_check
+_ieee80211_link_use_channel(struct ieee80211_link_data *link,
+ const struct ieee80211_chan_req *req,
+ enum ieee80211_chanctx_mode mode,
+ bool assign_on_failure);
+
+static inline int __must_check
ieee80211_link_use_channel(struct ieee80211_link_data *link,
const struct ieee80211_chan_req *req,
- enum ieee80211_chanctx_mode mode);
+ enum ieee80211_chanctx_mode mode)
+{
+ return _ieee80211_link_use_channel(link, req, mode, false);
+}
+
int __must_check
ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link,
const struct ieee80211_chan_req *req,
@@ -2565,6 +2611,8 @@ int __must_check
ieee80211_link_change_chanreq(struct ieee80211_link_data *link,
const struct ieee80211_chan_req *req,
u64 *changed);
+void __ieee80211_link_release_channel(struct ieee80211_link_data *link,
+ bool skip_idle_recalc);
void ieee80211_link_release_channel(struct ieee80211_link_data *link);
void ieee80211_link_vlan_copy_chanctx(struct ieee80211_link_data *link);
void ieee80211_link_copy_chanctx_to_vlans(struct ieee80211_link_data *link,
@@ -2576,7 +2624,8 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local,
struct ieee80211_chanctx *chanctx);
void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx,
- struct ieee80211_link_data *rsvd_for);
+ struct ieee80211_link_data *rsvd_for,
+ bool check_reserved);
bool ieee80211_is_radar_required(struct ieee80211_local *local);
void ieee80211_dfs_cac_timer_work(struct wiphy *wiphy, struct wiphy_work *work);
@@ -2591,8 +2640,9 @@ void ieee80211_recalc_dtim(struct ieee80211_local *local,
int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
const struct cfg80211_chan_def *chandef,
enum ieee80211_chanctx_mode chanmode,
- u8 radar_detect);
-int ieee80211_max_num_channels(struct ieee80211_local *local);
+ u8 radar_detect, int radio_idx);
+int ieee80211_max_num_channels(struct ieee80211_local *local, int radio_idx);
+u32 ieee80211_get_radio_mask(struct wiphy *wiphy, struct net_device *dev);
void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx);
@@ -2664,6 +2714,11 @@ void ieee80211_remove_wbrf(struct ieee80211_local *local, struct cfg80211_chan_d
#define VISIBLE_IF_MAC80211_KUNIT
ieee80211_rx_result
ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx);
+int ieee80211_calc_chandef_subchan_offset(const struct cfg80211_chan_def *ap,
+ u8 n_partial_subchans);
+void ieee80211_rearrange_tpe_psd(struct ieee80211_parsed_tpe_psd *psd,
+ const struct cfg80211_chan_def *ap,
+ const struct cfg80211_chan_def *used);
#else
#define EXPORT_SYMBOL_IF_MAC80211_KUNIT(sym)
#define VISIBLE_IF_MAC80211_KUNIT static
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 395de62d9cb2..b4ad66af3af3 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -397,7 +397,7 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata,
}
}
- return ieee80211_check_combinations(sdata, NULL, 0, 0);
+ return ieee80211_check_combinations(sdata, NULL, 0, 0, -1);
}
static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata,
@@ -543,18 +543,14 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
sdata->vif.bss_conf.csa_active = false;
if (sdata->vif.type == NL80211_IFTYPE_STATION)
- sdata->deflink.u.mgd.csa_waiting_bcn = false;
- if (sdata->csa_blocked_tx) {
- ieee80211_wake_vif_queues(local, sdata,
- IEEE80211_QUEUE_STOP_REASON_CSA);
- sdata->csa_blocked_tx = false;
- }
+ sdata->deflink.u.mgd.csa.waiting_bcn = false;
+ ieee80211_vif_unblock_queues_csa(sdata);
- wiphy_work_cancel(local->hw.wiphy, &sdata->deflink.csa_finalize_work);
+ wiphy_work_cancel(local->hw.wiphy, &sdata->deflink.csa.finalize_work);
wiphy_work_cancel(local->hw.wiphy,
&sdata->deflink.color_change_finalize_work);
wiphy_delayed_work_cancel(local->hw.wiphy,
- &sdata->deflink.dfs_cac_timer_work);
+ &sdata->dfs_cac_timer_work);
if (sdata->wdev.cac_started) {
chandef = sdata->vif.bss_conf.chanreq.oper;
@@ -686,14 +682,19 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
ieee80211_del_virtual_monitor(local);
ieee80211_recalc_idle(local);
+ ieee80211_recalc_offload(local);
if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE))
break;
fallthrough;
default:
- if (going_down)
- drv_remove_interface(local, sdata);
+ if (!going_down)
+ break;
+ drv_remove_interface(local, sdata);
+
+ /* Clear private driver data to prevent reuse */
+ memset(sdata->vif.drv_priv, 0, local->hw.vif_data_size);
}
ieee80211_recalc_ps(local);
@@ -702,7 +703,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
wiphy_delayed_work_flush(local->hw.wiphy, &local->scan_work);
if (local->open_count == 0) {
- ieee80211_stop_device(local);
+ ieee80211_stop_device(local, false);
/* no reconfiguring after stop! */
return;
@@ -815,12 +816,6 @@ static void ieee80211_uninit(struct net_device *dev)
ieee80211_teardown_sdata(IEEE80211_DEV_TO_SUB_IF(dev));
}
-static void
-ieee80211_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
-{
- dev_fetch_sw_netstats(stats, dev->tstats);
-}
-
static int ieee80211_netdev_setup_tc(struct net_device *dev,
enum tc_setup_type type, void *type_data)
{
@@ -837,7 +832,6 @@ static const struct net_device_ops ieee80211_dataif_ops = {
.ndo_start_xmit = ieee80211_subif_start_xmit,
.ndo_set_rx_mode = ieee80211_set_multicast_list,
.ndo_set_mac_address = ieee80211_change_mac,
- .ndo_get_stats64 = ieee80211_get_stats64,
.ndo_setup_tc = ieee80211_netdev_setup_tc,
};
@@ -877,7 +871,6 @@ static const struct net_device_ops ieee80211_monitorif_ops = {
.ndo_set_rx_mode = ieee80211_set_multicast_list,
.ndo_set_mac_address = ieee80211_change_mac,
.ndo_select_queue = ieee80211_monitor_select_queue,
- .ndo_get_stats64 = ieee80211_get_stats64,
};
static int ieee80211_netdev_fill_forward_path(struct net_device_path_ctx *ctx,
@@ -945,7 +938,6 @@ static const struct net_device_ops ieee80211_dataif_8023_ops = {
.ndo_start_xmit = ieee80211_subif_start_xmit_8023,
.ndo_set_rx_mode = ieee80211_set_multicast_list,
.ndo_set_mac_address = ieee80211_change_mac,
- .ndo_get_stats64 = ieee80211_get_stats64,
.ndo_fill_forward_path = ieee80211_netdev_fill_forward_path,
.ndo_setup_tc = ieee80211_netdev_setup_tc,
};
@@ -1121,9 +1113,6 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
struct ieee80211_sub_if_data *sdata;
int ret;
- if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
- return 0;
-
ASSERT_RTNL();
lockdep_assert_wiphy(local->hw.wiphy);
@@ -1145,11 +1134,13 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
ieee80211_set_default_queues(sdata);
- ret = drv_add_interface(local, sdata);
- if (WARN_ON(ret)) {
- /* ok .. stupid driver, it asked for this! */
- kfree(sdata);
- return ret;
+ if (ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) {
+ ret = drv_add_interface(local, sdata);
+ if (WARN_ON(ret)) {
+ /* ok .. stupid driver, it asked for this! */
+ kfree(sdata);
+ return ret;
+ }
}
set_bit(SDATA_STATE_RUNNING, &sdata->state);
@@ -1187,9 +1178,6 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
{
struct ieee80211_sub_if_data *sdata;
- if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
- return;
-
ASSERT_RTNL();
lockdep_assert_wiphy(local->hw.wiphy);
@@ -1209,7 +1197,8 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
ieee80211_link_release_channel(&sdata->deflink);
- drv_remove_interface(local, sdata);
+ if (ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
+ drv_remove_interface(local, sdata);
kfree(sdata);
}
@@ -1448,7 +1437,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
drv_remove_interface(local, sdata);
err_stop:
if (!local->open_count)
- drv_stop(local);
+ drv_stop(local, false);
err_del_bss:
sdata->bss = NULL;
if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
@@ -1458,11 +1447,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
return res;
}
-static void ieee80211_if_free(struct net_device *dev)
-{
- free_percpu(dev->tstats);
-}
-
static void ieee80211_if_setup(struct net_device *dev)
{
ether_setup(dev);
@@ -1470,7 +1454,6 @@ static void ieee80211_if_setup(struct net_device *dev)
dev->priv_flags |= IFF_NO_QUEUE;
dev->netdev_ops = &ieee80211_dataif_ops;
dev->needs_free_netdev = true;
- dev->priv_destructor = ieee80211_if_free;
}
static void ieee80211_iface_process_skb(struct ieee80211_local *local,
@@ -1699,8 +1682,13 @@ static void ieee80211_activate_links_work(struct wiphy *wiphy,
struct ieee80211_sub_if_data *sdata =
container_of(work, struct ieee80211_sub_if_data,
activate_links_work);
+ struct ieee80211_local *local = wiphy_priv(wiphy);
+
+ if (local->in_reconfig)
+ return;
ieee80211_set_active_links(&sdata->vif, sdata->desired_active_links);
+ sdata->desired_active_links = 0;
}
/*
@@ -1741,6 +1729,8 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
wiphy_work_init(&sdata->work, ieee80211_iface_work);
wiphy_work_init(&sdata->activate_links_work,
ieee80211_activate_links_work);
+ wiphy_delayed_work_init(&sdata->dfs_cac_timer_work,
+ ieee80211_dfs_cac_timer_work);
switch (type) {
case NL80211_IFTYPE_P2P_GO:
@@ -2096,11 +2086,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
dev_net_set(ndev, wiphy_net(local->hw.wiphy));
- ndev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!ndev->tstats) {
- free_netdev(ndev);
- return -ENOMEM;
- }
+ ndev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
ndev->needed_headroom = local->tx_headroom +
4*6 /* four MAC addresses */
@@ -2113,7 +2099,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
ret = dev_alloc_name(ndev, ndev->name);
if (ret < 0) {
- ieee80211_if_free(ndev);
free_netdev(ndev);
return ret;
}
@@ -2358,3 +2343,26 @@ void ieee80211_vif_dec_num_mcast(struct ieee80211_sub_if_data *sdata)
else if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
atomic_dec(&sdata->u.vlan.num_mcast_sta);
}
+
+void ieee80211_vif_block_queues_csa(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+
+ if (ieee80211_hw_check(&local->hw, HANDLES_QUIET_CSA))
+ return;
+
+ ieee80211_stop_vif_queues(local, sdata,
+ IEEE80211_QUEUE_STOP_REASON_CSA);
+ sdata->csa_blocked_queues = true;
+}
+
+void ieee80211_vif_unblock_queues_csa(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+
+ if (sdata->csa_blocked_queues) {
+ ieee80211_wake_vif_queues(local, sdata,
+ IEEE80211_QUEUE_STOP_REASON_CSA);
+ sdata->csa_blocked_queues = false;
+ }
+}
diff --git a/net/mac80211/link.c b/net/mac80211/link.c
index 685ec66b4264..1a211b8d4057 100644
--- a/net/mac80211/link.c
+++ b/net/mac80211/link.c
@@ -37,7 +37,7 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata,
link_conf->link_id = link_id;
link_conf->vif = &sdata->vif;
- wiphy_work_init(&link->csa_finalize_work,
+ wiphy_work_init(&link->csa.finalize_work,
ieee80211_csa_finalize_work);
wiphy_work_init(&link->color_change_finalize_work,
ieee80211_color_change_finalize_work);
@@ -45,8 +45,6 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata,
ieee80211_color_collision_detection_work);
INIT_LIST_HEAD(&link->assigned_chanctx_list);
INIT_LIST_HEAD(&link->reserved_chanctx_list);
- wiphy_delayed_work_init(&link->dfs_cac_timer_work,
- ieee80211_dfs_cac_timer_work);
if (!deflink) {
switch (sdata->vif.type) {
@@ -74,7 +72,9 @@ void ieee80211_link_stop(struct ieee80211_link_data *link)
cancel_delayed_work_sync(&link->color_collision_detect_work);
wiphy_work_cancel(link->sdata->local->hw.wiphy,
- &link->csa_finalize_work);
+ &link->color_change_finalize_work);
+ wiphy_work_cancel(link->sdata->local->hw.wiphy,
+ &link->csa.finalize_work);
ieee80211_link_release_channel(link);
}
@@ -358,7 +358,19 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata,
ieee80211_teardown_tdls_peers(link);
- ieee80211_link_release_channel(link);
+ __ieee80211_link_release_channel(link, true);
+
+ /*
+ * If CSA is (still) active while the link is deactivated,
+ * just schedule the channel switch work for the time we
+ * had previously calculated, and we'll take the process
+ * from there.
+ */
+ if (link->conf->csa_active)
+ wiphy_delayed_work_queue(local->hw.wiphy,
+ &link->u.mgd.csa.switch_work,
+ link->u.mgd.csa.time -
+ jiffies);
}
list_for_each_entry(sta, &local->sta_list, list) {
@@ -404,9 +416,24 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata,
link = sdata_dereference(sdata->link[link_id], sdata);
- ret = ieee80211_link_use_channel(link,
- &link->conf->chanreq,
- IEEE80211_CHANCTX_SHARED);
+ /*
+ * This call really should not fail. Unfortunately, it appears
+ * that this may happen occasionally with some drivers. Should
+ * it happen, we are stuck in a bad place as going backwards is
+ * not really feasible.
+ *
+ * So lets just tell link_use_channel that it must not fail to
+ * assign the channel context (from mac80211's perspective) and
+ * assume the driver is going to trigger a recovery flow if it
+ * had a failure.
+ * That really is not great nor guaranteed to work. But at least
+ * the internal mac80211 state remains consistent and there is
+ * a chance that we can recover.
+ */
+ ret = _ieee80211_link_use_channel(link,
+ &link->conf->chanreq,
+ IEEE80211_CHANCTX_SHARED,
+ true);
WARN_ON_ONCE(ret);
ieee80211_mgd_set_link_qos_params(link);
@@ -450,10 +477,13 @@ int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links)
if (WARN_ON(!active_links))
return -EINVAL;
+ old_active = sdata->vif.active_links;
+ if (old_active == active_links)
+ return 0;
+
if (!drv_can_activate_links(local, sdata, active_links))
return -EINVAL;
- old_active = sdata->vif.active_links;
if (old_active & active_links) {
/*
* if there's at least one link that stays active across
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 4eaea0a9975b..a3104b6ea6f0 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -148,7 +148,7 @@ static u32 ieee80211_calc_hw_conf_chan(struct ieee80211_local *local,
offchannel_flag ^= local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
/* force it also for scanning, since drivers might config differently */
- if (offchannel_flag || local->scanning ||
+ if (offchannel_flag || local->scanning || local->in_reconfig ||
!cfg80211_chandef_identical(&local->hw.conf.chandef, &chandef)) {
local->hw.conf.chandef = chandef;
changed |= IEEE80211_CONF_CHANGE_CHANNEL;
@@ -337,6 +337,8 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
might_sleep();
+ WARN_ON_ONCE(ieee80211_vif_is_mld(&sdata->vif));
+
if (!changed || sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
return;
@@ -369,7 +371,6 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
if (changed & ~BSS_CHANGED_VIF_CFG_FLAGS) {
u64 ch = changed & ~BSS_CHANGED_VIF_CFG_FLAGS;
- /* FIXME: should be for each link */
trace_drv_link_info_changed(local, sdata, &sdata->vif.bss_conf,
changed);
if (local->ops->link_info_changed)
@@ -423,9 +424,9 @@ u64 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata)
BSS_CHANGED_ERP_SLOT;
}
-static void ieee80211_tasklet_handler(struct tasklet_struct *t)
+/* context: requires softirqs disabled */
+void ieee80211_handle_queued_frames(struct ieee80211_local *local)
{
- struct ieee80211_local *local = from_tasklet(local, t, tasklet);
struct sk_buff *skb;
while ((skb = skb_dequeue(&local->skb_queue)) ||
@@ -450,6 +451,13 @@ static void ieee80211_tasklet_handler(struct tasklet_struct *t)
}
}
+static void ieee80211_tasklet_handler(struct tasklet_struct *t)
+{
+ struct ieee80211_local *local = from_tasklet(local, t, tasklet);
+
+ ieee80211_handle_queued_frames(local);
+}
+
static void ieee80211_restart_work(struct work_struct *work)
{
struct ieee80211_local *local =
@@ -1083,6 +1091,27 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
return 0;
}
+static bool
+ieee80211_ifcomb_check(const struct ieee80211_iface_combination *c, int n_comb)
+{
+ int i, j;
+
+ for (i = 0; i < n_comb; i++, c++) {
+ /* DFS is not supported with multi-channel combinations yet */
+ if (c->radar_detect_widths &&
+ c->num_different_channels > 1)
+ return false;
+
+ /* mac80211 doesn't support more than one IBSS interface */
+ for (j = 0; j < c->n_limits; j++)
+ if ((c->limits[j].types & BIT(NL80211_IFTYPE_ADHOC)) &&
+ c->limits[j].max > 1)
+ return false;
+ }
+
+ return true;
+}
+
int ieee80211_register_hw(struct ieee80211_hw *hw)
{
struct ieee80211_local *local = hw_to_local(hw);
@@ -1153,9 +1182,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
if (WARN_ON(!ieee80211_hw_check(hw, AP_LINK_PS)))
return -EINVAL;
-
- if (WARN_ON(ieee80211_hw_check(hw, DEAUTH_NEED_MGD_TX_PREP)))
- return -EINVAL;
}
#ifdef CONFIG_PM
@@ -1172,17 +1198,20 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
if (comb->num_different_channels > 1)
return -EINVAL;
}
- } else {
- /* DFS is not supported with multi-channel combinations yet */
- for (i = 0; i < local->hw.wiphy->n_iface_combinations; i++) {
- const struct ieee80211_iface_combination *comb;
+ }
- comb = &local->hw.wiphy->iface_combinations[i];
+ if (hw->wiphy->n_radio) {
+ for (i = 0; i < hw->wiphy->n_radio; i++) {
+ const struct wiphy_radio *radio = &hw->wiphy->radio[i];
- if (comb->radar_detect_widths &&
- comb->num_different_channels > 1)
+ if (!ieee80211_ifcomb_check(radio->iface_combinations,
+ radio->n_iface_combinations))
return -EINVAL;
}
+ } else {
+ if (!ieee80211_ifcomb_check(hw->wiphy->iface_combinations,
+ hw->wiphy->n_iface_combinations))
+ return -EINVAL;
}
/* Only HW csum features are currently compatible with mac80211 */
@@ -1312,18 +1341,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR);
hw->wiphy->software_iftypes |= BIT(NL80211_IFTYPE_MONITOR);
- /* mac80211 doesn't support more than one IBSS interface right now */
- for (i = 0; i < hw->wiphy->n_iface_combinations; i++) {
- const struct ieee80211_iface_combination *c;
- int j;
-
- c = &hw->wiphy->iface_combinations[i];
-
- for (j = 0; j < c->n_limits; j++)
- if ((c->limits[j].types & BIT(NL80211_IFTYPE_ADHOC)) &&
- c->limits[j].max > 1)
- return -EINVAL;
- }
local->int_scan_req = kzalloc(sizeof(*local->int_scan_req) +
sizeof(void *) * channels, GFP_KERNEL);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index cbc9b5e40cb3..f94e4be0be12 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -1312,7 +1312,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
memset(&params, 0, sizeof(params));
err = ieee80211_parse_ch_switch_ie(sdata, elems, sband->band,
vht_cap_info, &conn,
- sdata->vif.addr,
+ sdata->vif.addr, false,
&csa_ie);
if (err < 0)
return false;
@@ -1776,6 +1776,7 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
ifmsh->last_preq = jiffies;
ifmsh->next_perr = jiffies;
ifmsh->csa_role = IEEE80211_MESH_CSA_ROLE_NONE;
+ ifmsh->nonpeer_pm = NL80211_MESH_POWER_ACTIVE;
/* Allocate all mesh structures when creating the first mesh interface. */
if (!mesh_allocated)
ieee80211s_init();
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index a6b62169f084..c0a5c75cddcb 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -1017,10 +1017,23 @@ void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata,
*/
void mesh_path_flush_pending(struct mesh_path *mpath)
{
+ struct ieee80211_sub_if_data *sdata = mpath->sdata;
+ struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+ struct mesh_preq_queue *preq, *tmp;
struct sk_buff *skb;
while ((skb = skb_dequeue(&mpath->frame_queue)) != NULL)
mesh_path_discard_frame(mpath->sdata, skb);
+
+ spin_lock_bh(&ifmsh->mesh_preq_queue_lock);
+ list_for_each_entry_safe(preq, tmp, &ifmsh->preq_queue.list, list) {
+ if (ether_addr_equal(mpath->dst, preq->dst)) {
+ list_del(&preq->list);
+ kfree(preq);
+ --ifmsh->preq_queue_len;
+ }
+ }
+ spin_unlock_bh(&ifmsh->mesh_preq_queue_lock);
}
/**
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 3bbb216a0fc8..4779a18ab75d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -599,14 +599,225 @@ static bool ieee80211_chandef_usable(struct ieee80211_sub_if_data *sdata,
ieee80211_hw_check(&sdata->local->hw, DISALLOW_PUNCTURING))
return false;
+ if (chandef->punctured && chandef->chan->band == NL80211_BAND_5GHZ &&
+ ieee80211_hw_check(&sdata->local->hw, DISALLOW_PUNCTURING_5GHZ))
+ return false;
+
return true;
}
+static int ieee80211_chandef_num_subchans(const struct cfg80211_chan_def *c)
+{
+ if (c->width == NL80211_CHAN_WIDTH_80P80)
+ return 4 + 4;
+
+ return nl80211_chan_width_to_mhz(c->width) / 20;
+}
+
+static int ieee80211_chandef_num_widths(const struct cfg80211_chan_def *c)
+{
+ switch (c->width) {
+ case NL80211_CHAN_WIDTH_20:
+ case NL80211_CHAN_WIDTH_20_NOHT:
+ return 1;
+ case NL80211_CHAN_WIDTH_40:
+ return 2;
+ case NL80211_CHAN_WIDTH_80P80:
+ case NL80211_CHAN_WIDTH_80:
+ return 3;
+ case NL80211_CHAN_WIDTH_160:
+ return 4;
+ case NL80211_CHAN_WIDTH_320:
+ return 5;
+ default:
+ WARN_ON(1);
+ return 0;
+ }
+}
+
+VISIBLE_IF_MAC80211_KUNIT int
+ieee80211_calc_chandef_subchan_offset(const struct cfg80211_chan_def *ap,
+ u8 n_partial_subchans)
+{
+ int n = ieee80211_chandef_num_subchans(ap);
+ struct cfg80211_chan_def tmp = *ap;
+ int offset = 0;
+
+ /*
+ * Given a chandef (in this context, it's the AP's) and a number
+ * of subchannels that we want to look at ('n_partial_subchans'),
+ * calculate the offset in number of subchannels between the full
+ * and the subset with the desired width.
+ */
+
+ /* same number of subchannels means no offset, obviously */
+ if (n == n_partial_subchans)
+ return 0;
+
+ /* don't WARN - misconfigured APs could cause this if their N > width */
+ if (n < n_partial_subchans)
+ return 0;
+
+ while (ieee80211_chandef_num_subchans(&tmp) > n_partial_subchans) {
+ u32 prev = tmp.center_freq1;
+
+ ieee80211_chandef_downgrade(&tmp, NULL);
+
+ /*
+ * if center_freq moved up, half the original channels
+ * are gone now but were below, so increase offset
+ */
+ if (prev < tmp.center_freq1)
+ offset += ieee80211_chandef_num_subchans(&tmp);
+ }
+
+ /*
+ * 80+80 with secondary 80 below primary - four subchannels for it
+ * (we cannot downgrade *to* 80+80, so no need to consider 'tmp')
+ */
+ if (ap->width == NL80211_CHAN_WIDTH_80P80 &&
+ ap->center_freq2 < ap->center_freq1)
+ offset += 4;
+
+ return offset;
+}
+EXPORT_SYMBOL_IF_MAC80211_KUNIT(ieee80211_calc_chandef_subchan_offset);
+
+VISIBLE_IF_MAC80211_KUNIT void
+ieee80211_rearrange_tpe_psd(struct ieee80211_parsed_tpe_psd *psd,
+ const struct cfg80211_chan_def *ap,
+ const struct cfg80211_chan_def *used)
+{
+ u8 needed = ieee80211_chandef_num_subchans(used);
+ u8 have = ieee80211_chandef_num_subchans(ap);
+ u8 tmp[IEEE80211_TPE_PSD_ENTRIES_320MHZ];
+ u8 offset;
+
+ if (!psd->valid)
+ return;
+
+ /* if N is zero, all defaults were used, no point in rearranging */
+ if (!psd->n)
+ goto out;
+
+ BUILD_BUG_ON(sizeof(tmp) != sizeof(psd->power));
+
+ /*
+ * This assumes that 'N' is consistent with the HE channel, as
+ * it should be (otherwise the AP is broken).
+ *
+ * In psd->power we have values in the order 0..N, 0..K, where
+ * N+K should cover the entire channel per 'ap', but even if it
+ * doesn't then we've pre-filled 'unlimited' as defaults.
+ *
+ * But this is all the wrong order, we want to have them in the
+ * order of the 'used' channel.
+ *
+ * So for example, we could have a 320 MHz EHT AP, which has the
+ * HE channel as 80 MHz (e.g. due to puncturing, which doesn't
+ * seem to be considered for the TPE), as follows:
+ *
+ * EHT 320: | | | | | | | | | | | | | | | | |
+ * HE 80: | | | | |
+ * used 160: | | | | | | | | |
+ *
+ * N entries: |--|--|--|--|
+ * K entries: |--|--|--|--|--|--|--|--| |--|--|--|--|
+ * power idx: 4 5 6 7 8 9 10 11 0 1 2 3 12 13 14 15
+ * full chan: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+ * used chan: 0 1 2 3 4 5 6 7
+ *
+ * The idx in the power array ('power idx') is like this since it
+ * comes directly from the element's N and K entries in their
+ * element order, and those are this way for HE compatibility.
+ *
+ * Rearrange them as desired here, first by putting them into the
+ * 'full chan' order, and then selecting the necessary subset for
+ * the 'used chan'.
+ */
+
+ /* first reorder according to AP channel */
+ offset = ieee80211_calc_chandef_subchan_offset(ap, psd->n);
+ for (int i = 0; i < have; i++) {
+ if (i < offset)
+ tmp[i] = psd->power[i + psd->n];
+ else if (i < offset + psd->n)
+ tmp[i] = psd->power[i - offset];
+ else
+ tmp[i] = psd->power[i];
+ }
+
+ /*
+ * and then select the subset for the used channel
+ * (set everything to defaults first in case a driver is confused)
+ */
+ memset(psd->power, IEEE80211_TPE_PSD_NO_LIMIT, sizeof(psd->power));
+ offset = ieee80211_calc_chandef_subchan_offset(ap, needed);
+ for (int i = 0; i < needed; i++)
+ psd->power[i] = tmp[offset + i];
+
+out:
+ /* limit, but don't lie if there are defaults in the data */
+ if (needed < psd->count)
+ psd->count = needed;
+}
+EXPORT_SYMBOL_IF_MAC80211_KUNIT(ieee80211_rearrange_tpe_psd);
+
+static void ieee80211_rearrange_tpe(struct ieee80211_parsed_tpe *tpe,
+ const struct cfg80211_chan_def *ap,
+ const struct cfg80211_chan_def *used)
+{
+ /* ignore this completely for narrow/invalid channels */
+ if (!ieee80211_chandef_num_subchans(ap) ||
+ !ieee80211_chandef_num_subchans(used)) {
+ ieee80211_clear_tpe(tpe);
+ return;
+ }
+
+ for (int i = 0; i < 2; i++) {
+ int needed_pwr_count;
+
+ ieee80211_rearrange_tpe_psd(&tpe->psd_local[i], ap, used);
+ ieee80211_rearrange_tpe_psd(&tpe->psd_reg_client[i], ap, used);
+
+ /* limit this to the widths we actually need */
+ needed_pwr_count = ieee80211_chandef_num_widths(used);
+ if (needed_pwr_count < tpe->max_local[i].count)
+ tpe->max_local[i].count = needed_pwr_count;
+ if (needed_pwr_count < tpe->max_reg_client[i].count)
+ tpe->max_reg_client[i].count = needed_pwr_count;
+ }
+}
+
+/*
+ * The AP part of the channel request is used to distinguish settings
+ * to the device used for wider bandwidth OFDMA. This is used in the
+ * channel context code to assign two channel contexts even if they're
+ * both for the same channel, if the AP bandwidths are incompatible.
+ * If not EHT (or driver override) then ap.chan == NULL indicates that
+ * there's no wider BW OFDMA used.
+ */
+static void ieee80211_set_chanreq_ap(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_chan_req *chanreq,
+ struct ieee80211_conn_settings *conn,
+ struct cfg80211_chan_def *ap_chandef)
+{
+ chanreq->ap.chan = NULL;
+
+ if (conn->mode < IEEE80211_CONN_MODE_EHT)
+ return;
+ if (sdata->vif.driver_flags & IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW)
+ return;
+
+ chanreq->ap = *ap_chandef;
+}
+
static struct ieee802_11_elems *
ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata,
struct ieee80211_conn_settings *conn,
struct cfg80211_bss *cbss, int link_id,
- struct ieee80211_chan_req *chanreq)
+ struct ieee80211_chan_req *chanreq,
+ struct cfg80211_chan_def *ap_chandef)
{
const struct cfg80211_bss_ies *ies = rcu_dereference(cbss->ies);
struct ieee80211_bss *bss = (void *)cbss->priv;
@@ -619,7 +830,6 @@ ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata,
};
struct ieee802_11_elems *elems;
struct ieee80211_supported_band *sband;
- struct cfg80211_chan_def ap_chandef;
enum ieee80211_conn_mode ap_mode;
int ret;
@@ -630,7 +840,7 @@ again:
return ERR_PTR(-ENOMEM);
ap_mode = ieee80211_determine_ap_chan(sdata, channel, bss->vht_cap_info,
- elems, false, conn, &ap_chandef);
+ elems, false, conn, ap_chandef);
/* this should be impossible since parsing depends on our mode */
if (WARN_ON(ap_mode > conn->mode)) {
@@ -697,14 +907,9 @@ again:
break;
}
- chanreq->oper = ap_chandef;
+ chanreq->oper = *ap_chandef;
- /* wider-bandwidth OFDMA is only done in EHT */
- if (conn->mode >= IEEE80211_CONN_MODE_EHT &&
- !(sdata->vif.driver_flags & IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW))
- chanreq->ap = ap_chandef;
- else
- chanreq->ap.chan = NULL;
+ ieee80211_set_chanreq_ap(sdata, chanreq, conn, ap_chandef);
while (!ieee80211_chandef_usable(sdata, &chanreq->oper,
IEEE80211_CHAN_DISABLED)) {
@@ -734,7 +939,7 @@ again:
IEEE80211_CONN_BW_LIMIT_160);
}
- if (chanreq->oper.width != ap_chandef.width || ap_mode != conn->mode)
+ if (chanreq->oper.width != ap_chandef->width || ap_mode != conn->mode)
sdata_info(sdata,
"regulatory prevented using AP config, downgraded\n");
@@ -786,6 +991,7 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link,
struct ieee80211_channel *channel = link->conf->chanreq.oper.chan;
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_chan_req chanreq = {};
+ struct cfg80211_chan_def ap_chandef;
enum ieee80211_conn_mode ap_mode;
u32 vht_cap_info = 0;
u16 ht_opmode;
@@ -801,7 +1007,7 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link,
ap_mode = ieee80211_determine_ap_chan(sdata, channel, vht_cap_info,
elems, true, &link->u.mgd.conn,
- &chanreq.ap);
+ &ap_chandef);
if (ap_mode != link->u.mgd.conn.mode) {
link_info(link,
@@ -811,10 +1017,9 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link,
return -EINVAL;
}
- chanreq.oper = chanreq.ap;
- if (link->u.mgd.conn.mode < IEEE80211_CONN_MODE_EHT ||
- sdata->vif.driver_flags & IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW)
- chanreq.ap.chan = NULL;
+ chanreq.oper = ap_chandef;
+ ieee80211_set_chanreq_ap(sdata, &chanreq, &link->u.mgd.conn,
+ &ap_chandef);
/*
* if HT operation mode changed store the new one -
@@ -839,6 +1044,16 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link,
ieee80211_min_bw_limit_from_chandef(&chanreq.oper))
ieee80211_chandef_downgrade(&chanreq.oper, NULL);
+ if (ap_chandef.chan->band == NL80211_BAND_6GHZ &&
+ link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HE) {
+ ieee80211_rearrange_tpe(&elems->tpe, &ap_chandef,
+ &chanreq.oper);
+ if (memcmp(&link->conf->tpe, &elems->tpe, sizeof(elems->tpe))) {
+ link->conf->tpe = elems->tpe;
+ *changed |= BSS_CHANGED_TPE;
+ }
+ }
+
if (ieee80211_chanreq_identical(&chanreq, &link->conf->chanreq))
return 0;
@@ -1858,12 +2073,12 @@ void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local,
}
/* spectrum management related things */
-static void ieee80211_chswitch_work(struct wiphy *wiphy,
- struct wiphy_work *work)
+static void ieee80211_csa_switch_work(struct wiphy *wiphy,
+ struct wiphy_work *work)
{
struct ieee80211_link_data *link =
container_of(work, struct ieee80211_link_data,
- u.mgd.chswitch_work.work);
+ u.mgd.csa.switch_work.work);
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -1881,6 +2096,18 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy,
return;
/*
+ * If the link isn't active (now), we cannot wait for beacons, won't
+ * have a reserved chanctx, etc. Just switch over the chandef and
+ * update cfg80211 directly.
+ */
+ if (!ieee80211_vif_link_active(&sdata->vif, link->link_id)) {
+ link->conf->chanreq = link->csa.chanreq;
+ cfg80211_ch_switch_notify(sdata->dev, &link->csa.chanreq.oper,
+ link->link_id);
+ return;
+ }
+
+ /*
* using reservation isn't immediate as it may be deferred until later
* with multi-vif. once reservation is complete it will re-schedule the
* work with no reserved_chanctx so verify chandef to check if it
@@ -1898,9 +2125,9 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy,
ret = ieee80211_link_use_reserved_context(link);
if (ret) {
- sdata_info(sdata,
- "failed to use reserved channel context, disconnecting (err=%d)\n",
- ret);
+ link_info(link,
+ "failed to use reserved channel context, disconnecting (err=%d)\n",
+ ret);
wiphy_work_queue(sdata->local->hw.wiphy,
&ifmgd->csa_connection_drop_work);
}
@@ -1908,15 +2135,29 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy,
}
if (!ieee80211_chanreq_identical(&link->conf->chanreq,
- &link->csa_chanreq)) {
- sdata_info(sdata,
- "failed to finalize channel switch, disconnecting\n");
+ &link->csa.chanreq)) {
+ link_info(link,
+ "failed to finalize channel switch, disconnecting\n");
wiphy_work_queue(sdata->local->hw.wiphy,
&ifmgd->csa_connection_drop_work);
return;
}
- link->u.mgd.csa_waiting_bcn = true;
+ link->u.mgd.csa.waiting_bcn = true;
+
+ /* apply new TPE restrictions immediately on the new channel */
+ if (link->u.mgd.csa.ap_chandef.chan->band == NL80211_BAND_6GHZ &&
+ link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HE) {
+ ieee80211_rearrange_tpe(&link->u.mgd.csa.tpe,
+ &link->u.mgd.csa.ap_chandef,
+ &link->conf->chanreq.oper);
+ if (memcmp(&link->conf->tpe, &link->u.mgd.csa.tpe,
+ sizeof(link->u.mgd.csa.tpe))) {
+ link->conf->tpe = link->u.mgd.csa.tpe;
+ ieee80211_link_info_change_notify(sdata, link,
+ BSS_CHANGED_TPE);
+ }
+ }
ieee80211_sta_reset_beacon_monitor(sdata);
ieee80211_sta_reset_conn_monitor(sdata);
@@ -1925,7 +2166,6 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy,
static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link)
{
struct ieee80211_sub_if_data *sdata = link->sdata;
- struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
int ret;
@@ -1933,25 +2173,22 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link)
WARN_ON(!link->conf->csa_active);
- if (sdata->csa_blocked_tx) {
- ieee80211_wake_vif_queues(local, sdata,
- IEEE80211_QUEUE_STOP_REASON_CSA);
- sdata->csa_blocked_tx = false;
- }
+ ieee80211_vif_unblock_queues_csa(sdata);
link->conf->csa_active = false;
- link->u.mgd.csa_waiting_bcn = false;
+ link->u.mgd.csa.blocked_tx = false;
+ link->u.mgd.csa.waiting_bcn = false;
ret = drv_post_channel_switch(link);
if (ret) {
- sdata_info(sdata,
- "driver post channel switch failed, disconnecting\n");
+ link_info(link,
+ "driver post channel switch failed, disconnecting\n");
wiphy_work_queue(sdata->local->hw.wiphy,
&ifmgd->csa_connection_drop_work);
return;
}
- cfg80211_ch_switch_notify(sdata->dev, &link->reserved.oper,
+ cfg80211_ch_switch_notify(sdata->dev, &link->conf->chanreq.oper,
link->link_id);
}
@@ -1966,7 +2203,8 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success,
if (!success) {
sdata_info(sdata,
- "driver channel switch failed, disconnecting\n");
+ "driver channel switch failed (link %d), disconnecting\n",
+ link_id);
wiphy_work_queue(sdata->local->hw.wiphy,
&sdata->u.mgd.csa_connection_drop_work);
} else {
@@ -1979,7 +2217,7 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success,
}
wiphy_delayed_work_queue(sdata->local->hw.wiphy,
- &link->u.mgd.chswitch_work, 0);
+ &link->u.mgd.csa.switch_work, 0);
}
rcu_read_unlock();
@@ -1999,125 +2237,308 @@ ieee80211_sta_abort_chanswitch(struct ieee80211_link_data *link)
ieee80211_link_unreserve_chanctx(link);
- if (sdata->csa_blocked_tx) {
- ieee80211_wake_vif_queues(local, sdata,
- IEEE80211_QUEUE_STOP_REASON_CSA);
- sdata->csa_blocked_tx = false;
- }
+ ieee80211_vif_unblock_queues_csa(sdata);
link->conf->csa_active = false;
+ link->u.mgd.csa.blocked_tx = false;
drv_abort_channel_switch(link);
}
+struct sta_csa_rnr_iter_data {
+ struct ieee80211_link_data *link;
+ struct ieee80211_channel *chan;
+ u8 mld_id;
+};
+
+static enum cfg80211_rnr_iter_ret
+ieee80211_sta_csa_rnr_iter(void *_data, u8 type,
+ const struct ieee80211_neighbor_ap_info *info,
+ const u8 *tbtt_info, u8 tbtt_info_len)
+{
+ struct sta_csa_rnr_iter_data *data = _data;
+ struct ieee80211_link_data *link = data->link;
+ struct ieee80211_sub_if_data *sdata = link->sdata;
+ struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+ const struct ieee80211_tbtt_info_ge_11 *ti;
+ enum nl80211_band band;
+ unsigned int center_freq;
+ int link_id;
+
+ if (type != IEEE80211_TBTT_INFO_TYPE_TBTT)
+ return RNR_ITER_CONTINUE;
+
+ if (tbtt_info_len < sizeof(*ti))
+ return RNR_ITER_CONTINUE;
+
+ ti = (const void *)tbtt_info;
+
+ if (ti->mld_params.mld_id != data->mld_id)
+ return RNR_ITER_CONTINUE;
+
+ link_id = le16_get_bits(ti->mld_params.params,
+ IEEE80211_RNR_MLD_PARAMS_LINK_ID);
+ if (link_id != data->link->link_id)
+ return RNR_ITER_CONTINUE;
+
+ /* we found the entry for our link! */
+
+ /* this AP is confused, it had this right before ... just disconnect */
+ if (!ieee80211_operating_class_to_band(info->op_class, &band)) {
+ link_info(link,
+ "AP now has invalid operating class in RNR, disconnect\n");
+ wiphy_work_queue(sdata->local->hw.wiphy,
+ &ifmgd->csa_connection_drop_work);
+ return RNR_ITER_BREAK;
+ }
+
+ center_freq = ieee80211_channel_to_frequency(info->channel, band);
+ data->chan = ieee80211_get_channel(sdata->local->hw.wiphy, center_freq);
+
+ return RNR_ITER_BREAK;
+}
+
+static void
+ieee80211_sta_other_link_csa_disappeared(struct ieee80211_link_data *link,
+ struct ieee802_11_elems *elems)
+{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
+ struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+ struct sta_csa_rnr_iter_data data = {
+ .link = link,
+ };
+
+ /*
+ * If we get here, we see a beacon from another link without
+ * CSA still being reported for it, so now we have to check
+ * if the CSA was aborted or completed. This may not even be
+ * perfectly possible if the CSA was only done for changing
+ * the puncturing, but in that case if the link in inactive
+ * we don't really care, and if it's an active link (or when
+ * it's activated later) we'll get a beacon and adjust.
+ */
+
+ if (WARN_ON(!elems->ml_basic))
+ return;
+
+ data.mld_id = ieee80211_mle_get_mld_id((const void *)elems->ml_basic);
+
+ /*
+ * So in order to do this, iterate the RNR element(s) and see
+ * what channel is reported now.
+ */
+ cfg80211_iter_rnr(elems->ie_start, elems->total_len,
+ ieee80211_sta_csa_rnr_iter, &data);
+
+ if (!data.chan) {
+ link_info(link,
+ "couldn't find (valid) channel in RNR for CSA, disconnect\n");
+ wiphy_work_queue(sdata->local->hw.wiphy,
+ &ifmgd->csa_connection_drop_work);
+ return;
+ }
+
+ /*
+ * If it doesn't match the CSA, then assume it aborted. This
+ * may erroneously detect that it was _not_ aborted when it
+ * was in fact aborted, but only changed the bandwidth or the
+ * puncturing configuration, but we don't have enough data to
+ * detect that.
+ */
+ if (data.chan != link->csa.chanreq.oper.chan)
+ ieee80211_sta_abort_chanswitch(link);
+}
+
+enum ieee80211_csa_source {
+ IEEE80211_CSA_SOURCE_BEACON,
+ IEEE80211_CSA_SOURCE_OTHER_LINK,
+ IEEE80211_CSA_SOURCE_PROT_ACTION,
+ IEEE80211_CSA_SOURCE_UNPROT_ACTION,
+};
+
static void
ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
u64 timestamp, u32 device_timestamp,
- struct ieee802_11_elems *elems,
- bool beacon)
+ struct ieee802_11_elems *full_elems,
+ struct ieee802_11_elems *csa_elems,
+ enum ieee80211_csa_source source)
{
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- struct cfg80211_bss *cbss = link->conf->bss;
+ struct ieee80211_chanctx *chanctx = NULL;
struct ieee80211_chanctx_conf *conf;
- struct ieee80211_chanctx *chanctx;
- enum nl80211_band current_band;
- struct ieee80211_csa_ie csa_ie;
+ struct ieee80211_csa_ie csa_ie = {};
struct ieee80211_channel_switch ch_switch = {
.link_id = link->link_id,
+ .timestamp = timestamp,
+ .device_timestamp = device_timestamp,
};
- struct ieee80211_bss *bss;
- unsigned long timeout;
+ unsigned long now;
int res;
lockdep_assert_wiphy(local->hw.wiphy);
- if (!cbss)
- return;
+ if (csa_elems) {
+ struct cfg80211_bss *cbss = link->conf->bss;
+ enum nl80211_band current_band;
+ struct ieee80211_bss *bss;
- current_band = cbss->channel->band;
- bss = (void *)cbss->priv;
- res = ieee80211_parse_ch_switch_ie(sdata, elems, current_band,
- bss->vht_cap_info,
- &link->u.mgd.conn,
- link->u.mgd.bssid, &csa_ie);
+ if (WARN_ON(!cbss))
+ return;
- if (!res) {
- ch_switch.timestamp = timestamp;
- ch_switch.device_timestamp = device_timestamp;
- ch_switch.block_tx = csa_ie.mode;
- ch_switch.chandef = csa_ie.chanreq.oper;
- ch_switch.count = csa_ie.count;
- ch_switch.delay = csa_ie.max_switch_time;
+ current_band = cbss->channel->band;
+ bss = (void *)cbss->priv;
+
+ res = ieee80211_parse_ch_switch_ie(sdata, csa_elems,
+ current_band,
+ bss->vht_cap_info,
+ &link->u.mgd.conn,
+ link->u.mgd.bssid,
+ source == IEEE80211_CSA_SOURCE_UNPROT_ACTION,
+ &csa_ie);
+ if (res == 0) {
+ ch_switch.block_tx = csa_ie.mode;
+ ch_switch.chandef = csa_ie.chanreq.oper;
+ ch_switch.count = csa_ie.count;
+ ch_switch.delay = csa_ie.max_switch_time;
+ }
+
+ link->u.mgd.csa.tpe = csa_elems->csa_tpe;
+ } else {
+ /*
+ * If there was no per-STA profile for this link, we
+ * get called with csa_elems == NULL. This of course means
+ * there are no CSA elements, so set res=1 indicating
+ * no more CSA.
+ */
+ res = 1;
}
- if (res < 0)
+ if (res < 0) {
+ /* ignore this case, not a protected frame */
+ if (source == IEEE80211_CSA_SOURCE_UNPROT_ACTION)
+ return;
goto drop_connection;
+ }
if (link->conf->csa_active) {
- /* already processing - disregard action frames */
- if (!beacon)
+ switch (source) {
+ case IEEE80211_CSA_SOURCE_PROT_ACTION:
+ case IEEE80211_CSA_SOURCE_UNPROT_ACTION:
+ /* already processing - disregard action frames */
return;
+ case IEEE80211_CSA_SOURCE_BEACON:
+ if (link->u.mgd.csa.waiting_bcn) {
+ ieee80211_chswitch_post_beacon(link);
+ /*
+ * If the CSA is still present after the switch
+ * we need to consider it as a new CSA (possibly
+ * to self). This happens by not returning here
+ * so we'll get to the check below.
+ */
+ } else if (res) {
+ ieee80211_sta_abort_chanswitch(link);
+ return;
+ } else {
+ drv_channel_switch_rx_beacon(sdata, &ch_switch);
+ return;
+ }
+ break;
+ case IEEE80211_CSA_SOURCE_OTHER_LINK:
+ /* active link: we want to see the beacon to continue */
+ if (ieee80211_vif_link_active(&sdata->vif,
+ link->link_id))
+ return;
- if (link->u.mgd.csa_waiting_bcn) {
- ieee80211_chswitch_post_beacon(link);
- /*
- * If the CSA IE is still present in the beacon after
- * the switch, we need to consider it as a new CSA
- * (possibly to self) - this happens by not returning
- * here so we'll get to the check below.
- */
- } else if (res) {
- ieee80211_sta_abort_chanswitch(link);
- return;
- } else {
- drv_channel_switch_rx_beacon(sdata, &ch_switch);
+ /* switch work ran, so just complete the process */
+ if (link->u.mgd.csa.waiting_bcn) {
+ ieee80211_chswitch_post_beacon(link);
+ /*
+ * If the CSA is still present after the switch
+ * we need to consider it as a new CSA (possibly
+ * to self). This happens by not returning here
+ * so we'll get to the check below.
+ */
+ break;
+ }
+
+ /* link still has CSA but we already know, do nothing */
+ if (!res)
+ return;
+
+ /* check in the RNR if the CSA aborted */
+ ieee80211_sta_other_link_csa_disappeared(link,
+ full_elems);
return;
}
}
- /* nothing to do at all - no active CSA nor a new one */
- if (res)
+ /* no active CSA nor a new one */
+ if (res) {
+ /*
+ * However, we may have stopped queues when receiving a public
+ * action frame that couldn't be protected, if it had the quiet
+ * bit set. This is a trade-off, we want to be quiet as soon as
+ * possible, but also don't trust the public action frame much,
+ * as it can't be protected.
+ */
+ if (unlikely(link->u.mgd.csa.blocked_tx)) {
+ link->u.mgd.csa.blocked_tx = false;
+ ieee80211_vif_unblock_queues_csa(sdata);
+ }
return;
+ }
+
+ /*
+ * We don't really trust public action frames, but block queues (go to
+ * quiet mode) for them anyway, we should get a beacon soon to either
+ * know what the CSA really is, or figure out the public action frame
+ * was actually an attack.
+ */
+ if (source == IEEE80211_CSA_SOURCE_UNPROT_ACTION) {
+ if (csa_ie.mode) {
+ link->u.mgd.csa.blocked_tx = true;
+ ieee80211_vif_block_queues_csa(sdata);
+ }
+ return;
+ }
if (link->conf->chanreq.oper.chan->band !=
csa_ie.chanreq.oper.chan->band) {
- sdata_info(sdata,
- "AP %pM switches to different band (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n",
- link->u.mgd.bssid,
- csa_ie.chanreq.oper.chan->center_freq,
- csa_ie.chanreq.oper.width,
- csa_ie.chanreq.oper.center_freq1,
- csa_ie.chanreq.oper.center_freq2);
+ link_info(link,
+ "AP %pM switches to different band (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n",
+ link->u.mgd.bssid,
+ csa_ie.chanreq.oper.chan->center_freq,
+ csa_ie.chanreq.oper.width,
+ csa_ie.chanreq.oper.center_freq1,
+ csa_ie.chanreq.oper.center_freq2);
goto drop_connection;
}
if (!cfg80211_chandef_usable(local->hw.wiphy, &csa_ie.chanreq.oper,
IEEE80211_CHAN_DISABLED)) {
- sdata_info(sdata,
- "AP %pM switches to unsupported channel "
- "(%d.%03d MHz, width:%d, CF1/2: %d.%03d/%d MHz), "
- "disconnecting\n",
- link->u.mgd.bssid,
- csa_ie.chanreq.oper.chan->center_freq,
- csa_ie.chanreq.oper.chan->freq_offset,
- csa_ie.chanreq.oper.width,
- csa_ie.chanreq.oper.center_freq1,
- csa_ie.chanreq.oper.freq1_offset,
- csa_ie.chanreq.oper.center_freq2);
+ link_info(link,
+ "AP %pM switches to unsupported channel (%d.%03d MHz, width:%d, CF1/2: %d.%03d/%d MHz), disconnecting\n",
+ link->u.mgd.bssid,
+ csa_ie.chanreq.oper.chan->center_freq,
+ csa_ie.chanreq.oper.chan->freq_offset,
+ csa_ie.chanreq.oper.width,
+ csa_ie.chanreq.oper.center_freq1,
+ csa_ie.chanreq.oper.freq1_offset,
+ csa_ie.chanreq.oper.center_freq2);
goto drop_connection;
}
if (cfg80211_chandef_identical(&csa_ie.chanreq.oper,
&link->conf->chanreq.oper) &&
- (!csa_ie.mode || !beacon)) {
- if (link->u.mgd.csa_ignored_same_chan)
+ (!csa_ie.mode || source != IEEE80211_CSA_SOURCE_BEACON)) {
+ if (link->u.mgd.csa.ignored_same_chan)
return;
- sdata_info(sdata,
- "AP %pM tries to chanswitch to same channel, ignore\n",
- link->u.mgd.bssid);
- link->u.mgd.csa_ignored_same_chan = true;
+ link_info(link,
+ "AP %pM tries to chanswitch to same channel, ignore\n",
+ link->u.mgd.bssid);
+ link->u.mgd.csa.ignored_same_chan = true;
return;
}
@@ -2132,63 +2553,78 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
conf = rcu_dereference_protected(link->conf->chanctx_conf,
lockdep_is_held(&local->hw.wiphy->mtx));
- if (!conf) {
- sdata_info(sdata,
- "no channel context assigned to vif?, disconnecting\n");
+ if (ieee80211_vif_link_active(&sdata->vif, link->link_id) && !conf) {
+ link_info(link,
+ "no channel context assigned to vif?, disconnecting\n");
goto drop_connection;
}
- chanctx = container_of(conf, struct ieee80211_chanctx, conf);
+ if (conf)
+ chanctx = container_of(conf, struct ieee80211_chanctx, conf);
if (!ieee80211_hw_check(&local->hw, CHANCTX_STA_CSA)) {
- sdata_info(sdata,
- "driver doesn't support chan-switch with channel contexts\n");
+ link_info(link,
+ "driver doesn't support chan-switch with channel contexts\n");
goto drop_connection;
}
if (drv_pre_channel_switch(sdata, &ch_switch)) {
- sdata_info(sdata,
- "preparing for channel switch failed, disconnecting\n");
+ link_info(link,
+ "preparing for channel switch failed, disconnecting\n");
goto drop_connection;
}
- res = ieee80211_link_reserve_chanctx(link, &csa_ie.chanreq,
- chanctx->mode, false);
- if (res) {
- sdata_info(sdata,
- "failed to reserve channel context for channel switch, disconnecting (err=%d)\n",
- res);
- goto drop_connection;
+ link->u.mgd.csa.ap_chandef = csa_ie.chanreq.ap;
+
+ link->csa.chanreq.oper = csa_ie.chanreq.oper;
+ ieee80211_set_chanreq_ap(sdata, &link->csa.chanreq, &link->u.mgd.conn,
+ &csa_ie.chanreq.ap);
+
+ if (chanctx) {
+ res = ieee80211_link_reserve_chanctx(link, &link->csa.chanreq,
+ chanctx->mode, false);
+ if (res) {
+ link_info(link,
+ "failed to reserve channel context for channel switch, disconnecting (err=%d)\n",
+ res);
+ goto drop_connection;
+ }
}
link->conf->csa_active = true;
- link->csa_chanreq = csa_ie.chanreq;
- link->u.mgd.csa_ignored_same_chan = false;
+ link->u.mgd.csa.ignored_same_chan = false;
link->u.mgd.beacon_crc_valid = false;
+ link->u.mgd.csa.blocked_tx = csa_ie.mode;
- if (csa_ie.mode &&
- !ieee80211_hw_check(&local->hw, HANDLES_QUIET_CSA)) {
- ieee80211_stop_vif_queues(local, sdata,
- IEEE80211_QUEUE_STOP_REASON_CSA);
- sdata->csa_blocked_tx = true;
- }
+ if (csa_ie.mode)
+ ieee80211_vif_block_queues_csa(sdata);
cfg80211_ch_switch_started_notify(sdata->dev, &csa_ie.chanreq.oper,
link->link_id, csa_ie.count,
csa_ie.mode);
- if (local->ops->channel_switch) {
- /* use driver's channel switch callback */
+ /* we may have to handle timeout for deactivated link in software */
+ now = jiffies;
+ link->u.mgd.csa.time = now +
+ TU_TO_JIFFIES((max_t(int, csa_ie.count, 1) - 1) *
+ link->conf->beacon_int);
+
+ if (ieee80211_vif_link_active(&sdata->vif, link->link_id) &&
+ local->ops->channel_switch) {
+ /*
+ * Use driver's channel switch callback, the driver will
+ * later call ieee80211_chswitch_done(). It may deactivate
+ * the link as well, we handle that elsewhere and queue
+ * the csa.switch_work for the calculated time then.
+ */
drv_channel_switch(local, sdata, &ch_switch);
return;
}
/* channel switch handled in software */
- timeout = TU_TO_JIFFIES((max_t(int, csa_ie.count, 1) - 1) *
- cbss->beacon_interval);
wiphy_delayed_work_queue(local->hw.wiphy,
- &link->u.mgd.chswitch_work,
- timeout);
+ &link->u.mgd.csa.switch_work,
+ link->u.mgd.csa.time - now);
return;
drop_connection:
/*
@@ -2199,7 +2635,8 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
* reset when the disconnection worker runs.
*/
link->conf->csa_active = true;
- sdata->csa_blocked_tx =
+ link->u.mgd.csa.blocked_tx = csa_ie.mode;
+ sdata->csa_blocked_queues =
csa_ie.mode && !ieee80211_hw_check(&local->hw, HANDLES_QUIET_CSA);
wiphy_work_queue(sdata->local->hw.wiphy,
@@ -2594,16 +3031,15 @@ void ieee80211_dynamic_ps_timer(struct timer_list *t)
void ieee80211_dfs_cac_timer_work(struct wiphy *wiphy, struct wiphy_work *work)
{
- struct ieee80211_link_data *link =
- container_of(work, struct ieee80211_link_data,
+ struct ieee80211_sub_if_data *sdata =
+ container_of(work, struct ieee80211_sub_if_data,
dfs_cac_timer_work.work);
- struct cfg80211_chan_def chandef = link->conf->chanreq.oper;
- struct ieee80211_sub_if_data *sdata = link->sdata;
+ struct cfg80211_chan_def chandef = sdata->vif.bss_conf.chanreq.oper;
lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (sdata->wdev.cac_started) {
- ieee80211_link_release_channel(link);
+ ieee80211_link_release_channel(&sdata->deflink);
cfg80211_cac_event(sdata->dev, &chandef,
NL80211_RADAR_CAC_FINISHED,
GFP_KERNEL);
@@ -3085,6 +3521,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
u64 changed = 0;
struct ieee80211_prep_tx_info info = {
.subtype = stype,
+ .was_assoc = true,
+ .link_id = ffs(sdata->vif.active_links) - 1,
};
lockdep_assert_wiphy(local->hw.wiphy);
@@ -3133,29 +3571,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
/* deauthenticate/disassociate now */
if (tx || frame_buf) {
- /*
- * In multi channel scenarios guarantee that the virtual
- * interface is granted immediate airtime to transmit the
- * deauthentication frame by calling mgd_prepare_tx, if the
- * driver requested so.
- */
- if (ieee80211_hw_check(&local->hw, DEAUTH_NEED_MGD_TX_PREP)) {
- for (link_id = 0; link_id < ARRAY_SIZE(sdata->link);
- link_id++) {
- struct ieee80211_link_data *link;
-
- link = sdata_dereference(sdata->link[link_id],
- sdata);
- if (!link)
- continue;
- if (link->u.mgd.have_beacon)
- break;
- }
- if (link_id == IEEE80211_MLD_MAX_NUM_LINKS) {
- info.link_id = ffs(sdata->vif.active_links) - 1;
- drv_mgd_prepare_tx(sdata->local, sdata, &info);
- }
- }
+ drv_mgd_prepare_tx(sdata->local, sdata, &info);
ieee80211_send_deauth_disassoc(sdata, sdata->vif.cfg.ap_addr,
sdata->vif.cfg.ap_addr, stype,
@@ -3252,13 +3668,10 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
}
sdata->vif.bss_conf.csa_active = false;
- sdata->deflink.u.mgd.csa_waiting_bcn = false;
- sdata->deflink.u.mgd.csa_ignored_same_chan = false;
- if (sdata->csa_blocked_tx) {
- ieee80211_wake_vif_queues(local, sdata,
- IEEE80211_QUEUE_STOP_REASON_CSA);
- sdata->csa_blocked_tx = false;
- }
+ sdata->deflink.u.mgd.csa.blocked_tx = false;
+ sdata->deflink.u.mgd.csa.waiting_bcn = false;
+ sdata->deflink.u.mgd.csa.ignored_same_chan = false;
+ ieee80211_vif_unblock_queues_csa(sdata);
/* existing TX TSPEC sessions no longer exist */
memset(ifmgd->tx_tspec, 0, sizeof(ifmgd->tx_tspec));
@@ -3266,9 +3679,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
sdata->vif.bss_conf.power_type = IEEE80211_REG_UNSET_AP;
sdata->vif.bss_conf.pwr_reduction = 0;
- sdata->vif.bss_conf.tx_pwr_env_num = 0;
- memset(sdata->vif.bss_conf.tx_pwr_env, 0,
- sizeof(sdata->vif.bss_conf.tx_pwr_env));
+ ieee80211_clear_tpe(&sdata->vif.bss_conf.tpe);
sdata->vif.cfg.eml_cap = 0;
sdata->vif.cfg.eml_med_sync_delay = 0;
@@ -3278,8 +3689,17 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
sizeof(sdata->u.mgd.ttlm_info));
wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work);
+ memset(&sdata->vif.neg_ttlm, 0, sizeof(sdata->vif.neg_ttlm));
wiphy_delayed_work_cancel(sdata->local->hw.wiphy,
&ifmgd->neg_ttlm_timeout_work);
+
+ sdata->u.mgd.removed_links = 0;
+ wiphy_delayed_work_cancel(sdata->local->hw.wiphy,
+ &sdata->u.mgd.ml_reconf_work);
+
+ wiphy_work_cancel(sdata->local->hw.wiphy,
+ &ifmgd->teardown_ttlm_work);
+
ieee80211_vif_set_links(sdata, 0, 0);
ifmgd->mcast_seq_last = IEEE80211_SN_MODULO;
@@ -3563,19 +3983,32 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
- bool tx;
+ bool tx = false;
lockdep_assert_wiphy(local->hw.wiphy);
if (!ifmgd->associated)
return;
- /*
- * MLO drivers should have HANDLES_QUIET_CSA, so that csa_blocked_tx
- * is always false; if they don't then this may try to transmit the
- * frame but queues will be stopped.
- */
- tx = !sdata->csa_blocked_tx;
+ /* only transmit if we have a link that makes that worthwhile */
+ for (unsigned int link_id = 0;
+ link_id < ARRAY_SIZE(sdata->link);
+ link_id++) {
+ struct ieee80211_link_data *link;
+
+ if (!ieee80211_vif_link_active(&sdata->vif, link_id))
+ continue;
+
+ link = sdata_dereference(sdata->link[link_id], sdata);
+ if (WARN_ON_ONCE(!link))
+ continue;
+
+ if (link->u.mgd.csa.blocked_tx)
+ continue;
+
+ tx = true;
+ break;
+ }
if (!ifmgd->driver_disconnect) {
unsigned int link_id;
@@ -3607,12 +4040,9 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
tx, frame_buf);
/* the other links will be destroyed */
sdata->vif.bss_conf.csa_active = false;
- sdata->deflink.u.mgd.csa_waiting_bcn = false;
- if (sdata->csa_blocked_tx) {
- ieee80211_wake_vif_queues(local, sdata,
- IEEE80211_QUEUE_STOP_REASON_CSA);
- sdata->csa_blocked_tx = false;
- }
+ sdata->deflink.u.mgd.csa.waiting_bcn = false;
+ sdata->deflink.u.mgd.csa.blocked_tx = false;
+ ieee80211_vif_unblock_queues_csa(sdata);
ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), tx,
WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
@@ -4422,38 +4852,12 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
if (elems->he_operation &&
link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HE &&
elems->he_cap) {
- const struct ieee80211_he_6ghz_oper *he_6ghz_oper;
-
ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband,
elems->he_cap,
elems->he_cap_len,
elems->he_6ghz_capa,
link_sta);
- he_6ghz_oper = ieee80211_he_6ghz_oper(elems->he_operation);
-
- if (is_6ghz && he_6ghz_oper) {
- switch (u8_get_bits(he_6ghz_oper->control,
- IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO)) {
- case IEEE80211_6GHZ_CTRL_REG_LPI_AP:
- bss_conf->power_type = IEEE80211_REG_LPI_AP;
- break;
- case IEEE80211_6GHZ_CTRL_REG_SP_AP:
- bss_conf->power_type = IEEE80211_REG_SP_AP;
- break;
- case IEEE80211_6GHZ_CTRL_REG_VLP_AP:
- bss_conf->power_type = IEEE80211_REG_VLP_AP;
- break;
- default:
- bss_conf->power_type = IEEE80211_REG_UNSET_AP;
- break;
- }
- } else if (is_6ghz) {
- link_info(link,
- "HE 6 GHz operation missing (on %d MHz), expect issues\n",
- bss_conf->chanreq.oper.chan->center_freq);
- }
-
bss_conf->he_support = link_sta->pub->he_cap.has_he;
if (elems->rsnx && elems->rsnx_len &&
(elems->rsnx[0] & WLAN_RSNX_CAPA_PROTECTED_TWT) &&
@@ -4995,6 +5399,23 @@ ieee80211_determine_our_sta_mode_assoc(struct ieee80211_sub_if_data *sdata,
conn->bw_limit, tmp.bw_limit);
}
+static enum ieee80211_ap_reg_power
+ieee80211_ap_power_type(u8 control)
+{
+ switch (u8_get_bits(control, IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO)) {
+ case IEEE80211_6GHZ_CTRL_REG_LPI_AP:
+ case IEEE80211_6GHZ_CTRL_REG_INDOOR_LPI_AP:
+ return IEEE80211_REG_LPI_AP;
+ case IEEE80211_6GHZ_CTRL_REG_SP_AP:
+ case IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP:
+ return IEEE80211_REG_SP_AP;
+ case IEEE80211_6GHZ_CTRL_REG_VLP_AP:
+ return IEEE80211_REG_VLP_AP;
+ default:
+ return IEEE80211_REG_UNSET_AP;
+ }
+}
+
static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
struct ieee80211_link_data *link,
int link_id,
@@ -5004,15 +5425,15 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ;
struct ieee80211_chan_req chanreq = {};
+ struct cfg80211_chan_def ap_chandef;
struct ieee802_11_elems *elems;
int ret;
- u32 i;
lockdep_assert_wiphy(local->hw.wiphy);
rcu_read_lock();
elems = ieee80211_determine_chan_mode(sdata, conn, cbss, link_id,
- &chanreq);
+ &chanreq, &ap_chandef);
if (IS_ERR(elems)) {
rcu_read_unlock();
@@ -5027,26 +5448,23 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
}
if (link && is_6ghz && conn->mode >= IEEE80211_CONN_MODE_HE) {
- struct ieee80211_bss_conf *bss_conf;
- u8 j = 0;
-
- bss_conf = link->conf;
+ const struct ieee80211_he_6ghz_oper *he_6ghz_oper;
if (elems->pwr_constr_elem)
- bss_conf->pwr_reduction = *elems->pwr_constr_elem;
-
- BUILD_BUG_ON(ARRAY_SIZE(bss_conf->tx_pwr_env) !=
- ARRAY_SIZE(elems->tx_pwr_env));
+ link->conf->pwr_reduction = *elems->pwr_constr_elem;
- for (i = 0; i < elems->tx_pwr_env_num; i++) {
- if (elems->tx_pwr_env_len[i] > sizeof(bss_conf->tx_pwr_env[j]))
- continue;
+ he_6ghz_oper = ieee80211_he_6ghz_oper(elems->he_operation);
+ if (he_6ghz_oper)
+ link->conf->power_type =
+ ieee80211_ap_power_type(he_6ghz_oper->control);
+ else
+ link_info(link,
+ "HE 6 GHz operation missing (on %d MHz), expect issues\n",
+ cbss->channel->center_freq);
- bss_conf->tx_pwr_env_num++;
- memcpy(&bss_conf->tx_pwr_env[j], elems->tx_pwr_env[i],
- elems->tx_pwr_env_len[i]);
- j++;
- }
+ link->conf->tpe = elems->tpe;
+ ieee80211_rearrange_tpe(&link->conf->tpe, &ap_chandef,
+ &chanreq.oper);
}
rcu_read_unlock();
/* the element data was RCU protected so no longer valid anyway */
@@ -6125,13 +6543,140 @@ static void ieee80211_process_adv_ttlm(struct ieee80211_sub_if_data *sdata,
}
}
+static void
+ieee80211_mgd_check_cross_link_csa(struct ieee80211_sub_if_data *sdata,
+ int reporting_link_id,
+ struct ieee802_11_elems *elems)
+{
+ const struct element *sta_profiles[IEEE80211_MLD_MAX_NUM_LINKS] = {};
+ ssize_t sta_profiles_len[IEEE80211_MLD_MAX_NUM_LINKS] = {};
+ const struct element *sub;
+ const u8 *subelems;
+ size_t subelems_len;
+ u8 common_size;
+ int link_id;
+
+ if (!ieee80211_mle_size_ok((u8 *)elems->ml_basic, elems->ml_basic_len))
+ return;
+
+ common_size = ieee80211_mle_common_size((u8 *)elems->ml_basic);
+ subelems = (u8 *)elems->ml_basic + common_size;
+ subelems_len = elems->ml_basic_len - common_size;
+
+ for_each_element_id(sub, IEEE80211_MLE_SUBELEM_PER_STA_PROFILE,
+ subelems, subelems_len) {
+ struct ieee80211_mle_per_sta_profile *prof = (void *)sub->data;
+ struct ieee80211_link_data *link;
+ ssize_t len;
+
+ if (!ieee80211_mle_basic_sta_prof_size_ok(sub->data,
+ sub->datalen))
+ continue;
+
+ link_id = le16_get_bits(prof->control,
+ IEEE80211_MLE_STA_CONTROL_LINK_ID);
+ /* need a valid link ID, but also not our own, both AP bugs */
+ if (link_id == reporting_link_id ||
+ link_id >= IEEE80211_MLD_MAX_NUM_LINKS)
+ continue;
+
+ link = sdata_dereference(sdata->link[link_id], sdata);
+ if (!link)
+ continue;
+
+ len = cfg80211_defragment_element(sub, subelems, subelems_len,
+ NULL, 0,
+ IEEE80211_MLE_SUBELEM_FRAGMENT);
+ if (WARN_ON(len < 0))
+ continue;
+
+ sta_profiles[link_id] = sub;
+ sta_profiles_len[link_id] = len;
+ }
+
+ for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) {
+ struct ieee80211_mle_per_sta_profile *prof;
+ struct ieee802_11_elems *prof_elems;
+ struct ieee80211_link_data *link;
+ ssize_t len;
+
+ if (link_id == reporting_link_id)
+ continue;
+
+ link = sdata_dereference(sdata->link[link_id], sdata);
+ if (!link)
+ continue;
+
+ if (!sta_profiles[link_id]) {
+ prof_elems = NULL;
+ goto handle;
+ }
+
+ /* we can defragment in-place, won't use the buffer again */
+ len = cfg80211_defragment_element(sta_profiles[link_id],
+ subelems, subelems_len,
+ (void *)sta_profiles[link_id],
+ sta_profiles_len[link_id],
+ IEEE80211_MLE_SUBELEM_FRAGMENT);
+ if (WARN_ON(len != sta_profiles_len[link_id]))
+ continue;
+
+ prof = (void *)sta_profiles[link_id];
+ prof_elems = ieee802_11_parse_elems(prof->variable +
+ (prof->sta_info_len - 1),
+ len -
+ (prof->sta_info_len - 1),
+ false, NULL);
+
+ /* memory allocation failed - let's hope that's transient */
+ if (!prof_elems)
+ continue;
+
+handle:
+ /*
+ * FIXME: the timings here are obviously incorrect,
+ * but only older Intel drivers seem to care, and
+ * those don't have MLO. If you really need this,
+ * the problem is having to calculate it with the
+ * TSF offset etc. The device_timestamp is still
+ * correct, of course.
+ */
+ ieee80211_sta_process_chanswitch(link, 0, 0, elems, prof_elems,
+ IEEE80211_CSA_SOURCE_OTHER_LINK);
+ kfree(prof_elems);
+ }
+}
+
+static bool ieee80211_mgd_ssid_mismatch(struct ieee80211_sub_if_data *sdata,
+ const struct ieee802_11_elems *elems)
+{
+ struct ieee80211_vif_cfg *cfg = &sdata->vif.cfg;
+ static u8 zero_ssid[IEEE80211_MAX_SSID_LEN];
+
+ if (!elems->ssid)
+ return false;
+
+ /* hidden SSID: zero length */
+ if (elems->ssid_len == 0)
+ return false;
+
+ if (elems->ssid_len != cfg->ssid_len)
+ return true;
+
+ /* hidden SSID: zeroed out */
+ if (memcmp(elems->ssid, zero_ssid, elems->ssid_len))
+ return false;
+
+ return memcmp(elems->ssid, cfg->ssid, cfg->ssid_len);
+}
+
static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
struct ieee80211_hdr *hdr, size_t len,
struct ieee80211_rx_status *rx_status)
{
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
+ struct ieee80211_bss_conf *bss_conf = link->conf;
struct ieee80211_vif_cfg *vif_cfg = &sdata->vif.cfg;
struct ieee80211_mgmt *mgmt = (void *) hdr;
size_t baselen;
@@ -6175,7 +6720,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
parse_params.len = len - baselen;
rcu_read_lock();
- chanctx_conf = rcu_dereference(link->conf->chanctx_conf);
+ chanctx_conf = rcu_dereference(bss_conf->chanctx_conf);
if (!chanctx_conf) {
rcu_read_unlock();
return;
@@ -6205,11 +6750,11 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
ifmgd->assoc_data->need_beacon = false;
if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY) &&
!ieee80211_is_s1g_beacon(hdr->frame_control)) {
- link->conf->sync_tsf =
+ bss_conf->sync_tsf =
le64_to_cpu(mgmt->u.beacon.timestamp);
- link->conf->sync_device_ts =
+ bss_conf->sync_device_ts =
rx_status->device_timestamp;
- link->conf->sync_dtim_count = elems->dtim_count;
+ bss_conf->sync_dtim_count = elems->dtim_count;
}
if (elems->mbssid_config_ie)
@@ -6233,7 +6778,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
}
if (!ifmgd->associated ||
- !ieee80211_rx_our_beacon(bssid, link->conf->bss))
+ !ieee80211_rx_our_beacon(bssid, bss_conf->bss))
return;
bssid = link->u.mgd.bssid;
@@ -6260,12 +6805,21 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
*/
if (!ieee80211_is_s1g_beacon(hdr->frame_control))
ncrc = crc32_be(0, (void *)&mgmt->u.beacon.beacon_int, 4);
- parse_params.bss = link->conf->bss;
+ parse_params.bss = bss_conf->bss;
parse_params.filter = care_about_ies;
parse_params.crc = ncrc;
elems = ieee802_11_parse_elems_full(&parse_params);
if (!elems)
return;
+
+ if (rx_status->flag & RX_FLAG_DECRYPTED &&
+ ieee80211_mgd_ssid_mismatch(sdata, elems)) {
+ sdata_info(sdata, "SSID mismatch for AP %pM, disconnect\n",
+ sdata->vif.cfg.ap_addr);
+ __ieee80211_disconnect(sdata);
+ return;
+ }
+
ncrc = elems->crc;
if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK) &&
@@ -6332,11 +6886,11 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
*/
if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY) &&
!ieee80211_is_s1g_beacon(hdr->frame_control)) {
- link->conf->sync_tsf =
+ bss_conf->sync_tsf =
le64_to_cpu(mgmt->u.beacon.timestamp);
- link->conf->sync_device_ts =
+ bss_conf->sync_device_ts =
rx_status->device_timestamp;
- link->conf->sync_dtim_count = elems->dtim_count;
+ bss_conf->sync_dtim_count = elems->dtim_count;
}
if ((ncrc == link->u.mgd.beacon_crc && link->u.mgd.beacon_crc_valid) ||
@@ -6349,7 +6903,11 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
ieee80211_sta_process_chanswitch(link, rx_status->mactime,
rx_status->device_timestamp,
- elems, true);
+ elems, elems,
+ IEEE80211_CSA_SOURCE_BEACON);
+
+ /* note that after this elems->ml_basic can no longer be used fully */
+ ieee80211_mgd_check_cross_link_csa(sdata, rx_status->link_id, elems);
if (!link->u.mgd.disable_wmm_tracking &&
ieee80211_sta_wmm_params(local, link, elems->wmm_param,
@@ -6395,10 +6953,10 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
goto free;
}
- if (WARN_ON(!link->conf->chanreq.oper.chan))
+ if (WARN_ON(!bss_conf->chanreq.oper.chan))
goto free;
- sband = local->hw.wiphy->bands[link->conf->chanreq.oper.chan->band];
+ sband = local->hw.wiphy->bands[bss_conf->chanreq.oper.chan->band];
changed |= ieee80211_recalc_twt_req(sdata, sband, link, link_sta, elems);
@@ -6803,6 +7361,60 @@ void ieee80211_process_neg_ttlm_res(struct ieee80211_sub_if_data *sdata,
__ieee80211_disconnect(sdata);
}
+static void ieee80211_teardown_ttlm_work(struct wiphy *wiphy,
+ struct wiphy_work *work)
+{
+ u16 new_dormant_links;
+ struct ieee80211_sub_if_data *sdata =
+ container_of(work, struct ieee80211_sub_if_data,
+ u.mgd.teardown_ttlm_work);
+
+ if (!sdata->vif.neg_ttlm.valid)
+ return;
+
+ memset(&sdata->vif.neg_ttlm, 0, sizeof(sdata->vif.neg_ttlm));
+ new_dormant_links =
+ sdata->vif.dormant_links & ~sdata->vif.suspended_links;
+ sdata->vif.suspended_links = 0;
+ ieee80211_vif_set_links(sdata, sdata->vif.valid_links,
+ new_dormant_links);
+ ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_MLD_TTLM |
+ BSS_CHANGED_MLD_VALID_LINKS);
+}
+
+void ieee80211_send_teardown_neg_ttlm(struct ieee80211_vif *vif)
+{
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_mgmt *mgmt;
+ struct sk_buff *skb;
+ int frame_len = offsetofend(struct ieee80211_mgmt,
+ u.action.u.ttlm_tear_down);
+ struct ieee80211_tx_info *info;
+
+ skb = dev_alloc_skb(local->hw.extra_tx_headroom + frame_len);
+ if (!skb)
+ return;
+
+ skb_reserve(skb, local->hw.extra_tx_headroom);
+ mgmt = skb_put_zero(skb, frame_len);
+ mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+ IEEE80211_STYPE_ACTION);
+ memcpy(mgmt->da, sdata->vif.cfg.ap_addr, ETH_ALEN);
+ memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
+ memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN);
+
+ mgmt->u.action.category = WLAN_CATEGORY_PROTECTED_EHT;
+ mgmt->u.action.u.ttlm_tear_down.action_code =
+ WLAN_PROTECTED_EHT_ACTION_TTLM_TEARDOWN;
+
+ info = IEEE80211_SKB_CB(skb);
+ info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
+ info->status_data = IEEE80211_STATUS_TYPE_NEG_TTLM;
+ ieee80211_tx_skb(sdata, skb);
+}
+EXPORT_SYMBOL(ieee80211_send_teardown_neg_ttlm);
+
void ieee80211_sta_rx_queued_ext(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb)
{
@@ -6829,6 +7441,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_link_data *link = &sdata->deflink;
struct ieee80211_rx_status *rx_status;
+ struct ieee802_11_elems *elems;
struct ieee80211_mgmt *mgmt;
u16 fc;
int ies_len;
@@ -6872,9 +7485,8 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
!ether_addr_equal(mgmt->bssid, sdata->vif.cfg.ap_addr))
break;
- if (mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) {
- struct ieee802_11_elems *elems;
-
+ switch (mgmt->u.action.category) {
+ case WLAN_CATEGORY_SPECTRUM_MGMT:
ies_len = skb->len -
offsetof(struct ieee80211_mgmt,
u.action.u.chan_switch.variable);
@@ -6887,15 +7499,20 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
mgmt->u.action.u.chan_switch.variable,
ies_len, true, NULL);
- if (elems && !elems->parse_error)
+ if (elems && !elems->parse_error) {
+ enum ieee80211_csa_source src =
+ IEEE80211_CSA_SOURCE_PROT_ACTION;
+
ieee80211_sta_process_chanswitch(link,
rx_status->mactime,
rx_status->device_timestamp,
- elems, false);
+ elems, elems,
+ src);
+ }
kfree(elems);
- } else if (mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) {
- struct ieee802_11_elems *elems;
-
+ break;
+ case WLAN_CATEGORY_PUBLIC:
+ case WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION:
ies_len = skb->len -
offsetof(struct ieee80211_mgmt,
u.action.u.ext_chan_switch.variable);
@@ -6912,6 +7529,14 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
ies_len, true, NULL);
if (elems && !elems->parse_error) {
+ enum ieee80211_csa_source src;
+
+ if (mgmt->u.action.category ==
+ WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION)
+ src = IEEE80211_CSA_SOURCE_PROT_ACTION;
+ else
+ src = IEEE80211_CSA_SOURCE_UNPROT_ACTION;
+
/* for the handling code pretend it was an IE */
elems->ext_chansw_ie =
&mgmt->u.action.u.ext_chan_switch.data;
@@ -6919,10 +7544,12 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
ieee80211_sta_process_chanswitch(link,
rx_status->mactime,
rx_status->device_timestamp,
- elems, false);
+ elems, elems,
+ src);
}
kfree(elems);
+ break;
}
break;
}
@@ -7242,7 +7869,7 @@ static void ieee80211_sta_bcn_mon_timer(struct timer_list *t)
return;
if (sdata->vif.bss_conf.csa_active &&
- !sdata->deflink.u.mgd.csa_waiting_bcn)
+ !sdata->deflink.u.mgd.csa.waiting_bcn)
return;
if (sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)
@@ -7266,7 +7893,7 @@ static void ieee80211_sta_conn_mon_timer(struct timer_list *t)
return;
if (sdata->vif.bss_conf.csa_active &&
- !sdata->deflink.u.mgd.csa_waiting_bcn)
+ !sdata->deflink.u.mgd.csa.waiting_bcn)
return;
sta = sta_info_get(sdata, sdata->vif.cfg.ap_addr);
@@ -7434,6 +8061,8 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
ieee80211_tid_to_link_map_work);
wiphy_delayed_work_init(&ifmgd->neg_ttlm_timeout_work,
ieee80211_neg_ttlm_timeout_work);
+ wiphy_work_init(&ifmgd->teardown_ttlm_work,
+ ieee80211_teardown_ttlm_work);
ifmgd->flags = 0;
ifmgd->powersave = sdata->wdev.ps;
@@ -7475,8 +8104,10 @@ void ieee80211_mgd_setup_link(struct ieee80211_link_data *link)
else
link->u.mgd.req_smps = IEEE80211_SMPS_OFF;
- wiphy_delayed_work_init(&link->u.mgd.chswitch_work,
- ieee80211_chswitch_work);
+ wiphy_delayed_work_init(&link->u.mgd.csa.switch_work,
+ ieee80211_csa_switch_work);
+
+ ieee80211_clear_tpe(&link->conf->tpe);
if (sdata->u.mgd.assoc_data)
ether_addr_copy(link->conf->addr,
@@ -8220,6 +8851,14 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
if (req->ap_mld_addr) {
uapsd_supported = true;
+ if (req->flags & (ASSOC_REQ_DISABLE_HT |
+ ASSOC_REQ_DISABLE_VHT |
+ ASSOC_REQ_DISABLE_HE |
+ ASSOC_REQ_DISABLE_EHT)) {
+ err = -EINVAL;
+ goto err_free;
+ }
+
for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++) {
struct ieee80211_supported_band *sband;
struct cfg80211_bss *link_cbss = req->links[i].bss;
@@ -8232,19 +8871,13 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
if (!bss->wmm_used) {
err = -EINVAL;
- goto err_free;
- }
-
- if (req->flags & (ASSOC_REQ_DISABLE_HT |
- ASSOC_REQ_DISABLE_VHT |
- ASSOC_REQ_DISABLE_HE |
- ASSOC_REQ_DISABLE_EHT)) {
- err = -EINVAL;
+ req->links[i].error = err;
goto err_free;
}
if (link_cbss->channel->band == NL80211_BAND_S1GHZ) {
err = -EINVAL;
+ req->links[i].error = err;
goto err_free;
}
@@ -8603,7 +9236,7 @@ void ieee80211_mgd_stop_link(struct ieee80211_link_data *link)
wiphy_work_cancel(link->sdata->local->hw.wiphy,
&link->u.mgd.recalc_smps);
wiphy_delayed_work_cancel(link->sdata->local->hw.wiphy,
- &link->u.mgd.chswitch_work);
+ &link->u.mgd.csa.switch_work);
}
void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata)
@@ -8623,11 +9256,6 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata)
&ifmgd->csa_connection_drop_work);
wiphy_delayed_work_cancel(sdata->local->hw.wiphy,
&ifmgd->tdls_peer_del_work);
- wiphy_delayed_work_cancel(sdata->local->hw.wiphy,
- &ifmgd->ml_reconf_work);
- wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work);
- wiphy_delayed_work_cancel(sdata->local->hw.wiphy,
- &ifmgd->neg_ttlm_timeout_work);
if (ifmgd->assoc_data)
ieee80211_destroy_assoc_data(sdata, ASSOC_TIMEOUT);
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 221695d841fd..28d03196ef75 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -8,7 +8,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2019, 2022-2023 Intel Corporation
+ * Copyright (C) 2019, 2022-2024 Intel Corporation
*/
#include <linux/export.h>
#include <net/mac80211.h>
@@ -413,6 +413,39 @@ void ieee80211_start_next_roc(struct ieee80211_local *local)
}
}
+void ieee80211_reconfig_roc(struct ieee80211_local *local)
+{
+ struct ieee80211_roc_work *roc, *tmp;
+
+ /*
+ * In the software implementation can just continue with the
+ * interruption due to reconfig, roc_work is still queued if
+ * needed.
+ */
+ if (!local->ops->remain_on_channel)
+ return;
+
+ /* flush work so nothing from the driver is still pending */
+ wiphy_work_flush(local->hw.wiphy, &local->hw_roc_start);
+ wiphy_work_flush(local->hw.wiphy, &local->hw_roc_done);
+
+ list_for_each_entry_safe(roc, tmp, &local->roc_list, list) {
+ if (!roc->started)
+ break;
+
+ if (!roc->hw_begun) {
+ /* it didn't start in HW yet, so we can restart it */
+ roc->started = false;
+ continue;
+ }
+
+ /* otherwise destroy it and tell userspace */
+ ieee80211_roc_notify_destroy(roc);
+ }
+
+ ieee80211_start_next_roc(local);
+}
+
static void __ieee80211_roc_work(struct ieee80211_local *local)
{
struct ieee80211_roc_work *roc;
@@ -897,8 +930,18 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
break;
}
- if (ether_addr_equal(conf->addr, mgmt->sa))
+ if (ether_addr_equal(conf->addr, mgmt->sa)) {
+ /* If userspace requested Tx on a specific link
+ * use the same link id if the link bss is matching
+ * the requested chan.
+ */
+ if (sdata->vif.valid_links &&
+ params->link_id >= 0 && params->link_id == i &&
+ params->chan == chanctx_conf->def.chan)
+ link_id = i;
+
break;
+ }
chanctx_conf = NULL;
}
diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c
index 55e5497f8978..279c5143b335 100644
--- a/net/mac80211/parse.c
+++ b/net/mac80211/parse.c
@@ -111,7 +111,7 @@ ieee80211_parse_extension_element(u32 *crc,
if (params->mode < IEEE80211_CONN_MODE_HE)
break;
if (len >= sizeof(*elems->he_spr) &&
- len >= ieee80211_he_spr_size(data))
+ len >= ieee80211_he_spr_size(data) - 1)
elems->he_spr = data;
break;
case WLAN_EID_EXT_HE_6GHZ_CAPA:
@@ -187,6 +187,84 @@ ieee80211_parse_extension_element(u32 *crc,
*crc = crc32_be(*crc, (void *)elem, elem->datalen + 2);
}
+static void ieee80211_parse_tpe(struct ieee80211_parsed_tpe *tpe,
+ const u8 *data, u8 len)
+{
+ const struct ieee80211_tx_pwr_env *env = (const void *)data;
+ u8 count, interpret, category;
+ u8 *out, N, *cnt_out = NULL, *N_out = NULL;
+
+ if (!ieee80211_valid_tpe_element(data, len))
+ return;
+
+ count = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_COUNT);
+ interpret = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_INTERPRET);
+ category = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_CATEGORY);
+
+ switch (interpret) {
+ case IEEE80211_TPE_LOCAL_EIRP:
+ out = tpe->max_local[category].power;
+ cnt_out = &tpe->max_local[category].count;
+ tpe->max_local[category].valid = true;
+ break;
+ case IEEE80211_TPE_REG_CLIENT_EIRP:
+ out = tpe->max_reg_client[category].power;
+ cnt_out = &tpe->max_reg_client[category].count;
+ tpe->max_reg_client[category].valid = true;
+ break;
+ case IEEE80211_TPE_LOCAL_EIRP_PSD:
+ out = tpe->psd_local[category].power;
+ cnt_out = &tpe->psd_local[category].count;
+ N_out = &tpe->psd_local[category].n;
+ tpe->psd_local[category].valid = true;
+ break;
+ case IEEE80211_TPE_REG_CLIENT_EIRP_PSD:
+ out = tpe->psd_reg_client[category].power;
+ cnt_out = &tpe->psd_reg_client[category].count;
+ N_out = &tpe->psd_reg_client[category].n;
+ tpe->psd_reg_client[category].valid = true;
+ break;
+ }
+
+ switch (interpret) {
+ case IEEE80211_TPE_LOCAL_EIRP:
+ case IEEE80211_TPE_REG_CLIENT_EIRP:
+ /* count was validated <= 3, plus 320 MHz */
+ BUILD_BUG_ON(IEEE80211_TPE_EIRP_ENTRIES_320MHZ < 5);
+ memcpy(out, env->variable, count + 1);
+ *cnt_out = count + 1;
+ /* separately take 320 MHz if present */
+ if (count == 3 && len > sizeof(*env) + count + 1) {
+ out[4] = env->variable[4];
+ *cnt_out = 5;
+ }
+ break;
+ case IEEE80211_TPE_LOCAL_EIRP_PSD:
+ case IEEE80211_TPE_REG_CLIENT_EIRP_PSD:
+ if (!count) {
+ memset(out, env->variable[0],
+ IEEE80211_TPE_PSD_ENTRIES_320MHZ);
+ *cnt_out = IEEE80211_TPE_PSD_ENTRIES_320MHZ;
+ break;
+ }
+
+ N = 1 << (count - 1);
+ memcpy(out, env->variable, N);
+ *cnt_out = N;
+ *N_out = N;
+
+ if (len > sizeof(*env) + N) {
+ int K = u8_get_bits(env->variable[N],
+ IEEE80211_TX_PWR_ENV_EXT_COUNT);
+
+ K = min(K, IEEE80211_TPE_PSD_ENTRIES_320MHZ - N);
+ memcpy(out + N, env->variable + N + 1, K);
+ (*cnt_out) += K;
+ }
+ break;
+ }
+}
+
static u32
_ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params,
struct ieee80211_elems_parse *elems_parse,
@@ -529,6 +607,13 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params,
elem_parse_failed =
IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
}
+
+ subelem = cfg80211_find_ext_elem(WLAN_EID_TX_POWER_ENVELOPE,
+ pos, elen);
+ if (subelem)
+ ieee80211_parse_tpe(&elems->csa_tpe,
+ subelem->data + 1,
+ subelem->datalen - 1);
break;
case WLAN_EID_COUNTRY:
elems->country_elem = pos;
@@ -593,16 +678,9 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params,
elems->rsnx_len = elen;
break;
case WLAN_EID_TX_POWER_ENVELOPE:
- if (elen < 1 ||
- elen > sizeof(struct ieee80211_tx_pwr_env))
+ if (params->mode < IEEE80211_CONN_MODE_HE)
break;
-
- if (elems->tx_pwr_env_num >= ARRAY_SIZE(elems->tx_pwr_env))
- break;
-
- elems->tx_pwr_env[elems->tx_pwr_env_num] = (void *)pos;
- elems->tx_pwr_env_len[elems->tx_pwr_env_num] = elen;
- elems->tx_pwr_env_num++;
+ ieee80211_parse_tpe(&elems->tpe, pos, elen);
break;
case WLAN_EID_EXTENSION:
ieee80211_parse_extension_element(calc_crc ?
@@ -889,6 +967,10 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params)
elems->ie_start = params->start;
elems->total_len = params->len;
+ /* set all TPE entries to unlimited (but invalid) */
+ ieee80211_clear_tpe(&elems->tpe);
+ ieee80211_clear_tpe(&elems->csa_tpe);
+
nontransmitted_profile = elems_parse->scratch_pos;
nontransmitted_profile_len =
ieee802_11_find_bssid_profile(params->start, params->len,
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index c1fa26e09479..d823d58303e8 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Portions
- * Copyright (C) 2020-2021, 2023 Intel Corporation
+ * Copyright (C) 2020-2021, 2023-2024 Intel Corporation
*/
#include <net/mac80211.h>
#include <net/rtnetlink.h>
@@ -171,7 +171,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
WARN_ON(!list_empty(&local->chanctx_list));
/* stop hardware - this must stop RX */
- ieee80211_stop_device(local);
+ ieee80211_stop_device(local, true);
suspend:
local->suspended = true;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 6e24864f9a40..59ad24a71141 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3358,6 +3358,7 @@ static void
ieee80211_rx_check_bss_color_collision(struct ieee80211_rx_data *rx)
{
struct ieee80211_mgmt *mgmt = (void *)rx->skb->data;
+ struct ieee80211_bss_conf *bss_conf;
const struct element *ie;
size_t baselen;
@@ -3368,7 +3369,9 @@ ieee80211_rx_check_bss_color_collision(struct ieee80211_rx_data *rx)
if (ieee80211_hw_check(&rx->local->hw, DETECTS_COLOR_COLLISION))
return;
- if (rx->sdata->vif.bss_conf.csa_active)
+ bss_conf = rx->link->conf;
+ if (bss_conf->csa_active || bss_conf->color_change_active ||
+ !bss_conf->he_bss_color.enabled)
return;
baselen = mgmt->u.beacon.variable - rx->skb->data;
@@ -3380,7 +3383,6 @@ ieee80211_rx_check_bss_color_collision(struct ieee80211_rx_data *rx)
rx->skb->len - baselen);
if (ie && ie->datalen >= sizeof(struct ieee80211_he_operation) &&
ie->datalen >= ieee80211_he_oper_size(ie->data + 1)) {
- struct ieee80211_bss_conf *bss_conf = &rx->sdata->vif.bss_conf;
const struct ieee80211_he_operation *he_oper;
u8 color;
@@ -3393,7 +3395,8 @@ ieee80211_rx_check_bss_color_collision(struct ieee80211_rx_data *rx)
IEEE80211_HE_OPERATION_BSS_COLOR_MASK);
if (color == bss_conf->he_bss_color.color)
ieee80211_obss_color_collision_notify(&rx->sdata->vif,
- BIT_ULL(color));
+ BIT_ULL(color),
+ bss_conf->link_id);
}
}
@@ -3616,6 +3619,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
break;
case WLAN_CATEGORY_PUBLIC:
+ case WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION:
if (len < IEEE80211_MIN_ACTION_SIZE + 1)
goto invalid;
if (sdata->vif.type != NL80211_IFTYPE_STATION)
@@ -3969,8 +3973,8 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx)
__ieee80211_tx_skb_tid_band(rx->sdata, nskb, 7, -1,
status->band);
}
- dev_kfree_skb(rx->skb);
- return RX_QUEUED;
+
+ return RX_DROP_U_UNKNOWN_ACTION_REJECTED;
}
static ieee80211_rx_result debug_noinline
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 73850312580f..b5f2df61c7f6 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -358,7 +358,8 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_sub_if_data *sdata)
struct cfg80211_scan_request *req;
struct cfg80211_chan_def chandef;
u8 bands_used = 0;
- int i, ielen, n_chans;
+ int i, ielen;
+ u32 *n_chans;
u32 flags = 0;
req = rcu_dereference_protected(local->scan_req,
@@ -368,34 +369,34 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_sub_if_data *sdata)
return false;
if (ieee80211_hw_check(&local->hw, SINGLE_SCAN_ON_ALL_BANDS)) {
+ local->hw_scan_req->req.n_channels = req->n_channels;
+
for (i = 0; i < req->n_channels; i++) {
local->hw_scan_req->req.channels[i] = req->channels[i];
bands_used |= BIT(req->channels[i]->band);
}
-
- n_chans = req->n_channels;
} else {
do {
if (local->hw_scan_band == NUM_NL80211_BANDS)
return false;
- n_chans = 0;
+ n_chans = &local->hw_scan_req->req.n_channels;
+ *n_chans = 0;
for (i = 0; i < req->n_channels; i++) {
if (req->channels[i]->band !=
local->hw_scan_band)
continue;
- local->hw_scan_req->req.channels[n_chans] =
+ local->hw_scan_req->req.channels[(*n_chans)++] =
req->channels[i];
- n_chans++;
+
bands_used |= BIT(req->channels[i]->band);
}
local->hw_scan_band++;
- } while (!n_chans);
+ } while (!*n_chans);
}
- local->hw_scan_req->req.n_channels = n_chans;
ieee80211_prepare_scan_chandef(&chandef);
if (req->flags & NL80211_SCAN_FLAG_MIN_PREQ_CONTENT)
@@ -708,19 +709,11 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
return -EBUSY;
/* For an MLO connection, if a link ID was specified, validate that it
- * is indeed active. If no link ID was specified, select one of the
- * active links.
+ * is indeed active.
*/
- if (ieee80211_vif_is_mld(&sdata->vif)) {
- if (req->tsf_report_link_id >= 0) {
- if (!(sdata->vif.active_links &
- BIT(req->tsf_report_link_id)))
- return -EINVAL;
- } else {
- req->tsf_report_link_id =
- __ffs(sdata->vif.active_links);
- }
- }
+ if (ieee80211_vif_is_mld(&sdata->vif) && req->tsf_report_link_id >= 0 &&
+ !(sdata->vif.active_links & BIT(req->tsf_report_link_id)))
+ return -EINVAL;
if (!__ieee80211_can_leave_ch(sdata))
return -EBUSY;
@@ -752,15 +745,21 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
local->hw_scan_ies_bufsize *= n_bands;
}
- local->hw_scan_req = kmalloc(
- sizeof(*local->hw_scan_req) +
- req->n_channels * sizeof(req->channels[0]) +
- local->hw_scan_ies_bufsize, GFP_KERNEL);
+ local->hw_scan_req = kmalloc(struct_size(local->hw_scan_req,
+ req.channels,
+ req->n_channels) +
+ local->hw_scan_ies_bufsize,
+ GFP_KERNEL);
if (!local->hw_scan_req)
return -ENOMEM;
local->hw_scan_req->req.ssids = req->ssids;
local->hw_scan_req->req.n_ssids = req->n_ssids;
+ /* None of the channels are actually set
+ * up but let UBSAN know the boundaries.
+ */
+ local->hw_scan_req->req.n_channels = req->n_channels;
+
ies = (u8 *)local->hw_scan_req +
sizeof(*local->hw_scan_req) +
req->n_channels * sizeof(req->channels[0]);
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index 327c74e296e2..073ff9e0f397 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -9,7 +9,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2007-2008, Intel Corporation
* Copyright 2008, Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2018, 2020, 2022-2023 Intel Corporation
+ * Copyright (C) 2018, 2020, 2022-2024 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -155,6 +155,7 @@ validate_chandef_by_6ghz_he_eht_oper(struct ieee80211_sub_if_data *sdata,
struct ieee80211_eht_operation _oper;
struct ieee80211_eht_operation_info _oper_info;
} __packed eht;
+ const struct ieee80211_eht_operation *eht_oper;
if (conn->mode < IEEE80211_CONN_MODE_HE) {
chandef->chan = NULL;
@@ -203,19 +204,18 @@ validate_chandef_by_6ghz_he_eht_oper(struct ieee80211_sub_if_data *sdata,
}
if (conn->mode < IEEE80211_CONN_MODE_EHT) {
- if (!ieee80211_chandef_he_6ghz_oper(local, &he._oper,
- NULL, chandef))
- chandef->chan = NULL;
+ eht_oper = NULL;
} else {
eht._oper.params = IEEE80211_EHT_OPER_INFO_PRESENT;
eht._oper_info.control = he._6ghz_oper.control;
eht._oper_info.ccfs0 = he._6ghz_oper.ccfs0;
eht._oper_info.ccfs1 = he._6ghz_oper.ccfs1;
-
- if (!ieee80211_chandef_he_6ghz_oper(local, &he._oper,
- &eht._oper, chandef))
- chandef->chan = NULL;
+ eht_oper = &eht._oper;
}
+
+ if (!ieee80211_chandef_he_6ghz_oper(local, &he._oper,
+ eht_oper, chandef))
+ chandef->chan = NULL;
}
int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
@@ -223,7 +223,7 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
enum nl80211_band current_band,
u32 vht_cap_info,
struct ieee80211_conn_settings *conn,
- u8 *bssid,
+ u8 *bssid, bool unprot_action,
struct ieee80211_csa_ie *csa_ie)
{
enum nl80211_band new_band = current_band;
@@ -258,8 +258,10 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
if (!ieee80211_operating_class_to_band(new_op_class, &new_band)) {
new_op_class = 0;
- sdata_info(sdata, "cannot understand ECSA IE operating class, %d, ignoring\n",
- ext_chansw_elem->new_operating_class);
+ if (!unprot_action)
+ sdata_info(sdata,
+ "cannot understand ECSA IE operating class, %d, ignoring\n",
+ ext_chansw_elem->new_operating_class);
} else {
new_chan_no = ext_chansw_elem->new_ch_num;
csa_ie->count = ext_chansw_elem->count;
@@ -293,9 +295,10 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
new_freq = ieee80211_channel_to_frequency(new_chan_no, new_band);
new_chan = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq);
if (!new_chan || new_chan->flags & IEEE80211_CHAN_DISABLED) {
- sdata_info(sdata,
- "BSS %pM switches to unsupported channel (%d MHz), disconnecting\n",
- bssid, new_freq);
+ if (!unprot_action)
+ sdata_info(sdata,
+ "BSS %pM switches to unsupported channel (%d MHz), disconnecting\n",
+ bssid, new_freq);
return -EINVAL;
}
@@ -340,6 +343,9 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
break;
}
+ /* capture the AP configuration */
+ csa_ie->chanreq.ap = csa_ie->chanreq.oper;
+
/* parse one of the Elements to build a new chandef */
memset(&new_chandef, 0, sizeof(new_chandef));
new_chandef.chan = new_chan;
@@ -348,6 +354,10 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
new_chandef = csa_ie->chanreq.oper;
/* and update the width accordingly */
ieee80211_chandef_eht_oper(&bwi->info, &new_chandef);
+
+ if (bwi->params & IEEE80211_BW_IND_DIS_SUBCH_PRESENT)
+ new_chandef.punctured =
+ get_unaligned_le16(bwi->info.optional);
} else if (!wide_bw_chansw_ie || !wbcs_elem_to_chandef(wide_bw_chansw_ie,
&new_chandef)) {
if (!ieee80211_operating_class_to_chandef(new_op_class, new_chan,
@@ -364,6 +374,9 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
/* if data is there validate the bandwidth & use it */
if (new_chandef.chan) {
+ /* capture the AP chandef before (potential) downgrading */
+ csa_ie->chanreq.ap = new_chandef;
+
if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_320 &&
new_chandef.width == NL80211_CHAN_WIDTH_320)
ieee80211_chandef_downgrade(&new_chandef, NULL);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index da5fdd6f5c85..aa22f09e6d14 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1724,7 +1724,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
skb_queue_head_init(&pending);
/* sync with ieee80211_tx_h_unicast_ps_buf */
- spin_lock(&sta->ps_lock);
+ spin_lock_bh(&sta->ps_lock);
/* Send all buffered frames to the station */
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
int count = skb_queue_len(&pending), tmp;
@@ -1753,7 +1753,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
*/
clear_sta_flag(sta, WLAN_STA_PSPOLL);
clear_sta_flag(sta, WLAN_STA_UAPSD);
- spin_unlock(&sta->ps_lock);
+ spin_unlock_bh(&sta->ps_lock);
atomic_dec(&ps->num_sta_ps);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index a52fb76386d0..9195d5a2de0a 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -727,6 +727,12 @@ struct sta_info {
struct ieee80211_sta sta;
};
+static inline int ieee80211_tdls_sta_link_id(struct sta_info *sta)
+{
+ /* TDLS STA can only have a single link */
+ return sta->sta.valid_links ? __ffs(sta->sta.valid_links) : 0;
+}
+
static inline enum nl80211_plink_state sta_plink_state(struct sta_info *sta)
{
#ifdef CONFIG_MAC80211_MESH
@@ -886,7 +892,7 @@ void sta_info_stop(struct ieee80211_local *local);
/**
* __sta_info_flush - flush matching STA entries from the STA table
*
- * Returns the number of removed STA entries.
+ * Return: the number of removed STA entries.
*
* @sdata: sdata to remove all stations from
* @vlans: if the given interface is an AP interface, also flush VLANs
@@ -900,7 +906,7 @@ int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans,
/**
* sta_info_flush - flush matching STA entries from the STA table
*
- * Returns the number of removed STA entries.
+ * Return: the number of removed STA entries.
*
* @sdata: sdata to remove all stations from
* @link_id: if given (>=0), all those STA entries using @link_id only
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 1708b33cdc5e..dd8f857a1fbc 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -5,7 +5,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2008-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright 2021-2023 Intel Corporation
+ * Copyright 2021-2024 Intel Corporation
*/
#include <linux/export.h>
@@ -696,6 +696,23 @@ static void ieee80211_handle_smps_status(struct ieee80211_sub_if_data *sdata,
wiphy_work_queue(sdata->local->hw.wiphy, &link->u.mgd.recalc_smps);
}
+static void
+ieee80211_handle_teardown_ttlm_status(struct ieee80211_sub_if_data *sdata,
+ bool acked)
+{
+ if (!sdata || !ieee80211_sdata_running(sdata))
+ return;
+
+ if (!acked)
+ return;
+
+ if (sdata->vif.type != NL80211_IFTYPE_STATION)
+ return;
+
+ wiphy_work_queue(sdata->local->hw.wiphy,
+ &sdata->u.mgd.teardown_ttlm_work);
+}
+
static void ieee80211_report_used_skb(struct ieee80211_local *local,
struct sk_buff *skb, bool dropped,
ktime_t ack_hwtstamp)
@@ -773,6 +790,9 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local,
ieee80211_handle_smps_status(sdata, acked,
info->status_data);
break;
+ case IEEE80211_STATUS_TYPE_NEG_TTLM:
+ ieee80211_handle_teardown_ttlm_status(sdata, acked);
+ break;
}
rcu_read_unlock();
}
diff --git a/net/mac80211/tests/Makefile b/net/mac80211/tests/Makefile
index 4fdaf3feaca3..511dfa226699 100644
--- a/net/mac80211/tests/Makefile
+++ b/net/mac80211/tests/Makefile
@@ -1,3 +1,3 @@
-mac80211-tests-y += module.o elems.o mfp.o
+mac80211-tests-y += module.o elems.o mfp.o tpe.o
obj-$(CONFIG_MAC80211_KUNIT_TEST) += mac80211-tests.o
diff --git a/net/mac80211/tests/tpe.c b/net/mac80211/tests/tpe.c
new file mode 100644
index 000000000000..dd63303a2985
--- /dev/null
+++ b/net/mac80211/tests/tpe.c
@@ -0,0 +1,284 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KUnit tests for TPE element handling
+ *
+ * Copyright (C) 2024 Intel Corporation
+ */
+#include <kunit/test.h>
+#include "../ieee80211_i.h"
+
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
+
+static struct ieee80211_channel chan6g_1 = {
+ .band = NL80211_BAND_6GHZ,
+ .center_freq = 5955,
+};
+
+static struct ieee80211_channel chan6g_33 = {
+ .band = NL80211_BAND_6GHZ,
+ .center_freq = 6115,
+};
+
+static struct ieee80211_channel chan6g_61 = {
+ .band = NL80211_BAND_6GHZ,
+ .center_freq = 6255,
+};
+
+static const struct subchan_test_case {
+ const char *desc;
+ struct cfg80211_chan_def c;
+ u8 n;
+ int expect;
+} subchan_offset_cases[] = {
+ {
+ .desc = "identical 20 MHz",
+ .c.width = NL80211_CHAN_WIDTH_20,
+ .c.chan = &chan6g_1,
+ .c.center_freq1 = 5955,
+ .n = 1,
+ .expect = 0,
+ },
+ {
+ .desc = "identical 40 MHz",
+ .c.width = NL80211_CHAN_WIDTH_40,
+ .c.chan = &chan6g_1,
+ .c.center_freq1 = 5965,
+ .n = 2,
+ .expect = 0,
+ },
+ {
+ .desc = "identical 80+80 MHz",
+ /* not really is valid? doesn't matter for the test */
+ .c.width = NL80211_CHAN_WIDTH_80P80,
+ .c.chan = &chan6g_1,
+ .c.center_freq1 = 5985,
+ .c.center_freq2 = 6225,
+ .n = 16,
+ .expect = 0,
+ },
+ {
+ .desc = "identical 320 MHz",
+ .c.width = NL80211_CHAN_WIDTH_320,
+ .c.chan = &chan6g_1,
+ .c.center_freq1 = 6105,
+ .n = 16,
+ .expect = 0,
+ },
+ {
+ .desc = "lower 160 MHz of 320 MHz",
+ .c.width = NL80211_CHAN_WIDTH_320,
+ .c.chan = &chan6g_1,
+ .c.center_freq1 = 6105,
+ .n = 8,
+ .expect = 0,
+ },
+ {
+ .desc = "upper 160 MHz of 320 MHz",
+ .c.width = NL80211_CHAN_WIDTH_320,
+ .c.chan = &chan6g_61,
+ .c.center_freq1 = 6105,
+ .n = 8,
+ .expect = 8,
+ },
+ {
+ .desc = "upper 160 MHz of 320 MHz, go to 40",
+ .c.width = NL80211_CHAN_WIDTH_320,
+ .c.chan = &chan6g_61,
+ .c.center_freq1 = 6105,
+ .n = 2,
+ .expect = 8 + 4 + 2,
+ },
+ {
+ .desc = "secondary 80 above primary in 80+80 MHz",
+ /* not really is valid? doesn't matter for the test */
+ .c.width = NL80211_CHAN_WIDTH_80P80,
+ .c.chan = &chan6g_1,
+ .c.center_freq1 = 5985,
+ .c.center_freq2 = 6225,
+ .n = 4,
+ .expect = 0,
+ },
+ {
+ .desc = "secondary 80 below primary in 80+80 MHz",
+ /* not really is valid? doesn't matter for the test */
+ .c.width = NL80211_CHAN_WIDTH_80P80,
+ .c.chan = &chan6g_61,
+ .c.center_freq1 = 6225,
+ .c.center_freq2 = 5985,
+ .n = 4,
+ .expect = 4,
+ },
+ {
+ .desc = "secondary 80 below primary in 80+80 MHz, go to 20",
+ /* not really is valid? doesn't matter for the test */
+ .c.width = NL80211_CHAN_WIDTH_80P80,
+ .c.chan = &chan6g_61,
+ .c.center_freq1 = 6225,
+ .c.center_freq2 = 5985,
+ .n = 1,
+ .expect = 7,
+ },
+};
+
+KUNIT_ARRAY_PARAM_DESC(subchan_offset, subchan_offset_cases, desc);
+
+static void subchan_offset(struct kunit *test)
+{
+ const struct subchan_test_case *params = test->param_value;
+ int offset;
+
+ KUNIT_ASSERT_EQ(test, cfg80211_chandef_valid(&params->c), true);
+
+ offset = ieee80211_calc_chandef_subchan_offset(&params->c, params->n);
+
+ KUNIT_EXPECT_EQ(test, params->expect, offset);
+}
+
+static const struct psd_reorder_test_case {
+ const char *desc;
+ struct cfg80211_chan_def ap, used;
+ struct ieee80211_parsed_tpe_psd psd, out;
+} psd_reorder_cases[] = {
+ {
+ .desc = "no changes, 320 MHz",
+
+ .ap.width = NL80211_CHAN_WIDTH_320,
+ .ap.chan = &chan6g_1,
+ .ap.center_freq1 = 6105,
+
+ .used.width = NL80211_CHAN_WIDTH_320,
+ .used.chan = &chan6g_1,
+ .used.center_freq1 = 6105,
+
+ .psd.valid = true,
+ .psd.count = 16,
+ .psd.n = 8,
+ .psd.power = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+
+ .out.valid = true,
+ .out.count = 16,
+ .out.n = 8,
+ .out.power = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+ },
+ {
+ .desc = "no changes, 320 MHz, 160 MHz used, n=0",
+
+ .ap.width = NL80211_CHAN_WIDTH_320,
+ .ap.chan = &chan6g_1,
+ .ap.center_freq1 = 6105,
+
+ .used.width = NL80211_CHAN_WIDTH_160,
+ .used.chan = &chan6g_1,
+ .used.center_freq1 = 6025,
+
+ .psd.valid = true,
+ .psd.count = 16,
+ .psd.n = 0,
+ .psd.power = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, },
+
+ .out.valid = true,
+ .out.count = 8,
+ .out.n = 0,
+ .out.power = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, },
+ },
+ {
+ .desc = "320 MHz, HE is 80, used 160, all lower",
+
+ .ap.width = NL80211_CHAN_WIDTH_320,
+ .ap.chan = &chan6g_1,
+ .ap.center_freq1 = 6105,
+
+ .used.width = NL80211_CHAN_WIDTH_160,
+ .used.chan = &chan6g_1,
+ .used.center_freq1 = 6025,
+
+ .psd.valid = true,
+ .psd.count = 16,
+ .psd.n = 4,
+ .psd.power = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+
+ .out.valid = true,
+ .out.count = 8,
+ .out.n = 4,
+ .out.power = { 0, 1, 2, 3, 4, 5, 6, 7, 127, 127, 127, 127, 127, 127, 127, 127},
+ },
+ {
+ .desc = "320 MHz, HE is 80, used 160, all upper",
+ /*
+ * EHT: | | | | | | | | | | | | | | | | |
+ * HE: | | | | |
+ * used: | | | | | | | | |
+ */
+
+ .ap.width = NL80211_CHAN_WIDTH_320,
+ .ap.chan = &chan6g_61,
+ .ap.center_freq1 = 6105,
+
+ .used.width = NL80211_CHAN_WIDTH_160,
+ .used.chan = &chan6g_61,
+ .used.center_freq1 = 6185,
+
+ .psd.valid = true,
+ .psd.count = 16,
+ .psd.n = 4,
+ .psd.power = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+
+ .out.valid = true,
+ .out.count = 8,
+ .out.n = 4,
+ .out.power = { 12, 13, 14, 15, 0, 1, 2, 3, 127, 127, 127, 127, 127, 127, 127, 127},
+ },
+ {
+ .desc = "320 MHz, HE is 80, used 160, split",
+ /*
+ * EHT: | | | | | | | | | | | | | | | | |
+ * HE: | | | | |
+ * used: | | | | | | | | |
+ */
+
+ .ap.width = NL80211_CHAN_WIDTH_320,
+ .ap.chan = &chan6g_33,
+ .ap.center_freq1 = 6105,
+
+ .used.width = NL80211_CHAN_WIDTH_160,
+ .used.chan = &chan6g_33,
+ .used.center_freq1 = 6185,
+
+ .psd.valid = true,
+ .psd.count = 16,
+ .psd.n = 4,
+ .psd.power = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+
+ .out.valid = true,
+ .out.count = 8,
+ .out.n = 4,
+ .out.power = { 0, 1, 2, 3, 12, 13, 14, 15, 127, 127, 127, 127, 127, 127, 127, 127},
+ },
+};
+
+KUNIT_ARRAY_PARAM_DESC(psd_reorder, psd_reorder_cases, desc);
+
+static void psd_reorder(struct kunit *test)
+{
+ const struct psd_reorder_test_case *params = test->param_value;
+ struct ieee80211_parsed_tpe_psd tmp = params->psd;
+
+ KUNIT_ASSERT_EQ(test, cfg80211_chandef_valid(&params->ap), true);
+ KUNIT_ASSERT_EQ(test, cfg80211_chandef_valid(&params->used), true);
+
+ ieee80211_rearrange_tpe_psd(&tmp, &params->ap, &params->used);
+ KUNIT_EXPECT_MEMEQ(test, &tmp, &params->out, sizeof(tmp));
+}
+
+static struct kunit_case tpe_test_cases[] = {
+ KUNIT_CASE_PARAM(subchan_offset, subchan_offset_gen_params),
+ KUNIT_CASE_PARAM(psd_reorder, psd_reorder_gen_params),
+ {}
+};
+
+static struct kunit_suite tpe = {
+ .name = "mac80211-tpe",
+ .test_cases = tpe_test_cases,
+};
+
+kunit_test_suite(tpe);
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 8e758b5074bd..dc498cd8cd91 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -33,7 +33,7 @@
__string(vif_name, sdata->name)
#define VIF_ASSIGN __entry->vif_type = sdata->vif.type; __entry->sdata = sdata; \
__entry->p2p = sdata->vif.p2p; \
- __assign_str(vif_name, sdata->name)
+ __assign_str(vif_name)
#define VIF_PR_FMT " vif:%s(%d%s)"
#define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : ""
@@ -328,9 +328,18 @@ TRACE_EVENT(drv_set_wakeup,
TP_printk(LOCAL_PR_FMT " enabled:%d", LOCAL_PR_ARG, __entry->enabled)
);
-DEFINE_EVENT(local_only_evt, drv_stop,
- TP_PROTO(struct ieee80211_local *local),
- TP_ARGS(local)
+TRACE_EVENT(drv_stop,
+ TP_PROTO(struct ieee80211_local *local, bool suspend),
+ TP_ARGS(local, suspend),
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ __field(bool, suspend)
+ ),
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ __entry->suspend = suspend;
+ ),
+ TP_printk(LOCAL_PR_FMT " suspend:%d", LOCAL_PR_ARG, __entry->suspend)
);
DEFINE_EVENT(local_sdata_addr_evt, drv_add_interface,
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index cfd0a62d0152..edba4a31844f 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1609,8 +1609,8 @@ int ieee80211_txq_setup_flows(struct ieee80211_local *local)
local->cparams.target = MS2TIME(20);
local->cparams.ecn = true;
- local->cvars = kcalloc(fq->flows_cnt, sizeof(local->cvars[0]),
- GFP_KERNEL);
+ local->cvars = kvcalloc(fq->flows_cnt, sizeof(local->cvars[0]),
+ GFP_KERNEL);
if (!local->cvars) {
spin_lock_bh(&fq->lock);
fq_reset(fq, fq_skb_free_func);
@@ -1630,7 +1630,7 @@ void ieee80211_txq_teardown_flows(struct ieee80211_local *local)
{
struct fq *fq = &local->fq;
- kfree(local->cvars);
+ kvfree(local->cvars);
local->cvars = NULL;
spin_lock_bh(&fq->lock);
@@ -1768,7 +1768,7 @@ static bool __ieee80211_tx(struct ieee80211_local *local,
break;
}
sdata = rcu_dereference(local->monitor_sdata);
- if (sdata) {
+ if (sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) {
vif = &sdata->vif;
info->hw_queue =
vif->hw_queue[skb_get_queue_mapping(skb)];
@@ -2774,8 +2774,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
if (tdls_peer) {
/* For TDLS only one link can be valid with peer STA */
- int tdls_link_id = sta->sta.valid_links ?
- __ffs(sta->sta.valid_links) : 0;
+ int tdls_link_id = ieee80211_tdls_sta_link_id(sta);
struct ieee80211_link_data *link;
/* DA SA BSSID */
@@ -3101,8 +3100,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
case NL80211_IFTYPE_STATION:
if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
/* For TDLS only one link can be valid with peer STA */
- int tdls_link_id = sta->sta.valid_links ?
- __ffs(sta->sta.valid_links) : 0;
+ int tdls_link_id = ieee80211_tdls_sta_link_id(sta);
struct ieee80211_link_data *link;
/* DA SA BSSID */
@@ -3959,7 +3957,8 @@ begin:
break;
}
tx.sdata = rcu_dereference(local->monitor_sdata);
- if (tx.sdata) {
+ if (tx.sdata &&
+ ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) {
vif = &tx.sdata->vif;
info->hw_queue =
vif->hw_queue[skb_get_queue_mapping(skb)];
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index a237cbcf7b49..c7ad9bc5973a 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -776,7 +776,7 @@ static void __iterate_interfaces(struct ieee80211_local *local,
sdata = rcu_dereference_check(local->monitor_sdata,
lockdep_is_held(&local->iflist_mtx) ||
lockdep_is_held(&local->hw.wiphy->mtx));
- if (sdata &&
+ if (sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) &&
(iter_flags & IEEE80211_IFACE_ITER_RESUME_ALL || !active_only ||
sdata->flags & IEEE80211_SDATA_IN_DRIVER))
iterator(data, sdata->vif.addr, &sdata->vif);
@@ -1565,8 +1565,12 @@ u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata,
return supp_rates;
}
-void ieee80211_stop_device(struct ieee80211_local *local)
+void ieee80211_stop_device(struct ieee80211_local *local, bool suspend)
{
+ local_bh_disable();
+ ieee80211_handle_queued_frames(local);
+ local_bh_enable();
+
ieee80211_led_radio(local, false);
ieee80211_mod_tpt_led_trig(local, 0, IEEE80211_TPT_LEDTRIG_FL_RADIO);
@@ -1574,7 +1578,7 @@ void ieee80211_stop_device(struct ieee80211_local *local)
flush_workqueue(local->workqueue);
wiphy_work_flush(local->hw.wiphy, NULL);
- drv_stop(local);
+ drv_stop(local, suspend);
}
static void ieee80211_flush_completed_scan(struct ieee80211_local *local,
@@ -1841,7 +1845,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
/* add interfaces */
sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata);
- if (sdata) {
+ if (sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) {
/* in HW restart it exists already */
WARN_ON(local->resuming);
res = drv_add_interface(local, sdata);
@@ -1932,6 +1936,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
old);
}
+ sdata->restart_active_links = active_links;
+
for (link_id = 0;
link_id < ARRAY_SIZE(sdata->vif.link_conf);
link_id++) {
@@ -2059,9 +2065,6 @@ int ieee80211_reconfig(struct ieee80211_local *local)
WARN_ON(1);
break;
}
-
- if (active_links)
- ieee80211_set_active_links(&sdata->vif, active_links);
}
ieee80211_recalc_ps(local);
@@ -2102,6 +2105,20 @@ int ieee80211_reconfig(struct ieee80211_local *local)
list_for_each_entry(sdata, &local->interfaces, list)
ieee80211_reenable_keys(sdata);
+ /* re-enable multi-link for client interfaces */
+ list_for_each_entry(sdata, &local->interfaces, list) {
+ if (sdata->restart_active_links)
+ ieee80211_set_active_links(&sdata->vif,
+ sdata->restart_active_links);
+ /*
+ * If a link switch was scheduled before the restart, and ran
+ * before reconfig, it will do nothing, so re-schedule.
+ */
+ if (sdata->desired_active_links)
+ wiphy_work_queue(sdata->local->hw.wiphy,
+ &sdata->activate_links_work);
+ }
+
/* Reconfigure sched scan if it was interrupted by FW restart */
sched_scan_sdata = rcu_dereference_protected(local->sched_scan_sdata,
lockdep_is_held(&local->hw.wiphy->mtx));
@@ -2162,8 +2179,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
local->in_reconfig = false;
barrier();
- /* Restart deferred ROCs */
- ieee80211_start_next_roc(local);
+ ieee80211_reconfig_roc(local);
/* Requeue all works */
list_for_each_entry(sdata, &local->interfaces, list)
@@ -2320,7 +2336,7 @@ void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata,
chanctx = container_of(chanctx_conf, struct ieee80211_chanctx,
conf);
- ieee80211_recalc_chanctx_min_def(local, chanctx, NULL);
+ ieee80211_recalc_chanctx_min_def(local, chanctx, NULL, false);
}
}
@@ -3136,6 +3152,8 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_local *local,
} else {
ieee80211_chandef_eht_oper((const void *)eht_oper->optional,
&he_chandef);
+ he_chandef.punctured =
+ ieee80211_eht_oper_dis_subchan_bitmap(eht_oper);
}
if (!cfg80211_chandef_valid(&he_chandef))
@@ -3441,12 +3459,8 @@ void ieee80211_dfs_cac_cancel(struct ieee80211_local *local)
lockdep_assert_wiphy(local->hw.wiphy);
list_for_each_entry(sdata, &local->interfaces, list) {
- /* it might be waiting for the local->mtx, but then
- * by the time it gets it, sdata->wdev.cac_started
- * will no longer be true
- */
wiphy_delayed_work_cancel(local->hw.wiphy,
- &sdata->deflink.dfs_cac_timer_work);
+ &sdata->dfs_cac_timer_work);
if (sdata->wdev.cac_started) {
chandef = sdata->vif.bss_conf.chanreq.oper;
@@ -3918,19 +3932,103 @@ static u8 ieee80211_chanctx_radar_detect(struct ieee80211_local *local,
return radar_detect;
}
+static u32
+__ieee80211_get_radio_mask(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_bss_conf *link_conf;
+ struct ieee80211_chanctx_conf *conf;
+ unsigned int link_id;
+ u32 mask = 0;
+
+ for_each_vif_active_link(&sdata->vif, link_conf, link_id) {
+ conf = sdata_dereference(link_conf->chanctx_conf, sdata);
+ if (!conf || conf->radio_idx < 0)
+ continue;
+
+ mask |= BIT(conf->radio_idx);
+ }
+
+ return mask;
+}
+
+u32 ieee80211_get_radio_mask(struct wiphy *wiphy, struct net_device *dev)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+ return __ieee80211_get_radio_mask(sdata);
+}
+
+static bool
+ieee80211_sdata_uses_radio(struct ieee80211_sub_if_data *sdata, int radio_idx)
+{
+ if (radio_idx < 0)
+ return true;
+
+ return __ieee80211_get_radio_mask(sdata) & BIT(radio_idx);
+}
+
+static int
+ieee80211_fill_ifcomb_params(struct ieee80211_local *local,
+ struct iface_combination_params *params,
+ const struct cfg80211_chan_def *chandef,
+ struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_sub_if_data *sdata_iter;
+ struct ieee80211_chanctx *ctx;
+ int total = !!sdata;
+
+ list_for_each_entry(ctx, &local->chanctx_list, list) {
+ if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED)
+ continue;
+
+ if (params->radio_idx >= 0 &&
+ ctx->conf.radio_idx != params->radio_idx)
+ continue;
+
+ params->radar_detect |=
+ ieee80211_chanctx_radar_detect(local, ctx);
+
+ if (chandef && ctx->mode != IEEE80211_CHANCTX_EXCLUSIVE &&
+ cfg80211_chandef_compatible(chandef, &ctx->conf.def))
+ continue;
+
+ params->num_different_channels++;
+ }
+
+ list_for_each_entry(sdata_iter, &local->interfaces, list) {
+ struct wireless_dev *wdev_iter;
+
+ wdev_iter = &sdata_iter->wdev;
+
+ if (sdata_iter == sdata ||
+ !ieee80211_sdata_running(sdata_iter) ||
+ cfg80211_iftype_allowed(local->hw.wiphy,
+ wdev_iter->iftype, 0, 1))
+ continue;
+
+ if (!ieee80211_sdata_uses_radio(sdata_iter, params->radio_idx))
+ continue;
+
+ params->iftype_num[wdev_iter->iftype]++;
+ total++;
+ }
+
+ return total;
+}
+
int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
const struct cfg80211_chan_def *chandef,
enum ieee80211_chanctx_mode chanmode,
- u8 radar_detect)
+ u8 radar_detect, int radio_idx)
{
+ bool shared = chanmode == IEEE80211_CHANCTX_SHARED;
struct ieee80211_local *local = sdata->local;
- struct ieee80211_sub_if_data *sdata_iter;
enum nl80211_iftype iftype = sdata->wdev.iftype;
- struct ieee80211_chanctx *ctx;
- int total = 1;
struct iface_combination_params params = {
.radar_detect = radar_detect,
+ .radio_idx = radio_idx,
};
+ int total;
lockdep_assert_wiphy(local->hw.wiphy);
@@ -3967,37 +4065,9 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
if (iftype != NL80211_IFTYPE_UNSPECIFIED)
params.iftype_num[iftype] = 1;
- list_for_each_entry(ctx, &local->chanctx_list, list) {
- if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED)
- continue;
- params.radar_detect |=
- ieee80211_chanctx_radar_detect(local, ctx);
- if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) {
- params.num_different_channels++;
- continue;
- }
- if (chandef && chanmode == IEEE80211_CHANCTX_SHARED &&
- cfg80211_chandef_compatible(chandef,
- &ctx->conf.def))
- continue;
- params.num_different_channels++;
- }
-
- list_for_each_entry_rcu(sdata_iter, &local->interfaces, list) {
- struct wireless_dev *wdev_iter;
-
- wdev_iter = &sdata_iter->wdev;
-
- if (sdata_iter == sdata ||
- !ieee80211_sdata_running(sdata_iter) ||
- cfg80211_iftype_allowed(local->hw.wiphy,
- wdev_iter->iftype, 0, 1))
- continue;
-
- params.iftype_num[wdev_iter->iftype]++;
- total++;
- }
-
+ total = ieee80211_fill_ifcomb_params(local, &params,
+ shared ? chandef : NULL,
+ sdata);
if (total == 1 && !params.radar_detect)
return 0;
@@ -4014,28 +4084,17 @@ ieee80211_iter_max_chans(const struct ieee80211_iface_combination *c,
c->num_different_channels);
}
-int ieee80211_max_num_channels(struct ieee80211_local *local)
+int ieee80211_max_num_channels(struct ieee80211_local *local, int radio_idx)
{
- struct ieee80211_sub_if_data *sdata;
- struct ieee80211_chanctx *ctx;
u32 max_num_different_channels = 1;
int err;
- struct iface_combination_params params = {0};
+ struct iface_combination_params params = {
+ .radio_idx = radio_idx,
+ };
lockdep_assert_wiphy(local->hw.wiphy);
- list_for_each_entry(ctx, &local->chanctx_list, list) {
- if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED)
- continue;
-
- params.num_different_channels++;
-
- params.radar_detect |=
- ieee80211_chanctx_radar_detect(local, ctx);
- }
-
- list_for_each_entry_rcu(sdata, &local->interfaces, list)
- params.iftype_num[sdata->wdev.iftype]++;
+ ieee80211_fill_ifcomb_params(local, &params, NULL, NULL);
err = cfg80211_iter_combinations(local->hw.wiphy, &params,
ieee80211_iter_max_chans,
@@ -4323,3 +4382,28 @@ ieee80211_min_bw_limit_from_chandef(struct cfg80211_chan_def *chandef)
return IEEE80211_CONN_BW_LIMIT_20;
}
}
+
+void ieee80211_clear_tpe(struct ieee80211_parsed_tpe *tpe)
+{
+ for (int i = 0; i < 2; i++) {
+ tpe->max_local[i].valid = false;
+ memset(tpe->max_local[i].power,
+ IEEE80211_TPE_MAX_TX_PWR_NO_CONSTRAINT,
+ sizeof(tpe->max_local[i].power));
+
+ tpe->max_reg_client[i].valid = false;
+ memset(tpe->max_reg_client[i].power,
+ IEEE80211_TPE_MAX_TX_PWR_NO_CONSTRAINT,
+ sizeof(tpe->max_reg_client[i].power));
+
+ tpe->psd_local[i].valid = false;
+ memset(tpe->psd_local[i].power,
+ IEEE80211_TPE_PSD_NO_LIMIT,
+ sizeof(tpe->psd_local[i].power));
+
+ tpe->psd_reg_client[i].valid = false;
+ memset(tpe->psd_reg_client[i].power,
+ IEEE80211_TPE_PSD_NO_LIMIT,
+ sizeof(tpe->psd_reg_client[i].power));
+ }
+}
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 642891cafbaf..bf6ef45af757 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -351,7 +351,8 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
/* FIXME: move this to some better location - parses HE/EHT now */
enum ieee80211_sta_rx_bandwidth
-ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta)
+_ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta,
+ struct cfg80211_chan_def *chandef)
{
unsigned int link_id = link_sta->link_id;
struct ieee80211_sub_if_data *sdata = link_sta->sta->sdata;
@@ -361,44 +362,43 @@ ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta)
u32 cap_width;
if (he_cap->has_he) {
- struct ieee80211_bss_conf *link_conf;
- enum ieee80211_sta_rx_bandwidth ret;
+ enum nl80211_band band;
u8 info;
- rcu_read_lock();
- link_conf = rcu_dereference(sdata->vif.link_conf[link_id]);
+ if (chandef) {
+ band = chandef->chan->band;
+ } else {
+ struct ieee80211_bss_conf *link_conf;
+
+ rcu_read_lock();
+ link_conf = rcu_dereference(sdata->vif.link_conf[link_id]);
+ band = link_conf->chanreq.oper.chan->band;
+ rcu_read_unlock();
+ }
- if (eht_cap->has_eht &&
- link_conf->chanreq.oper.chan->band == NL80211_BAND_6GHZ) {
+ if (eht_cap->has_eht && band == NL80211_BAND_6GHZ) {
info = eht_cap->eht_cap_elem.phy_cap_info[0];
- if (info & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ) {
- ret = IEEE80211_STA_RX_BW_320;
- goto out;
- }
+ if (info & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ)
+ return IEEE80211_STA_RX_BW_320;
}
info = he_cap->he_cap_elem.phy_cap_info[0];
- if (link_conf->chanreq.oper.chan->band == NL80211_BAND_2GHZ) {
+ if (band == NL80211_BAND_2GHZ) {
if (info & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G)
- ret = IEEE80211_STA_RX_BW_40;
- else
- ret = IEEE80211_STA_RX_BW_20;
- goto out;
+ return IEEE80211_STA_RX_BW_40;
+ return IEEE80211_STA_RX_BW_20;
}
if (info & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G ||
info & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G)
- ret = IEEE80211_STA_RX_BW_160;
- else if (info & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G)
- ret = IEEE80211_STA_RX_BW_80;
- else
- ret = IEEE80211_STA_RX_BW_20;
-out:
- rcu_read_unlock();
+ return IEEE80211_STA_RX_BW_160;
- return ret;
+ if (info & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G)
+ return IEEE80211_STA_RX_BW_80;
+
+ return IEEE80211_STA_RX_BW_20;
}
if (!vht_cap->vht_supported)
@@ -503,22 +503,29 @@ ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width)
/* FIXME: rename/move - this deals with everything not just VHT */
enum ieee80211_sta_rx_bandwidth
-ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta)
+_ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta,
+ struct cfg80211_chan_def *chandef)
{
struct sta_info *sta = link_sta->sta;
- struct ieee80211_bss_conf *link_conf;
enum nl80211_chan_width bss_width;
enum ieee80211_sta_rx_bandwidth bw;
- rcu_read_lock();
- link_conf = rcu_dereference(sta->sdata->vif.link_conf[link_sta->link_id]);
- if (WARN_ON(!link_conf))
- bss_width = NL80211_CHAN_WIDTH_20_NOHT;
- else
+ if (chandef) {
+ bss_width = chandef->width;
+ } else {
+ struct ieee80211_bss_conf *link_conf;
+
+ rcu_read_lock();
+ link_conf = rcu_dereference(sta->sdata->vif.link_conf[link_sta->link_id]);
+ if (WARN_ON_ONCE(!link_conf)) {
+ rcu_read_unlock();
+ return IEEE80211_STA_RX_BW_20;
+ }
bss_width = link_conf->chanreq.oper.width;
- rcu_read_unlock();
+ rcu_read_unlock();
+ }
- bw = ieee80211_sta_cap_rx_bw(link_sta);
+ bw = _ieee80211_sta_cap_rx_bw(link_sta, chandef);
bw = min(bw, link_sta->cur_max_bandwidth);
/* Don't consider AP's bandwidth for TDLS peers, section 11.23.1 of
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index e40529b8c5c9..047a33797020 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -895,7 +895,8 @@ ieee80211_crypto_aes_cmac_256_encrypt(struct ieee80211_tx_data *tx)
info = IEEE80211_SKB_CB(skb);
- if (info->control.hw_key)
+ if (info->control.hw_key &&
+ !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIE))
return TX_CONTINUE;
if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie)))
@@ -911,6 +912,9 @@ ieee80211_crypto_aes_cmac_256_encrypt(struct ieee80211_tx_data *tx)
bip_ipn_set64(mmie->sequence_number, pn64);
+ if (info->control.hw_key)
+ return TX_CONTINUE;
+
bip_aad(skb, aad);
/* MIC = AES-256-CMAC(IGTK, AAD || Management Frame Body || MMIE, 128)
@@ -1040,7 +1044,8 @@ ieee80211_crypto_aes_gmac_encrypt(struct ieee80211_tx_data *tx)
info = IEEE80211_SKB_CB(skb);
- if (info->control.hw_key)
+ if (info->control.hw_key &&
+ !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIE))
return TX_CONTINUE;
if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie)))
@@ -1056,6 +1061,9 @@ ieee80211_crypto_aes_gmac_encrypt(struct ieee80211_tx_data *tx)
bip_ipn_set64(mmie->sequence_number, pn64);
+ if (info->control.hw_key)
+ return TX_CONTINUE;
+
bip_aad(skb, aad);
hdr = (struct ieee80211_hdr *)skb->data;
diff --git a/net/mac802154/main.c b/net/mac802154/main.c
index 9ab7396668d2..21b7c3b280b4 100644
--- a/net/mac802154/main.c
+++ b/net/mac802154/main.c
@@ -161,8 +161,10 @@ void ieee802154_configure_durations(struct wpan_phy *phy,
}
phy->symbol_duration = duration;
- phy->lifs_period = (IEEE802154_LIFS_PERIOD * phy->symbol_duration) / NSEC_PER_SEC;
- phy->sifs_period = (IEEE802154_SIFS_PERIOD * phy->symbol_duration) / NSEC_PER_SEC;
+ phy->lifs_period =
+ (IEEE802154_LIFS_PERIOD * phy->symbol_duration) / NSEC_PER_USEC;
+ phy->sifs_period =
+ (IEEE802154_SIFS_PERIOD * phy->symbol_duration) / NSEC_PER_USEC;
}
EXPORT_SYMBOL(ieee802154_configure_durations);
@@ -184,10 +186,10 @@ static void ieee802154_setup_wpan_phy_pib(struct wpan_phy *wpan_phy)
* Should be done when all drivers sets this value.
*/
- wpan_phy->lifs_period =
- (IEEE802154_LIFS_PERIOD * wpan_phy->symbol_duration) / 1000;
- wpan_phy->sifs_period =
- (IEEE802154_SIFS_PERIOD * wpan_phy->symbol_duration) / 1000;
+ wpan_phy->lifs_period = (IEEE802154_LIFS_PERIOD *
+ wpan_phy->symbol_duration) / NSEC_PER_USEC;
+ wpan_phy->sifs_period = (IEEE802154_SIFS_PERIOD *
+ wpan_phy->symbol_duration) / NSEC_PER_USEC;
}
int ieee802154_register_hw(struct ieee802154_hw *hw)
diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
index 2a6f1ed763c9..6fbed5bb5c3e 100644
--- a/net/mac802154/tx.c
+++ b/net/mac802154/tx.c
@@ -34,8 +34,8 @@ void ieee802154_xmit_sync_worker(struct work_struct *work)
if (res)
goto err_tx;
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += skb->len;
+ DEV_STATS_INC(dev, tx_packets);
+ DEV_STATS_ADD(dev, tx_bytes, skb->len);
ieee802154_xmit_complete(&local->hw, skb, false);
@@ -90,8 +90,8 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb)
if (ret)
goto err_wake_netif_queue;
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += len;
+ DEV_STATS_INC(dev, tx_packets);
+ DEV_STATS_ADD(dev, tx_bytes, len);
} else {
local->tx_skb = skb;
queue_work(local->workqueue, &local->sync_tx_work);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 6dab883a08dd..0e6c94a8c2bc 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -594,7 +594,7 @@ static struct net_device *inet_fib_lookup_dev(struct net *net,
struct in_addr daddr;
memcpy(&daddr, addr, sizeof(struct in_addr));
- rt = ip_route_output(net, daddr.s_addr, 0, 0, 0);
+ rt = ip_route_output(net, daddr.s_addr, 0, 0, 0, RT_SCOPE_UNIVERSE);
if (IS_ERR(rt))
return ERR_CAST(rt);
@@ -1154,7 +1154,7 @@ static int mpls_netconf_fill_devconf(struct sk_buff *skb, struct mpls_dev *mdev,
if ((all || type == NETCONFA_INPUT) &&
nla_put_s32(skb, NETCONFA_INPUT,
- mdev->input_enabled) < 0)
+ READ_ONCE(mdev->input_enabled)) < 0)
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -1303,11 +1303,12 @@ static int mpls_netconf_dump_devconf(struct sk_buff *skb,
{
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
- struct hlist_head *head;
+ struct {
+ unsigned long ifindex;
+ } *ctx = (void *)cb->ctx;
struct net_device *dev;
struct mpls_dev *mdev;
- int idx, s_idx;
- int h, s_h;
+ int err = 0;
if (cb->strict_check) {
struct netlink_ext_ack *extack = cb->extack;
@@ -1324,46 +1325,29 @@ static int mpls_netconf_dump_devconf(struct sk_buff *skb,
}
}
- s_h = cb->args[0];
- s_idx = idx = cb->args[1];
-
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- idx = 0;
- head = &net->dev_index_head[h];
- rcu_read_lock();
- cb->seq = net->dev_base_seq;
- hlist_for_each_entry_rcu(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- mdev = mpls_dev_get(dev);
- if (!mdev)
- goto cont;
- if (mpls_netconf_fill_devconf(skb, mdev,
- NETLINK_CB(cb->skb).portid,
- nlh->nlmsg_seq,
- RTM_NEWNETCONF,
- NLM_F_MULTI,
- NETCONFA_ALL) < 0) {
- rcu_read_unlock();
- goto done;
- }
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-cont:
- idx++;
- }
- rcu_read_unlock();
+ rcu_read_lock();
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ mdev = mpls_dev_get(dev);
+ if (!mdev)
+ continue;
+ err = mpls_netconf_fill_devconf(skb, mdev,
+ NETLINK_CB(cb->skb).portid,
+ nlh->nlmsg_seq,
+ RTM_NEWNETCONF,
+ NLM_F_MULTI,
+ NETCONFA_ALL);
+ if (err < 0)
+ break;
}
-done:
- cb->args[0] = h;
- cb->args[1] = idx;
+ rcu_read_unlock();
- return skb->len;
+ return err;
}
#define MPLS_PERDEV_SYSCTL_OFFSET(field) \
(&((struct mpls_dev *)0)->field)
-static int mpls_conf_proc(struct ctl_table *ctl, int write,
+static int mpls_conf_proc(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int oval = *(int *)ctl->data;
@@ -1393,13 +1377,13 @@ static const struct ctl_table mpls_dev_table[] = {
.proc_handler = mpls_conf_proc,
.data = MPLS_PERDEV_SYSCTL_OFFSET(input_enabled),
},
- { }
};
static int mpls_dev_sysctl_register(struct net_device *dev,
struct mpls_dev *mdev)
{
char path[sizeof("net/mpls/conf/") + IFNAMSIZ];
+ size_t table_size = ARRAY_SIZE(mpls_dev_table);
struct net *net = dev_net(dev);
struct ctl_table *table;
int i;
@@ -1411,7 +1395,7 @@ static int mpls_dev_sysctl_register(struct net_device *dev,
/* Table data contains only offsets relative to the base of
* the mdev at this point, so make them absolute.
*/
- for (i = 0; i < ARRAY_SIZE(mpls_dev_table); i++) {
+ for (i = 0; i < table_size; i++) {
table[i].data = (char *)mdev + (uintptr_t)table[i].data;
table[i].extra1 = mdev;
table[i].extra2 = net;
@@ -1419,8 +1403,7 @@ static int mpls_dev_sysctl_register(struct net_device *dev,
snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name);
- mdev->sysctl = register_net_sysctl_sz(net, path, table,
- ARRAY_SIZE(mpls_dev_table));
+ mdev->sysctl = register_net_sysctl_sz(net, path, table, table_size);
if (!mdev->sysctl)
goto free;
@@ -1438,7 +1421,7 @@ static void mpls_dev_sysctl_unregister(struct net_device *dev,
struct mpls_dev *mdev)
{
struct net *net = dev_net(dev);
- struct ctl_table *table;
+ const struct ctl_table *table;
if (!mdev->sysctl)
return;
@@ -2617,7 +2600,7 @@ nolabels:
return -ENOMEM;
}
-static int mpls_platform_labels(struct ctl_table *table, int write,
+static int mpls_platform_labels(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net = table->data;
@@ -2669,11 +2652,11 @@ static const struct ctl_table mpls_table[] = {
.extra1 = SYSCTL_ONE,
.extra2 = &ttl_max,
},
- { }
};
static int mpls_net_init(struct net *net)
{
+ size_t table_size = ARRAY_SIZE(mpls_table);
struct ctl_table *table;
int i;
@@ -2689,11 +2672,11 @@ static int mpls_net_init(struct net *net)
/* Table data contains only offsets relative to the base of
* the mdev at this point, so make them absolute.
*/
- for (i = 0; i < ARRAY_SIZE(mpls_table) - 1; i++)
+ for (i = 0; i < table_size; i++)
table[i].data = (char *)net + (uintptr_t)table[i].data;
net->mpls.ctl = register_net_sysctl_sz(net, "net/mpls", table,
- ARRAY_SIZE(mpls_table));
+ table_size);
if (net->mpls.ctl == NULL) {
kfree(table);
return -ENOMEM;
@@ -2706,7 +2689,7 @@ static void mpls_net_exit(struct net *net)
{
struct mpls_route __rcu **platform_label;
size_t platform_labels;
- struct ctl_table *table;
+ const struct ctl_table *table;
unsigned int index;
table = net->mpls.ctl->ctl_table_arg;
@@ -2773,7 +2756,8 @@ static int __init mpls_init(void)
mpls_getroute, mpls_dump_routes, 0);
rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_GETNETCONF,
mpls_netconf_get_devconf,
- mpls_netconf_dump_devconf, 0);
+ mpls_netconf_dump_devconf,
+ RTNL_FLAG_DUMP_UNLOCKED);
err = ipgre_tunnel_encap_add_mpls_ops();
if (err)
pr_err("Can't add mpls over gre tunnel ops\n");
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index 8fc790f2a01b..4385fd3b13be 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -81,7 +81,7 @@ static int mpls_xmit(struct sk_buff *skb)
ttl = net->mpls.default_ttl;
else
ttl = ip_hdr(skb)->ttl;
- rt = (struct rtable *)dst;
+ rt = dst_rtable(dst);
} else if (dst->ops->family == AF_INET6) {
if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED)
ttl = tun_encap_info->default_ttl;
@@ -90,7 +90,7 @@ static int mpls_xmit(struct sk_buff *skb)
ttl = net->mpls.default_ttl;
else
ttl = ipv6_hdr(skb)->hop_limit;
- rt6 = (struct rt6_info *)dst;
+ rt6 = dst_rt6_info(dst);
} else {
goto drop;
}
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index 13fe0748dde8..99382c317ebb 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -92,10 +92,65 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
pernet->allow_join_initial_addr_port = 1;
pernet->stale_loss_cnt = 4;
pernet->pm_type = MPTCP_PM_TYPE_KERNEL;
- strcpy(pernet->scheduler, "default");
+ strscpy(pernet->scheduler, "default", sizeof(pernet->scheduler));
}
#ifdef CONFIG_SYSCTL
+static int mptcp_set_scheduler(const struct net *net, const char *name)
+{
+ struct mptcp_pernet *pernet = mptcp_get_pernet(net);
+ struct mptcp_sched_ops *sched;
+ int ret = 0;
+
+ rcu_read_lock();
+ sched = mptcp_sched_find(name);
+ if (sched)
+ strscpy(pernet->scheduler, name, MPTCP_SCHED_NAME_MAX);
+ else
+ ret = -ENOENT;
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static int proc_scheduler(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ const struct net *net = current->nsproxy->net_ns;
+ char val[MPTCP_SCHED_NAME_MAX];
+ struct ctl_table tbl = {
+ .data = val,
+ .maxlen = MPTCP_SCHED_NAME_MAX,
+ };
+ int ret;
+
+ strscpy(val, mptcp_get_scheduler(net), MPTCP_SCHED_NAME_MAX);
+
+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
+ if (write && ret == 0)
+ ret = mptcp_set_scheduler(net, val);
+
+ return ret;
+}
+
+static int proc_available_schedulers(const struct ctl_table *ctl,
+ int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ struct ctl_table tbl = { .maxlen = MPTCP_SCHED_BUF_MAX, };
+ int ret;
+
+ tbl.data = kmalloc(tbl.maxlen, GFP_USER);
+ if (!tbl.data)
+ return -ENOMEM;
+
+ mptcp_get_available_schedulers(tbl.data, MPTCP_SCHED_BUF_MAX);
+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
+ kfree(tbl.data);
+
+ return ret;
+}
+
static struct ctl_table mptcp_sysctl_table[] = {
{
.procname = "enabled",
@@ -148,7 +203,13 @@ static struct ctl_table mptcp_sysctl_table[] = {
.procname = "scheduler",
.maxlen = MPTCP_SCHED_NAME_MAX,
.mode = 0644,
- .proc_handler = proc_dostring,
+ .proc_handler = proc_scheduler,
+ },
+ {
+ .procname = "available_schedulers",
+ .maxlen = MPTCP_SCHED_BUF_MAX,
+ .mode = 0644,
+ .proc_handler = proc_available_schedulers,
},
{
.procname = "close_timeout",
@@ -156,7 +217,6 @@ static struct ctl_table mptcp_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- {}
};
static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
@@ -178,7 +238,8 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[4].data = &pernet->stale_loss_cnt;
table[5].data = &pernet->pm_type;
table[6].data = &pernet->scheduler;
- table[7].data = &pernet->close_timeout;
+ /* table[7] is for available_schedulers which is read-only info */
+ table[8].data = &pernet->close_timeout;
hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table,
ARRAY_SIZE(mptcp_sysctl_table));
@@ -198,7 +259,7 @@ err_alloc:
static void mptcp_pernet_del_table(struct mptcp_pernet *pernet)
{
- struct ctl_table *table = pernet->ctl_table_hdr->ctl_table_arg;
+ const struct ctl_table *table = pernet->ctl_table_hdr->ctl_table_arg;
unregister_net_sysctl_table(pernet->ctl_table_hdr);
diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c
index 3ae46b545d2c..2d3efb405437 100644
--- a/net/mptcp/diag.c
+++ b/net/mptcp/diag.c
@@ -94,7 +94,7 @@ static size_t subflow_get_info_size(const struct sock *sk)
nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ */
nla_total_size_64bit(8) + /* MPTCP_SUBFLOW_ATTR_MAP_SEQ */
nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_MAP_SFSEQ */
- nla_total_size(2) + /* MPTCP_SUBFLOW_ATTR_SSN_OFFSET */
+ nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_SSN_OFFSET */
nla_total_size(2) + /* MPTCP_SUBFLOW_ATTR_MAP_DATALEN */
nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_FLAGS */
nla_total_size(1) + /* MPTCP_SUBFLOW_ATTR_ID_REM */
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index c30405e76833..7884217f33eb 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -19,7 +19,9 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS),
SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN),
SNMP_MIB_ITEM("MPJoinSynRx", MPTCP_MIB_JOINSYNRX),
+ SNMP_MIB_ITEM("MPJoinSynBackupRx", MPTCP_MIB_JOINSYNBACKUPRX),
SNMP_MIB_ITEM("MPJoinSynAckRx", MPTCP_MIB_JOINSYNACKRX),
+ SNMP_MIB_ITEM("MPJoinSynAckBackupRx", MPTCP_MIB_JOINSYNACKBACKUPRX),
SNMP_MIB_ITEM("MPJoinSynAckHMacFailure", MPTCP_MIB_JOINSYNACKMAC),
SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX),
SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index dd7fd1f246b5..66aa67f49d03 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -1,5 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
+#include <net/inet_common.h>
+
enum linux_mptcp_mib_field {
MPTCP_MIB_NUM = 0,
MPTCP_MIB_MPCAPABLEPASSIVE, /* Received SYN with MP_CAPABLE */
@@ -12,7 +14,9 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */
MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */
MPTCP_MIB_JOINSYNRX, /* Received a SYN + MP_JOIN */
+ MPTCP_MIB_JOINSYNBACKUPRX, /* Received a SYN + MP_JOIN + backup flag */
MPTCP_MIB_JOINSYNACKRX, /* Received a SYN/ACK + MP_JOIN */
+ MPTCP_MIB_JOINSYNACKBACKUPRX, /* Received a SYN/ACK + MP_JOIN + backup flag */
MPTCP_MIB_JOINSYNACKMAC, /* HMAC was wrong on SYN/ACK + MP_JOIN */
MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */
MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 27ca42c77b02..ac2f1a54cc43 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -909,7 +909,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
return true;
} else if (subflow_req->mp_join) {
opts->suboptions = OPTION_MPTCP_MPJ_SYNACK;
- opts->backup = subflow_req->backup;
+ opts->backup = subflow_req->request_bkup;
opts->join_id = subflow_req->local_id;
opts->thmac = subflow_req->thmac;
opts->nonce = subflow_req->local_nonce;
@@ -958,7 +958,8 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
if (subflow->remote_key_valid &&
(((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) ||
- ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && !mp_opt->echo))) {
+ ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) &&
+ (!mp_opt->echo || subflow->mp_join)))) {
/* subflows are fully established as soon as we get any
* additional ack, including ADD_ADDR.
*/
@@ -1068,6 +1069,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
__mptcp_snd_una_update(msk, new_snd_una);
__mptcp_data_acked(sk);
}
+ msk->last_ack_recv = tcp_jiffies32;
mptcp_data_unlock(sk);
trace_ack_update_msk(mp_opt->data_ack,
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 55406720c607..23bb89c94e90 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -426,6 +426,18 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
return mptcp_pm_nl_get_local_id(msk, &skc_local);
}
+bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc)
+{
+ struct mptcp_addr_info skc_local;
+
+ mptcp_local_address((struct sock_common *)skc, &skc_local);
+
+ if (mptcp_pm_is_userspace(msk))
+ return mptcp_userspace_pm_is_backup(msk, &skc_local);
+
+ return mptcp_pm_nl_is_backup(msk, &skc_local);
+}
+
int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
u8 *flags, int *ifindex)
{
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 5c17d39146ea..4cae2aa7be5c 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -14,6 +14,7 @@
#include "protocol.h"
#include "mib.h"
+#include "mptcp_pm_gen.h"
static int pm_nl_pernet_id;
@@ -347,7 +348,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr);
if (add_entry) {
- if (mptcp_pm_is_kernel(msk))
+ if (WARN_ON_ONCE(mptcp_pm_is_kernel(msk)))
return false;
sk_reset_timer(sk, &add_entry->add_timer,
@@ -470,7 +471,6 @@ static void __mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_con
slow = lock_sock_fast(ssk);
if (prio) {
subflow->send_mp_prio = 1;
- subflow->backup = backup;
subflow->request_bkup = backup;
}
@@ -512,8 +512,8 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info)
static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
{
+ struct mptcp_pm_addr_entry *local, *signal_and_subflow = NULL;
struct sock *sk = (struct sock *)msk;
- struct mptcp_pm_addr_entry *local;
unsigned int add_addr_signal_max;
unsigned int local_addr_max;
struct pm_nl_pernet *pernet;
@@ -555,8 +555,6 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
/* check first for announce */
if (msk->pm.add_addr_signaled < add_addr_signal_max) {
- local = select_signal_address(pernet, msk);
-
/* due to racing events on both ends we can reach here while
* previous add address is still running: if we invoke now
* mptcp_pm_announce_addr(), that will fail and the
@@ -567,16 +565,26 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL))
return;
- if (local) {
- if (mptcp_pm_alloc_anno_list(msk, &local->addr)) {
- __clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
- msk->pm.add_addr_signaled++;
- mptcp_pm_announce_addr(msk, &local->addr, false);
- mptcp_pm_nl_addr_send_ack(msk);
- }
- }
+ local = select_signal_address(pernet, msk);
+ if (!local)
+ goto subflow;
+
+ /* If the alloc fails, we are on memory pressure, not worth
+ * continuing, and trying to create subflows.
+ */
+ if (!mptcp_pm_alloc_anno_list(msk, &local->addr))
+ return;
+
+ __clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
+ msk->pm.add_addr_signaled++;
+ mptcp_pm_announce_addr(msk, &local->addr, false);
+ mptcp_pm_nl_addr_send_ack(msk);
+
+ if (local->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)
+ signal_and_subflow = local;
}
+subflow:
/* check if should create a new subflow */
while (msk->pm.local_addr_used < local_addr_max &&
msk->pm.subflows < subflows_max) {
@@ -584,9 +592,14 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
bool fullmesh;
int i, nr;
- local = select_local_address(pernet, msk);
- if (!local)
- break;
+ if (signal_and_subflow) {
+ local = signal_and_subflow;
+ signal_and_subflow = NULL;
+ } else {
+ local = select_local_address(pernet, msk);
+ if (!local)
+ break;
+ }
fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH);
@@ -676,6 +689,7 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
unsigned int add_addr_accept_max;
struct mptcp_addr_info remote;
unsigned int subflows_max;
+ bool sf_created = false;
int i, nr;
add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk);
@@ -703,15 +717,18 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
if (nr == 0)
return;
- msk->pm.add_addr_accepted++;
- if (msk->pm.add_addr_accepted >= add_addr_accept_max ||
- msk->pm.subflows >= subflows_max)
- WRITE_ONCE(msk->pm.accept_addr, false);
-
spin_unlock_bh(&msk->pm.lock);
for (i = 0; i < nr; i++)
- __mptcp_subflow_connect(sk, &addrs[i], &remote);
+ if (__mptcp_subflow_connect(sk, &addrs[i], &remote) == 0)
+ sf_created = true;
spin_lock_bh(&msk->pm.lock);
+
+ if (sf_created) {
+ msk->pm.add_addr_accepted++;
+ if (msk->pm.add_addr_accepted >= add_addr_accept_max ||
+ msk->pm.subflows >= subflows_max)
+ WRITE_ONCE(msk->pm.accept_addr, false);
+ }
}
void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
@@ -813,10 +830,13 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
spin_lock_bh(&msk->pm.lock);
removed = true;
- __MPTCP_INC_STATS(sock_net(sk), rm_type);
+ if (rm_type == MPTCP_MIB_RMSUBFLOW)
+ __MPTCP_INC_STATS(sock_net(sk), rm_type);
}
if (rm_type == MPTCP_MIB_RMSUBFLOW)
__set_bit(rm_id ? rm_id : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap);
+ else if (rm_type == MPTCP_MIB_RMADDR)
+ __MPTCP_INC_STATS(sock_net(sk), rm_type);
if (!removed)
continue;
@@ -1094,6 +1114,24 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc
return ret;
}
+bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc)
+{
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
+ struct mptcp_pm_addr_entry *entry;
+ bool backup = false;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
+ if (mptcp_addresses_equal(&entry->addr, skc, entry->addr.port)) {
+ backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return backup;
+}
+
#define MPTCP_PM_CMD_GRP_OFFSET 0
#define MPTCP_PM_EV_GRP_OFFSET 1
@@ -1303,8 +1341,8 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
return ret;
- if (addr.addr.port && !(addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
- GENL_SET_ERR_MSG(info, "flags must have signal when using port");
+ if (addr.addr.port && !address_use_port(&addr)) {
+ GENL_SET_ERR_MSG(info, "flags must have signal and not subflow when using port");
return -EINVAL;
}
@@ -1393,6 +1431,7 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk,
ret = remove_anno_list_by_saddr(msk, addr);
if (ret || force) {
spin_lock_bh(&msk->pm.lock);
+ msk->pm.add_addr_signaled -= ret;
mptcp_pm_remove_addr(msk, &list);
spin_unlock_bh(&msk->pm.lock);
}
@@ -1526,16 +1565,25 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list)
{
struct mptcp_rm_list alist = { .nr = 0 };
struct mptcp_pm_addr_entry *entry;
+ int anno_nr = 0;
list_for_each_entry(entry, rm_list, list) {
- if ((remove_anno_list_by_saddr(msk, &entry->addr) ||
- lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) &&
- alist.nr < MPTCP_RM_IDS_MAX)
- alist.ids[alist.nr++] = entry->addr.id;
+ if (alist.nr >= MPTCP_RM_IDS_MAX)
+ break;
+
+ /* only delete if either announced or matching a subflow */
+ if (remove_anno_list_by_saddr(msk, &entry->addr))
+ anno_nr++;
+ else if (!lookup_subflow_by_saddr(&msk->conn_list,
+ &entry->addr))
+ continue;
+
+ alist.ids[alist.nr++] = entry->addr.id;
}
if (alist.nr) {
spin_lock_bh(&msk->pm.lock);
+ msk->pm.add_addr_signaled -= anno_nr;
mptcp_pm_remove_addr(msk, &alist);
spin_unlock_bh(&msk->pm.lock);
}
@@ -1548,17 +1596,18 @@ static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
struct mptcp_pm_addr_entry *entry;
list_for_each_entry(entry, rm_list, list) {
- if (lookup_subflow_by_saddr(&msk->conn_list, &entry->addr) &&
- slist.nr < MPTCP_RM_IDS_MAX)
+ if (slist.nr < MPTCP_RM_IDS_MAX &&
+ lookup_subflow_by_saddr(&msk->conn_list, &entry->addr))
slist.ids[slist.nr++] = entry->addr.id;
- if (remove_anno_list_by_saddr(msk, &entry->addr) &&
- alist.nr < MPTCP_RM_IDS_MAX)
+ if (alist.nr < MPTCP_RM_IDS_MAX &&
+ remove_anno_list_by_saddr(msk, &entry->addr))
alist.ids[alist.nr++] = entry->addr.id;
}
if (alist.nr) {
spin_lock_bh(&msk->pm.lock);
+ msk->pm.add_addr_signaled -= alist.nr;
mptcp_pm_remove_addr(msk, &alist);
spin_unlock_bh(&msk->pm.lock);
}
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index 9f5d422d5ef6..8eaa9fbe3e34 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -6,6 +6,7 @@
#include "protocol.h"
#include "mib.h"
+#include "mptcp_pm_gen.h"
void mptcp_free_local_addr_list(struct mptcp_sock *msk)
{
@@ -164,6 +165,24 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk,
return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry, true);
}
+bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk,
+ struct mptcp_addr_info *skc)
+{
+ struct mptcp_pm_addr_entry *entry;
+ bool backup = false;
+
+ spin_lock_bh(&msk->pm.lock);
+ list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
+ if (mptcp_addresses_equal(&entry->addr, skc, false)) {
+ backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
+ break;
+ }
+ }
+ spin_unlock_bh(&msk->pm.lock);
+
+ return backup;
+}
+
int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 965eb69dc5de..0d536b183a6c 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -20,6 +20,7 @@
#include <net/transp_v6.h>
#endif
#include <net/mptcp.h>
+#include <net/hotdata.h>
#include <net/xfrm.h>
#include <asm/ioctls.h>
#include "protocol.h"
@@ -349,8 +350,10 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
skb_orphan(skb);
/* try to fetch required memory from subflow */
- if (!mptcp_rmem_schedule(sk, ssk, skb->truesize))
+ if (!mptcp_rmem_schedule(sk, ssk, skb->truesize)) {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED);
goto drop;
+ }
has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
@@ -706,6 +709,8 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
}
} while (more_data_avail);
+ if (moved > 0)
+ msk->last_data_recv = tcp_jiffies32;
*bytes += moved;
return done;
}
@@ -841,10 +846,8 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
sk_rbuf = ssk_rbuf;
/* over limit? can't append more skbs to msk, Also, no need to wake-up*/
- if (__mptcp_rmem(sk) > sk_rbuf) {
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED);
+ if (__mptcp_rmem(sk) > sk_rbuf)
return;
- }
/* Wake-up the reader only for in-sequence data */
mptcp_data_lock(sk);
@@ -1270,7 +1273,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
i = skb_shinfo(skb)->nr_frags;
can_coalesce = skb_can_coalesce(skb, i, dfrag->page, offset);
- if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) {
+ if (!can_coalesce && i >= READ_ONCE(net_hotdata.sysctl_max_skb_frags)) {
tcp_mark_push(tcp_sk(ssk), skb);
goto alloc_skb;
}
@@ -1419,13 +1422,15 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
}
mptcp_for_each_subflow(msk, subflow) {
+ bool backup = subflow->backup || subflow->request_bkup;
+
trace_mptcp_subflow_get_send(subflow);
ssk = mptcp_subflow_tcp_sock(subflow);
if (!mptcp_subflow_active(subflow))
continue;
tout = max(tout, mptcp_timeout_from_subflow(subflow));
- nr_active += !subflow->backup;
+ nr_active += !backup;
pace = subflow->avg_pacing_rate;
if (unlikely(!pace)) {
/* init pacing rate from socket */
@@ -1436,9 +1441,9 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
}
linger_time = div_u64((u64)READ_ONCE(ssk->sk_wmem_queued) << 32, pace);
- if (linger_time < send_info[subflow->backup].linger_time) {
- send_info[subflow->backup].ssk = ssk;
- send_info[subflow->backup].linger_time = linger_time;
+ if (linger_time < send_info[backup].linger_time) {
+ send_info[backup].ssk = ssk;
+ send_info[backup].linger_time = linger_time;
}
}
__mptcp_set_timeout(sk, tout);
@@ -1556,6 +1561,8 @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
err = copied;
out:
+ if (err > 0)
+ msk->last_data_sent = tcp_jiffies32;
return err;
}
@@ -2035,13 +2042,13 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
do_div(grow, msk->rcvq_space.space);
rcvwin += (grow << 1);
- rcvbuf = min_t(u64, __tcp_space_from_win(scaling_ratio, rcvwin),
+ rcvbuf = min_t(u64, mptcp_space_from_win(sk, rcvwin),
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
if (rcvbuf > sk->sk_rcvbuf) {
u32 window_clamp;
- window_clamp = __tcp_win_from_space(scaling_ratio, rcvbuf);
+ window_clamp = mptcp_win_from_space(sk, rcvbuf);
WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
/* Make subflows follow along. If we do not do this, we
@@ -2056,7 +2063,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
ssk = mptcp_subflow_tcp_sock(subflow);
slow = lock_sock_fast(ssk);
WRITE_ONCE(ssk->sk_rcvbuf, rcvbuf);
- tcp_sk(ssk)->window_clamp = window_clamp;
+ WRITE_ONCE(tcp_sk(ssk)->window_clamp, window_clamp);
tcp_cleanup_rbuf(ssk, 1);
unlock_sock_fast(ssk, slow);
}
@@ -2197,7 +2204,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (skb_queue_empty(&msk->receive_queue) && __mptcp_move_skbs(msk))
continue;
- /* only the master socket status is relevant here. The exit
+ /* only the MPTCP socket status is relevant here. The exit
* conditions mirror closely tcp_recvmsg()
*/
if (copied >= target)
@@ -2565,7 +2572,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
slow = lock_sock_fast(tcp_sk);
if (tcp_sk->sk_state != TCP_CLOSE) {
- tcp_send_active_reset(tcp_sk, GFP_ATOMIC);
+ mptcp_send_active_reset_reason(tcp_sk);
tcp_set_state(tcp_sk, TCP_CLOSE);
}
unlock_sock_fast(tcp_sk, slow);
@@ -2793,6 +2800,9 @@ static void __mptcp_init_sock(struct sock *sk)
WRITE_ONCE(msk->allow_infinite_fallback, true);
msk->recovery = false;
msk->subflow_id = 1;
+ msk->last_data_sent = tcp_jiffies32;
+ msk->last_data_recv = tcp_jiffies32;
+ msk->last_ack_recv = tcp_jiffies32;
mptcp_pm_data_init(msk);
@@ -2806,7 +2816,8 @@ static void mptcp_ca_reset(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
tcp_assign_congestion_control(sk);
- strcpy(mptcp_sk(sk)->ca_name, icsk->icsk_ca_ops->name);
+ strscpy(mptcp_sk(sk)->ca_name, icsk->icsk_ca_ops->name,
+ sizeof(mptcp_sk(sk)->ca_name));
/* no need to keep a reference to the ops, the name will suffice */
tcp_cleanup_congestion_control(sk);
@@ -2907,9 +2918,14 @@ void mptcp_set_state(struct sock *sk, int state)
if (oldstate != TCP_ESTABLISHED)
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB);
break;
-
+ case TCP_CLOSE_WAIT:
+ /* Unlike TCP, MPTCP sk would not have the TCP_SYN_RECV state:
+ * MPTCP "accepted" sockets will be created later on. So no
+ * transition from TCP_SYN_RECV to TCP_CLOSE_WAIT.
+ */
+ break;
default:
- if (oldstate == TCP_ESTABLISHED)
+ if (oldstate == TCP_ESTABLISHED || oldstate == TCP_CLOSE_WAIT)
MPTCP_DEC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB);
}
@@ -3512,7 +3528,7 @@ void mptcp_subflow_process_delegated(struct sock *ssk, long status)
static int mptcp_hash(struct sock *sk)
{
/* should never be called,
- * we hash the TCP subflows not the master socket
+ * we hash the TCP subflows not the MPTCP socket
*/
WARN_ON_ONCE(1);
return 0;
@@ -3726,6 +3742,7 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
WRITE_ONCE(msk->write_seq, subflow->idsn);
WRITE_ONCE(msk->snd_nxt, subflow->idsn);
+ WRITE_ONCE(msk->snd_una, subflow->idsn);
if (likely(!__mptcp_check_fallback(msk)))
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVE);
@@ -3873,11 +3890,10 @@ unlock:
}
static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
- int flags, bool kern)
+ struct proto_accept_arg *arg)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
struct sock *ssk, *newsk;
- int err;
pr_debug("msk=%p", msk);
@@ -3889,9 +3905,9 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
return -EINVAL;
pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk));
- newsk = inet_csk_accept(ssk, flags, &err, kern);
+ newsk = inet_csk_accept(ssk, arg);
if (!newsk)
- return err;
+ return arg->err;
pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk));
if (sk_is_mptcp(newsk)) {
@@ -3912,7 +3928,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
newsk = new_mptcp_sock;
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
- newsk->sk_kern_sock = kern;
+ newsk->sk_kern_sock = arg->kern;
lock_sock(newsk);
__inet_accept(sock, newsock, newsk);
@@ -3941,7 +3957,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
}
} else {
tcpfallback:
- newsk->sk_kern_sock = kern;
+ newsk->sk_kern_sock = arg->kern;
lock_sock(newsk);
__inet_accept(sock, newsock, newsk);
/* we are being invoked after accepting a non-mp-capable
@@ -4161,7 +4177,7 @@ int __init mptcp_proto_v6_init(void)
int err;
mptcp_v6_prot = mptcp_prot;
- strcpy(mptcp_v6_prot.name, "MPTCPv6");
+ strscpy(mptcp_v6_prot.name, "MPTCPv6", sizeof(mptcp_v6_prot.name));
mptcp_v6_prot.slab = NULL;
mptcp_v6_prot.obj_size = sizeof(struct mptcp6_sock);
mptcp_v6_prot.ipv6_pinfo_offset = offsetof(struct mptcp6_sock, np);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index a10ebf3ee10a..60c6b073d65f 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -12,8 +12,7 @@
#include <net/inet_connection_sock.h>
#include <uapi/linux/mptcp.h>
#include <net/genetlink.h>
-
-#include "mptcp_pm_gen.h"
+#include <net/rstreason.h>
#define MPTCP_SUPPORTED_VERSION 1
@@ -282,6 +281,9 @@ struct mptcp_sock {
u64 bytes_acked;
u64 snd_una;
u64 wnd_end;
+ u32 last_data_sent;
+ u32 last_data_recv;
+ u32 last_ack_recv;
unsigned long timer_ival;
u32 token;
int rmem_released;
@@ -308,6 +310,9 @@ struct mptcp_sock {
free_first:1,
rcvspace_init:1;
u32 notsent_lowat;
+ int keepalive_cnt;
+ int keepalive_idle;
+ int keepalive_intvl;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
@@ -381,6 +386,11 @@ static inline int mptcp_win_from_space(const struct sock *sk, int space)
return __tcp_win_from_space(mptcp_sk(sk)->scaling_ratio, space);
}
+static inline int mptcp_space_from_win(const struct sock *sk, int win)
+{
+ return __tcp_space_from_win(mptcp_sk(sk)->scaling_ratio, win);
+}
+
static inline int __mptcp_space(const struct sock *sk)
{
return mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk));
@@ -438,6 +448,7 @@ struct mptcp_subflow_request_sock {
u16 mp_capable : 1,
mp_join : 1,
backup : 1,
+ request_bkup : 1,
csum_reqd : 1,
allow_join_id0 : 1;
u8 local_id;
@@ -558,7 +569,7 @@ struct mptcp_subflow_context {
static inline struct mptcp_subflow_context *
mptcp_subflow_ctx(const struct sock *sk)
{
- struct inet_connection_sock *icsk = inet_csk(sk);
+ const struct inet_connection_sock *icsk = inet_csk(sk);
/* Use RCU on icsk_ulp_data only for sock diag code */
return (__force struct mptcp_subflow_context *)icsk->icsk_ulp_data;
@@ -578,6 +589,43 @@ mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow)
WRITE_ONCE(subflow->local_id, -1);
}
+/* Convert reset reasons in MPTCP to enum sk_rst_reason type */
+static inline enum sk_rst_reason
+sk_rst_convert_mptcp_reason(u32 reason)
+{
+ switch (reason) {
+ case MPTCP_RST_EUNSPEC:
+ return SK_RST_REASON_MPTCP_RST_EUNSPEC;
+ case MPTCP_RST_EMPTCP:
+ return SK_RST_REASON_MPTCP_RST_EMPTCP;
+ case MPTCP_RST_ERESOURCE:
+ return SK_RST_REASON_MPTCP_RST_ERESOURCE;
+ case MPTCP_RST_EPROHIBIT:
+ return SK_RST_REASON_MPTCP_RST_EPROHIBIT;
+ case MPTCP_RST_EWQ2BIG:
+ return SK_RST_REASON_MPTCP_RST_EWQ2BIG;
+ case MPTCP_RST_EBADPERF:
+ return SK_RST_REASON_MPTCP_RST_EBADPERF;
+ case MPTCP_RST_EMIDDLEBOX:
+ return SK_RST_REASON_MPTCP_RST_EMIDDLEBOX;
+ default:
+ /* It should not happen, or else errors may occur
+ * in MPTCP layer
+ */
+ return SK_RST_REASON_ERROR;
+ }
+}
+
+static inline void
+mptcp_send_active_reset_reason(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ enum sk_rst_reason reason;
+
+ reason = sk_rst_convert_mptcp_reason(subflow->reset_reason);
+ tcp_send_active_reset(sk, GFP_ATOMIC, reason);
+}
+
static inline u64
mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow)
{
@@ -642,6 +690,7 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net);
unsigned int mptcp_close_timeout(const struct sock *sk);
int mptcp_get_pm_type(const struct net *net);
const char *mptcp_get_scheduler(const struct net *net);
+void mptcp_get_available_schedulers(char *buf, size_t maxlen);
void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
struct mptcp_subflow_context *subflow,
const struct mptcp_options_received *mp_opt);
@@ -1060,6 +1109,9 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
+bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc);
+bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
+bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb);
int mptcp_pm_nl_dump_addr(struct sk_buff *msg,
struct netlink_callback *cb);
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
index 4ab0693c069c..4a7fd0508ad2 100644
--- a/net/mptcp/sched.c
+++ b/net/mptcp/sched.c
@@ -51,6 +51,28 @@ struct mptcp_sched_ops *mptcp_sched_find(const char *name)
return ret;
}
+/* Build string with list of available scheduler values.
+ * Similar to tcp_get_available_congestion_control()
+ */
+void mptcp_get_available_schedulers(char *buf, size_t maxlen)
+{
+ struct mptcp_sched_ops *sched;
+ size_t offs = 0;
+
+ rcu_read_lock();
+ spin_lock(&mptcp_sched_list_lock);
+ list_for_each_entry_rcu(sched, &mptcp_sched_list, list) {
+ offs += snprintf(buf + offs, maxlen - offs,
+ "%s%s",
+ offs == 0 ? "" : " ", sched->name);
+
+ if (WARN_ON_ONCE(offs >= maxlen))
+ break;
+ }
+ spin_unlock(&mptcp_sched_list_lock);
+ rcu_read_unlock();
+}
+
int mptcp_register_scheduler(struct mptcp_sched_ops *sched)
{
if (!sched->get_subflow)
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 73fdf423de44..2026a9a36f80 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -181,8 +181,6 @@ static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname,
switch (optname) {
case SO_KEEPALIVE:
- mptcp_sol_socket_sync_intval(msk, optname, val);
- return 0;
case SO_DEBUG:
case SO_MARK:
case SO_PRIORITY:
@@ -618,12 +616,37 @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t
}
if (ret == 0)
- strcpy(msk->ca_name, name);
+ strscpy(msk->ca_name, name, sizeof(msk->ca_name));
release_sock(sk);
return ret;
}
+static int __mptcp_setsockopt_set_val(struct mptcp_sock *msk, int max,
+ int (*set_val)(struct sock *, int),
+ int *msk_val, int val)
+{
+ struct mptcp_subflow_context *subflow;
+ int err = 0;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ int ret;
+
+ lock_sock(ssk);
+ ret = set_val(ssk, val);
+ err = err ? : ret;
+ release_sock(ssk);
+ }
+
+ if (!err) {
+ *msk_val = val;
+ sockopt_seq_inc(msk);
+ }
+
+ return err;
+}
+
static int __mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, int val)
{
struct mptcp_subflow_context *subflow;
@@ -820,6 +843,22 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
case TCP_NODELAY:
ret = __mptcp_setsockopt_sol_tcp_nodelay(msk, val);
break;
+ case TCP_KEEPIDLE:
+ ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPIDLE,
+ &tcp_sock_set_keepidle_locked,
+ &msk->keepalive_idle, val);
+ break;
+ case TCP_KEEPINTVL:
+ ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPINTVL,
+ &tcp_sock_set_keepintvl,
+ &msk->keepalive_intvl, val);
+ break;
+ case TCP_KEEPCNT:
+ ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPCNT,
+ &tcp_sock_set_keepcnt,
+ &msk->keepalive_cnt,
+ val);
+ break;
default:
ret = -ENOPROTOOPT;
}
@@ -898,6 +937,7 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
struct sock *sk = (struct sock *)msk;
u32 flags = 0;
bool slow;
+ u32 now;
memset(info, 0, sizeof(*info));
@@ -926,11 +966,6 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
if (READ_ONCE(msk->can_ack))
flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED;
info->mptcpi_flags = flags;
- mptcp_data_lock(sk);
- info->mptcpi_snd_una = msk->snd_una;
- info->mptcpi_rcv_nxt = msk->ack_seq;
- info->mptcpi_bytes_acked = msk->bytes_acked;
- mptcp_data_unlock(sk);
slow = lock_sock_fast(sk);
info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
@@ -942,7 +977,17 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
info->mptcpi_bytes_retrans = msk->bytes_retrans;
info->mptcpi_subflows_total = info->mptcpi_subflows +
__mptcp_has_initial_subflow(msk);
+ now = tcp_jiffies32;
+ info->mptcpi_last_data_sent = jiffies_to_msecs(now - msk->last_data_sent);
+ info->mptcpi_last_data_recv = jiffies_to_msecs(now - msk->last_data_recv);
unlock_sock_fast(sk, slow);
+
+ mptcp_data_lock(sk);
+ info->mptcpi_last_ack_recv = jiffies_to_msecs(now - msk->last_ack_recv);
+ info->mptcpi_snd_una = msk->snd_una;
+ info->mptcpi_rcv_nxt = msk->ack_seq;
+ info->mptcpi_bytes_acked = msk->bytes_acked;
+ mptcp_data_unlock(sk);
}
EXPORT_SYMBOL_GPL(mptcp_diag_fill_info);
@@ -954,6 +999,10 @@ static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, in
if (get_user(len, optlen))
return -EFAULT;
+ /* When used only to check if a fallback to TCP happened. */
+ if (len == 0)
+ return 0;
+
len = min_t(unsigned int, len, sizeof(struct mptcp_info));
mptcp_diag_fill_info(msk, &m_info);
@@ -1322,6 +1371,8 @@ static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval,
static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
char __user *optval, int __user *optlen)
{
+ struct sock *sk = (void *)msk;
+
switch (optname) {
case TCP_ULP:
case TCP_CONGESTION:
@@ -1340,8 +1391,22 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
return mptcp_put_int_option(msk, optval, optlen, msk->cork);
case TCP_NODELAY:
return mptcp_put_int_option(msk, optval, optlen, msk->nodelay);
+ case TCP_KEEPIDLE:
+ return mptcp_put_int_option(msk, optval, optlen,
+ msk->keepalive_idle ? :
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_time) / HZ);
+ case TCP_KEEPINTVL:
+ return mptcp_put_int_option(msk, optval, optlen,
+ msk->keepalive_intvl ? :
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_intvl) / HZ);
+ case TCP_KEEPCNT:
+ return mptcp_put_int_option(msk, optval, optlen,
+ msk->keepalive_cnt ? :
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_probes));
case TCP_NOTSENT_LOWAT:
return mptcp_put_int_option(msk, optval, optlen, msk->notsent_lowat);
+ case TCP_IS_MPTCP:
+ return mptcp_put_int_option(msk, optval, optlen, 1);
}
return -EOPNOTSUPP;
}
@@ -1457,6 +1522,9 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
tcp_set_congestion_control(ssk, msk->ca_name, false, true);
__tcp_sock_set_cork(ssk, !!msk->cork);
__tcp_sock_set_nodelay(ssk, !!msk->nodelay);
+ tcp_sock_set_keepidle_locked(ssk, msk->keepalive_idle);
+ tcp_sock_set_keepintvl(ssk, msk->keepalive_intvl);
+ tcp_sock_set_keepcnt(ssk, msk->keepalive_cnt);
inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk));
inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk));
@@ -1511,7 +1579,7 @@ int mptcp_set_rcvlowat(struct sock *sk, int val)
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
return 0;
- space = __tcp_space_from_win(mptcp_sk(sk)->scaling_ratio, val);
+ space = mptcp_space_from_win(sk, val);
if (space <= sk->sk_rcvbuf)
return 0;
@@ -1523,7 +1591,7 @@ int mptcp_set_rcvlowat(struct sock *sk, int val)
slow = lock_sock_fast(ssk);
WRITE_ONCE(ssk->sk_rcvbuf, space);
- tcp_sk(ssk)->window_clamp = val;
+ WRITE_ONCE(tcp_sk(ssk)->window_clamp, val);
unlock_sock_fast(ssk, slow);
}
return 0;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 6042a47da61b..a21c712350c3 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -20,6 +20,7 @@
#include <net/transp_v6.h>
#endif
#include <net/mptcp.h>
+
#include "protocol.h"
#include "mib.h"
@@ -99,6 +100,7 @@ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req)
return NULL;
}
subflow_req->local_id = local_id;
+ subflow_req->request_bkup = mptcp_pm_is_backup(msk, (struct sock_common *)req);
return msk;
}
@@ -150,8 +152,10 @@ static int subflow_check_req(struct request_sock *req,
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
* TCP option space.
*/
- if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
+ if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info)) {
+ subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP);
return -EINVAL;
+ }
#endif
mptcp_get_options(skb, &mp_opt);
@@ -165,6 +169,9 @@ static int subflow_check_req(struct request_sock *req,
return 0;
} else if (opt_mp_join) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX);
+
+ if (mp_opt.backup)
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNBACKUPRX);
}
if (opt_mp_capable && listener->request_mptcp) {
@@ -219,6 +226,7 @@ again:
ntohs(inet_sk((struct sock *)subflow_req->msk)->inet_sport));
if (!mptcp_pm_sport_in_anno_list(subflow_req->msk, sk_listener)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTSYNRX);
+ subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);
return -EPERM;
}
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTSYNRX);
@@ -227,10 +235,12 @@ again:
subflow_req_create_thmac(subflow_req);
if (unlikely(req->syncookie)) {
- if (mptcp_can_accept_new_subflow(subflow_req->msk))
- subflow_init_req_cookie_join_save(subflow_req, skb);
- else
+ if (!mptcp_can_accept_new_subflow(subflow_req->msk)) {
+ subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);
return -EPERM;
+ }
+
+ subflow_init_req_cookie_join_save(subflow_req, skb);
}
pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token,
@@ -281,10 +291,21 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
}
EXPORT_SYMBOL_GPL(mptcp_subflow_init_cookie_req);
+static enum sk_rst_reason mptcp_get_rst_reason(const struct sk_buff *skb)
+{
+ const struct mptcp_ext *mpext = mptcp_get_ext(skb);
+
+ if (!mpext)
+ return SK_RST_REASON_NOT_SPECIFIED;
+
+ return sk_rst_convert_mptcp_reason(mpext->reset_reason);
+}
+
static struct dst_entry *subflow_v4_route_req(const struct sock *sk,
struct sk_buff *skb,
struct flowi *fl,
- struct request_sock *req)
+ struct request_sock *req,
+ u32 tw_isn)
{
struct dst_entry *dst;
int err;
@@ -292,7 +313,7 @@ static struct dst_entry *subflow_v4_route_req(const struct sock *sk,
tcp_rsk(req)->is_mptcp = 1;
subflow_init_req(req, sk);
- dst = tcp_request_sock_ipv4_ops.route_req(sk, skb, fl, req);
+ dst = tcp_request_sock_ipv4_ops.route_req(sk, skb, fl, req, tw_isn);
if (!dst)
return NULL;
@@ -302,7 +323,8 @@ static struct dst_entry *subflow_v4_route_req(const struct sock *sk,
dst_release(dst);
if (!req->syncookie)
- tcp_request_sock_ops.send_reset(sk, skb);
+ tcp_request_sock_ops.send_reset(sk, skb,
+ mptcp_get_rst_reason(skb));
return NULL;
}
@@ -351,7 +373,8 @@ static int subflow_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
static struct dst_entry *subflow_v6_route_req(const struct sock *sk,
struct sk_buff *skb,
struct flowi *fl,
- struct request_sock *req)
+ struct request_sock *req,
+ u32 tw_isn)
{
struct dst_entry *dst;
int err;
@@ -359,7 +382,7 @@ static struct dst_entry *subflow_v6_route_req(const struct sock *sk,
tcp_rsk(req)->is_mptcp = 1;
subflow_init_req(req, sk);
- dst = tcp_request_sock_ipv6_ops.route_req(sk, skb, fl, req);
+ dst = tcp_request_sock_ipv6_ops.route_req(sk, skb, fl, req, tw_isn);
if (!dst)
return NULL;
@@ -369,7 +392,8 @@ static struct dst_entry *subflow_v6_route_req(const struct sock *sk,
dst_release(dst);
if (!req->syncookie)
- tcp6_request_sock_ops.send_reset(sk, skb);
+ tcp6_request_sock_ops.send_reset(sk, skb,
+ mptcp_get_rst_reason(skb));
return NULL;
}
#endif
@@ -405,7 +429,7 @@ void mptcp_subflow_reset(struct sock *ssk)
/* must hold: tcp_done() could drop last reference on parent */
sock_hold(sk);
- tcp_send_active_reset(ssk, GFP_ATOMIC);
+ mptcp_send_active_reset_reason(ssk);
tcp_done(ssk);
if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags))
mptcp_schedule_work(sk);
@@ -557,6 +581,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->mp_join = 1;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
+ if (subflow->backup)
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKBACKUPRX);
+
if (subflow_use_different_dport(msk, sk)) {
pr_debug("synack inet_dport=%d %d",
ntohs(inet_sk(sk)->inet_dport),
@@ -594,6 +621,8 @@ static int subflow_chk_local_id(struct sock *sk)
return err;
subflow_set_local_id(subflow, err);
+ subflow->request_bkup = mptcp_pm_is_backup(msk, (struct sock_common *)sk);
+
return 0;
}
@@ -774,6 +803,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
struct mptcp_subflow_request_sock *subflow_req;
struct mptcp_options_received mp_opt;
bool fallback, fallback_is_fatal;
+ enum sk_rst_reason reason;
struct mptcp_sock *owner;
struct sock *child;
@@ -873,13 +903,18 @@ create_child:
ntohs(inet_sk((struct sock *)owner)->inet_sport));
if (!mptcp_pm_sport_in_anno_list(owner, sk)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTACKRX);
+ subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);
goto dispose_child;
}
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTACKRX);
}
- if (!mptcp_finish_join(child))
+ if (!mptcp_finish_join(child)) {
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(child);
+
+ subflow_add_reset_reason(skb, subflow->reset_reason);
goto dispose_child;
+ }
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
tcp_rsk(req)->drop_req = true;
@@ -887,7 +922,7 @@ create_child:
}
/* check for expected invariant - should never trigger, just help
- * catching eariler subtle bugs
+ * catching earlier subtle bugs
*/
WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp &&
(!mptcp_subflow_ctx(child) ||
@@ -899,7 +934,8 @@ dispose_child:
tcp_rsk(req)->drop_req = true;
inet_csk_prepare_for_destroy_sock(child);
tcp_done(child);
- req->rsk_ops->send_reset(sk, skb);
+ reason = mptcp_get_rst_reason(skb);
+ req->rsk_ops->send_reset(sk, skb, reason);
/* The last child reference will be released by the caller */
return child;
@@ -1092,6 +1128,8 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
}
if (mpext->data_fin == 1) {
+ u64 data_fin_seq;
+
if (data_len == 1) {
bool updated = mptcp_update_rcv_data_fin(msk, mpext->data_seq,
mpext->dsn64);
@@ -1104,26 +1142,26 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
*/
skb_ext_del(skb, SKB_EXT_MPTCP);
return MAPPING_OK;
- } else {
- if (updated)
- mptcp_schedule_work((struct sock *)msk);
-
- return MAPPING_DATA_FIN;
}
- } else {
- u64 data_fin_seq = mpext->data_seq + data_len - 1;
- /* If mpext->data_seq is a 32-bit value, data_fin_seq
- * must also be limited to 32 bits.
- */
- if (!mpext->dsn64)
- data_fin_seq &= GENMASK_ULL(31, 0);
+ if (updated)
+ mptcp_schedule_work((struct sock *)msk);
- mptcp_update_rcv_data_fin(msk, data_fin_seq, mpext->dsn64);
- pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d",
- data_fin_seq, mpext->dsn64);
+ return MAPPING_DATA_FIN;
}
+ data_fin_seq = mpext->data_seq + data_len - 1;
+
+ /* If mpext->data_seq is a 32-bit value, data_fin_seq must also
+ * be limited to 32 bits.
+ */
+ if (!mpext->dsn64)
+ data_fin_seq &= GENMASK_ULL(31, 0);
+
+ mptcp_update_rcv_data_fin(msk, data_fin_seq, mpext->dsn64);
+ pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d",
+ data_fin_seq, mpext->dsn64);
+
/* Adjust for DATA_FIN using 1 byte of sequence space */
data_len--;
}
@@ -1192,14 +1230,22 @@ static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
bool fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
- u32 incr;
+ struct tcp_sock *tp = tcp_sk(ssk);
+ u32 offset, incr, avail_len;
- incr = limit >= skb->len ? skb->len + fin : limit;
+ offset = tp->copied_seq - TCP_SKB_CB(skb)->seq;
+ if (WARN_ON_ONCE(offset > skb->len))
+ goto out;
- pr_debug("discarding=%d len=%d seq=%d", incr, skb->len,
- subflow->map_subflow_seq);
+ avail_len = skb->len - offset;
+ incr = limit >= avail_len ? avail_len + fin : limit;
+
+ pr_debug("discarding=%d len=%d offset=%d seq=%d", incr, skb->len,
+ offset, subflow->map_subflow_seq);
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DUPDATA);
tcp_sk(ssk)->copied_seq += incr;
+
+out:
if (!before(tcp_sk(ssk)->copied_seq, TCP_SKB_CB(skb)->end_seq))
sk_eat_skb(ssk, skb);
if (mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len)
@@ -1234,7 +1280,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
unsigned long fail_tout;
- /* greceful failure can happen only on the MPC subflow */
+ /* graceful failure can happen only on the MPC subflow */
if (WARN_ON_ONCE(ssk != READ_ONCE(msk->first)))
return;
@@ -1336,7 +1382,7 @@ reset:
tcp_set_state(ssk, TCP_CLOSE);
while ((skb = skb_peek(&ssk->sk_receive_queue)))
sk_eat_skb(ssk, skb);
- tcp_send_active_reset(ssk, GFP_ATOMIC);
+ mptcp_send_active_reset_reason(ssk);
WRITE_ONCE(subflow->data_avail, false);
return false;
}
@@ -1690,7 +1736,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
mptcp_sockopt_sync_locked(mptcp_sk(sk), sf->sk);
release_sock(sf->sk);
- /* the newly created socket really belongs to the owning MPTCP master
+ /* the newly created socket really belongs to the owning MPTCP
* socket, even if for additional subflows the allocation is performed
* by a kernel workqueue. Adjust inode references, so that the
* procfs/diag interfaces really show this one belonging to the correct
@@ -1976,6 +2022,7 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->fully_established = 1;
new_ctx->remote_key_valid = 1;
new_ctx->backup = subflow_req->backup;
+ new_ctx->request_bkup = subflow_req->request_bkup;
WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id);
new_ctx->token = subflow_req->token;
new_ctx->thmac = subflow_req->thmac;
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 374412ed780b..ef0f8f73826f 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -325,6 +325,7 @@ struct ncsi_dev_priv {
spinlock_t lock; /* Protect the NCSI device */
unsigned int package_probe_id;/* Current ID during probe */
unsigned int package_num; /* Number of packages */
+ unsigned int channel_probe_id;/* Current cahnnel ID during probe */
struct list_head packages; /* List of packages */
struct ncsi_channel *hot_channel; /* Channel was ever active */
struct ncsi_request requests[256]; /* Request table */
@@ -343,6 +344,7 @@ struct ncsi_dev_priv {
bool multi_package; /* Enable multiple packages */
bool mlx_multi_host; /* Enable multi host Mellanox */
u32 package_whitelist; /* Packages to configure */
+ unsigned char channel_count; /* Num of channels to probe */
};
struct ncsi_cmd_arg {
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 745c788f1d1d..5ecf611c8820 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -510,17 +510,19 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
break;
case ncsi_dev_state_suspend_gls:
- ndp->pending_req_num = np->channel_num;
+ ndp->pending_req_num = 1;
nca.type = NCSI_PKT_CMD_GLS;
nca.package = np->id;
+ nca.channel = ndp->channel_probe_id;
+ ret = ncsi_xmit_cmd(&nca);
+ if (ret)
+ goto error;
+ ndp->channel_probe_id++;
- nd->state = ncsi_dev_state_suspend_dcnt;
- NCSI_FOR_EACH_CHANNEL(np, nc) {
- nca.channel = nc->id;
- ret = ncsi_xmit_cmd(&nca);
- if (ret)
- goto error;
+ if (ndp->channel_probe_id == ndp->channel_count) {
+ ndp->channel_probe_id = 0;
+ nd->state = ncsi_dev_state_suspend_dcnt;
}
break;
@@ -1345,7 +1347,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
{
struct ncsi_dev *nd = &ndp->ndev;
struct ncsi_package *np;
- struct ncsi_channel *nc;
struct ncsi_cmd_arg nca;
unsigned char index;
int ret;
@@ -1423,23 +1424,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
nd->state = ncsi_dev_state_probe_cis;
break;
- case ncsi_dev_state_probe_cis:
- ndp->pending_req_num = NCSI_RESERVED_CHANNEL;
-
- /* Clear initial state */
- nca.type = NCSI_PKT_CMD_CIS;
- nca.package = ndp->active_package->id;
- for (index = 0; index < NCSI_RESERVED_CHANNEL; index++) {
- nca.channel = index;
- ret = ncsi_xmit_cmd(&nca);
- if (ret)
- goto error;
- }
-
- nd->state = ncsi_dev_state_probe_gvi;
- if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY))
- nd->state = ncsi_dev_state_probe_keep_phy;
- break;
case ncsi_dev_state_probe_keep_phy:
ndp->pending_req_num = 1;
@@ -1452,14 +1436,17 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
nd->state = ncsi_dev_state_probe_gvi;
break;
+ case ncsi_dev_state_probe_cis:
case ncsi_dev_state_probe_gvi:
case ncsi_dev_state_probe_gc:
case ncsi_dev_state_probe_gls:
np = ndp->active_package;
- ndp->pending_req_num = np->channel_num;
+ ndp->pending_req_num = 1;
- /* Retrieve version, capability or link status */
- if (nd->state == ncsi_dev_state_probe_gvi)
+ /* Clear initial state Retrieve version, capability or link status */
+ if (nd->state == ncsi_dev_state_probe_cis)
+ nca.type = NCSI_PKT_CMD_CIS;
+ else if (nd->state == ncsi_dev_state_probe_gvi)
nca.type = NCSI_PKT_CMD_GVI;
else if (nd->state == ncsi_dev_state_probe_gc)
nca.type = NCSI_PKT_CMD_GC;
@@ -1467,19 +1454,29 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
nca.type = NCSI_PKT_CMD_GLS;
nca.package = np->id;
- NCSI_FOR_EACH_CHANNEL(np, nc) {
- nca.channel = nc->id;
- ret = ncsi_xmit_cmd(&nca);
- if (ret)
- goto error;
- }
+ nca.channel = ndp->channel_probe_id;
- if (nd->state == ncsi_dev_state_probe_gvi)
+ ret = ncsi_xmit_cmd(&nca);
+ if (ret)
+ goto error;
+
+ if (nd->state == ncsi_dev_state_probe_cis) {
+ nd->state = ncsi_dev_state_probe_gvi;
+ if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY) && ndp->channel_probe_id == 0)
+ nd->state = ncsi_dev_state_probe_keep_phy;
+ } else if (nd->state == ncsi_dev_state_probe_gvi) {
nd->state = ncsi_dev_state_probe_gc;
- else if (nd->state == ncsi_dev_state_probe_gc)
+ } else if (nd->state == ncsi_dev_state_probe_gc) {
nd->state = ncsi_dev_state_probe_gls;
- else
+ } else {
+ nd->state = ncsi_dev_state_probe_cis;
+ ndp->channel_probe_id++;
+ }
+
+ if (ndp->channel_probe_id == ndp->channel_count) {
+ ndp->channel_probe_id = 0;
nd->state = ncsi_dev_state_probe_dp;
+ }
break;
case ncsi_dev_state_probe_dp:
ndp->pending_req_num = 1;
@@ -1780,6 +1777,7 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
ndp->requests[i].ndp = ndp;
timer_setup(&ndp->requests[i].timer, ncsi_request_timeout, 0);
}
+ ndp->channel_count = NCSI_RESERVED_CHANNEL;
spin_lock_irqsave(&ncsi_dev_lock, flags);
list_add_tail_rcu(&ndp->node, &ncsi_dev_list);
@@ -1813,6 +1811,7 @@ int ncsi_start_dev(struct ncsi_dev *nd)
if (!(ndp->flags & NCSI_DEV_PROBED)) {
ndp->package_probe_id = 0;
+ ndp->channel_probe_id = 0;
nd->state = ncsi_dev_state_probe;
schedule_work(&ndp->work);
return 0;
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index bee290d0f48b..e28be33bdf2c 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -795,12 +795,13 @@ static int ncsi_rsp_handler_gc(struct ncsi_request *nr)
struct ncsi_rsp_gc_pkt *rsp;
struct ncsi_dev_priv *ndp = nr->ndp;
struct ncsi_channel *nc;
+ struct ncsi_package *np;
size_t size;
/* Find the channel */
rsp = (struct ncsi_rsp_gc_pkt *)skb_network_header(nr->rsp);
ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
- NULL, &nc);
+ &np, &nc);
if (!nc)
return -ENODEV;
@@ -835,6 +836,7 @@ static int ncsi_rsp_handler_gc(struct ncsi_request *nr)
*/
nc->vlan_filter.bitmap = U64_MAX;
nc->vlan_filter.n_vids = rsp->vlan_cnt;
+ np->ndp->channel_count = rsp->channel_cnt;
return 0;
}
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 614815a3ed73..f0aa4d7ef499 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -142,8 +142,13 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
# flow table infrastructure
obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \
- nf_flow_table_offload.o
+ nf_flow_table_offload.o nf_flow_table_xdp.o
nf_flow_table-$(CONFIG_NF_FLOW_TABLE_PROCFS) += nf_flow_table_procfs.o
+ifeq ($(CONFIG_NF_FLOW_TABLE),m)
+nf_flow_table-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_flow_table_bpf.o
+else ifeq ($(CONFIG_NF_FLOW_TABLE),y)
+nf_flow_table-$(CONFIG_DEBUG_INFO_BTF) += nf_flow_table_bpf.o
+endif
obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 3126911f5042..b00fc285b334 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -815,12 +815,21 @@ int __init netfilter_init(void)
if (ret < 0)
goto err;
+#ifdef CONFIG_LWTUNNEL
+ ret = netfilter_lwtunnel_init();
+ if (ret < 0)
+ goto err_lwtunnel_pernet;
+#endif
ret = netfilter_log_init();
if (ret < 0)
- goto err_pernet;
+ goto err_log_pernet;
return 0;
-err_pernet:
+err_log_pernet:
+#ifdef CONFIG_LWTUNNEL
+ netfilter_lwtunnel_fini();
+err_lwtunnel_pernet:
+#endif
unregister_pernet_subsys(&netfilter_net_ops);
err:
return ret;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 3184cc6be4c9..61431690cbd5 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -53,12 +53,13 @@ MODULE_DESCRIPTION("core IP set support");
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
/* When the nfnl mutex or ip_set_ref_lock is held: */
-#define ip_set_dereference(p) \
- rcu_dereference_protected(p, \
+#define ip_set_dereference(inst) \
+ rcu_dereference_protected((inst)->ip_set_list, \
lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \
- lockdep_is_held(&ip_set_ref_lock))
+ lockdep_is_held(&ip_set_ref_lock) || \
+ (inst)->is_deleted)
#define ip_set(inst, id) \
- ip_set_dereference((inst)->ip_set_list)[id]
+ ip_set_dereference(inst)[id]
#define ip_set_ref_netlink(inst,id) \
rcu_dereference_raw((inst)->ip_set_list)[id]
#define ip_set_dereference_nfnl(p) \
@@ -1133,7 +1134,7 @@ static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info,
if (!list)
goto cleanup;
/* nfnl mutex is held, both lists are valid */
- tmp = ip_set_dereference(inst->ip_set_list);
+ tmp = ip_set_dereference(inst);
memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max);
rcu_assign_pointer(inst->ip_set_list, list);
/* Make sure all current packets have passed through */
@@ -1172,23 +1173,50 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
.len = IPSET_MAXNAMELEN - 1 },
};
+/* In order to return quickly when destroying a single set, it is split
+ * into two stages:
+ * - Cancel garbage collector
+ * - Destroy the set itself via call_rcu()
+ */
+
static void
-ip_set_destroy_set(struct ip_set *set)
+ip_set_destroy_set_rcu(struct rcu_head *head)
{
- pr_debug("set: %s\n", set->name);
+ struct ip_set *set = container_of(head, struct ip_set, rcu);
- /* Must call it without holding any lock */
set->variant->destroy(set);
module_put(set->type->me);
kfree(set);
}
static void
-ip_set_destroy_set_rcu(struct rcu_head *head)
+_destroy_all_sets(struct ip_set_net *inst)
{
- struct ip_set *set = container_of(head, struct ip_set, rcu);
+ struct ip_set *set;
+ ip_set_id_t i;
+ bool need_wait = false;
- ip_set_destroy_set(set);
+ /* First cancel gc's: set:list sets are flushed as well */
+ for (i = 0; i < inst->ip_set_max; i++) {
+ set = ip_set(inst, i);
+ if (set) {
+ set->variant->cancel_gc(set);
+ if (set->type->features & IPSET_TYPE_NAME)
+ need_wait = true;
+ }
+ }
+ /* Must wait for flush to be really finished */
+ if (need_wait)
+ rcu_barrier();
+ for (i = 0; i < inst->ip_set_max; i++) {
+ set = ip_set(inst, i);
+ if (set) {
+ ip_set(inst, i) = NULL;
+ set->variant->destroy(set);
+ module_put(set->type->me);
+ kfree(set);
+ }
+ }
}
static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
@@ -1202,11 +1230,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
if (unlikely(protocol_min_failed(attr)))
return -IPSET_ERR_PROTOCOL;
-
/* Commands are serialized and references are
* protected by the ip_set_ref_lock.
* External systems (i.e. xt_set) must call
- * ip_set_put|get_nfnl_* functions, that way we
+ * ip_set_nfnl_get_* functions, that way we
* can safely check references here.
*
* list:set timer can only decrement the reference
@@ -1214,8 +1241,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
* without holding the lock.
*/
if (!attr[IPSET_ATTR_SETNAME]) {
- /* Must wait for flush to be really finished in list:set */
- rcu_barrier();
read_lock_bh(&ip_set_ref_lock);
for (i = 0; i < inst->ip_set_max; i++) {
s = ip_set(inst, i);
@@ -1226,15 +1251,7 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
}
inst->is_destroyed = true;
read_unlock_bh(&ip_set_ref_lock);
- for (i = 0; i < inst->ip_set_max; i++) {
- s = ip_set(inst, i);
- if (s) {
- ip_set(inst, i) = NULL;
- /* Must cancel garbage collectors */
- s->variant->cancel_gc(s);
- ip_set_destroy_set(s);
- }
- }
+ _destroy_all_sets(inst);
/* Modified by ip_set_destroy() only, which is serialized */
inst->is_destroyed = false;
} else {
@@ -1255,12 +1272,12 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
features = s->type->features;
ip_set(inst, i) = NULL;
read_unlock_bh(&ip_set_ref_lock);
+ /* Must cancel garbage collectors */
+ s->variant->cancel_gc(s);
if (features & IPSET_TYPE_NAME) {
/* Must wait for flush to be really finished */
rcu_barrier();
}
- /* Must cancel garbage collectors */
- s->variant->cancel_gc(s);
call_rcu(&s->rcu, ip_set_destroy_set_rcu);
}
return 0;
@@ -2365,30 +2382,25 @@ ip_set_net_init(struct net *net)
}
static void __net_exit
-ip_set_net_exit(struct net *net)
+ip_set_net_pre_exit(struct net *net)
{
struct ip_set_net *inst = ip_set_pernet(net);
- struct ip_set *set = NULL;
- ip_set_id_t i;
-
inst->is_deleted = true; /* flag for ip_set_nfnl_put */
+}
- nfnl_lock(NFNL_SUBSYS_IPSET);
- for (i = 0; i < inst->ip_set_max; i++) {
- set = ip_set(inst, i);
- if (set) {
- ip_set(inst, i) = NULL;
- set->variant->cancel_gc(set);
- ip_set_destroy_set(set);
- }
- }
- nfnl_unlock(NFNL_SUBSYS_IPSET);
+static void __net_exit
+ip_set_net_exit(struct net *net)
+{
+ struct ip_set_net *inst = ip_set_pernet(net);
+
+ _destroy_all_sets(inst);
kvfree(rcu_dereference_protected(inst->ip_set_list, 1));
}
static struct pernet_operations ip_set_net_ops = {
.init = ip_set_net_init,
+ .pre_exit = ip_set_net_pre_exit,
.exit = ip_set_net_exit,
.id = &ip_set_net_id,
.size = sizeof(struct ip_set_net),
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 6c3f28bc59b3..bfae7066936b 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -79,7 +79,7 @@ list_set_kadd(struct ip_set *set, const struct sk_buff *skb,
struct set_elem *e;
int ret;
- list_for_each_entry(e, &map->members, list) {
+ list_for_each_entry_rcu(e, &map->members, list) {
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(e, set)))
continue;
@@ -99,7 +99,7 @@ list_set_kdel(struct ip_set *set, const struct sk_buff *skb,
struct set_elem *e;
int ret;
- list_for_each_entry(e, &map->members, list) {
+ list_for_each_entry_rcu(e, &map->members, list) {
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(e, set)))
continue;
@@ -188,9 +188,10 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,
struct list_set *map = set->data;
struct set_adt_elem *d = value;
struct set_elem *e, *next, *prev = NULL;
- int ret;
+ int ret = 0;
- list_for_each_entry(e, &map->members, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(e, &map->members, list) {
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(e, set)))
continue;
@@ -201,6 +202,7 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,
if (d->before == 0) {
ret = 1;
+ goto out;
} else if (d->before > 0) {
next = list_next_entry(e, list);
ret = !list_is_last(&e->list, &map->members) &&
@@ -208,9 +210,11 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,
} else {
ret = prev && prev->id == d->refid;
}
- return ret;
+ goto out;
}
- return 0;
+out:
+ rcu_read_unlock();
+ return ret;
}
static void
@@ -239,7 +243,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
/* Find where to add the new entry */
n = prev = next = NULL;
- list_for_each_entry(e, &map->members, list) {
+ list_for_each_entry_rcu(e, &map->members, list) {
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(e, set)))
continue;
@@ -316,9 +320,9 @@ list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext,
{
struct list_set *map = set->data;
struct set_adt_elem *d = value;
- struct set_elem *e, *next, *prev = NULL;
+ struct set_elem *e, *n, *next, *prev = NULL;
- list_for_each_entry(e, &map->members, list) {
+ list_for_each_entry_safe(e, n, &map->members, list) {
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(e, set)))
continue;
@@ -424,14 +428,8 @@ static void
list_set_destroy(struct ip_set *set)
{
struct list_set *map = set->data;
- struct set_elem *e, *n;
- list_for_each_entry_safe(e, n, &map->members, list) {
- list_del(&e->list);
- ip_set_put_byindex(map->net, e->id);
- ip_set_ext_destroy(set, e);
- kfree(e);
- }
+ WARN_ON_ONCE(!list_empty(&map->members));
kfree(map);
set->data = NULL;
@@ -549,6 +547,9 @@ list_set_cancel_gc(struct ip_set *set)
if (SET_WITH_TIMEOUT(set))
timer_shutdown_sync(&map->gc);
+
+ /* Flush list to drop references to other ipsets */
+ list_set_flush(set);
}
static const struct ip_set_type_variant set_variant = {
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index a2c16b501087..c7a8a08b7308 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1550,6 +1550,7 @@ static int ipvs_gre_decap(struct netns_ipvs *ipvs, struct sk_buff *skb,
if (!dest)
goto unk;
if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+ IP_TUNNEL_DECLARE_FLAGS(flags);
__be16 type;
/* Only support version 0 and C (csum) */
@@ -1560,7 +1561,10 @@ static int ipvs_gre_decap(struct netns_ipvs *ipvs, struct sk_buff *skb,
if (type != htons(ETH_P_IP))
goto unk;
*proto = IPPROTO_IPIP;
- return gre_calc_hlen(gre_flags_to_tnl_flags(greh->flags));
+
+ gre_flags_to_tnl_flags(flags, greh->flags);
+
+ return gre_calc_hlen(flags);
}
unk:
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 143a341bbc0a..dc6ddc4abbe2 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -94,6 +94,7 @@ static void update_defense_level(struct netns_ipvs *ipvs)
{
struct sysinfo i;
int availmem;
+ int amemthresh;
int nomem;
int to_change = -1;
@@ -105,7 +106,8 @@ static void update_defense_level(struct netns_ipvs *ipvs)
/* si_swapinfo(&i); */
/* availmem = availmem - (i.totalswap - i.freeswap); */
- nomem = (availmem < ipvs->sysctl_amemthresh);
+ amemthresh = max(READ_ONCE(ipvs->sysctl_amemthresh), 0);
+ nomem = (availmem < amemthresh);
local_bh_disable();
@@ -145,9 +147,8 @@ static void update_defense_level(struct netns_ipvs *ipvs)
break;
case 1:
if (nomem) {
- ipvs->drop_rate = ipvs->drop_counter
- = ipvs->sysctl_amemthresh /
- (ipvs->sysctl_amemthresh-availmem);
+ ipvs->drop_counter = amemthresh / (amemthresh - availmem);
+ ipvs->drop_rate = ipvs->drop_counter;
ipvs->sysctl_drop_packet = 2;
} else {
ipvs->drop_rate = 0;
@@ -155,9 +156,8 @@ static void update_defense_level(struct netns_ipvs *ipvs)
break;
case 2:
if (nomem) {
- ipvs->drop_rate = ipvs->drop_counter
- = ipvs->sysctl_amemthresh /
- (ipvs->sysctl_amemthresh-availmem);
+ ipvs->drop_counter = amemthresh / (amemthresh - availmem);
+ ipvs->drop_rate = ipvs->drop_counter;
} else {
ipvs->drop_rate = 0;
ipvs->sysctl_drop_packet = 1;
@@ -1459,18 +1459,18 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
if (ret < 0)
goto out_err;
- /* Bind the ct retriever */
- RCU_INIT_POINTER(svc->pe, pe);
- pe = NULL;
-
/* Update the virtual service counters */
if (svc->port == FTPPORT)
atomic_inc(&ipvs->ftpsvc_counter);
else if (svc->port == 0)
atomic_inc(&ipvs->nullsvc_counter);
- if (svc->pe && svc->pe->conn_out)
+ if (pe && pe->conn_out)
atomic_inc(&ipvs->conn_out_counter);
+ /* Bind the ct retriever */
+ RCU_INIT_POINTER(svc->pe, pe);
+ pe = NULL;
+
/* Count only IPv4 services for old get/setsockopt interface */
if (svc->af == AF_INET)
ipvs->num_services++;
@@ -1846,7 +1846,7 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs)
#ifdef CONFIG_SYSCTL
static int
-proc_do_defense_mode(struct ctl_table *table, int write,
+proc_do_defense_mode(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct netns_ipvs *ipvs = table->extra2;
@@ -1873,7 +1873,7 @@ proc_do_defense_mode(struct ctl_table *table, int write,
}
static int
-proc_do_sync_threshold(struct ctl_table *table, int write,
+proc_do_sync_threshold(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct netns_ipvs *ipvs = table->extra2;
@@ -1901,7 +1901,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write,
}
static int
-proc_do_sync_ports(struct ctl_table *table, int write,
+proc_do_sync_ports(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = table->data;
@@ -1924,7 +1924,8 @@ proc_do_sync_ports(struct ctl_table *table, int write,
return rc;
}
-static int ipvs_proc_est_cpumask_set(struct ctl_table *table, void *buffer)
+static int ipvs_proc_est_cpumask_set(const struct ctl_table *table,
+ void *buffer)
{
struct netns_ipvs *ipvs = table->extra2;
cpumask_var_t *valp = table->data;
@@ -1962,8 +1963,8 @@ out:
return ret;
}
-static int ipvs_proc_est_cpumask_get(struct ctl_table *table, void *buffer,
- size_t size)
+static int ipvs_proc_est_cpumask_get(const struct ctl_table *table,
+ void *buffer, size_t size)
{
struct netns_ipvs *ipvs = table->extra2;
cpumask_var_t *valp = table->data;
@@ -1983,7 +1984,7 @@ static int ipvs_proc_est_cpumask_get(struct ctl_table *table, void *buffer,
return ret;
}
-static int ipvs_proc_est_cpulist(struct ctl_table *table, int write,
+static int ipvs_proc_est_cpulist(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
@@ -2010,7 +2011,7 @@ static int ipvs_proc_est_cpulist(struct ctl_table *table, int write,
return ret;
}
-static int ipvs_proc_est_nice(struct ctl_table *table, int write,
+static int ipvs_proc_est_nice(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct netns_ipvs *ipvs = table->extra2;
@@ -2040,7 +2041,7 @@ static int ipvs_proc_est_nice(struct ctl_table *table, int write,
return ret;
}
-static int ipvs_proc_run_estimation(struct ctl_table *table, int write,
+static int ipvs_proc_run_estimation(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct netns_ipvs *ipvs = table->extra2;
@@ -2263,7 +2264,6 @@ static struct ctl_table vs_vars[] = {
.proc_handler = proc_dointvec,
},
#endif
- { }
};
#endif
@@ -4270,6 +4270,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
struct ctl_table *tbl;
int idx, ret;
size_t ctl_table_size = ARRAY_SIZE(vs_vars);
+ bool unpriv = net->user_ns != &init_user_ns;
atomic_set(&ipvs->dropentry, 0);
spin_lock_init(&ipvs->dropentry_lock);
@@ -4284,12 +4285,6 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
if (tbl == NULL)
return -ENOMEM;
-
- /* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns) {
- tbl[0].procname = NULL;
- ctl_table_size = 0;
- }
} else
tbl = vs_vars;
/* Initialize sysctl defaults */
@@ -4315,10 +4310,17 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
ipvs->sysctl_sync_ports = 1;
tbl[idx++].data = &ipvs->sysctl_sync_ports;
tbl[idx++].data = &ipvs->sysctl_sync_persist_mode;
+
ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
+ if (unpriv)
+ tbl[idx].mode = 0444;
tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
+
ipvs->sysctl_sync_sock_size = 0;
+ if (unpriv)
+ tbl[idx].mode = 0444;
tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
+
tbl[idx++].data = &ipvs->sysctl_cache_bypass;
tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
tbl[idx++].data = &ipvs->sysctl_sloppy_tcp;
@@ -4341,15 +4343,22 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
+
ipvs->sysctl_run_estimation = 1;
+ if (unpriv)
+ tbl[idx].mode = 0444;
tbl[idx].extra2 = ipvs;
tbl[idx++].data = &ipvs->sysctl_run_estimation;
ipvs->est_cpulist_valid = 0;
+ if (unpriv)
+ tbl[idx].mode = 0444;
tbl[idx].extra2 = ipvs;
tbl[idx++].data = &ipvs->sysctl_est_cpulist;
ipvs->sysctl_est_nice = IPVS_EST_NICE;
+ if (unpriv)
+ tbl[idx].mode = 0444;
tbl[idx].extra2 = ipvs;
tbl[idx++].data = &ipvs->sysctl_est_nice;
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 8ceec7a2fa8f..2423513d701d 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -123,7 +123,6 @@ static struct ctl_table vs_vars_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { }
};
#endif
@@ -563,10 +562,8 @@ static int __net_init __ip_vs_lblc_init(struct net *net)
return -ENOMEM;
/* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns) {
- ipvs->lblc_ctl_table[0].procname = NULL;
+ if (net->user_ns != &init_user_ns)
vars_table_size = 0;
- }
} else
ipvs->lblc_ctl_table = vs_vars_table;
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 0fb64707213f..cdb1d4bf6761 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -294,7 +294,6 @@ static struct ctl_table vs_vars_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { }
};
#endif
@@ -749,10 +748,8 @@ static int __net_init __ip_vs_lblcr_init(struct net *net)
return -ENOMEM;
/* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns) {
- ipvs->lblcr_ctl_table[0].procname = NULL;
+ if (net->user_ns != &init_user_ns)
vars_table_size = 0;
- }
} else
ipvs->lblcr_ctl_table = vs_vars_table;
ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION;
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 1e689c714127..83e452916403 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -126,7 +126,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
if (sctph->source != cp->vport || payload_csum ||
skb->ip_summed == CHECKSUM_PARTIAL) {
sctph->source = cp->vport;
- if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb))
+ if (!skb_is_gso(skb))
sctp_nat_csum(skb, sctph, sctphoff);
} else {
skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -175,7 +175,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
(skb->ip_summed == CHECKSUM_PARTIAL &&
!(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) {
sctph->dest = cp->dport;
- if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb))
+ if (!skb_is_gso(skb))
sctp_nat_csum(skb, sctph, sctphoff);
} else if (skb->ip_summed != CHECKSUM_PARTIAL) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 65e0259178da..3313bceb6cc9 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -180,7 +180,7 @@ static inline bool crosses_local_route_boundary(int skb_af, struct sk_buff *skb,
(!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
(addr_type & IPV6_ADDR_LOOPBACK);
old_rt_is_local = __ip_vs_is_local_route6(
- (struct rt6_info *)skb_dst(skb));
+ dst_rt6_info(skb_dst(skb)));
} else
#endif
{
@@ -318,7 +318,7 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
if (dest) {
dest_dst = __ip_vs_dst_check(dest);
if (likely(dest_dst))
- rt = (struct rtable *) dest_dst->dst_cache;
+ rt = dst_rtable(dest_dst->dst_cache);
else {
dest_dst = ip_vs_dest_dst_alloc();
spin_lock_bh(&dest->dst_lock);
@@ -390,10 +390,10 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
skb->ip_summed == CHECKSUM_PARTIAL)
mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
} else if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
- __be16 tflags = 0;
+ IP_TUNNEL_DECLARE_FLAGS(tflags) = { };
if (dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
- tflags |= TUNNEL_CSUM;
+ __set_bit(IP_TUNNEL_CSUM_BIT, tflags);
mtu -= gre_calc_hlen(tflags);
}
if (mtu < 68) {
@@ -481,7 +481,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
if (dest) {
dest_dst = __ip_vs_dst_check(dest);
if (likely(dest_dst))
- rt = (struct rt6_info *) dest_dst->dst_cache;
+ rt = dst_rt6_info(dest_dst->dst_cache);
else {
u32 cookie;
@@ -501,7 +501,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
ip_vs_dest_dst_free(dest_dst);
goto err_unreach;
}
- rt = (struct rt6_info *) dst;
+ rt = dst_rt6_info(dst);
cookie = rt6_get_cookie(rt);
__ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie);
spin_unlock_bh(&dest->dst_lock);
@@ -517,7 +517,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
rt_mode);
if (!dst)
goto err_unreach;
- rt = (struct rt6_info *) dst;
+ rt = dst_rt6_info(dst);
}
local = __ip_vs_is_local_route6(rt);
@@ -553,10 +553,10 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
skb->ip_summed == CHECKSUM_PARTIAL)
mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
} else if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
- __be16 tflags = 0;
+ IP_TUNNEL_DECLARE_FLAGS(tflags) = { };
if (dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
- tflags |= TUNNEL_CSUM;
+ __set_bit(IP_TUNNEL_CSUM_BIT, tflags);
mtu -= gre_calc_hlen(tflags);
}
if (mtu < IPV6_MIN_MTU) {
@@ -862,7 +862,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_RDR);
if (local < 0)
goto tx_error;
- rt = (struct rt6_info *) skb_dst(skb);
+ rt = dst_rt6_info(skb_dst(skb));
/*
* Avoid duplicate tuple in reply direction for NAT traffic
* to local address when connection is sync-ed
@@ -1082,11 +1082,11 @@ ipvs_gre_encap(struct net *net, struct sk_buff *skb,
{
__be16 proto = *next_protocol == IPPROTO_IPIP ?
htons(ETH_P_IP) : htons(ETH_P_IPV6);
- __be16 tflags = 0;
+ IP_TUNNEL_DECLARE_FLAGS(tflags) = { };
size_t hdrlen;
if (cp->dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
- tflags |= TUNNEL_CSUM;
+ __set_bit(IP_TUNNEL_CSUM_BIT, tflags);
hdrlen = gre_calc_hlen(tflags);
gre_build_header(skb, hdrlen, tflags, proto, 0, 0);
@@ -1165,11 +1165,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
max_headroom += sizeof(struct udphdr) + gue_hdrlen;
} else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+ IP_TUNNEL_DECLARE_FLAGS(tflags) = { };
size_t gre_hdrlen;
- __be16 tflags = 0;
if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
- tflags |= TUNNEL_CSUM;
+ __set_bit(IP_TUNNEL_CSUM_BIT, tflags);
gre_hdrlen = gre_calc_hlen(tflags);
max_headroom += gre_hdrlen;
@@ -1288,7 +1288,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
if (local)
return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
- rt = (struct rt6_info *) skb_dst(skb);
+ rt = dst_rt6_info(skb_dst(skb));
tdev = rt->dst.dev;
/*
@@ -1310,11 +1310,11 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
max_headroom += sizeof(struct udphdr) + gue_hdrlen;
} else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+ IP_TUNNEL_DECLARE_FLAGS(tflags) = { };
size_t gre_hdrlen;
- __be16 tflags = 0;
if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
- tflags |= TUNNEL_CSUM;
+ __set_bit(IP_TUNNEL_CSUM_BIT, tflags);
gre_hdrlen = gre_calc_hlen(tflags);
max_headroom += gre_hdrlen;
@@ -1590,7 +1590,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
&cp->daddr.in6, NULL, ipvsh, 0, rt_mode);
if (local < 0)
goto tx_error;
- rt = (struct rt6_info *) skb_dst(skb);
+ rt = dst_rt6_info(skb_dst(skb));
/*
* Avoid duplicate tuple in reply direction for NAT traffic
* to local address when connection is sync-ed
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 8715617b02fe..34ba14e59e95 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -321,7 +321,6 @@ insert_tree(struct net *net,
struct nf_conncount_rb *rbconn;
struct nf_conncount_tuple *conn;
unsigned int count = 0, gc_count = 0;
- u8 keylen = data->keylen;
bool do_gc = true;
spin_lock_bh(&nf_conncount_locks[hash]);
@@ -333,7 +332,7 @@ restart:
rbconn = rb_entry(*rbnode, struct nf_conncount_rb, node);
parent = *rbnode;
- diff = key_diff(key, rbconn->key, keylen);
+ diff = key_diff(key, rbconn->key, data->keylen);
if (diff < 0) {
rbnode = &((*rbnode)->rb_left);
} else if (diff > 0) {
@@ -378,7 +377,7 @@ restart:
conn->tuple = *tuple;
conn->zone = *zone;
- memcpy(rbconn->key, key, sizeof(u32) * keylen);
+ memcpy(rbconn->key, key, sizeof(u32) * data->keylen);
nf_conncount_list_init(&rbconn->list);
list_add(&conn->node, &rbconn->list.head);
@@ -403,7 +402,6 @@ count_tree(struct net *net,
struct rb_node *parent;
struct nf_conncount_rb *rbconn;
unsigned int hash;
- u8 keylen = data->keylen;
hash = jhash2(key, data->keylen, conncount_rnd) % CONNCOUNT_SLOTS;
root = &data->root[hash];
@@ -414,7 +412,7 @@ count_tree(struct net *net,
rbconn = rb_entry(parent, struct nf_conncount_rb, node);
- diff = key_diff(key, rbconn->key, keylen);
+ diff = key_diff(key, rbconn->key, data->keylen);
if (diff < 0) {
parent = rcu_dereference_raw(parent->rb_left);
} else if (diff > 0) {
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index d2492d050fe6..4a136fc3a9c0 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -32,7 +32,9 @@
* -EINVAL - Passed NULL for bpf_tuple pointer
* -EINVAL - opts->reserved is not 0
* -EINVAL - netns_id is less than -1
- * -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
+ * -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (16) or 12
+ * -EINVAL - opts->ct_zone_id set when
+ opts__sz isn't NF_BPF_CT_OPTS_SZ (16)
* -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
* -ENONET - No network namespace found for netns_id
* -ENOENT - Conntrack lookup could not find entry for tuple
@@ -42,6 +44,8 @@
* Values:
* IPPROTO_TCP, IPPROTO_UDP
* @dir: - connection tracking tuple direction.
+ * @ct_zone_id - connection tracking zone id.
+ * @ct_zone_dir - connection tracking zone direction.
* @reserved - Reserved member, will be reused for more options in future
* Values:
* 0
@@ -51,11 +55,13 @@ struct bpf_ct_opts {
s32 error;
u8 l4proto;
u8 dir;
- u8 reserved[2];
+ u16 ct_zone_id;
+ u8 ct_zone_dir;
+ u8 reserved[3];
};
enum {
- NF_BPF_CT_OPTS_SZ = 12,
+ NF_BPF_CT_OPTS_SZ = 16,
};
static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple,
@@ -104,12 +110,21 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
u32 timeout)
{
struct nf_conntrack_tuple otuple, rtuple;
+ struct nf_conntrack_zone ct_zone;
struct nf_conn *ct;
int err;
- if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
- opts_len != NF_BPF_CT_OPTS_SZ)
+ if (!opts || !bpf_tuple)
return ERR_PTR(-EINVAL);
+ if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == 12))
+ return ERR_PTR(-EINVAL);
+ if (opts_len == NF_BPF_CT_OPTS_SZ) {
+ if (opts->reserved[0] || opts->reserved[1] || opts->reserved[2])
+ return ERR_PTR(-EINVAL);
+ } else {
+ if (opts->ct_zone_id)
+ return ERR_PTR(-EINVAL);
+ }
if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
return ERR_PTR(-EINVAL);
@@ -130,7 +145,16 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
return ERR_PTR(-ENONET);
}
- ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
+ if (opts_len == NF_BPF_CT_OPTS_SZ) {
+ if (opts->ct_zone_dir == 0)
+ opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR;
+ nf_ct_zone_init(&ct_zone,
+ opts->ct_zone_id, opts->ct_zone_dir, 0);
+ } else {
+ ct_zone = nf_ct_zone_dflt;
+ }
+
+ ct = nf_conntrack_alloc(net, &ct_zone, &otuple, &rtuple,
GFP_ATOMIC);
if (IS_ERR(ct))
goto out;
@@ -152,12 +176,21 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
{
struct nf_conntrack_tuple_hash *hash;
struct nf_conntrack_tuple tuple;
+ struct nf_conntrack_zone ct_zone;
struct nf_conn *ct;
int err;
- if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
- opts_len != NF_BPF_CT_OPTS_SZ)
+ if (!opts || !bpf_tuple)
return ERR_PTR(-EINVAL);
+ if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == 12))
+ return ERR_PTR(-EINVAL);
+ if (opts_len == NF_BPF_CT_OPTS_SZ) {
+ if (opts->reserved[0] || opts->reserved[1] || opts->reserved[2])
+ return ERR_PTR(-EINVAL);
+ } else {
+ if (opts->ct_zone_id)
+ return ERR_PTR(-EINVAL);
+ }
if (unlikely(opts->l4proto != IPPROTO_TCP && opts->l4proto != IPPROTO_UDP))
return ERR_PTR(-EPROTO);
if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
@@ -174,7 +207,16 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
return ERR_PTR(-ENONET);
}
- hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
+ if (opts_len == NF_BPF_CT_OPTS_SZ) {
+ if (opts->ct_zone_dir == 0)
+ opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR;
+ nf_ct_zone_init(&ct_zone,
+ opts->ct_zone_id, opts->ct_zone_dir, 0);
+ } else {
+ ct_zone = nf_ct_zone_dflt;
+ }
+
+ hash = nf_conntrack_find_get(net, &ct_zone, &tuple);
if (opts->netns_id >= 0)
put_net(net);
if (!hash)
@@ -245,7 +287,7 @@ __bpf_kfunc_start_defs();
* @opts - Additional options for allocation (documented above)
* Cannot be NULL
* @opts__sz - Length of the bpf_ct_opts structure
- * Must be NF_BPF_CT_OPTS_SZ (12)
+ * Must be NF_BPF_CT_OPTS_SZ (16) or 12
*/
__bpf_kfunc struct nf_conn___init *
bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
@@ -279,7 +321,7 @@ bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
* @opts - Additional options for lookup (documented above)
* Cannot be NULL
* @opts__sz - Length of the bpf_ct_opts structure
- * Must be NF_BPF_CT_OPTS_SZ (12)
+ * Must be NF_BPF_CT_OPTS_SZ (16) or 12
*/
__bpf_kfunc struct nf_conn *
bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
@@ -312,7 +354,7 @@ bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
* @opts - Additional options for allocation (documented above)
* Cannot be NULL
* @opts__sz - Length of the bpf_ct_opts structure
- * Must be NF_BPF_CT_OPTS_SZ (12)
+ * Must be NF_BPF_CT_OPTS_SZ (16) or 12
*/
__bpf_kfunc struct nf_conn___init *
bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
@@ -347,7 +389,7 @@ bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
* @opts - Additional options for lookup (documented above)
* Cannot be NULL
* @opts__sz - Length of the bpf_ct_opts structure
- * Must be NF_BPF_CT_OPTS_SZ (12)
+ * Must be NF_BPF_CT_OPTS_SZ (16) or 12
*/
__bpf_kfunc struct nf_conn *
bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c63868666bd9..9384426ddc06 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1090,7 +1090,7 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx)
* A conntrack entry can be inserted to the connection tracking table
* if there is no existing entry with an identical tuple.
*
- * If there is one, @skb (and the assocated, unconfirmed conntrack) has
+ * If there is one, @skb (and the associated, unconfirmed conntrack) has
* to be dropped. In case @skb is retransmitted, next conntrack lookup
* will find the already-existing entry.
*
@@ -1440,8 +1440,6 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
const struct nf_conntrack_l4proto *l4proto;
u8 protonum = nf_ct_protonum(ct);
- if (test_bit(IPS_OFFLOAD_BIT, &ct->status) && protonum != IPPROTO_UDP)
- return false;
if (!test_bit(IPS_ASSURED_BIT, &ct->status))
return true;
@@ -2024,7 +2022,7 @@ repeat:
goto repeat;
NF_CT_STAT_INC_ATOMIC(state->net, invalid);
- if (ret == -NF_DROP)
+ if (ret == NF_DROP)
NF_CT_STAT_INC_ATOMIC(state->net, drop);
ret = -ret;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 3b846cbdc050..4cbf71d0786b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -3420,7 +3420,8 @@ static int ctnetlink_del_expect(struct sk_buff *skb,
if (cda[CTA_EXPECT_ID]) {
__be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
- if (ntohl(id) != (u32)(unsigned long)exp) {
+
+ if (id != nf_expect_get_id(exp)) {
nf_ct_expect_put(exp);
return -ENOENT;
}
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index e2db1f4ec2df..ebc4f733bb2e 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -525,7 +525,7 @@ int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
dh = skb_header_pointer(skb, dataoff, sizeof(*dh), &_dh.dh);
if (!dh)
- return NF_DROP;
+ return -NF_ACCEPT;
if (dccp_error(dh, skb, dataoff, state))
return -NF_ACCEPT;
@@ -533,7 +533,7 @@ int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
/* pull again, including possible 48 bit sequences and subtype header */
dh = dccp_header_pointer(skb, dataoff, dh, &_dh);
if (!dh)
- return NF_DROP;
+ return -NF_ACCEPT;
type = dh->dccph_type;
if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh, state))
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
index 1020d67600a9..327b8059025d 100644
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -62,7 +62,9 @@ static const u_int8_t noct_valid_new[] = {
[NDISC_ROUTER_ADVERTISEMENT - 130] = 1,
[NDISC_NEIGHBOUR_SOLICITATION - 130] = 1,
[NDISC_NEIGHBOUR_ADVERTISEMENT - 130] = 1,
- [ICMPV6_MLD2_REPORT - 130] = 1
+ [ICMPV6_MLD2_REPORT - 130] = 1,
+ [ICMPV6_MRDISC_ADV - 130] = 1,
+ [ICMPV6_MRDISC_SOL - 130] = 1
};
bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple,
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 0ee98ce5b816..7d4f0fa8b609 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -22,9 +22,6 @@
#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_timestamp.h>
-#ifdef CONFIG_LWTUNNEL
-#include <net/netfilter/nf_hooks_lwtunnel.h>
-#endif
#include <linux/rculist_nulls.h>
static bool enable_hooks __read_mostly;
@@ -527,7 +524,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_count);
static unsigned int nf_conntrack_htable_size_user __read_mostly;
static int
-nf_conntrack_hash_sysctl(struct ctl_table *table, int write,
+nf_conntrack_hash_sysctl(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
@@ -612,15 +609,10 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_PROTO_TIMEOUT_GRE,
NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM,
#endif
-#ifdef CONFIG_LWTUNNEL
- NF_SYSCTL_CT_LWTUNNEL,
-#endif
- __NF_SYSCTL_CT_LAST_SYSCTL,
+ NF_SYSCTL_CT_LAST_SYSCTL,
};
-#define NF_SYSCTL_CT_LAST_SYSCTL (__NF_SYSCTL_CT_LAST_SYSCTL + 1)
-
static struct ctl_table nf_ct_sysctl_table[] = {
[NF_SYSCTL_CT_MAX] = {
.procname = "nf_conntrack_max",
@@ -948,16 +940,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.proc_handler = proc_dointvec_jiffies,
},
#endif
-#ifdef CONFIG_LWTUNNEL
- [NF_SYSCTL_CT_LWTUNNEL] = {
- .procname = "nf_hooks_lwtunnel",
- .data = NULL,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = nf_hooks_lwtunnel_sysctl_handler,
- },
-#endif
- {}
};
static struct ctl_table nf_ct_netfilter_table[] = {
@@ -968,7 +950,6 @@ static struct ctl_table nf_ct_netfilter_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
};
static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
@@ -1122,7 +1103,7 @@ out_unregister_netfilter:
static void nf_conntrack_standalone_fini_sysctl(struct net *net)
{
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
- struct ctl_table *table;
+ const struct ctl_table *table;
table = cnet->sysctl_header->ctl_table_arg;
unregister_net_sysctl_table(cnet->sysctl_header);
diff --git a/net/netfilter/nf_flow_table_bpf.c b/net/netfilter/nf_flow_table_bpf.c
new file mode 100644
index 000000000000..4a5f5195f2d2
--- /dev/null
+++ b/net/netfilter/nf_flow_table_bpf.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Unstable Flow Table Helpers for XDP hook
+ *
+ * These are called from the XDP programs.
+ * Note that it is allowed to break compatibility for these functions since
+ * the interface they are exposed through to BPF programs is explicitly
+ * unstable.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <net/xdp.h>
+
+/* bpf_flowtable_opts - options for bpf flowtable helpers
+ * @error: out parameter, set for any encountered error
+ */
+struct bpf_flowtable_opts {
+ s32 error;
+};
+
+enum {
+ NF_BPF_FLOWTABLE_OPTS_SZ = 4,
+};
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+ "Global functions as their definitions will be in nf_flow_table BTF");
+
+__bpf_kfunc_start_defs();
+
+static struct flow_offload_tuple_rhash *
+bpf_xdp_flow_tuple_lookup(struct net_device *dev,
+ struct flow_offload_tuple *tuple, __be16 proto)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct nf_flowtable *nf_flow_table;
+ struct flow_offload *nf_flow;
+
+ nf_flow_table = nf_flowtable_by_dev(dev);
+ if (!nf_flow_table)
+ return ERR_PTR(-ENOENT);
+
+ tuplehash = flow_offload_lookup(nf_flow_table, tuple);
+ if (!tuplehash)
+ return ERR_PTR(-ENOENT);
+
+ nf_flow = container_of(tuplehash, struct flow_offload,
+ tuplehash[tuplehash->tuple.dir]);
+ flow_offload_refresh(nf_flow_table, nf_flow, false);
+
+ return tuplehash;
+}
+
+__bpf_kfunc struct flow_offload_tuple_rhash *
+bpf_xdp_flow_lookup(struct xdp_md *ctx, struct bpf_fib_lookup *fib_tuple,
+ struct bpf_flowtable_opts *opts, u32 opts_len)
+{
+ struct xdp_buff *xdp = (struct xdp_buff *)ctx;
+ struct flow_offload_tuple tuple = {
+ .iifidx = fib_tuple->ifindex,
+ .l3proto = fib_tuple->family,
+ .l4proto = fib_tuple->l4_protocol,
+ .src_port = fib_tuple->sport,
+ .dst_port = fib_tuple->dport,
+ };
+ struct flow_offload_tuple_rhash *tuplehash;
+ __be16 proto;
+
+ if (opts_len != NF_BPF_FLOWTABLE_OPTS_SZ) {
+ opts->error = -EINVAL;
+ return NULL;
+ }
+
+ switch (fib_tuple->family) {
+ case AF_INET:
+ tuple.src_v4.s_addr = fib_tuple->ipv4_src;
+ tuple.dst_v4.s_addr = fib_tuple->ipv4_dst;
+ proto = htons(ETH_P_IP);
+ break;
+ case AF_INET6:
+ tuple.src_v6 = *(struct in6_addr *)&fib_tuple->ipv6_src;
+ tuple.dst_v6 = *(struct in6_addr *)&fib_tuple->ipv6_dst;
+ proto = htons(ETH_P_IPV6);
+ break;
+ default:
+ opts->error = -EAFNOSUPPORT;
+ return NULL;
+ }
+
+ tuplehash = bpf_xdp_flow_tuple_lookup(xdp->rxq->dev, &tuple, proto);
+ if (IS_ERR(tuplehash)) {
+ opts->error = PTR_ERR(tuplehash);
+ return NULL;
+ }
+
+ return tuplehash;
+}
+
+__diag_pop()
+
+__bpf_kfunc_end_defs();
+
+BTF_KFUNCS_START(nf_ft_kfunc_set)
+BTF_ID_FLAGS(func, bpf_xdp_flow_lookup, KF_TRUSTED_ARGS | KF_RET_NULL)
+BTF_KFUNCS_END(nf_ft_kfunc_set)
+
+static const struct btf_kfunc_id_set nf_flow_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &nf_ft_kfunc_set,
+};
+
+int nf_flow_register_bpf(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP,
+ &nf_flow_kfunc_set);
+}
+EXPORT_SYMBOL_GPL(nf_flow_register_bpf);
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index a0571339239c..5c1ff07eaee0 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -77,12 +77,8 @@ EXPORT_SYMBOL_GPL(flow_offload_alloc);
static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
{
- const struct rt6_info *rt;
-
- if (flow_tuple->l3proto == NFPROTO_IPV6) {
- rt = (const struct rt6_info *)flow_tuple->dst_cache;
- return rt6_get_cookie(rt);
- }
+ if (flow_tuple->l3proto == NFPROTO_IPV6)
+ return rt6_get_cookie(dst_rt6_info(flow_tuple->dst_cache));
return 0;
}
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
index 6eef15648b7b..88787b45e30d 100644
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -98,7 +98,7 @@ static int __init nf_flow_inet_module_init(void)
nft_register_flowtable_type(&flowtable_ipv6);
nft_register_flowtable_type(&flowtable_inet);
- return 0;
+ return nf_flow_register_bpf();
}
static void __exit nf_flow_inet_module_exit(void)
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 5383bed3d3e0..c2c005234dcd 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -434,7 +434,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
return NF_ACCEPT;
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
- rt = (struct rtable *)tuplehash->tuple.dst_cache;
+ rt = dst_rtable(tuplehash->tuple.dst_cache);
memset(skb->cb, 0, sizeof(struct inet_skb_parm));
IPCB(skb)->iif = skb->dev->ifindex;
IPCB(skb)->flags = IPSKB_FORWARDED;
@@ -446,7 +446,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
switch (tuplehash->tuple.xmit_type) {
case FLOW_OFFLOAD_XMIT_NEIGH:
- rt = (struct rtable *)tuplehash->tuple.dst_cache;
+ rt = dst_rtable(tuplehash->tuple.dst_cache);
outdev = rt->dst.dev;
skb->dev = outdev;
nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
@@ -729,7 +729,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
return NF_ACCEPT;
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
- rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
+ rt = dst_rt6_info(tuplehash->tuple.dst_cache);
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
IP6CB(skb)->iif = skb->dev->ifindex;
IP6CB(skb)->flags = IP6SKB_FORWARDED;
@@ -741,7 +741,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
switch (tuplehash->tuple.xmit_type) {
case FLOW_OFFLOAD_XMIT_NEIGH:
- rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
+ rt = dst_rt6_info(tuplehash->tuple.dst_cache);
outdev = rt->dst.dev;
skb->dev = outdev;
nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index a010b25076ca..e06bc36f49fe 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -841,8 +841,8 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
struct list_head *block_cb_list)
{
struct flow_cls_offload cls_flow = {};
+ struct netlink_ext_ack extack = {};
struct flow_block_cb *block_cb;
- struct netlink_ext_ack extack;
__be16 proto = ETH_P_ALL;
int err, i = 0;
@@ -1192,7 +1192,7 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
int err;
if (!nf_flowtable_hw_offload(flowtable))
- return 0;
+ return nf_flow_offload_xdp_setup(flowtable, dev, cmd);
if (dev->netdev_ops->ndo_setup_tc)
err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
diff --git a/net/netfilter/nf_flow_table_xdp.c b/net/netfilter/nf_flow_table_xdp.c
new file mode 100644
index 000000000000..e1252d042699
--- /dev/null
+++ b/net/netfilter/nf_flow_table_xdp.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <linux/netdevice.h>
+#include <net/flow_offload.h>
+#include <net/netfilter/nf_flow_table.h>
+
+struct flow_offload_xdp_ft {
+ struct list_head head;
+ struct nf_flowtable *ft;
+ struct rcu_head rcuhead;
+};
+
+struct flow_offload_xdp {
+ struct hlist_node hnode;
+ unsigned long net_device_addr;
+ struct list_head head;
+};
+
+#define NF_XDP_HT_BITS 4
+static DEFINE_HASHTABLE(nf_xdp_hashtable, NF_XDP_HT_BITS);
+static DEFINE_MUTEX(nf_xdp_hashtable_lock);
+
+/* caller must hold rcu read lock */
+struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev)
+{
+ unsigned long key = (unsigned long)dev;
+ struct flow_offload_xdp *iter;
+
+ hash_for_each_possible_rcu(nf_xdp_hashtable, iter, hnode, key) {
+ if (key == iter->net_device_addr) {
+ struct flow_offload_xdp_ft *ft_elem;
+
+ /* The user is supposed to insert a given net_device
+ * just into a single nf_flowtable so we always return
+ * the first element here.
+ */
+ ft_elem = list_first_or_null_rcu(&iter->head,
+ struct flow_offload_xdp_ft,
+ head);
+ return ft_elem ? ft_elem->ft : NULL;
+ }
+ }
+
+ return NULL;
+}
+
+static int nf_flowtable_by_dev_insert(struct nf_flowtable *ft,
+ const struct net_device *dev)
+{
+ struct flow_offload_xdp *iter, *elem = NULL;
+ unsigned long key = (unsigned long)dev;
+ struct flow_offload_xdp_ft *ft_elem;
+
+ ft_elem = kzalloc(sizeof(*ft_elem), GFP_KERNEL_ACCOUNT);
+ if (!ft_elem)
+ return -ENOMEM;
+
+ ft_elem->ft = ft;
+
+ mutex_lock(&nf_xdp_hashtable_lock);
+
+ hash_for_each_possible(nf_xdp_hashtable, iter, hnode, key) {
+ if (key == iter->net_device_addr) {
+ elem = iter;
+ break;
+ }
+ }
+
+ if (!elem) {
+ elem = kzalloc(sizeof(*elem), GFP_KERNEL_ACCOUNT);
+ if (!elem)
+ goto err_unlock;
+
+ elem->net_device_addr = key;
+ INIT_LIST_HEAD(&elem->head);
+ hash_add_rcu(nf_xdp_hashtable, &elem->hnode, key);
+ }
+ list_add_tail_rcu(&ft_elem->head, &elem->head);
+
+ mutex_unlock(&nf_xdp_hashtable_lock);
+
+ return 0;
+
+err_unlock:
+ mutex_unlock(&nf_xdp_hashtable_lock);
+ kfree(ft_elem);
+
+ return -ENOMEM;
+}
+
+static void nf_flowtable_by_dev_remove(struct nf_flowtable *ft,
+ const struct net_device *dev)
+{
+ struct flow_offload_xdp *iter, *elem = NULL;
+ unsigned long key = (unsigned long)dev;
+
+ mutex_lock(&nf_xdp_hashtable_lock);
+
+ hash_for_each_possible(nf_xdp_hashtable, iter, hnode, key) {
+ if (key == iter->net_device_addr) {
+ elem = iter;
+ break;
+ }
+ }
+
+ if (elem) {
+ struct flow_offload_xdp_ft *ft_elem, *ft_next;
+
+ list_for_each_entry_safe(ft_elem, ft_next, &elem->head, head) {
+ if (ft_elem->ft == ft) {
+ list_del_rcu(&ft_elem->head);
+ kfree_rcu(ft_elem, rcuhead);
+ }
+ }
+
+ if (list_empty(&elem->head))
+ hash_del_rcu(&elem->hnode);
+ else
+ elem = NULL;
+ }
+
+ mutex_unlock(&nf_xdp_hashtable_lock);
+
+ if (elem) {
+ synchronize_rcu();
+ kfree(elem);
+ }
+}
+
+int nf_flow_offload_xdp_setup(struct nf_flowtable *flowtable,
+ struct net_device *dev,
+ enum flow_block_command cmd)
+{
+ switch (cmd) {
+ case FLOW_BLOCK_BIND:
+ return nf_flowtable_by_dev_insert(flowtable, dev);
+ case FLOW_BLOCK_UNBIND:
+ nf_flowtable_by_dev_remove(flowtable, dev);
+ return 0;
+ }
+
+ WARN_ON_ONCE(1);
+ return 0;
+}
diff --git a/net/netfilter/nf_hooks_lwtunnel.c b/net/netfilter/nf_hooks_lwtunnel.c
index 00e89ffd78f6..2d890dd04ff8 100644
--- a/net/netfilter/nf_hooks_lwtunnel.c
+++ b/net/netfilter/nf_hooks_lwtunnel.c
@@ -3,6 +3,9 @@
#include <linux/sysctl.h>
#include <net/lwtunnel.h>
#include <net/netfilter/nf_hooks_lwtunnel.h>
+#include <linux/netfilter.h>
+
+#include "nf_internals.h"
static inline int nf_hooks_lwtunnel_get(void)
{
@@ -25,7 +28,7 @@ static inline int nf_hooks_lwtunnel_set(int enable)
}
#ifdef CONFIG_SYSCTL
-int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write,
+int nf_hooks_lwtunnel_sysctl_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int proc_nf_hooks_lwtunnel_enabled = 0;
@@ -50,4 +53,71 @@ int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write,
return ret;
}
EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_sysctl_handler);
+
+static struct ctl_table nf_lwtunnel_sysctl_table[] = {
+ {
+ .procname = "nf_hooks_lwtunnel",
+ .data = NULL,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = nf_hooks_lwtunnel_sysctl_handler,
+ },
+};
+
+static int __net_init nf_lwtunnel_net_init(struct net *net)
+{
+ struct ctl_table_header *hdr;
+ struct ctl_table *table;
+
+ table = nf_lwtunnel_sysctl_table;
+ if (!net_eq(net, &init_net)) {
+ table = kmemdup(nf_lwtunnel_sysctl_table,
+ sizeof(nf_lwtunnel_sysctl_table),
+ GFP_KERNEL);
+ if (!table)
+ goto err_alloc;
+ }
+
+ hdr = register_net_sysctl_sz(net, "net/netfilter", table,
+ ARRAY_SIZE(nf_lwtunnel_sysctl_table));
+ if (!hdr)
+ goto err_reg;
+
+ net->nf.nf_lwtnl_dir_header = hdr;
+
+ return 0;
+err_reg:
+ if (!net_eq(net, &init_net))
+ kfree(table);
+err_alloc:
+ return -ENOMEM;
+}
+
+static void __net_exit nf_lwtunnel_net_exit(struct net *net)
+{
+ const struct ctl_table *table;
+
+ table = net->nf.nf_lwtnl_dir_header->ctl_table_arg;
+ unregister_net_sysctl_table(net->nf.nf_lwtnl_dir_header);
+ if (!net_eq(net, &init_net))
+ kfree(table);
+}
+
+static struct pernet_operations nf_lwtunnel_net_ops = {
+ .init = nf_lwtunnel_net_init,
+ .exit = nf_lwtunnel_net_exit,
+};
+
+int __init netfilter_lwtunnel_init(void)
+{
+ return register_pernet_subsys(&nf_lwtunnel_net_ops);
+}
+
+void netfilter_lwtunnel_fini(void)
+{
+ unregister_pernet_subsys(&nf_lwtunnel_net_ops);
+}
+#else
+int __init netfilter_lwtunnel_init(void) { return 0; }
+void netfilter_lwtunnel_fini(void) {}
#endif /* CONFIG_SYSCTL */
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 832ae64179f0..25403023060b 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -29,6 +29,12 @@ void nf_queue_nf_hook_drop(struct net *net);
/* nf_log.c */
int __init netfilter_log_init(void);
+#ifdef CONFIG_LWTUNNEL
+/* nf_hooks_lwtunnel.c */
+int __init netfilter_lwtunnel_init(void);
+void netfilter_lwtunnel_fini(void);
+#endif
+
/* core.c */
void nf_hook_entries_delete_raw(struct nf_hook_entries __rcu **pp,
const struct nf_hook_ops *reg);
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 370f8231385c..6dd0de33eebd 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -395,7 +395,7 @@ static const struct seq_operations nflog_seq_ops = {
#ifdef CONFIG_SYSCTL
static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3];
-static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1];
+static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO];
static struct ctl_table_header *nf_log_sysctl_fhdr;
static struct ctl_table nf_log_sysctl_ftable[] = {
@@ -406,10 +406,9 @@ static struct ctl_table nf_log_sysctl_ftable[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
};
-static int nf_log_proc_dostring(struct ctl_table *table, int write,
+static int nf_log_proc_dostring(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
const struct nf_logger *logger;
@@ -514,7 +513,7 @@ err_alloc:
static void netfilter_log_sysctl_exit(struct net *net)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
table = net->nf.nf_log_dir_header->ctl_table_arg;
unregister_net_sysctl_table(net->nf.nf_log_dir_header);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 167074283ea9..0a2f79346958 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -153,14 +153,18 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx,
{
struct nft_trans *trans;
- trans = kzalloc(sizeof(struct nft_trans) + size, gfp);
+ trans = kzalloc(size, gfp);
if (trans == NULL)
return NULL;
INIT_LIST_HEAD(&trans->list);
- INIT_LIST_HEAD(&trans->binding_list);
trans->msg_type = msg_type;
- trans->ctx = *ctx;
+
+ trans->net = ctx->net;
+ trans->table = ctx->table;
+ trans->seq = ctx->seq;
+ trans->flags = ctx->flags;
+ trans->report = ctx->report;
return trans;
}
@@ -171,10 +175,26 @@ static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx,
return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL);
}
+static struct nft_trans_binding *nft_trans_get_binding(struct nft_trans *trans)
+{
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWCHAIN:
+ case NFT_MSG_NEWSET:
+ return container_of(trans, struct nft_trans_binding, nft_trans);
+ }
+
+ return NULL;
+}
+
static void nft_trans_list_del(struct nft_trans *trans)
{
+ struct nft_trans_binding *trans_binding;
+
list_del(&trans->list);
- list_del(&trans->binding_list);
+
+ trans_binding = nft_trans_get_binding(trans);
+ if (trans_binding)
+ list_del(&trans_binding->binding_list);
}
static void nft_trans_destroy(struct nft_trans *trans)
@@ -236,7 +256,7 @@ static void __nft_chain_trans_bind(const struct nft_ctx *ctx,
nft_trans_chain_bound(trans) = bind;
break;
case NFT_MSG_NEWRULE:
- if (trans->ctx.chain == chain)
+ if (nft_trans_rule_chain(trans) == chain)
nft_trans_rule_bound(trans) = bind;
break;
}
@@ -372,21 +392,26 @@ static void nf_tables_unregister_hook(struct net *net,
static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans)
{
struct nftables_pernet *nft_net = nft_pernet(net);
+ struct nft_trans_binding *binding;
+
+ list_add_tail(&trans->list, &nft_net->commit_list);
+
+ binding = nft_trans_get_binding(trans);
+ if (!binding)
+ return;
switch (trans->msg_type) {
case NFT_MSG_NEWSET:
if (!nft_trans_set_update(trans) &&
nft_set_is_anonymous(nft_trans_set(trans)))
- list_add_tail(&trans->binding_list, &nft_net->binding_list);
+ list_add_tail(&binding->binding_list, &nft_net->binding_list);
break;
case NFT_MSG_NEWCHAIN:
if (!nft_trans_chain_update(trans) &&
nft_chain_binding(nft_trans_chain(trans)))
- list_add_tail(&trans->binding_list, &nft_net->binding_list);
+ list_add_tail(&binding->binding_list, &nft_net->binding_list);
break;
}
-
- list_add_tail(&trans->list, &nft_net->commit_list);
}
static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
@@ -416,11 +441,28 @@ static int nft_deltable(struct nft_ctx *ctx)
return err;
}
-static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
+static struct nft_trans *
+nft_trans_alloc_chain(const struct nft_ctx *ctx, int msg_type)
{
+ struct nft_trans_chain *trans_chain;
struct nft_trans *trans;
trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain));
+ if (!trans)
+ return NULL;
+
+ trans_chain = nft_trans_container_chain(trans);
+ INIT_LIST_HEAD(&trans_chain->nft_trans_binding.binding_list);
+ trans_chain->chain = ctx->chain;
+
+ return trans;
+}
+
+static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc_chain(ctx, msg_type);
if (trans == NULL)
return ERR_PTR(-ENOMEM);
@@ -432,7 +474,6 @@ static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
ntohl(nla_get_be32(ctx->nla[NFTA_CHAIN_ID]));
}
}
- nft_trans_chain(trans) = ctx->chain;
nft_trans_commit_list_add_tail(ctx->net, trans);
return trans;
@@ -505,6 +546,7 @@ static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type,
ntohl(nla_get_be32(ctx->nla[NFTA_RULE_ID]));
}
nft_trans_rule(trans) = rule;
+ nft_trans_rule_chain(trans) = ctx->chain;
nft_trans_commit_list_add_tail(ctx->net, trans);
return trans;
@@ -560,12 +602,16 @@ static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
struct nft_set *set,
const struct nft_set_desc *desc)
{
+ struct nft_trans_set *trans_set;
struct nft_trans *trans;
trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set));
if (trans == NULL)
return -ENOMEM;
+ trans_set = nft_trans_container_set(trans);
+ INIT_LIST_HEAD(&trans_set->nft_trans_binding.binding_list);
+
if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) {
nft_trans_set_id(trans) =
ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
@@ -1217,11 +1263,11 @@ static bool nft_table_pending_update(const struct nft_ctx *ctx)
return true;
list_for_each_entry(trans, &nft_net->commit_list, list) {
- if (trans->ctx.table == ctx->table &&
+ if (trans->table == ctx->table &&
((trans->msg_type == NFT_MSG_NEWCHAIN &&
nft_trans_chain_update(trans)) ||
(trans->msg_type == NFT_MSG_DELCHAIN &&
- nft_is_base_chain(trans->ctx.chain))))
+ nft_is_base_chain(nft_trans_chain(trans)))))
return true;
}
@@ -1615,15 +1661,15 @@ static int nf_tables_deltable(struct sk_buff *skb, const struct nfnl_info *info,
return nft_flush_table(&ctx);
}
-static void nf_tables_table_destroy(struct nft_ctx *ctx)
+static void nf_tables_table_destroy(struct nft_table *table)
{
- if (WARN_ON(ctx->table->use > 0))
+ if (WARN_ON(table->use > 0))
return;
- rhltable_destroy(&ctx->table->chains_ht);
- kfree(ctx->table->name);
- kfree(ctx->table->udata);
- kfree(ctx->table);
+ rhltable_destroy(&table->chains_ht);
+ kfree(table->name);
+ kfree(table->udata);
+ kfree(table);
}
void nft_register_chain_type(const struct nft_chain_type *ctype)
@@ -2049,18 +2095,19 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
return newstats;
}
-static void nft_chain_stats_replace(struct nft_trans *trans)
+static void nft_chain_stats_replace(struct nft_trans_chain *trans)
{
- struct nft_base_chain *chain = nft_base_chain(trans->ctx.chain);
+ const struct nft_trans *t = &trans->nft_trans_binding.nft_trans;
+ struct nft_base_chain *chain = nft_base_chain(trans->chain);
- if (!nft_trans_chain_stats(trans))
+ if (!trans->stats)
return;
- nft_trans_chain_stats(trans) =
- rcu_replace_pointer(chain->stats, nft_trans_chain_stats(trans),
- lockdep_commit_lock_is_held(trans->ctx.net));
+ trans->stats =
+ rcu_replace_pointer(chain->stats, trans->stats,
+ lockdep_commit_lock_is_held(t->net));
- if (!nft_trans_chain_stats(trans))
+ if (!trans->stats)
static_branch_inc(&nft_counters_enabled);
}
@@ -2078,9 +2125,9 @@ static void nf_tables_chain_free_chain_rules(struct nft_chain *chain)
kvfree(chain->blob_next);
}
-void nf_tables_chain_destroy(struct nft_ctx *ctx)
+void nf_tables_chain_destroy(struct nft_chain *chain)
{
- struct nft_chain *chain = ctx->chain;
+ const struct nft_table *table = chain->table;
struct nft_hook *hook, *next;
if (WARN_ON(chain->use > 0))
@@ -2092,7 +2139,7 @@ void nf_tables_chain_destroy(struct nft_ctx *ctx)
if (nft_is_base_chain(chain)) {
struct nft_base_chain *basechain = nft_base_chain(chain);
- if (nft_base_chain_netdev(ctx->family, basechain->ops.hooknum)) {
+ if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) {
list_for_each_entry_safe(hook, next,
&basechain->hook_list, list) {
list_del_rcu(&hook->list);
@@ -2581,7 +2628,7 @@ err_chain_add:
err_trans:
nft_use_dec_restore(&table->use);
err_destroy_chain:
- nf_tables_chain_destroy(ctx);
+ nf_tables_chain_destroy(chain);
return err;
}
@@ -2698,8 +2745,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
}
err = -ENOMEM;
- trans = nft_trans_alloc(ctx, NFT_MSG_NEWCHAIN,
- sizeof(struct nft_trans_chain));
+ trans = nft_trans_alloc_chain(ctx, NFT_MSG_NEWCHAIN);
if (trans == NULL)
goto err_trans;
@@ -2725,7 +2771,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
err = -EEXIST;
list_for_each_entry(tmp, &nft_net->commit_list, list) {
if (tmp->msg_type == NFT_MSG_NEWCHAIN &&
- tmp->ctx.table == table &&
+ tmp->table == table &&
nft_trans_chain_update(tmp) &&
nft_trans_chain_name(tmp) &&
strcmp(name, nft_trans_chain_name(tmp)) == 0) {
@@ -2774,13 +2820,11 @@ static struct nft_chain *nft_chain_lookup_byid(const struct net *net,
struct nft_trans *trans;
list_for_each_entry(trans, &nft_net->commit_list, list) {
- struct nft_chain *chain = trans->ctx.chain;
-
if (trans->msg_type == NFT_MSG_NEWCHAIN &&
- chain->table == table &&
+ nft_trans_chain(trans)->table == table &&
id == nft_trans_chain_id(trans) &&
- nft_active_genmask(chain, genmask))
- return chain;
+ nft_active_genmask(nft_trans_chain(trans), genmask))
+ return nft_trans_chain(trans);
}
return ERR_PTR(-ENOENT);
}
@@ -2915,8 +2959,7 @@ static int nft_delchain_hook(struct nft_ctx *ctx,
list_move(&hook->list, &chain_del_list);
}
- trans = nft_trans_alloc(ctx, NFT_MSG_DELCHAIN,
- sizeof(struct nft_trans_chain));
+ trans = nft_trans_alloc_chain(ctx, NFT_MSG_DELCHAIN);
if (!trans) {
err = -ENOMEM;
goto err_chain_del_hook;
@@ -3333,7 +3376,7 @@ err_expr_parse:
return ERR_PTR(err);
}
-int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
+int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src, gfp_t gfp)
{
int err;
@@ -3341,7 +3384,7 @@ int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
return -EINVAL;
dst->ops = src->ops;
- err = src->ops->clone(dst, src);
+ err = src->ops->clone(dst, src, gfp);
if (err < 0)
return err;
@@ -3823,6 +3866,15 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r
nf_tables_rule_destroy(ctx, rule);
}
+/** nft_chain_validate - loop detection and hook validation
+ *
+ * @ctx: context containing call depth and base chain
+ * @chain: chain to validate
+ *
+ * Walk through the rules of the given chain and chase all jumps/gotos
+ * and set lookups until either the jump limit is hit or all reachable
+ * chains have been validated.
+ */
int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
{
struct nft_expr *expr, *last;
@@ -3844,6 +3896,9 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
if (!expr->ops->validate)
continue;
+ /* This may call nft_chain_validate() recursively,
+ * callers that do so must increment ctx->level.
+ */
err = expr->ops->validate(ctx, expr, &data);
if (err < 0)
return err;
@@ -4188,7 +4243,7 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
list_for_each_entry(trans, &nft_net->commit_list, list) {
if (trans->msg_type == NFT_MSG_NEWRULE &&
- trans->ctx.chain == chain &&
+ nft_trans_rule_chain(trans) == chain &&
id == nft_trans_rule_id(trans))
return nft_trans_rule(trans);
}
@@ -5740,8 +5795,7 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
nft_data_dump(skb, NFTA_SET_ELEM_DATA, nft_set_ext_data(ext),
- set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE,
- set->dlen) < 0)
+ nft_set_datatype(set), set->dlen) < 0)
goto nla_put_failure;
if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS) &&
@@ -6525,7 +6579,7 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
if (!expr)
goto err_expr;
- err = nft_expr_clone(expr, set->exprs[i]);
+ err = nft_expr_clone(expr, set->exprs[i], GFP_KERNEL_ACCOUNT);
if (err < 0) {
kfree(expr);
goto err_expr;
@@ -6564,7 +6618,7 @@ static int nft_set_elem_expr_setup(struct nft_ctx *ctx,
for (i = 0; i < num_exprs; i++) {
expr = nft_setelem_expr_at(elem_expr, elem_expr->size);
- err = nft_expr_clone(expr, expr_array[i]);
+ err = nft_expr_clone(expr, expr_array[i], GFP_KERNEL_ACCOUNT);
if (err < 0)
goto err_elem_expr_setup;
@@ -7776,6 +7830,9 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
if (WARN_ON_ONCE(!type))
return -ENOENT;
+ if (!obj->ops->update)
+ return 0;
+
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
return nf_tables_updobj(&ctx, type, nla[NFTA_OBJ_DATA], obj);
@@ -7963,6 +8020,19 @@ cont:
return skb->len;
}
+static int nf_tables_dumpreset_obj(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk));
+ int ret;
+
+ mutex_lock(&nft_net->commit_mutex);
+ ret = nf_tables_dump_obj(skb, cb);
+ mutex_unlock(&nft_net->commit_mutex);
+
+ return ret;
+}
+
static int nf_tables_dump_obj_start(struct netlink_callback *cb)
{
struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
@@ -7979,12 +8049,18 @@ static int nf_tables_dump_obj_start(struct netlink_callback *cb)
if (nla[NFTA_OBJ_TYPE])
ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
- if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
- ctx->reset = true;
-
return 0;
}
+static int nf_tables_dumpreset_obj_start(struct netlink_callback *cb)
+{
+ struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
+
+ ctx->reset = true;
+
+ return nf_tables_dump_obj_start(cb);
+}
+
static int nf_tables_dump_obj_done(struct netlink_callback *cb)
{
struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
@@ -7995,8 +8071,9 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
}
/* called with rcu_read_lock held */
-static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
- const struct nlattr * const nla[])
+static struct sk_buff *
+nf_tables_getobj_single(u32 portid, const struct nfnl_info *info,
+ const struct nlattr * const nla[], bool reset)
{
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_cur(info->net);
@@ -8005,72 +8082,109 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
struct net *net = info->net;
struct nft_object *obj;
struct sk_buff *skb2;
- bool reset = false;
u32 objtype;
int err;
- if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
- struct netlink_dump_control c = {
- .start = nf_tables_dump_obj_start,
- .dump = nf_tables_dump_obj,
- .done = nf_tables_dump_obj_done,
- .module = THIS_MODULE,
- .data = (void *)nla,
- };
-
- return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
- }
-
if (!nla[NFTA_OBJ_NAME] ||
!nla[NFTA_OBJ_TYPE])
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask, 0);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]);
- return PTR_ERR(table);
+ return ERR_CAST(table);
}
objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask);
if (IS_ERR(obj)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
- return PTR_ERR(obj);
+ return ERR_CAST(obj);
}
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (!skb2)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
- if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
- reset = true;
+ err = nf_tables_fill_obj_info(skb2, net, portid,
+ info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0,
+ family, table, obj, reset);
+ if (err < 0) {
+ kfree_skb(skb2);
+ return ERR_PTR(err);
+ }
- if (reset) {
- const struct nftables_pernet *nft_net;
- char *buf;
+ return skb2;
+}
+
+static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const nla[])
+{
+ u32 portid = NETLINK_CB(skb).portid;
+ struct sk_buff *skb2;
- nft_net = nft_pernet(net);
- buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, nft_net->base_seq);
+ if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .start = nf_tables_dump_obj_start,
+ .dump = nf_tables_dump_obj,
+ .done = nf_tables_dump_obj_done,
+ .module = THIS_MODULE,
+ .data = (void *)nla,
+ };
- audit_log_nfcfg(buf,
- family,
- 1,
- AUDIT_NFT_OP_OBJ_RESET,
- GFP_ATOMIC);
- kfree(buf);
+ return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
}
- err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid,
- info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0,
- family, table, obj, reset);
- if (err < 0)
- goto err_fill_obj_info;
+ skb2 = nf_tables_getobj_single(portid, info, nla, false);
+ if (IS_ERR(skb2))
+ return PTR_ERR(skb2);
- return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
+ return nfnetlink_unicast(skb2, info->net, portid);
+}
-err_fill_obj_info:
- kfree_skb(skb2);
- return err;
+static int nf_tables_getobj_reset(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const nla[])
+{
+ struct nftables_pernet *nft_net = nft_pernet(info->net);
+ u32 portid = NETLINK_CB(skb).portid;
+ struct net *net = info->net;
+ struct sk_buff *skb2;
+ char *buf;
+
+ if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .start = nf_tables_dumpreset_obj_start,
+ .dump = nf_tables_dumpreset_obj,
+ .done = nf_tables_dump_obj_done,
+ .module = THIS_MODULE,
+ .data = (void *)nla,
+ };
+
+ return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
+ }
+
+ if (!try_module_get(THIS_MODULE))
+ return -EINVAL;
+ rcu_read_unlock();
+ mutex_lock(&nft_net->commit_mutex);
+ skb2 = nf_tables_getobj_single(portid, info, nla, true);
+ mutex_unlock(&nft_net->commit_mutex);
+ rcu_read_lock();
+ module_put(THIS_MODULE);
+
+ if (IS_ERR(skb2))
+ return PTR_ERR(skb2);
+
+ buf = kasprintf(GFP_ATOMIC, "%.*s:%u",
+ nla_len(nla[NFTA_OBJ_TABLE]),
+ (char *)nla_data(nla[NFTA_OBJ_TABLE]),
+ nft_net->base_seq);
+ audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1,
+ AUDIT_NFT_OP_OBJ_RESET, GFP_ATOMIC);
+ kfree(buf);
+
+ return nfnetlink_unicast(skb2, net, portid);
}
static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj)
@@ -9353,7 +9467,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_obj_policy,
},
[NFT_MSG_GETOBJ_RESET] = {
- .call = nf_tables_getobj,
+ .call = nf_tables_getobj_reset,
.type = NFNL_CB_RCU,
.attr_count = NFTA_OBJ_MAX,
.policy = nft_obj_policy,
@@ -9415,51 +9529,53 @@ static int nf_tables_validate(struct net *net)
*
* We defer the drop policy until the transaction has been finalized.
*/
-static void nft_chain_commit_drop_policy(struct nft_trans *trans)
+static void nft_chain_commit_drop_policy(struct nft_trans_chain *trans)
{
struct nft_base_chain *basechain;
- if (nft_trans_chain_policy(trans) != NF_DROP)
+ if (trans->policy != NF_DROP)
return;
- if (!nft_is_base_chain(trans->ctx.chain))
+ if (!nft_is_base_chain(trans->chain))
return;
- basechain = nft_base_chain(trans->ctx.chain);
+ basechain = nft_base_chain(trans->chain);
basechain->policy = NF_DROP;
}
-static void nft_chain_commit_update(struct nft_trans *trans)
+static void nft_chain_commit_update(struct nft_trans_chain *trans)
{
+ struct nft_table *table = trans->nft_trans_binding.nft_trans.table;
struct nft_base_chain *basechain;
- if (nft_trans_chain_name(trans)) {
- rhltable_remove(&trans->ctx.table->chains_ht,
- &trans->ctx.chain->rhlhead,
+ if (trans->name) {
+ rhltable_remove(&table->chains_ht,
+ &trans->chain->rhlhead,
nft_chain_ht_params);
- swap(trans->ctx.chain->name, nft_trans_chain_name(trans));
- rhltable_insert_key(&trans->ctx.table->chains_ht,
- trans->ctx.chain->name,
- &trans->ctx.chain->rhlhead,
+ swap(trans->chain->name, trans->name);
+ rhltable_insert_key(&table->chains_ht,
+ trans->chain->name,
+ &trans->chain->rhlhead,
nft_chain_ht_params);
}
- if (!nft_is_base_chain(trans->ctx.chain))
+ if (!nft_is_base_chain(trans->chain))
return;
nft_chain_stats_replace(trans);
- basechain = nft_base_chain(trans->ctx.chain);
+ basechain = nft_base_chain(trans->chain);
- switch (nft_trans_chain_policy(trans)) {
+ switch (trans->policy) {
case NF_DROP:
case NF_ACCEPT:
- basechain->policy = nft_trans_chain_policy(trans);
+ basechain->policy = trans->policy;
break;
}
}
-static void nft_obj_commit_update(struct nft_trans *trans)
+static void nft_obj_commit_update(const struct nft_ctx *ctx,
+ struct nft_trans *trans)
{
struct nft_object *newobj;
struct nft_object *obj;
@@ -9467,18 +9583,25 @@ static void nft_obj_commit_update(struct nft_trans *trans)
obj = nft_trans_obj(trans);
newobj = nft_trans_obj_newobj(trans);
- if (obj->ops->update)
- obj->ops->update(obj, newobj);
+ if (WARN_ON_ONCE(!obj->ops->update))
+ return;
- nft_obj_destroy(&trans->ctx, newobj);
+ obj->ops->update(obj, newobj);
+ nft_obj_destroy(ctx, newobj);
}
static void nft_commit_release(struct nft_trans *trans)
{
+ struct nft_ctx ctx = {
+ .net = trans->net,
+ };
+
+ nft_ctx_update(&ctx, trans);
+
switch (trans->msg_type) {
case NFT_MSG_DELTABLE:
case NFT_MSG_DESTROYTABLE:
- nf_tables_table_destroy(&trans->ctx);
+ nf_tables_table_destroy(trans->table);
break;
case NFT_MSG_NEWCHAIN:
free_percpu(nft_trans_chain_stats(trans));
@@ -9489,25 +9612,25 @@ static void nft_commit_release(struct nft_trans *trans)
if (nft_trans_chain_update(trans))
nft_hooks_destroy(&nft_trans_chain_hooks(trans));
else
- nf_tables_chain_destroy(&trans->ctx);
+ nf_tables_chain_destroy(nft_trans_chain(trans));
break;
case NFT_MSG_DELRULE:
case NFT_MSG_DESTROYRULE:
- nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
+ nf_tables_rule_destroy(&ctx, nft_trans_rule(trans));
break;
case NFT_MSG_DELSET:
case NFT_MSG_DESTROYSET:
- nft_set_destroy(&trans->ctx, nft_trans_set(trans));
+ nft_set_destroy(&ctx, nft_trans_set(trans));
break;
case NFT_MSG_DELSETELEM:
case NFT_MSG_DESTROYSETELEM:
- nf_tables_set_elem_destroy(&trans->ctx,
+ nf_tables_set_elem_destroy(&ctx,
nft_trans_elem_set(trans),
nft_trans_elem_priv(trans));
break;
case NFT_MSG_DELOBJ:
case NFT_MSG_DESTROYOBJ:
- nft_obj_destroy(&trans->ctx, nft_trans_obj(trans));
+ nft_obj_destroy(&ctx, nft_trans_obj(trans));
break;
case NFT_MSG_DELFLOWTABLE:
case NFT_MSG_DESTROYFLOWTABLE:
@@ -9519,7 +9642,7 @@ static void nft_commit_release(struct nft_trans *trans)
}
if (trans->put_net)
- put_net(trans->ctx.net);
+ put_net(trans->net);
kfree(trans);
}
@@ -9638,10 +9761,10 @@ static void nf_tables_commit_chain_prepare_cancel(struct net *net)
struct nft_trans *trans, *next;
list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) {
- struct nft_chain *chain = trans->ctx.chain;
-
if (trans->msg_type == NFT_MSG_NEWRULE ||
trans->msg_type == NFT_MSG_DELRULE) {
+ struct nft_chain *chain = nft_trans_rule_chain(trans);
+
kvfree(chain->blob_next);
chain->blob_next = NULL;
}
@@ -9999,7 +10122,7 @@ static void nf_tables_commit_release(struct net *net)
trans = list_last_entry(&nft_net->commit_list,
struct nft_trans, list);
- get_net(trans->ctx.net);
+ get_net(trans->net);
WARN_ON_ONCE(trans->put_net);
trans->put_net = true;
@@ -10143,12 +10266,15 @@ static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq)
static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{
struct nftables_pernet *nft_net = nft_pernet(net);
+ const struct nlmsghdr *nlh = nlmsg_hdr(skb);
+ struct nft_trans_binding *trans_binding;
struct nft_trans *trans, *next;
unsigned int base_seq, gc_seq;
LIST_HEAD(set_update_list);
struct nft_trans_elem *te;
struct nft_chain *chain;
struct nft_table *table;
+ struct nft_ctx ctx;
LIST_HEAD(adl);
int err;
@@ -10157,7 +10283,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
return 0;
}
- list_for_each_entry(trans, &nft_net->binding_list, binding_list) {
+ nft_ctx_init(&ctx, net, skb, nlh, NFPROTO_UNSPEC, NULL, NULL, NULL);
+
+ list_for_each_entry(trans_binding, &nft_net->binding_list, binding_list) {
+ trans = &trans_binding->nft_trans;
switch (trans->msg_type) {
case NFT_MSG_NEWSET:
if (!nft_trans_set_update(trans) &&
@@ -10175,6 +10304,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
return -EINVAL;
}
break;
+ default:
+ WARN_ONCE(1, "Unhandled bind type %d", trans->msg_type);
+ break;
}
}
@@ -10190,9 +10322,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
/* 1. Allocate space for next generation rules_gen_X[] */
list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) {
+ struct nft_table *table = trans->table;
int ret;
- ret = nf_tables_commit_audit_alloc(&adl, trans->ctx.table);
+ ret = nf_tables_commit_audit_alloc(&adl, table);
if (ret) {
nf_tables_commit_chain_prepare_cancel(net);
nf_tables_commit_audit_free(&adl);
@@ -10200,7 +10333,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
}
if (trans->msg_type == NFT_MSG_NEWRULE ||
trans->msg_type == NFT_MSG_DELRULE) {
- chain = trans->ctx.chain;
+ chain = nft_trans_rule_chain(trans);
ret = nf_tables_commit_chain_prepare(net, chain);
if (ret < 0) {
@@ -10233,70 +10366,71 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
net->nft.gencursor = nft_gencursor_next(net);
list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) {
- nf_tables_commit_audit_collect(&adl, trans->ctx.table,
- trans->msg_type);
+ struct nft_table *table = trans->table;
+
+ nft_ctx_update(&ctx, trans);
+
+ nf_tables_commit_audit_collect(&adl, table, trans->msg_type);
switch (trans->msg_type) {
case NFT_MSG_NEWTABLE:
if (nft_trans_table_update(trans)) {
- if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) {
+ if (!(table->flags & __NFT_TABLE_F_UPDATE)) {
nft_trans_destroy(trans);
break;
}
- if (trans->ctx.table->flags & NFT_TABLE_F_DORMANT)
- nf_tables_table_disable(net, trans->ctx.table);
+ if (table->flags & NFT_TABLE_F_DORMANT)
+ nf_tables_table_disable(net, table);
- trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE;
+ table->flags &= ~__NFT_TABLE_F_UPDATE;
} else {
- nft_clear(net, trans->ctx.table);
+ nft_clear(net, table);
}
- nf_tables_table_notify(&trans->ctx, NFT_MSG_NEWTABLE);
+ nf_tables_table_notify(&ctx, NFT_MSG_NEWTABLE);
nft_trans_destroy(trans);
break;
case NFT_MSG_DELTABLE:
case NFT_MSG_DESTROYTABLE:
- list_del_rcu(&trans->ctx.table->list);
- nf_tables_table_notify(&trans->ctx, trans->msg_type);
+ list_del_rcu(&table->list);
+ nf_tables_table_notify(&ctx, trans->msg_type);
break;
case NFT_MSG_NEWCHAIN:
if (nft_trans_chain_update(trans)) {
- nft_chain_commit_update(trans);
- nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN,
+ nft_chain_commit_update(nft_trans_container_chain(trans));
+ nf_tables_chain_notify(&ctx, NFT_MSG_NEWCHAIN,
&nft_trans_chain_hooks(trans));
list_splice(&nft_trans_chain_hooks(trans),
&nft_trans_basechain(trans)->hook_list);
/* trans destroyed after rcu grace period */
} else {
- nft_chain_commit_drop_policy(trans);
- nft_clear(net, trans->ctx.chain);
- nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN, NULL);
+ nft_chain_commit_drop_policy(nft_trans_container_chain(trans));
+ nft_clear(net, nft_trans_chain(trans));
+ nf_tables_chain_notify(&ctx, NFT_MSG_NEWCHAIN, NULL);
nft_trans_destroy(trans);
}
break;
case NFT_MSG_DELCHAIN:
case NFT_MSG_DESTROYCHAIN:
if (nft_trans_chain_update(trans)) {
- nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN,
+ nf_tables_chain_notify(&ctx, NFT_MSG_DELCHAIN,
&nft_trans_chain_hooks(trans));
- if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) {
+ if (!(table->flags & NFT_TABLE_F_DORMANT)) {
nft_netdev_unregister_hooks(net,
&nft_trans_chain_hooks(trans),
true);
}
} else {
- nft_chain_del(trans->ctx.chain);
- nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN,
+ nft_chain_del(nft_trans_chain(trans));
+ nf_tables_chain_notify(&ctx, NFT_MSG_DELCHAIN,
NULL);
- nf_tables_unregister_hook(trans->ctx.net,
- trans->ctx.table,
- trans->ctx.chain);
+ nf_tables_unregister_hook(ctx.net, ctx.table,
+ nft_trans_chain(trans));
}
break;
case NFT_MSG_NEWRULE:
- nft_clear(trans->ctx.net, nft_trans_rule(trans));
- nf_tables_rule_notify(&trans->ctx,
- nft_trans_rule(trans),
+ nft_clear(net, nft_trans_rule(trans));
+ nf_tables_rule_notify(&ctx, nft_trans_rule(trans),
NFT_MSG_NEWRULE);
- if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+ if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)
nft_flow_rule_destroy(nft_trans_flow_rule(trans));
nft_trans_destroy(trans);
@@ -10304,14 +10438,12 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_DELRULE:
case NFT_MSG_DESTROYRULE:
list_del_rcu(&nft_trans_rule(trans)->list);
- nf_tables_rule_notify(&trans->ctx,
- nft_trans_rule(trans),
+ nf_tables_rule_notify(&ctx, nft_trans_rule(trans),
trans->msg_type);
- nft_rule_expr_deactivate(&trans->ctx,
- nft_trans_rule(trans),
+ nft_rule_expr_deactivate(&ctx, nft_trans_rule(trans),
NFT_TRANS_COMMIT);
- if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+ if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)
nft_flow_rule_destroy(nft_trans_flow_rule(trans));
break;
case NFT_MSG_NEWSET:
@@ -10330,9 +10462,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
*/
if (nft_set_is_anonymous(nft_trans_set(trans)) &&
!list_empty(&nft_trans_set(trans)->bindings))
- nft_use_dec(&trans->ctx.table->use);
+ nft_use_dec(&table->use);
}
- nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
+ nf_tables_set_notify(&ctx, nft_trans_set(trans),
NFT_MSG_NEWSET, GFP_KERNEL);
nft_trans_destroy(trans);
break;
@@ -10340,14 +10472,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_DESTROYSET:
nft_trans_set(trans)->dead = 1;
list_del_rcu(&nft_trans_set(trans)->list);
- nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
+ nf_tables_set_notify(&ctx, nft_trans_set(trans),
trans->msg_type, GFP_KERNEL);
break;
case NFT_MSG_NEWSETELEM:
- te = (struct nft_trans_elem *)trans->data;
+ te = nft_trans_container_elem(trans);
nft_setelem_activate(net, te->set, te->elem_priv);
- nf_tables_setelem_notify(&trans->ctx, te->set,
+ nf_tables_setelem_notify(&ctx, te->set,
te->elem_priv,
NFT_MSG_NEWSETELEM);
if (te->set->ops->commit &&
@@ -10359,9 +10491,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
break;
case NFT_MSG_DELSETELEM:
case NFT_MSG_DESTROYSETELEM:
- te = (struct nft_trans_elem *)trans->data;
+ te = nft_trans_container_elem(trans);
- nf_tables_setelem_notify(&trans->ctx, te->set,
+ nf_tables_setelem_notify(&ctx, te->set,
te->elem_priv,
trans->msg_type);
nft_setelem_remove(net, te->set, te->elem_priv);
@@ -10377,13 +10509,13 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
break;
case NFT_MSG_NEWOBJ:
if (nft_trans_obj_update(trans)) {
- nft_obj_commit_update(trans);
- nf_tables_obj_notify(&trans->ctx,
+ nft_obj_commit_update(&ctx, trans);
+ nf_tables_obj_notify(&ctx,
nft_trans_obj(trans),
NFT_MSG_NEWOBJ);
} else {
nft_clear(net, nft_trans_obj(trans));
- nf_tables_obj_notify(&trans->ctx,
+ nf_tables_obj_notify(&ctx,
nft_trans_obj(trans),
NFT_MSG_NEWOBJ);
nft_trans_destroy(trans);
@@ -10392,14 +10524,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_DELOBJ:
case NFT_MSG_DESTROYOBJ:
nft_obj_del(nft_trans_obj(trans));
- nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans),
+ nf_tables_obj_notify(&ctx, nft_trans_obj(trans),
trans->msg_type);
break;
case NFT_MSG_NEWFLOWTABLE:
if (nft_trans_flowtable_update(trans)) {
nft_trans_flowtable(trans)->data.flags =
nft_trans_flowtable_flags(trans);
- nf_tables_flowtable_notify(&trans->ctx,
+ nf_tables_flowtable_notify(&ctx,
nft_trans_flowtable(trans),
&nft_trans_flowtable_hooks(trans),
NFT_MSG_NEWFLOWTABLE);
@@ -10407,7 +10539,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
&nft_trans_flowtable(trans)->hook_list);
} else {
nft_clear(net, nft_trans_flowtable(trans));
- nf_tables_flowtable_notify(&trans->ctx,
+ nf_tables_flowtable_notify(&ctx,
nft_trans_flowtable(trans),
NULL,
NFT_MSG_NEWFLOWTABLE);
@@ -10417,7 +10549,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_DELFLOWTABLE:
case NFT_MSG_DESTROYFLOWTABLE:
if (nft_trans_flowtable_update(trans)) {
- nf_tables_flowtable_notify(&trans->ctx,
+ nf_tables_flowtable_notify(&ctx,
nft_trans_flowtable(trans),
&nft_trans_flowtable_hooks(trans),
trans->msg_type);
@@ -10425,7 +10557,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
&nft_trans_flowtable_hooks(trans));
} else {
list_del_rcu(&nft_trans_flowtable(trans)->list);
- nf_tables_flowtable_notify(&trans->ctx,
+ nf_tables_flowtable_notify(&ctx,
nft_trans_flowtable(trans),
NULL,
trans->msg_type);
@@ -10467,28 +10599,32 @@ static void nf_tables_module_autoload(struct net *net)
static void nf_tables_abort_release(struct nft_trans *trans)
{
+ struct nft_ctx ctx = { };
+
+ nft_ctx_update(&ctx, trans);
+
switch (trans->msg_type) {
case NFT_MSG_NEWTABLE:
- nf_tables_table_destroy(&trans->ctx);
+ nf_tables_table_destroy(trans->table);
break;
case NFT_MSG_NEWCHAIN:
if (nft_trans_chain_update(trans))
nft_hooks_destroy(&nft_trans_chain_hooks(trans));
else
- nf_tables_chain_destroy(&trans->ctx);
+ nf_tables_chain_destroy(nft_trans_chain(trans));
break;
case NFT_MSG_NEWRULE:
- nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
+ nf_tables_rule_destroy(&ctx, nft_trans_rule(trans));
break;
case NFT_MSG_NEWSET:
- nft_set_destroy(&trans->ctx, nft_trans_set(trans));
+ nft_set_destroy(&ctx, nft_trans_set(trans));
break;
case NFT_MSG_NEWSETELEM:
nft_set_elem_destroy(nft_trans_elem_set(trans),
nft_trans_elem_priv(trans), true);
break;
case NFT_MSG_NEWOBJ:
- nft_obj_destroy(&trans->ctx, nft_trans_obj(trans));
+ nft_obj_destroy(&ctx, nft_trans_obj(trans));
break;
case NFT_MSG_NEWFLOWTABLE:
if (nft_trans_flowtable_update(trans))
@@ -10520,6 +10656,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
struct nft_trans *trans, *next;
LIST_HEAD(set_update_list);
struct nft_trans_elem *te;
+ struct nft_ctx ctx = {
+ .net = net,
+ };
int err = 0;
if (action == NFNL_ABORT_VALIDATE &&
@@ -10528,37 +10667,41 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list,
list) {
+ struct nft_table *table = trans->table;
+
+ nft_ctx_update(&ctx, trans);
+
switch (trans->msg_type) {
case NFT_MSG_NEWTABLE:
if (nft_trans_table_update(trans)) {
- if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) {
+ if (!(table->flags & __NFT_TABLE_F_UPDATE)) {
nft_trans_destroy(trans);
break;
}
- if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_DORMANT) {
- nf_tables_table_disable(net, trans->ctx.table);
- trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
- } else if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_AWAKEN) {
- trans->ctx.table->flags &= ~NFT_TABLE_F_DORMANT;
+ if (table->flags & __NFT_TABLE_F_WAS_DORMANT) {
+ nf_tables_table_disable(net, table);
+ table->flags |= NFT_TABLE_F_DORMANT;
+ } else if (table->flags & __NFT_TABLE_F_WAS_AWAKEN) {
+ table->flags &= ~NFT_TABLE_F_DORMANT;
}
- if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_ORPHAN) {
- trans->ctx.table->flags &= ~NFT_TABLE_F_OWNER;
- trans->ctx.table->nlpid = 0;
+ if (table->flags & __NFT_TABLE_F_WAS_ORPHAN) {
+ table->flags &= ~NFT_TABLE_F_OWNER;
+ table->nlpid = 0;
}
- trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE;
+ table->flags &= ~__NFT_TABLE_F_UPDATE;
nft_trans_destroy(trans);
} else {
- list_del_rcu(&trans->ctx.table->list);
+ list_del_rcu(&table->list);
}
break;
case NFT_MSG_DELTABLE:
case NFT_MSG_DESTROYTABLE:
- nft_clear(trans->ctx.net, trans->ctx.table);
+ nft_clear(trans->net, table);
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWCHAIN:
if (nft_trans_chain_update(trans)) {
- if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) {
+ if (!(table->flags & NFT_TABLE_F_DORMANT)) {
nft_netdev_unregister_hooks(net,
&nft_trans_chain_hooks(trans),
true);
@@ -10571,11 +10714,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
}
- nft_use_dec_restore(&trans->ctx.table->use);
- nft_chain_del(trans->ctx.chain);
- nf_tables_unregister_hook(trans->ctx.net,
- trans->ctx.table,
- trans->ctx.chain);
+ nft_use_dec_restore(&table->use);
+ nft_chain_del(nft_trans_chain(trans));
+ nf_tables_unregister_hook(trans->net, table,
+ nft_trans_chain(trans));
}
break;
case NFT_MSG_DELCHAIN:
@@ -10584,8 +10726,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
list_splice(&nft_trans_chain_hooks(trans),
&nft_trans_basechain(trans)->hook_list);
} else {
- nft_use_inc_restore(&trans->ctx.table->use);
- nft_clear(trans->ctx.net, trans->ctx.chain);
+ nft_use_inc_restore(&table->use);
+ nft_clear(trans->net, nft_trans_chain(trans));
}
nft_trans_destroy(trans);
break;
@@ -10594,20 +10736,20 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
}
- nft_use_dec_restore(&trans->ctx.chain->use);
+ nft_use_dec_restore(&nft_trans_rule_chain(trans)->use);
list_del_rcu(&nft_trans_rule(trans)->list);
- nft_rule_expr_deactivate(&trans->ctx,
+ nft_rule_expr_deactivate(&ctx,
nft_trans_rule(trans),
NFT_TRANS_ABORT);
- if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+ if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)
nft_flow_rule_destroy(nft_trans_flow_rule(trans));
break;
case NFT_MSG_DELRULE:
case NFT_MSG_DESTROYRULE:
- nft_use_inc_restore(&trans->ctx.chain->use);
- nft_clear(trans->ctx.net, nft_trans_rule(trans));
- nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans));
- if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+ nft_use_inc_restore(&nft_trans_rule_chain(trans)->use);
+ nft_clear(trans->net, nft_trans_rule(trans));
+ nft_rule_expr_activate(&ctx, nft_trans_rule(trans));
+ if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)
nft_flow_rule_destroy(nft_trans_flow_rule(trans));
nft_trans_destroy(trans);
@@ -10617,7 +10759,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
}
- nft_use_dec_restore(&trans->ctx.table->use);
+ nft_use_dec_restore(&table->use);
if (nft_trans_set_bound(trans)) {
nft_trans_destroy(trans);
break;
@@ -10627,10 +10769,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
case NFT_MSG_DELSET:
case NFT_MSG_DESTROYSET:
- nft_use_inc_restore(&trans->ctx.table->use);
- nft_clear(trans->ctx.net, nft_trans_set(trans));
+ nft_use_inc_restore(&table->use);
+ nft_clear(trans->net, nft_trans_set(trans));
if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
- nft_map_activate(&trans->ctx, nft_trans_set(trans));
+ nft_map_activate(&ctx, nft_trans_set(trans));
nft_trans_destroy(trans);
break;
@@ -10639,7 +10781,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
}
- te = (struct nft_trans_elem *)trans->data;
+ te = nft_trans_container_elem(trans);
nft_setelem_remove(net, te->set, te->elem_priv);
if (!nft_setelem_is_catchall(te->set, te->elem_priv))
atomic_dec(&te->set->nelems);
@@ -10652,7 +10794,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
case NFT_MSG_DELSETELEM:
case NFT_MSG_DESTROYSETELEM:
- te = (struct nft_trans_elem *)trans->data;
+ te = nft_trans_container_elem(trans);
if (!nft_setelem_active_next(net, te->set, te->elem_priv)) {
nft_setelem_data_activate(net, te->set, te->elem_priv);
@@ -10670,17 +10812,17 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
case NFT_MSG_NEWOBJ:
if (nft_trans_obj_update(trans)) {
- nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans));
+ nft_obj_destroy(&ctx, nft_trans_obj_newobj(trans));
nft_trans_destroy(trans);
} else {
- nft_use_dec_restore(&trans->ctx.table->use);
+ nft_use_dec_restore(&table->use);
nft_obj_del(nft_trans_obj(trans));
}
break;
case NFT_MSG_DELOBJ:
case NFT_MSG_DESTROYOBJ:
- nft_use_inc_restore(&trans->ctx.table->use);
- nft_clear(trans->ctx.net, nft_trans_obj(trans));
+ nft_use_inc_restore(&table->use);
+ nft_clear(trans->net, nft_trans_obj(trans));
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWFLOWTABLE:
@@ -10688,7 +10830,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_unregister_flowtable_net_hooks(net,
&nft_trans_flowtable_hooks(trans));
} else {
- nft_use_dec_restore(&trans->ctx.table->use);
+ nft_use_dec_restore(&table->use);
list_del_rcu(&nft_trans_flowtable(trans)->list);
nft_unregister_flowtable_net_hooks(net,
&nft_trans_flowtable(trans)->hook_list);
@@ -10700,8 +10842,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
list_splice(&nft_trans_flowtable_hooks(trans),
&nft_trans_flowtable(trans)->hook_list);
} else {
- nft_use_inc_restore(&trans->ctx.table->use);
- nft_clear(trans->ctx.net, nft_trans_flowtable(trans));
+ nft_use_inc_restore(&table->use);
+ nft_clear(trans->net, nft_trans_flowtable(trans));
}
nft_trans_destroy(trans);
break;
@@ -10806,150 +10948,6 @@ int nft_chain_validate_hooks(const struct nft_chain *chain,
}
EXPORT_SYMBOL_GPL(nft_chain_validate_hooks);
-/*
- * Loop detection - walk through the ruleset beginning at the destination chain
- * of a new jump until either the source chain is reached (loop) or all
- * reachable chains have been traversed.
- *
- * The loop check is performed whenever a new jump verdict is added to an
- * expression or verdict map or a verdict map is bound to a new chain.
- */
-
-static int nf_tables_check_loops(const struct nft_ctx *ctx,
- const struct nft_chain *chain);
-
-static int nft_check_loops(const struct nft_ctx *ctx,
- const struct nft_set_ext *ext)
-{
- const struct nft_data *data;
- int ret;
-
- data = nft_set_ext_data(ext);
- switch (data->verdict.code) {
- case NFT_JUMP:
- case NFT_GOTO:
- ret = nf_tables_check_loops(ctx, data->verdict.chain);
- break;
- default:
- ret = 0;
- break;
- }
-
- return ret;
-}
-
-static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
- struct nft_set *set,
- const struct nft_set_iter *iter,
- struct nft_elem_priv *elem_priv)
-{
- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
-
- if (!nft_set_elem_active(ext, iter->genmask))
- return 0;
-
- if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
- *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
- return 0;
-
- return nft_check_loops(ctx, ext);
-}
-
-static int nft_set_catchall_loops(const struct nft_ctx *ctx,
- struct nft_set *set)
-{
- u8 genmask = nft_genmask_next(ctx->net);
- struct nft_set_elem_catchall *catchall;
- struct nft_set_ext *ext;
- int ret = 0;
-
- list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
- ext = nft_set_elem_ext(set, catchall->elem);
- if (!nft_set_elem_active(ext, genmask))
- continue;
-
- ret = nft_check_loops(ctx, ext);
- if (ret < 0)
- return ret;
- }
-
- return ret;
-}
-
-static int nf_tables_check_loops(const struct nft_ctx *ctx,
- const struct nft_chain *chain)
-{
- const struct nft_rule *rule;
- const struct nft_expr *expr, *last;
- struct nft_set *set;
- struct nft_set_binding *binding;
- struct nft_set_iter iter;
-
- if (ctx->chain == chain)
- return -ELOOP;
-
- if (fatal_signal_pending(current))
- return -EINTR;
-
- list_for_each_entry(rule, &chain->rules, list) {
- nft_rule_for_each_expr(expr, last, rule) {
- struct nft_immediate_expr *priv;
- const struct nft_data *data;
- int err;
-
- if (strcmp(expr->ops->type->name, "immediate"))
- continue;
-
- priv = nft_expr_priv(expr);
- if (priv->dreg != NFT_REG_VERDICT)
- continue;
-
- data = &priv->data;
- switch (data->verdict.code) {
- case NFT_JUMP:
- case NFT_GOTO:
- err = nf_tables_check_loops(ctx,
- data->verdict.chain);
- if (err < 0)
- return err;
- break;
- default:
- break;
- }
- }
- }
-
- list_for_each_entry(set, &ctx->table->sets, list) {
- if (!nft_is_active_next(ctx->net, set))
- continue;
- if (!(set->flags & NFT_SET_MAP) ||
- set->dtype != NFT_DATA_VERDICT)
- continue;
-
- list_for_each_entry(binding, &set->bindings, list) {
- if (!(binding->flags & NFT_SET_MAP) ||
- binding->chain != chain)
- continue;
-
- iter.genmask = nft_genmask_next(ctx->net);
- iter.type = NFT_ITER_UPDATE;
- iter.skip = 0;
- iter.count = 0;
- iter.err = 0;
- iter.fn = nf_tables_loop_check_setelem;
-
- set->ops->walk(ctx, set, &iter);
- if (!iter.err)
- iter.err = nft_set_catchall_loops(ctx, set);
-
- if (iter.err < 0)
- return iter.err;
- }
- }
-
- return 0;
-}
-
/**
* nft_parse_u32_check - fetch u32 attribute and check for maximum value
*
@@ -11062,13 +11060,16 @@ static int nft_validate_register_store(const struct nft_ctx *ctx,
if (data != NULL &&
(data->verdict.code == NFT_GOTO ||
data->verdict.code == NFT_JUMP)) {
- err = nf_tables_check_loops(ctx, data->verdict.chain);
+ err = nft_chain_validate(ctx, data->verdict.chain);
if (err < 0)
return err;
}
return 0;
default:
+ if (type != NFT_DATA_VALUE)
+ return -EINVAL;
+
if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE)
return -EINVAL;
if (len == 0)
@@ -11077,8 +11078,6 @@ static int nft_validate_register_store(const struct nft_ctx *ctx,
sizeof_field(struct nft_regs, data))
return -ERANGE;
- if (data != NULL && type != NFT_DATA_VALUE)
- return -EINVAL;
return 0;
}
}
@@ -11361,7 +11360,7 @@ int __nft_release_basechain(struct nft_ctx *ctx)
}
nft_chain_del(ctx->chain);
nft_use_dec(&ctx->table->use);
- nf_tables_chain_destroy(ctx);
+ nf_tables_chain_destroy(ctx->chain);
return 0;
}
@@ -11436,12 +11435,11 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
nft_obj_destroy(&ctx, obj);
}
list_for_each_entry_safe(chain, nc, &table->chains, list) {
- ctx.chain = chain;
nft_chain_del(chain);
nft_use_dec(&table->use);
- nf_tables_chain_destroy(&ctx);
+ nf_tables_chain_destroy(chain);
}
- nf_tables_table_destroy(&ctx);
+ nf_tables_table_destroy(table);
}
static void __nft_release_tables(struct net *net)
@@ -11479,8 +11477,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
gc_seq = nft_gc_seq_begin(nft_net);
- if (!list_empty(&nf_tables_destroy_list))
- nf_tables_trans_destroy_flush_work();
+ nf_tables_trans_destroy_flush_work();
again:
list_for_each_entry(table, &nft_net->tables, list) {
if (nft_table_has_owner(table) &&
@@ -11584,6 +11581,14 @@ static int __init nf_tables_module_init(void)
{
int err;
+ BUILD_BUG_ON(offsetof(struct nft_trans_table, nft_trans) != 0);
+ BUILD_BUG_ON(offsetof(struct nft_trans_chain, nft_trans_binding.nft_trans) != 0);
+ BUILD_BUG_ON(offsetof(struct nft_trans_rule, nft_trans) != 0);
+ BUILD_BUG_ON(offsetof(struct nft_trans_set, nft_trans_binding.nft_trans) != 0);
+ BUILD_BUG_ON(offsetof(struct nft_trans_elem, nft_trans) != 0);
+ BUILD_BUG_ON(offsetof(struct nft_trans_obj, nft_trans) != 0);
+ BUILD_BUG_ON(offsetof(struct nft_trans_flowtable, nft_trans) != 0);
+
err = register_pernet_subsys(&nf_tables_net_ops);
if (err < 0)
return err;
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 12ab78fa5d84..64675f1c7f29 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -513,38 +513,38 @@ static void nft_flow_rule_offload_abort(struct net *net,
int err = 0;
list_for_each_entry_continue_reverse(trans, &nft_net->commit_list, list) {
- if (trans->ctx.family != NFPROTO_NETDEV)
+ if (trans->table->family != NFPROTO_NETDEV)
continue;
switch (trans->msg_type) {
case NFT_MSG_NEWCHAIN:
- if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) ||
+ if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) ||
nft_trans_chain_update(trans))
continue;
- err = nft_flow_offload_chain(trans->ctx.chain, NULL,
+ err = nft_flow_offload_chain(nft_trans_chain(trans), NULL,
FLOW_BLOCK_UNBIND);
break;
case NFT_MSG_DELCHAIN:
- if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+ if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD))
continue;
- err = nft_flow_offload_chain(trans->ctx.chain, NULL,
+ err = nft_flow_offload_chain(nft_trans_chain(trans), NULL,
FLOW_BLOCK_BIND);
break;
case NFT_MSG_NEWRULE:
- if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+ if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD))
continue;
- err = nft_flow_offload_rule(trans->ctx.chain,
+ err = nft_flow_offload_rule(nft_trans_rule_chain(trans),
nft_trans_rule(trans),
NULL, FLOW_CLS_DESTROY);
break;
case NFT_MSG_DELRULE:
- if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+ if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD))
continue;
- err = nft_flow_offload_rule(trans->ctx.chain,
+ err = nft_flow_offload_rule(nft_trans_rule_chain(trans),
nft_trans_rule(trans),
nft_trans_flow_rule(trans),
FLOW_CLS_REPLACE);
@@ -564,46 +564,46 @@ int nft_flow_rule_offload_commit(struct net *net)
u8 policy;
list_for_each_entry(trans, &nft_net->commit_list, list) {
- if (trans->ctx.family != NFPROTO_NETDEV)
+ if (trans->table->family != NFPROTO_NETDEV)
continue;
switch (trans->msg_type) {
case NFT_MSG_NEWCHAIN:
- if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) ||
+ if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) ||
nft_trans_chain_update(trans))
continue;
policy = nft_trans_chain_policy(trans);
- err = nft_flow_offload_chain(trans->ctx.chain, &policy,
+ err = nft_flow_offload_chain(nft_trans_chain(trans), &policy,
FLOW_BLOCK_BIND);
break;
case NFT_MSG_DELCHAIN:
- if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+ if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD))
continue;
policy = nft_trans_chain_policy(trans);
- err = nft_flow_offload_chain(trans->ctx.chain, &policy,
+ err = nft_flow_offload_chain(nft_trans_chain(trans), &policy,
FLOW_BLOCK_UNBIND);
break;
case NFT_MSG_NEWRULE:
- if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+ if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD))
continue;
- if (trans->ctx.flags & NLM_F_REPLACE ||
- !(trans->ctx.flags & NLM_F_APPEND)) {
+ if (trans->flags & NLM_F_REPLACE ||
+ !(trans->flags & NLM_F_APPEND)) {
err = -EOPNOTSUPP;
break;
}
- err = nft_flow_offload_rule(trans->ctx.chain,
+ err = nft_flow_offload_rule(nft_trans_rule_chain(trans),
nft_trans_rule(trans),
nft_trans_flow_rule(trans),
FLOW_CLS_REPLACE);
break;
case NFT_MSG_DELRULE:
- if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+ if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD))
continue;
- err = nft_flow_offload_rule(trans->ctx.chain,
+ err = nft_flow_offload_rule(nft_trans_rule_chain(trans),
nft_trans_rule(trans),
NULL, FLOW_CLS_DESTROY);
break;
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index a83637e3f455..580c55268f65 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -317,7 +317,7 @@ void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt,
net_get_random_once(&trace_key, sizeof(trace_key));
info->skbid = (u32)siphash_3u32(hash32_ptr(skb),
- skb_get_hash(skb),
+ skb_get_hash_net(nft_net(pkt), skb),
skb->skb_iif,
&trace_key);
}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index c9fbe0f707b5..932b3ddb34f1 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -427,6 +427,11 @@ replay_abort:
nfnl_unlock(subsys_id);
+ if (nlh->nlmsg_flags & NLM_F_ACK) {
+ memset(&extack, 0, sizeof(extack));
+ nfnl_err_add(&err_list, nlh, 0, &extack);
+ }
+
while (skb->len >= nlmsg_total_size(0)) {
int msglen, type;
@@ -573,6 +578,9 @@ done:
} else if (err) {
ss->abort(net, oskb, NFNL_ABORT_NONE);
netlink_ack(oskb, nlmsg_hdr(oskb), err, NULL);
+ } else if (nlh->nlmsg_flags & NLM_F_ACK) {
+ memset(&extack, 0, sizeof(extack));
+ nfnl_err_add(&err_list, nlh, 0, &extack);
}
} else {
enum nfnl_abort_action abort_action;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index f466af4f8531..eab4f476b47f 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -366,8 +366,7 @@ static int cttimeout_default_set(struct sk_buff *skb,
__u8 l4num;
int ret;
- if (!cda[CTA_TIMEOUT_L3PROTO] ||
- !cda[CTA_TIMEOUT_L4PROTO] ||
+ if (!cda[CTA_TIMEOUT_L4PROTO] ||
!cda[CTA_TIMEOUT_DATA])
return -EINVAL;
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 00f4bd21c59b..e0716da256bf 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -169,7 +169,9 @@ instance_destroy_rcu(struct rcu_head *head)
struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance,
rcu);
+ rcu_read_lock();
nfqnl_flush(inst, NULL, 0);
+ rcu_read_unlock();
kfree(inst);
module_put(THIS_MODULE);
}
@@ -323,7 +325,7 @@ static void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
hooks = nf_hook_entries_head(net, pf, entry->state.hook);
i = entry->hook_index;
- if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) {
+ if (!hooks || i >= hooks->num_hook_entries) {
kfree_skb_reason(skb, SKB_DROP_REASON_NETFILTER_DROP);
nf_queue_entry_free(entry);
return;
@@ -818,10 +820,41 @@ static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
static const unsigned long flags = IPS_CONFIRMED | IPS_DYING;
- const struct nf_conn *ct = (void *)skb_nfct(entry->skb);
+ struct nf_conn *ct = (void *)skb_nfct(entry->skb);
+ unsigned long status;
+ unsigned int use;
+
+ if (!ct)
+ return false;
- if (ct && ((ct->status & flags) == IPS_DYING))
+ status = READ_ONCE(ct->status);
+ if ((status & flags) == IPS_DYING)
return true;
+
+ if (status & IPS_CONFIRMED)
+ return false;
+
+ /* in some cases skb_clone() can occur after initial conntrack
+ * pickup, but conntrack assumes exclusive skb->_nfct ownership for
+ * unconfirmed entries.
+ *
+ * This happens for br_netfilter and with ip multicast routing.
+ * We can't be solved with serialization here because one clone could
+ * have been queued for local delivery.
+ */
+ use = refcount_read(&ct->ct_general.use);
+ if (likely(use == 1))
+ return false;
+
+ /* Can't decrement further? Exclusive ownership. */
+ if (!refcount_dec_not_one(&ct->ct_general.use))
+ return false;
+
+ skb_set_nfct(entry->skb, 0);
+ /* No nf_ct_put(): we already decremented .use and it cannot
+ * drop down to 0.
+ */
+ return true;
#endif
return false;
}
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index d170758a1eb5..7010541fcca6 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -325,9 +325,6 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev,
struct nft_hook *hook, *found = NULL;
int n = 0;
- if (event != NETDEV_UNREGISTER)
- return;
-
list_for_each_entry(hook, &basechain->hook_list, list) {
if (hook->ops.dev == dev)
found = hook;
@@ -367,8 +364,7 @@ static int nf_tables_netdev_event(struct notifier_block *this,
.net = dev_net(dev),
};
- if (event != NETDEV_UNREGISTER &&
- event != NETDEV_CHANGENAME)
+ if (event != NETDEV_UNREGISTER)
return NOTIFY_DONE;
nft_net = nft_pernet(ctx.net);
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index de9d1980df69..92b984fa8175 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -210,12 +210,12 @@ static void nft_connlimit_destroy(const struct nft_ctx *ctx,
nft_connlimit_do_destroy(ctx, priv);
}
-static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp)
{
struct nft_connlimit *priv_dst = nft_expr_priv(dst);
struct nft_connlimit *priv_src = nft_expr_priv(src);
- priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC);
+ priv_dst->list = kmalloc(sizeof(*priv_dst->list), gfp);
if (!priv_dst->list)
return -ENOMEM;
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index dccc68a5135a..291ed2026367 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -226,7 +226,7 @@ static void nft_counter_destroy(const struct nft_ctx *ctx,
nft_counter_do_destroy(priv);
}
-static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp)
{
struct nft_counter_percpu_priv *priv = nft_expr_priv(src);
struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst);
@@ -236,7 +236,7 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
nft_counter_fetch(priv, &total);
- cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC);
+ cpu_stats = alloc_percpu_gfp(struct nft_counter, gfp);
if (cpu_stats == NULL)
return -ENOMEM;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index c09dba57354c..b4ada3ab2167 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -35,7 +35,7 @@ static int nft_dynset_expr_setup(const struct nft_dynset *priv,
for (i = 0; i < priv->num_exprs; i++) {
expr = nft_setelem_expr_at(elem_expr, elem_expr->size);
- if (nft_expr_clone(expr, priv->expr_array[i]) < 0)
+ if (nft_expr_clone(expr, priv->expr_array[i], GFP_ATOMIC) < 0)
return -1;
elem_expr->size += priv->expr_array[i]->ops->size;
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index 37cfe6dd712d..b58f62195ff3 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -35,11 +35,9 @@ int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
switch (priv->result) {
case NFT_FIB_RESULT_OIF:
case NFT_FIB_RESULT_OIFNAME:
- hooks = (1 << NF_INET_PRE_ROUTING);
- if (priv->flags & NFTA_FIB_F_IIF) {
- hooks |= (1 << NF_INET_LOCAL_IN) |
- (1 << NF_INET_FORWARD);
- }
+ hooks = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD);
break;
case NFT_FIB_RESULT_ADDRTYPE:
if (priv->flags & NFTA_FIB_F_IIF)
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 92d47e469204..868d68302d22 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -51,7 +51,8 @@ static void nft_symhash_eval(const struct nft_expr *expr,
struct sk_buff *skb = pkt->skb;
u32 h;
- h = reciprocal_scale(__skb_get_hash_symmetric(skb), priv->modulus);
+ h = reciprocal_scale(__skb_get_hash_symmetric_net(nft_net(pkt), skb),
+ priv->modulus);
regs->data[priv->dreg] = h + priv->offset;
}
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 6475c7abc1fe..ac2422c215e5 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -221,7 +221,7 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx,
list_del(&rule->list);
nf_tables_rule_destroy(&chain_ctx, rule);
}
- nf_tables_chain_destroy(&chain_ctx);
+ nf_tables_chain_destroy(chain);
break;
default:
break;
diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c
index 8e6d7eaf9dc8..de1b6066bfa8 100644
--- a/net/netfilter/nft_last.c
+++ b/net/netfilter/nft_last.c
@@ -102,12 +102,12 @@ static void nft_last_destroy(const struct nft_ctx *ctx,
kfree(priv->last);
}
-static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp)
{
struct nft_last_priv *priv_dst = nft_expr_priv(dst);
struct nft_last_priv *priv_src = nft_expr_priv(src);
- priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC);
+ priv_dst->last = kzalloc(sizeof(*priv_dst->last), gfp);
if (!priv_dst->last)
return -ENOMEM;
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index cefa25e0dbb0..21d26b79b460 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -150,7 +150,7 @@ static void nft_limit_destroy(const struct nft_ctx *ctx,
}
static int nft_limit_clone(struct nft_limit_priv *priv_dst,
- const struct nft_limit_priv *priv_src)
+ const struct nft_limit_priv *priv_src, gfp_t gfp)
{
priv_dst->tokens_max = priv_src->tokens_max;
priv_dst->rate = priv_src->rate;
@@ -158,7 +158,7 @@ static int nft_limit_clone(struct nft_limit_priv *priv_dst,
priv_dst->burst = priv_src->burst;
priv_dst->invert = priv_src->invert;
- priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), GFP_ATOMIC);
+ priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), gfp);
if (!priv_dst->limit)
return -ENOMEM;
@@ -223,14 +223,15 @@ static void nft_limit_pkts_destroy(const struct nft_ctx *ctx,
nft_limit_destroy(ctx, &priv->limit);
}
-static int nft_limit_pkts_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_limit_pkts_clone(struct nft_expr *dst, const struct nft_expr *src,
+ gfp_t gfp)
{
struct nft_limit_priv_pkts *priv_dst = nft_expr_priv(dst);
struct nft_limit_priv_pkts *priv_src = nft_expr_priv(src);
priv_dst->cost = priv_src->cost;
- return nft_limit_clone(&priv_dst->limit, &priv_src->limit);
+ return nft_limit_clone(&priv_dst->limit, &priv_src->limit, gfp);
}
static struct nft_expr_type nft_limit_type;
@@ -281,12 +282,13 @@ static void nft_limit_bytes_destroy(const struct nft_ctx *ctx,
nft_limit_destroy(ctx, priv);
}
-static int nft_limit_bytes_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_limit_bytes_clone(struct nft_expr *dst, const struct nft_expr *src,
+ gfp_t gfp)
{
struct nft_limit_priv *priv_dst = nft_expr_priv(dst);
struct nft_limit_priv *priv_src = nft_expr_priv(src);
- return nft_limit_clone(priv_dst, priv_src);
+ return nft_limit_clone(priv_dst, priv_src, gfp);
}
static const struct nft_expr_ops nft_limit_bytes_ops = {
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index b314ca728a29..f3080fa1b226 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -132,7 +132,8 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
return -EINVAL;
err = nft_parse_register_store(ctx, tb[NFTA_LOOKUP_DREG],
- &priv->dreg, NULL, set->dtype,
+ &priv->dreg, NULL,
+ nft_set_datatype(set),
set->dlen);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index ba0d3683a45d..9139ce38ea7b 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -839,6 +839,9 @@ static int nft_meta_inner_init(const struct nft_ctx *ctx,
struct nft_meta *priv = nft_expr_priv(expr);
unsigned int len;
+ if (!tb[NFTA_META_KEY] || !tb[NFTA_META_DREG])
+ return -EINVAL;
+
priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
switch (priv->key) {
case NFT_META_PROTOCOL:
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 0a689c8e0295..50429cbd42da 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -45,36 +45,27 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len)
int mac_off = skb_mac_header(skb) - skb->data;
u8 *vlanh, *dst_u8 = (u8 *) d;
struct vlan_ethhdr veth;
- u8 vlan_hlen = 0;
-
- if ((skb->protocol == htons(ETH_P_8021AD) ||
- skb->protocol == htons(ETH_P_8021Q)) &&
- offset >= VLAN_ETH_HLEN && offset < VLAN_ETH_HLEN + VLAN_HLEN)
- vlan_hlen += VLAN_HLEN;
vlanh = (u8 *) &veth;
- if (offset < VLAN_ETH_HLEN + vlan_hlen) {
+ if (offset < VLAN_ETH_HLEN) {
u8 ethlen = len;
- if (vlan_hlen &&
- skb_copy_bits(skb, mac_off, &veth, VLAN_ETH_HLEN) < 0)
- return false;
- else if (!nft_payload_rebuild_vlan_hdr(skb, mac_off, &veth))
+ if (!nft_payload_rebuild_vlan_hdr(skb, mac_off, &veth))
return false;
- if (offset + len > VLAN_ETH_HLEN + vlan_hlen)
- ethlen -= offset + len - VLAN_ETH_HLEN - vlan_hlen;
+ if (offset + len > VLAN_ETH_HLEN)
+ ethlen -= offset + len - VLAN_ETH_HLEN;
- memcpy(dst_u8, vlanh + offset - vlan_hlen, ethlen);
+ memcpy(dst_u8, vlanh + offset, ethlen);
len -= ethlen;
if (len == 0)
return true;
dst_u8 += ethlen;
- offset = ETH_HLEN + vlan_hlen;
+ offset = ETH_HLEN;
} else {
- offset -= VLAN_HLEN + vlan_hlen;
+ offset -= VLAN_HLEN;
}
return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0;
@@ -154,12 +145,12 @@ int nft_payload_inner_offset(const struct nft_pktinfo *pkt)
return pkt->inneroff;
}
-static bool nft_payload_need_vlan_copy(const struct nft_payload *priv)
+static bool nft_payload_need_vlan_adjust(u32 offset, u32 len)
{
- unsigned int len = priv->offset + priv->len;
+ unsigned int boundary = offset + len;
/* data past ether src/dst requested, copy needed */
- if (len > offsetof(struct ethhdr, h_proto))
+ if (boundary > offsetof(struct ethhdr, h_proto))
return true;
return false;
@@ -183,7 +174,7 @@ void nft_payload_eval(const struct nft_expr *expr,
goto err;
if (skb_vlan_tag_present(skb) &&
- nft_payload_need_vlan_copy(priv)) {
+ nft_payload_need_vlan_adjust(priv->offset, priv->len)) {
if (!nft_payload_copy_vlan(dest, skb,
priv->offset, priv->len))
goto err;
@@ -659,6 +650,10 @@ static int nft_payload_inner_init(const struct nft_ctx *ctx,
struct nft_payload *priv = nft_expr_priv(expr);
u32 base;
+ if (!tb[NFTA_PAYLOAD_BASE] || !tb[NFTA_PAYLOAD_OFFSET] ||
+ !tb[NFTA_PAYLOAD_LEN] || !tb[NFTA_PAYLOAD_DREG])
+ return -EINVAL;
+
base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
switch (base) {
case NFT_PAYLOAD_TUN_HEADER:
@@ -810,21 +805,79 @@ struct nft_payload_set {
u8 csum_flags;
};
+/* This is not struct vlan_hdr. */
+struct nft_payload_vlan_hdr {
+ __be16 h_vlan_proto;
+ __be16 h_vlan_TCI;
+};
+
+static bool
+nft_payload_set_vlan(const u32 *src, struct sk_buff *skb, u8 offset, u8 len,
+ int *vlan_hlen)
+{
+ struct nft_payload_vlan_hdr *vlanh;
+ __be16 vlan_proto;
+ u16 vlan_tci;
+
+ if (offset >= offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto)) {
+ *vlan_hlen = VLAN_HLEN;
+ return true;
+ }
+
+ switch (offset) {
+ case offsetof(struct vlan_ethhdr, h_vlan_proto):
+ if (len == 2) {
+ vlan_proto = nft_reg_load_be16(src);
+ skb->vlan_proto = vlan_proto;
+ } else if (len == 4) {
+ vlanh = (struct nft_payload_vlan_hdr *)src;
+ __vlan_hwaccel_put_tag(skb, vlanh->h_vlan_proto,
+ ntohs(vlanh->h_vlan_TCI));
+ } else {
+ return false;
+ }
+ break;
+ case offsetof(struct vlan_ethhdr, h_vlan_TCI):
+ if (len != 2)
+ return false;
+
+ vlan_tci = ntohs(nft_reg_load_be16(src));
+ skb->vlan_tci = vlan_tci;
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
static void nft_payload_set_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_payload_set *priv = nft_expr_priv(expr);
- struct sk_buff *skb = pkt->skb;
const u32 *src = &regs->data[priv->sreg];
- int offset, csum_offset;
+ int offset, csum_offset, vlan_hlen = 0;
+ struct sk_buff *skb = pkt->skb;
__wsum fsum, tsum;
switch (priv->base) {
case NFT_PAYLOAD_LL_HEADER:
if (!skb_mac_header_was_set(skb))
goto err;
- offset = skb_mac_header(skb) - skb->data;
+
+ if (skb_vlan_tag_present(skb) &&
+ nft_payload_need_vlan_adjust(priv->offset, priv->len)) {
+ if (!nft_payload_set_vlan(src, skb,
+ priv->offset, priv->len,
+ &vlan_hlen))
+ goto err;
+
+ if (!vlan_hlen)
+ return;
+ }
+
+ offset = skb_mac_header(skb) - skb->data - vlan_hlen;
break;
case NFT_PAYLOAD_NETWORK_HEADER:
offset = skb_network_offset(skb);
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 3ba12a7471b0..9b2d7463d3d3 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -233,7 +233,7 @@ static void nft_quota_destroy(const struct nft_ctx *ctx,
return nft_quota_do_destroy(ctx, priv);
}
-static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp)
{
struct nft_quota *priv_dst = nft_expr_priv(dst);
struct nft_quota *priv_src = nft_expr_priv(src);
@@ -241,7 +241,7 @@ static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src)
priv_dst->quota = priv_src->quota;
priv_dst->flags = priv_src->flags;
- priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC);
+ priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), gfp);
if (!priv_dst->consumed)
return -ENOMEM;
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index 24d977138572..14d88394bcb7 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -73,14 +73,14 @@ void nft_rt_get_eval(const struct nft_expr *expr,
if (nft_pf(pkt) != NFPROTO_IPV4)
goto err;
- *dest = (__force u32)rt_nexthop((const struct rtable *)dst,
+ *dest = (__force u32)rt_nexthop(dst_rtable(dst),
ip_hdr(skb)->daddr);
break;
case NFT_RT_NEXTHOP6:
if (nft_pf(pkt) != NFPROTO_IPV6)
goto err;
- memcpy(dest, rt6_nexthop((struct rt6_info *)dst,
+ memcpy(dest, rt6_nexthop(dst_rt6_info(dst),
&ipv6_hdr(skb)->daddr),
sizeof(struct in6_addr));
break;
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 187138afac45..eb4c4a4ac7ac 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -434,7 +434,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
res_map = scratch->map + (map_index ? m->bsize_max : 0);
fill_map = scratch->map + (map_index ? 0 : m->bsize_max);
- memset(res_map, 0xff, m->bsize_max * sizeof(*res_map));
+ pipapo_resmap_init(m, res_map);
nft_pipapo_for_each_field(f, i, m) {
bool last = i == m->field_count - 1;
@@ -504,6 +504,7 @@ out:
* pipapo_get() - Get matching element reference given key data
* @net: Network namespace
* @set: nftables API set representation
+ * @m: storage containing active/existing elements
* @data: Key data to be matched against existing elements
* @genmask: If set, check that element is active in given genmask
* @tstamp: timestamp to check for expired elements
@@ -517,17 +518,15 @@ out:
*/
static struct nft_pipapo_elem *pipapo_get(const struct net *net,
const struct nft_set *set,
+ const struct nft_pipapo_match *m,
const u8 *data, u8 genmask,
u64 tstamp, gfp_t gfp)
{
struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT);
- struct nft_pipapo *priv = nft_set_priv(set);
unsigned long *res_map, *fill_map = NULL;
- const struct nft_pipapo_match *m;
const struct nft_pipapo_field *f;
int i;
- m = priv->clone;
if (m->bsize_max == 0)
return ret;
@@ -543,7 +542,7 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net,
goto out;
}
- memset(res_map, 0xff, m->bsize_max * sizeof(*res_map));
+ pipapo_resmap_init(m, res_map);
nft_pipapo_for_each_field(f, i, m) {
bool last = i == m->field_count - 1;
@@ -612,9 +611,11 @@ static struct nft_elem_priv *
nft_pipapo_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
{
+ struct nft_pipapo *priv = nft_set_priv(set);
+ struct nft_pipapo_match *m = rcu_dereference(priv->match);
struct nft_pipapo_elem *e;
- e = pipapo_get(net, set, (const u8 *)elem->key.val.data,
+ e = pipapo_get(net, set, m, (const u8 *)elem->key.val.data,
nft_genmask_cur(net), get_jiffies_64(),
GFP_ATOMIC);
if (IS_ERR(e))
@@ -1247,6 +1248,40 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
return 0;
}
+static bool nft_pipapo_transaction_mutex_held(const struct nft_set *set)
+{
+#ifdef CONFIG_PROVE_LOCKING
+ const struct net *net = read_pnet(&set->net);
+
+ return lockdep_is_held(&nft_pernet(net)->commit_mutex);
+#else
+ return true;
+#endif
+}
+
+static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old);
+
+/**
+ * pipapo_maybe_clone() - Build clone for pending data changes, if not existing
+ * @set: nftables API set representation
+ *
+ * Return: newly created or existing clone, if any. NULL on allocation failure
+ */
+static struct nft_pipapo_match *pipapo_maybe_clone(const struct nft_set *set)
+{
+ struct nft_pipapo *priv = nft_set_priv(set);
+ struct nft_pipapo_match *m;
+
+ if (priv->clone)
+ return priv->clone;
+
+ m = rcu_dereference_protected(priv->match,
+ nft_pipapo_transaction_mutex_held(set));
+ priv->clone = pipapo_clone(m);
+
+ return priv->clone;
+}
+
/**
* nft_pipapo_insert() - Validate and insert ranged elements
* @net: Network namespace
@@ -1263,8 +1298,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
const u8 *start = (const u8 *)elem->key.val.data, *end;
- struct nft_pipapo *priv = nft_set_priv(set);
- struct nft_pipapo_match *m = priv->clone;
+ struct nft_pipapo_match *m = pipapo_maybe_clone(set);
u8 genmask = nft_genmask_next(net);
struct nft_pipapo_elem *e, *dup;
u64 tstamp = nft_net_tstamp(net);
@@ -1272,12 +1306,15 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
const u8 *start_p, *end_p;
int i, bsize_max, err = 0;
+ if (!m)
+ return -ENOMEM;
+
if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END))
end = (const u8 *)nft_set_ext_key_end(ext)->data;
else
end = start;
- dup = pipapo_get(net, set, start, genmask, tstamp, GFP_KERNEL);
+ dup = pipapo_get(net, set, m, start, genmask, tstamp, GFP_KERNEL);
if (!IS_ERR(dup)) {
/* Check if we already have the same exact entry */
const struct nft_data *dup_key, *dup_end;
@@ -1299,7 +1336,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
if (PTR_ERR(dup) == -ENOENT) {
/* Look for partially overlapping entries */
- dup = pipapo_get(net, set, end, nft_genmask_next(net), tstamp,
+ dup = pipapo_get(net, set, m, end, nft_genmask_next(net), tstamp,
GFP_KERNEL);
}
@@ -1332,8 +1369,6 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
}
/* Insert */
- priv->dirty = true;
-
bsize_max = m->bsize_max;
nft_pipapo_for_each_field(f, i, m) {
@@ -1384,7 +1419,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
* pipapo_clone() - Clone matching data to create new working copy
* @old: Existing matching data
*
- * Return: copy of matching data passed as 'old', error pointer on failure
+ * Return: copy of matching data passed as 'old' or NULL.
*/
static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
{
@@ -1394,7 +1429,7 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
new = kmalloc(struct_size(new, f, old->field_count), GFP_KERNEL);
if (!new)
- return ERR_PTR(-ENOMEM);
+ return NULL;
new->field_count = old->field_count;
new->bsize_max = old->bsize_max;
@@ -1466,7 +1501,7 @@ out_scratch:
free_percpu(new->scratch);
kfree(new);
- return ERR_PTR(-ENOMEM);
+ return NULL;
}
/**
@@ -1698,8 +1733,6 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
* NFT_SET_ELEM_DEAD_BIT.
*/
if (__nft_set_elem_expired(&e->ext, tstamp)) {
- priv->dirty = true;
-
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
if (!gc)
return;
@@ -1777,57 +1810,30 @@ static void pipapo_reclaim_match(struct rcu_head *rcu)
static void nft_pipapo_commit(struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
- struct nft_pipapo_match *new_clone, *old;
-
- if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set)))
- pipapo_gc(set, priv->clone);
+ struct nft_pipapo_match *old;
- if (!priv->dirty)
+ if (!priv->clone)
return;
- new_clone = pipapo_clone(priv->clone);
- if (IS_ERR(new_clone))
- return;
+ if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set)))
+ pipapo_gc(set, priv->clone);
- priv->dirty = false;
+ old = rcu_replace_pointer(priv->match, priv->clone,
+ nft_pipapo_transaction_mutex_held(set));
+ priv->clone = NULL;
- old = rcu_access_pointer(priv->match);
- rcu_assign_pointer(priv->match, priv->clone);
if (old)
call_rcu(&old->rcu, pipapo_reclaim_match);
-
- priv->clone = new_clone;
-}
-
-static bool nft_pipapo_transaction_mutex_held(const struct nft_set *set)
-{
-#ifdef CONFIG_PROVE_LOCKING
- const struct net *net = read_pnet(&set->net);
-
- return lockdep_is_held(&nft_pernet(net)->commit_mutex);
-#else
- return true;
-#endif
}
static void nft_pipapo_abort(const struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
- struct nft_pipapo_match *new_clone, *m;
-
- if (!priv->dirty)
- return;
-
- m = rcu_dereference_protected(priv->match, nft_pipapo_transaction_mutex_held(set));
- new_clone = pipapo_clone(m);
- if (IS_ERR(new_clone))
+ if (!priv->clone)
return;
-
- priv->dirty = false;
-
pipapo_free_match(priv->clone);
- priv->clone = new_clone;
+ priv->clone = NULL;
}
/**
@@ -1851,52 +1857,38 @@ static void nft_pipapo_activate(const struct net *net,
}
/**
- * pipapo_deactivate() - Check that element is in set, mark as inactive
+ * nft_pipapo_deactivate() - Search for element and make it inactive
* @net: Network namespace
* @set: nftables API set representation
- * @data: Input key data
- * @ext: nftables API extension pointer, used to check for end element
- *
- * This is a convenience function that can be called from both
- * nft_pipapo_deactivate() and nft_pipapo_flush(), as they are in fact the same
- * operation.
+ * @elem: nftables API element representation containing key data
*
* Return: deactivated element if found, NULL otherwise.
*/
-static void *pipapo_deactivate(const struct net *net, const struct nft_set *set,
- const u8 *data, const struct nft_set_ext *ext)
+static struct nft_elem_priv *
+nft_pipapo_deactivate(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
+ struct nft_pipapo_match *m = pipapo_maybe_clone(set);
struct nft_pipapo_elem *e;
- e = pipapo_get(net, set, data, nft_genmask_next(net),
- nft_net_tstamp(net), GFP_KERNEL);
+ /* removal must occur on priv->clone, if we are low on memory
+ * we have no choice and must fail the removal request.
+ */
+ if (!m)
+ return NULL;
+
+ e = pipapo_get(net, set, m, (const u8 *)elem->key.val.data,
+ nft_genmask_next(net), nft_net_tstamp(net), GFP_KERNEL);
if (IS_ERR(e))
return NULL;
nft_set_elem_change_active(net, set, &e->ext);
- return e;
-}
-
-/**
- * nft_pipapo_deactivate() - Call pipapo_deactivate() to make element inactive
- * @net: Network namespace
- * @set: nftables API set representation
- * @elem: nftables API element representation containing key data
- *
- * Return: deactivated element if found, NULL otherwise.
- */
-static struct nft_elem_priv *
-nft_pipapo_deactivate(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem)
-{
- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
-
- return pipapo_deactivate(net, set, (const u8 *)elem->key.val.data, ext);
+ return &e->priv;
}
/**
- * nft_pipapo_flush() - Call pipapo_deactivate() to make element inactive
+ * nft_pipapo_flush() - make element inactive
* @net: Network namespace
* @set: nftables API set representation
* @elem_priv: nftables API element representation containing key data
@@ -2093,7 +2085,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
if (last && f->mt[rulemap[i].to].e == e) {
- priv->dirty = true;
pipapo_drop(m, rulemap);
return;
}
@@ -2106,35 +2097,23 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
}
/**
- * nft_pipapo_walk() - Walk over elements
+ * nft_pipapo_do_walk() - Walk over elements in m
* @ctx: nftables API context
* @set: nftables API set representation
+ * @m: matching data pointing to key mapping array
* @iter: Iterator
*
* As elements are referenced in the mapping array for the last field, directly
* scan that array: there's no need to follow rule mappings from the first
- * field.
+ * field. @m is protected either by RCU read lock or by transaction mutex.
*/
-static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
- struct nft_set_iter *iter)
+static void nft_pipapo_do_walk(const struct nft_ctx *ctx, struct nft_set *set,
+ const struct nft_pipapo_match *m,
+ struct nft_set_iter *iter)
{
- struct nft_pipapo *priv = nft_set_priv(set);
- const struct nft_pipapo_match *m;
const struct nft_pipapo_field *f;
unsigned int i, r;
- WARN_ON_ONCE(iter->type != NFT_ITER_READ &&
- iter->type != NFT_ITER_UPDATE);
-
- rcu_read_lock();
- if (iter->type == NFT_ITER_READ)
- m = rcu_dereference(priv->match);
- else
- m = priv->clone;
-
- if (unlikely(!m))
- goto out;
-
for (i = 0, f = m->f; i < m->field_count - 1; i++, f++)
;
@@ -2151,14 +2130,49 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
iter->err = iter->fn(ctx, set, iter, &e->priv);
if (iter->err < 0)
- goto out;
+ return;
cont:
iter->count++;
}
+}
-out:
- rcu_read_unlock();
+/**
+ * nft_pipapo_walk() - Walk over elements
+ * @ctx: nftables API context
+ * @set: nftables API set representation
+ * @iter: Iterator
+ *
+ * Test if destructive action is needed or not, clone active backend if needed
+ * and call the real function to work on the data.
+ */
+static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_set_iter *iter)
+{
+ struct nft_pipapo *priv = nft_set_priv(set);
+ const struct nft_pipapo_match *m;
+
+ switch (iter->type) {
+ case NFT_ITER_UPDATE:
+ m = pipapo_maybe_clone(set);
+ if (!m) {
+ iter->err = -ENOMEM;
+ return;
+ }
+
+ nft_pipapo_do_walk(ctx, set, m, iter);
+ break;
+ case NFT_ITER_READ:
+ rcu_read_lock();
+ m = rcu_dereference(priv->match);
+ nft_pipapo_do_walk(ctx, set, m, iter);
+ rcu_read_unlock();
+ break;
+ default:
+ iter->err = -EINVAL;
+ WARN_ON_ONCE(1);
+ break;
+ }
}
/**
@@ -2267,21 +2281,10 @@ static int nft_pipapo_init(const struct nft_set *set,
f->mt = NULL;
}
- /* Create an initial clone of matching data for next insertion */
- priv->clone = pipapo_clone(m);
- if (IS_ERR(priv->clone)) {
- err = PTR_ERR(priv->clone);
- goto out_free;
- }
-
- priv->dirty = false;
-
rcu_assign_pointer(priv->match, m);
return 0;
-out_free:
- free_percpu(m->scratch);
out_scratch:
kfree(m);
@@ -2326,33 +2329,18 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m;
- int cpu;
m = rcu_dereference_protected(priv->match, true);
- if (m) {
- rcu_barrier();
-
- for_each_possible_cpu(cpu)
- pipapo_free_scratch(m, cpu);
- free_percpu(m->scratch);
- pipapo_free_fields(m);
- kfree(m);
- priv->match = NULL;
- }
if (priv->clone) {
- m = priv->clone;
-
- nft_set_pipapo_match_destroy(ctx, set, m);
-
- for_each_possible_cpu(cpu)
- pipapo_free_scratch(priv->clone, cpu);
- free_percpu(priv->clone->scratch);
-
- pipapo_free_fields(priv->clone);
- kfree(priv->clone);
+ nft_set_pipapo_match_destroy(ctx, set, priv->clone);
+ pipapo_free_match(priv->clone);
priv->clone = NULL;
+ } else {
+ nft_set_pipapo_match_destroy(ctx, set, m);
}
+
+ pipapo_free_match(m);
}
/**
diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h
index 24cd1ff73f98..4a2ff85ce1c4 100644
--- a/net/netfilter/nft_set_pipapo.h
+++ b/net/netfilter/nft_set_pipapo.h
@@ -155,14 +155,12 @@ struct nft_pipapo_match {
* @match: Currently in-use matching data
* @clone: Copy where pending insertions and deletions are kept
* @width: Total bytes to be matched for one packet, including padding
- * @dirty: Working copy has pending insertions or deletions
* @last_gc: Timestamp of last garbage collection run, jiffies
*/
struct nft_pipapo {
struct nft_pipapo_match __rcu *match;
struct nft_pipapo_match *clone;
int width;
- bool dirty;
unsigned long last_gc;
};
@@ -280,4 +278,25 @@ static u64 pipapo_estimate_size(const struct nft_set_desc *desc)
return size;
}
+/**
+ * pipapo_resmap_init() - Initialise result map before first use
+ * @m: Matching data, including mapping table
+ * @res_map: Result map
+ *
+ * Initialize all bits covered by the first field to one, so that after
+ * the first step, only the matching bits of the first bit group remain.
+ *
+ * If other fields have a large bitmap, set remainder of res_map to 0.
+ */
+static inline void pipapo_resmap_init(const struct nft_pipapo_match *m, unsigned long *res_map)
+{
+ const struct nft_pipapo_field *f = m->f;
+ int i;
+
+ for (i = 0; i < f->bsize; i++)
+ res_map[i] = ULONG_MAX;
+
+ for (i = f->bsize; i < m->bsize_max; i++)
+ res_map[i] = 0ul;
+}
#endif /* _NFT_SET_PIPAPO_H */
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index d08407d589ea..b8d3c3213efe 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -1036,6 +1036,7 @@ nothing:
/**
* nft_pipapo_avx2_lookup_slow() - Fallback function for uncommon field sizes
+ * @mdata: Matching data, including mapping table
* @map: Previous match result, used as initial bitmap
* @fill: Destination bitmap to be filled with current match result
* @f: Field, containing lookup and mapping tables
@@ -1051,7 +1052,8 @@ nothing:
* Return: -1 on no match, rule index of match if @last, otherwise first long
* word index to be checked next (i.e. first filled word).
*/
-static int nft_pipapo_avx2_lookup_slow(unsigned long *map, unsigned long *fill,
+static int nft_pipapo_avx2_lookup_slow(const struct nft_pipapo_match *mdata,
+ unsigned long *map, unsigned long *fill,
const struct nft_pipapo_field *f,
int offset, const u8 *pkt,
bool first, bool last)
@@ -1060,7 +1062,7 @@ static int nft_pipapo_avx2_lookup_slow(unsigned long *map, unsigned long *fill,
int i, ret = -1, b;
if (first)
- memset(map, 0xff, bsize * sizeof(*map));
+ pipapo_resmap_init(mdata, map);
for (i = offset; i < bsize; i++) {
if (f->bb == 8)
@@ -1137,8 +1139,14 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
bool map_index;
int i, ret = 0;
- if (unlikely(!irq_fpu_usable()))
- return nft_pipapo_lookup(net, set, key, ext);
+ local_bh_disable();
+
+ if (unlikely(!irq_fpu_usable())) {
+ bool fallback_res = nft_pipapo_lookup(net, set, key, ext);
+
+ local_bh_enable();
+ return fallback_res;
+ }
m = rcu_dereference(priv->match);
@@ -1153,6 +1161,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
scratch = *raw_cpu_ptr(m->scratch);
if (unlikely(!scratch)) {
kernel_fpu_end();
+ local_bh_enable();
return false;
}
@@ -1186,7 +1195,7 @@ next_match:
} else if (f->groups == 16) {
NFT_SET_PIPAPO_AVX2_LOOKUP(8, 16);
} else {
- ret = nft_pipapo_avx2_lookup_slow(res, fill, f,
+ ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f,
ret, rp,
first, last);
}
@@ -1202,7 +1211,7 @@ next_match:
} else if (f->groups == 32) {
NFT_SET_PIPAPO_AVX2_LOOKUP(4, 32);
} else {
- ret = nft_pipapo_avx2_lookup_slow(res, fill, f,
+ ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f,
ret, rp,
first, last);
}
@@ -1233,6 +1242,7 @@ out:
if (i % 2)
scratch->map_index = !map_index;
kernel_fpu_end();
+ local_bh_enable();
return ret >= 0;
}
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index f735d79d8be5..60a76e6e348e 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -174,8 +174,8 @@ struct nft_tunnel_opts {
struct erspan_metadata erspan;
u8 data[IP_TUNNEL_OPTS_MAX];
} u;
+ IP_TUNNEL_DECLARE_FLAGS(flags);
u32 len;
- __be16 flags;
};
struct nft_tunnel_obj {
@@ -271,7 +271,8 @@ static int nft_tunnel_obj_vxlan_init(const struct nlattr *attr,
opts->u.vxlan.gbp = ntohl(nla_get_be32(tb[NFTA_TUNNEL_KEY_VXLAN_GBP]));
opts->len = sizeof(struct vxlan_metadata);
- opts->flags = TUNNEL_VXLAN_OPT;
+ ip_tunnel_flags_zero(opts->flags);
+ __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, opts->flags);
return 0;
}
@@ -325,7 +326,8 @@ static int nft_tunnel_obj_erspan_init(const struct nlattr *attr,
opts->u.erspan.version = version;
opts->len = sizeof(struct erspan_metadata);
- opts->flags = TUNNEL_ERSPAN_OPT;
+ ip_tunnel_flags_zero(opts->flags);
+ __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, opts->flags);
return 0;
}
@@ -366,7 +368,8 @@ static int nft_tunnel_obj_geneve_init(const struct nlattr *attr,
opt->length = data_len / 4;
opt->opt_class = nla_get_be16(tb[NFTA_TUNNEL_KEY_GENEVE_CLASS]);
opt->type = nla_get_u8(tb[NFTA_TUNNEL_KEY_GENEVE_TYPE]);
- opts->flags = TUNNEL_GENEVE_OPT;
+ ip_tunnel_flags_zero(opts->flags);
+ __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, opts->flags);
return 0;
}
@@ -385,8 +388,8 @@ static int nft_tunnel_obj_opts_init(const struct nft_ctx *ctx,
struct nft_tunnel_opts *opts)
{
struct nlattr *nla;
- __be16 type = 0;
int err, rem;
+ u32 type = 0;
err = nla_validate_nested_deprecated(attr, NFTA_TUNNEL_KEY_OPTS_MAX,
nft_tunnel_opts_policy, NULL);
@@ -401,7 +404,7 @@ static int nft_tunnel_obj_opts_init(const struct nft_ctx *ctx,
err = nft_tunnel_obj_vxlan_init(nla, opts);
if (err)
return err;
- type = TUNNEL_VXLAN_OPT;
+ type = IP_TUNNEL_VXLAN_OPT_BIT;
break;
case NFTA_TUNNEL_KEY_OPTS_ERSPAN:
if (type)
@@ -409,15 +412,15 @@ static int nft_tunnel_obj_opts_init(const struct nft_ctx *ctx,
err = nft_tunnel_obj_erspan_init(nla, opts);
if (err)
return err;
- type = TUNNEL_ERSPAN_OPT;
+ type = IP_TUNNEL_ERSPAN_OPT_BIT;
break;
case NFTA_TUNNEL_KEY_OPTS_GENEVE:
- if (type && type != TUNNEL_GENEVE_OPT)
+ if (type && type != IP_TUNNEL_GENEVE_OPT_BIT)
return -EINVAL;
err = nft_tunnel_obj_geneve_init(nla, opts);
if (err)
return err;
- type = TUNNEL_GENEVE_OPT;
+ type = IP_TUNNEL_GENEVE_OPT_BIT;
break;
default:
return -EOPNOTSUPP;
@@ -454,7 +457,9 @@ static int nft_tunnel_obj_init(const struct nft_ctx *ctx,
memset(&info, 0, sizeof(info));
info.mode = IP_TUNNEL_INFO_TX;
info.key.tun_id = key32_to_tunnel_id(nla_get_be32(tb[NFTA_TUNNEL_KEY_ID]));
- info.key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
+ __set_bit(IP_TUNNEL_KEY_BIT, info.key.tun_flags);
+ __set_bit(IP_TUNNEL_CSUM_BIT, info.key.tun_flags);
+ __set_bit(IP_TUNNEL_NOCACHE_BIT, info.key.tun_flags);
if (tb[NFTA_TUNNEL_KEY_IP]) {
err = nft_tunnel_obj_ip_init(ctx, tb[NFTA_TUNNEL_KEY_IP], &info);
@@ -483,11 +488,12 @@ static int nft_tunnel_obj_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
if (tun_flags & NFT_TUNNEL_F_ZERO_CSUM_TX)
- info.key.tun_flags &= ~TUNNEL_CSUM;
+ __clear_bit(IP_TUNNEL_CSUM_BIT, info.key.tun_flags);
if (tun_flags & NFT_TUNNEL_F_DONT_FRAGMENT)
- info.key.tun_flags |= TUNNEL_DONT_FRAGMENT;
+ __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT,
+ info.key.tun_flags);
if (tun_flags & NFT_TUNNEL_F_SEQ_NUMBER)
- info.key.tun_flags |= TUNNEL_SEQ;
+ __set_bit(IP_TUNNEL_SEQ_BIT, info.key.tun_flags);
}
if (tb[NFTA_TUNNEL_KEY_TOS])
info.key.tos = nla_get_u8(tb[NFTA_TUNNEL_KEY_TOS]);
@@ -583,7 +589,7 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb,
if (!nest)
return -1;
- if (opts->flags & TUNNEL_VXLAN_OPT) {
+ if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, opts->flags)) {
inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_VXLAN);
if (!inner)
goto failure;
@@ -591,7 +597,7 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb,
htonl(opts->u.vxlan.gbp)))
goto inner_failure;
nla_nest_end(skb, inner);
- } else if (opts->flags & TUNNEL_ERSPAN_OPT) {
+ } else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, opts->flags)) {
inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_ERSPAN);
if (!inner)
goto failure;
@@ -613,7 +619,7 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb,
break;
}
nla_nest_end(skb, inner);
- } else if (opts->flags & TUNNEL_GENEVE_OPT) {
+ } else if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, opts->flags)) {
struct geneve_opt *opt;
int offset = 0;
@@ -658,11 +664,11 @@ static int nft_tunnel_flags_dump(struct sk_buff *skb,
{
u32 flags = 0;
- if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
+ if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, info->key.tun_flags))
flags |= NFT_TUNNEL_F_DONT_FRAGMENT;
- if (!(info->key.tun_flags & TUNNEL_CSUM))
+ if (!test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags))
flags |= NFT_TUNNEL_F_ZERO_CSUM_TX;
- if (info->key.tun_flags & TUNNEL_SEQ)
+ if (test_bit(IP_TUNNEL_SEQ_BIT, info->key.tun_flags))
flags |= NFT_TUNNEL_F_SEQ_NUMBER;
if (nla_put_be32(skb, NFTA_TUNNEL_KEY_FLAGS, htonl(flags)) < 0)
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index ef93e0d3bee0..588a5e6ad899 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -59,9 +59,9 @@ MODULE_PARM_DESC(ip_list_gid, "default owning group of /proc/net/xt_recent/* fil
/* retained for backwards compatibility */
static unsigned int ip_pkt_list_tot __read_mostly;
module_param(ip_pkt_list_tot, uint, 0400);
-MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 255)");
+MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 65535)");
-#define XT_RECENT_MAX_NSTAMPS 256
+#define XT_RECENT_MAX_NSTAMPS 65536
struct recent_entry {
struct list_head list;
@@ -69,7 +69,7 @@ struct recent_entry {
union nf_inet_addr addr;
u_int16_t family;
u_int8_t ttl;
- u_int8_t index;
+ u_int16_t index;
u_int16_t nstamps;
unsigned long stamps[];
};
@@ -80,7 +80,7 @@ struct recent_table {
union nf_inet_addr mask;
unsigned int refcnt;
unsigned int entries;
- u8 nstamps_max_mask;
+ u_int16_t nstamps_max_mask;
struct list_head lru_list;
struct list_head iphash[];
};
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 1ba4f58e1d35..cd9160bbc919 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -965,6 +965,7 @@ int netlbl_enabled(void)
* @sk: the socket to label
* @family: protocol family
* @secattr: the security attributes
+ * @sk_locked: true if caller holds the socket lock
*
* Description:
* Attach the correct label to the given socket using the security attributes
@@ -977,7 +978,8 @@ int netlbl_enabled(void)
*/
int netlbl_sock_setattr(struct sock *sk,
u16 family,
- const struct netlbl_lsm_secattr *secattr)
+ const struct netlbl_lsm_secattr *secattr,
+ bool sk_locked)
{
int ret_val;
struct netlbl_dom_map *dom_entry;
@@ -997,7 +999,7 @@ int netlbl_sock_setattr(struct sock *sk,
case NETLBL_NLTYPE_CIPSOV4:
ret_val = cipso_v4_sock_setattr(sk,
dom_entry->def.cipso,
- secattr);
+ secattr, sk_locked);
break;
case NETLBL_NLTYPE_UNLABELED:
ret_val = 0;
@@ -1091,6 +1093,28 @@ int netlbl_sock_getattr(struct sock *sk,
}
/**
+ * netlbl_sk_lock_check - Check if the socket lock has been acquired.
+ * @sk: the socket to be checked
+ *
+ * Return: true if socket @sk is locked or if lock debugging is disabled at
+ * runtime or compile-time; false otherwise
+ *
+ */
+#ifdef CONFIG_LOCKDEP
+bool netlbl_sk_lock_check(struct sock *sk)
+{
+ if (debug_locks)
+ return lockdep_sock_is_held(sk);
+ return true;
+}
+#else
+bool netlbl_sk_lock_check(struct sock *sk)
+{
+ return true;
+}
+#endif
+
+/**
* netlbl_conn_setattr - Label a connected socket using the correct protocol
* @sk: the socket to label
* @addr: the destination address
@@ -1126,7 +1150,8 @@ int netlbl_conn_setattr(struct sock *sk,
switch (entry->type) {
case NETLBL_NLTYPE_CIPSOV4:
ret_val = cipso_v4_sock_setattr(sk,
- entry->cipso, secattr);
+ entry->cipso, secattr,
+ netlbl_sk_lock_check(sk));
break;
case NETLBL_NLTYPE_UNLABELED:
/* just delete the protocols we support for right now
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 7554803218a2..0b7a89db3ab7 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -59,7 +59,6 @@
#include <linux/rhashtable.h>
#include <asm/cacheflush.h>
#include <linux/hash.h>
-#include <linux/genetlink.h>
#include <linux/net_namespace.h>
#include <linux/nospec.h>
#include <linux/btf_ids.h>
@@ -73,6 +72,7 @@
#include <trace/events/netlink.h>
#include "af_netlink.h"
+#include "genetlink.h"
struct listeners {
struct rcu_head rcu;
@@ -636,8 +636,7 @@ static struct proto netlink_proto = {
};
static int __netlink_create(struct net *net, struct socket *sock,
- struct mutex *dump_cb_mutex, int protocol,
- int kern)
+ int protocol, int kern)
{
struct sock *sk;
struct netlink_sock *nlk;
@@ -655,7 +654,6 @@ static int __netlink_create(struct net *net, struct socket *sock,
lockdep_set_class_and_name(&nlk->nl_cb_mutex,
nlk_cb_mutex_keys + protocol,
nlk_cb_mutex_key_strings[protocol]);
- nlk->dump_cb_mutex = dump_cb_mutex;
init_waitqueue_head(&nlk->wait);
sk->sk_destruct = netlink_sock_destruct;
@@ -667,7 +665,6 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
struct module *module = NULL;
- struct mutex *cb_mutex;
struct netlink_sock *nlk;
int (*bind)(struct net *net, int group);
void (*unbind)(struct net *net, int group);
@@ -696,7 +693,6 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
module = nl_table[protocol].module;
else
err = -EPROTONOSUPPORT;
- cb_mutex = nl_table[protocol].cb_mutex;
bind = nl_table[protocol].bind;
unbind = nl_table[protocol].unbind;
release = nl_table[protocol].release;
@@ -705,7 +701,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
if (err < 0)
goto out;
- err = __netlink_create(net, sock, cb_mutex, protocol, kern);
+ err = __netlink_create(net, sock, protocol, kern);
if (err < 0)
goto out_module;
@@ -2016,7 +2012,6 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
struct sock *sk;
struct netlink_sock *nlk;
struct listeners *listeners = NULL;
- struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL;
unsigned int groups;
BUG_ON(!nl_table);
@@ -2027,7 +2022,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
return NULL;
- if (__netlink_create(net, sock, cb_mutex, unit, 1) < 0)
+ if (__netlink_create(net, sock, unit, 1) < 0)
goto out_sock_release_nosk;
sk = sock->sk;
@@ -2055,7 +2050,6 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
if (!nl_table[unit].registered) {
nl_table[unit].groups = groups;
rcu_assign_pointer(nl_table[unit].listeners, listeners);
- nl_table[unit].cb_mutex = cb_mutex;
nl_table[unit].module = module;
if (cfg) {
nl_table[unit].bind = cfg->bind;
@@ -2165,6 +2159,69 @@ __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int fla
}
EXPORT_SYMBOL(__nlmsg_put);
+static size_t
+netlink_ack_tlv_len(struct netlink_sock *nlk, int err,
+ const struct netlink_ext_ack *extack)
+{
+ size_t tlvlen;
+
+ if (!extack || !test_bit(NETLINK_F_EXT_ACK, &nlk->flags))
+ return 0;
+
+ tlvlen = 0;
+ if (extack->_msg)
+ tlvlen += nla_total_size(strlen(extack->_msg) + 1);
+ if (extack->cookie_len)
+ tlvlen += nla_total_size(extack->cookie_len);
+
+ /* Following attributes are only reported as error (not warning) */
+ if (!err)
+ return tlvlen;
+
+ if (extack->bad_attr)
+ tlvlen += nla_total_size(sizeof(u32));
+ if (extack->policy)
+ tlvlen += netlink_policy_dump_attr_size_estimate(extack->policy);
+ if (extack->miss_type)
+ tlvlen += nla_total_size(sizeof(u32));
+ if (extack->miss_nest)
+ tlvlen += nla_total_size(sizeof(u32));
+
+ return tlvlen;
+}
+
+static void
+netlink_ack_tlv_fill(struct sk_buff *in_skb, struct sk_buff *skb,
+ const struct nlmsghdr *nlh, int err,
+ const struct netlink_ext_ack *extack)
+{
+ if (extack->_msg)
+ WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg));
+ if (extack->cookie_len)
+ WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE,
+ extack->cookie_len, extack->cookie));
+
+ if (!err)
+ return;
+
+ if (extack->bad_attr &&
+ !WARN_ON((u8 *)extack->bad_attr < in_skb->data ||
+ (u8 *)extack->bad_attr >= in_skb->data + in_skb->len))
+ WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS,
+ (u8 *)extack->bad_attr - (const u8 *)nlh));
+ if (extack->policy)
+ netlink_policy_dump_write_attr(skb, extack->policy,
+ NLMSGERR_ATTR_POLICY);
+ if (extack->miss_type)
+ WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_TYPE,
+ extack->miss_type));
+ if (extack->miss_nest &&
+ !WARN_ON((u8 *)extack->miss_nest < in_skb->data ||
+ (u8 *)extack->miss_nest > in_skb->data + in_skb->len))
+ WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_NEST,
+ (u8 *)extack->miss_nest - (const u8 *)nlh));
+}
+
/*
* It looks a bit ugly.
* It would be better to create kernel thread.
@@ -2175,6 +2232,7 @@ static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb,
struct netlink_ext_ack *extack)
{
struct nlmsghdr *nlh;
+ size_t extack_len;
nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(nlk->dump_done_errno),
NLM_F_MULTI | cb->answer_flags);
@@ -2184,10 +2242,14 @@ static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb,
nl_dump_check_consistent(cb, nlh);
memcpy(nlmsg_data(nlh), &nlk->dump_done_errno, sizeof(nlk->dump_done_errno));
- if (extack->_msg && test_bit(NETLINK_F_EXT_ACK, &nlk->flags)) {
+ extack_len = netlink_ack_tlv_len(nlk, nlk->dump_done_errno, extack);
+ if (extack_len) {
nlh->nlmsg_flags |= NLM_F_ACK_TLVS;
- if (!nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg))
+ if (skb_tailroom(skb) >= extack_len) {
+ netlink_ack_tlv_fill(cb->skb, skb, cb->nlh,
+ nlk->dump_done_errno, extack);
nlmsg_end(skb, nlh);
+ }
}
return 0;
@@ -2258,17 +2320,9 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
netlink_skb_set_owner_r(skb, sk);
if (nlk->dump_done_errno > 0) {
- struct mutex *extra_mutex = nlk->dump_cb_mutex;
-
cb->extack = &extack;
- if (cb->flags & RTNL_FLAG_DUMP_UNLOCKED)
- extra_mutex = NULL;
- if (extra_mutex)
- mutex_lock(extra_mutex);
nlk->dump_done_errno = cb->dump(skb, cb);
- if (extra_mutex)
- mutex_unlock(extra_mutex);
/* EMSGSIZE plus something already in the skb means
* that there's more to dump but current skb has filled up.
@@ -2406,69 +2460,6 @@ error_free:
}
EXPORT_SYMBOL(__netlink_dump_start);
-static size_t
-netlink_ack_tlv_len(struct netlink_sock *nlk, int err,
- const struct netlink_ext_ack *extack)
-{
- size_t tlvlen;
-
- if (!extack || !test_bit(NETLINK_F_EXT_ACK, &nlk->flags))
- return 0;
-
- tlvlen = 0;
- if (extack->_msg)
- tlvlen += nla_total_size(strlen(extack->_msg) + 1);
- if (extack->cookie_len)
- tlvlen += nla_total_size(extack->cookie_len);
-
- /* Following attributes are only reported as error (not warning) */
- if (!err)
- return tlvlen;
-
- if (extack->bad_attr)
- tlvlen += nla_total_size(sizeof(u32));
- if (extack->policy)
- tlvlen += netlink_policy_dump_attr_size_estimate(extack->policy);
- if (extack->miss_type)
- tlvlen += nla_total_size(sizeof(u32));
- if (extack->miss_nest)
- tlvlen += nla_total_size(sizeof(u32));
-
- return tlvlen;
-}
-
-static void
-netlink_ack_tlv_fill(struct sk_buff *in_skb, struct sk_buff *skb,
- struct nlmsghdr *nlh, int err,
- const struct netlink_ext_ack *extack)
-{
- if (extack->_msg)
- WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg));
- if (extack->cookie_len)
- WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE,
- extack->cookie_len, extack->cookie));
-
- if (!err)
- return;
-
- if (extack->bad_attr &&
- !WARN_ON((u8 *)extack->bad_attr < in_skb->data ||
- (u8 *)extack->bad_attr >= in_skb->data + in_skb->len))
- WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS,
- (u8 *)extack->bad_attr - (u8 *)nlh));
- if (extack->policy)
- netlink_policy_dump_write_attr(skb, extack->policy,
- NLMSGERR_ATTR_POLICY);
- if (extack->miss_type)
- WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_TYPE,
- extack->miss_type));
- if (extack->miss_nest &&
- !WARN_ON((u8 *)extack->miss_nest < in_skb->data ||
- (u8 *)extack->miss_nest > in_skb->data + in_skb->len))
- WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_NEST,
- (u8 *)extack->miss_nest - (u8 *)nlh));
-}
-
void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
const struct netlink_ext_ack *extack)
{
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 3b7666944b11..feb54c63a116 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -22,6 +22,8 @@
#include <net/sock.h>
#include <net/genetlink.h>
+#include "genetlink.h"
+
static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */
static DECLARE_RWSEM(cb_lock);
diff --git a/net/netlink/genetlink.h b/net/netlink/genetlink.h
new file mode 100644
index 000000000000..89bd9d2631c3
--- /dev/null
+++ b/net/netlink/genetlink.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NET_GENETLINK_H
+#define __NET_GENETLINK_H
+
+#include <linux/wait.h>
+
+/* for synchronisation between af_netlink and genetlink */
+extern atomic_t genl_sk_destructing_cnt;
+extern wait_queue_head_t genl_sk_destructing_waitq;
+
+#endif /* __LINUX_GENERIC_NETLINK_H */
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 104a80b75477..6ee148f0e6d0 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -772,8 +772,8 @@ out_release:
return err;
}
-static int nr_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int nr_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sk_buff *skb;
struct sock *newsk;
@@ -805,7 +805,7 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags,
if (skb)
break;
- if (flags & O_NONBLOCK) {
+ if (arg->flags & O_NONBLOCK) {
err = -EWOULDBLOCK;
break;
}
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 70480869ad1c..bd2b17b219ae 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -285,22 +285,14 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
return 0;
}
-static inline void __nr_remove_node(struct nr_node *nr_node)
+static void nr_remove_node_locked(struct nr_node *nr_node)
{
+ lockdep_assert_held(&nr_node_list_lock);
+
hlist_del_init(&nr_node->node_node);
nr_node_put(nr_node);
}
-#define nr_remove_node_locked(__node) \
- __nr_remove_node(__node)
-
-static void nr_remove_node(struct nr_node *nr_node)
-{
- spin_lock_bh(&nr_node_list_lock);
- __nr_remove_node(nr_node);
- spin_unlock_bh(&nr_node_list_lock);
-}
-
static inline void __nr_remove_neigh(struct nr_neigh *nr_neigh)
{
hlist_del_init(&nr_neigh->neigh_node);
@@ -339,6 +331,7 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n
return -EINVAL;
}
+ spin_lock_bh(&nr_node_list_lock);
nr_node_lock(nr_node);
for (i = 0; i < nr_node->count; i++) {
if (nr_node->routes[i].neighbour == nr_neigh) {
@@ -352,7 +345,7 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n
nr_node->count--;
if (nr_node->count == 0) {
- nr_remove_node(nr_node);
+ nr_remove_node_locked(nr_node);
} else {
switch (i) {
case 0:
@@ -367,12 +360,14 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n
nr_node_put(nr_node);
}
nr_node_unlock(nr_node);
+ spin_unlock_bh(&nr_node_list_lock);
return 0;
}
}
nr_neigh_put(nr_neigh);
nr_node_unlock(nr_node);
+ spin_unlock_bh(&nr_node_list_lock);
nr_node_put(nr_node);
return -EINVAL;
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
index 4e7c968cde2d..5e3ca068f04e 100644
--- a/net/netrom/nr_timer.c
+++ b/net/netrom/nr_timer.c
@@ -121,7 +121,8 @@ static void nr_heartbeat_expiry(struct timer_list *t)
is accepted() it isn't 'dead' so doesn't get removed. */
if (sock_flag(sk, SOCK_DESTROY) ||
(sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) {
- sock_hold(sk);
+ if (sk->sk_state == TCP_LISTEN)
+ sock_hold(sk);
bh_unlock_sock(sk);
nr_destroy_socket(sk);
goto out;
diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c
index 79fb2d3f477b..7dc0fa628f2e 100644
--- a/net/netrom/sysctl_net_netrom.c
+++ b/net/netrom/sysctl_net_netrom.c
@@ -140,7 +140,6 @@ static struct ctl_table nr_table[] = {
.extra1 = &min_reset,
.extra2 = &max_reset
},
- { }
};
int __init nr_register_sysctl(void)
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index d5344563e525..57a2f97004e1 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -447,7 +447,7 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *parent,
}
static int llcp_sock_accept(struct socket *sock, struct socket *newsock,
- int flags, bool kern)
+ struct proto_accept_arg *arg)
{
DECLARE_WAITQUEUE(wait, current);
struct sock *sk = sock->sk, *new_sk;
@@ -463,7 +463,7 @@ static int llcp_sock_accept(struct socket *sock, struct socket *newsock,
goto error;
}
- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
/* Wait for an incoming connection. */
add_wait_queue_exclusive(sk_sleep(sk), &wait);
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 0d26c8ec9993..f456a5911e7d 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -1463,6 +1463,19 @@ int nci_core_ntf_packet(struct nci_dev *ndev, __u16 opcode,
ndev->ops->n_core_ops);
}
+static bool nci_valid_size(struct sk_buff *skb)
+{
+ BUILD_BUG_ON(NCI_CTRL_HDR_SIZE != NCI_DATA_HDR_SIZE);
+ unsigned int hdr_size = NCI_CTRL_HDR_SIZE;
+
+ if (skb->len < hdr_size ||
+ !nci_plen(skb->data) ||
+ skb->len < hdr_size + nci_plen(skb->data)) {
+ return false;
+ }
+ return true;
+}
+
/* ---- NCI TX Data worker thread ---- */
static void nci_tx_work(struct work_struct *work)
@@ -1516,9 +1529,9 @@ static void nci_rx_work(struct work_struct *work)
nfc_send_to_raw_sock(ndev->nfc_dev, skb,
RAW_PAYLOAD_NCI, NFC_DIRECTION_RX);
- if (!nci_plen(skb->data)) {
+ if (!nci_valid_size(skb)) {
kfree_skb(skb);
- break;
+ continue;
}
/* Process frame */
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index aa1dbf654c3e..dd2ce73a24fb 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -969,8 +969,7 @@ static int nfc_genl_dep_link_down(struct sk_buff *skb, struct genl_info *info)
int rc;
u32 idx;
- if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
- !info->attrs[NFC_ATTR_TARGET_INDEX])
+ if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
return -EINVAL;
idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
@@ -1018,8 +1017,7 @@ static int nfc_genl_llc_get_params(struct sk_buff *skb, struct genl_info *info)
struct sk_buff *msg = NULL;
u32 idx;
- if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
- !info->attrs[NFC_ATTR_FIRMWARE_NAME])
+ if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
return -EINVAL;
idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index 29a7081858cd..2535f3f9f462 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -10,6 +10,7 @@ config OPENVSWITCH
(NF_CONNTRACK && ((!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6) && \
(!NF_NAT || NF_NAT) && \
(!NETFILTER_CONNCOUNT || NETFILTER_CONNCOUNT)))
+ depends on PSAMPLE || !PSAMPLE
select LIBCRC32C
select MPLS
select NET_MPLS_GSO
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 6fcd7e2ca81f..101f9a23792c 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -24,6 +24,11 @@
#include <net/checksum.h>
#include <net/dsfield.h>
#include <net/mpls.h>
+
+#if IS_ENABLED(CONFIG_PSAMPLE)
+#include <net/psample.h>
+#endif
+
#include <net/sctp/checksum.h>
#include "datapath.h"
@@ -936,6 +941,12 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
pskb_trim(skb, ovs_mac_header_len(key));
}
+ /* Need to set the pkt_type to involve the routing layer. The
+ * packet movement through the OVS datapath doesn't generally
+ * use routing, but this is needed for tunnel cases.
+ */
+ skb->pkt_type = PACKET_OUTGOING;
+
if (likely(!mru ||
(skb->len <= mru + vport->dev->hard_header_len))) {
ovs_vport_send(vport, skb, ovs_key_mac_proto(key));
@@ -1037,12 +1048,15 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
struct nlattr *sample_arg;
int rem = nla_len(attr);
const struct sample_arg *arg;
+ u32 init_probability;
bool clone_flow_key;
+ int err;
/* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
sample_arg = nla_data(attr);
arg = nla_data(sample_arg);
actions = nla_next(sample_arg, &rem);
+ init_probability = OVS_CB(skb)->probability;
if ((arg->probability != U32_MAX) &&
(!arg->probability || get_random_u32() > arg->probability)) {
@@ -1051,9 +1065,16 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
return 0;
}
+ OVS_CB(skb)->probability = arg->probability;
+
clone_flow_key = !arg->exec;
- return clone_execute(dp, skb, key, 0, actions, rem, last,
- clone_flow_key);
+ err = clone_execute(dp, skb, key, 0, actions, rem, last,
+ clone_flow_key);
+
+ if (!last)
+ OVS_CB(skb)->probability = init_probability;
+
+ return err;
}
/* When 'last' is true, clone() should always consume the 'skb'.
@@ -1293,6 +1314,44 @@ static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
return 0;
}
+#if IS_ENABLED(CONFIG_PSAMPLE)
+static void execute_psample(struct datapath *dp, struct sk_buff *skb,
+ const struct nlattr *attr)
+{
+ struct psample_group psample_group = {};
+ struct psample_metadata md = {};
+ const struct nlattr *a;
+ u32 rate;
+ int rem;
+
+ nla_for_each_attr(a, nla_data(attr), nla_len(attr), rem) {
+ switch (nla_type(a)) {
+ case OVS_PSAMPLE_ATTR_GROUP:
+ psample_group.group_num = nla_get_u32(a);
+ break;
+
+ case OVS_PSAMPLE_ATTR_COOKIE:
+ md.user_cookie = nla_data(a);
+ md.user_cookie_len = nla_len(a);
+ break;
+ }
+ }
+
+ psample_group.net = ovs_dp_get_net(dp);
+ md.in_ifindex = OVS_CB(skb)->input_vport->dev->ifindex;
+ md.trunc_size = skb->len - OVS_CB(skb)->cutlen;
+ md.rate_as_probability = 1;
+
+ rate = OVS_CB(skb)->probability ? OVS_CB(skb)->probability : U32_MAX;
+
+ psample_sample_packet(&psample_group, skb, rate, &md);
+}
+#else
+static void execute_psample(struct datapath *dp, struct sk_buff *skb,
+ const struct nlattr *attr)
+{}
+#endif
+
/* Execute a list of actions against 'skb'. */
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key,
@@ -1496,6 +1555,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
ovs_kfree_skb_reason(skb, reason);
return 0;
}
+
+ case OVS_ACTION_ATTR_PSAMPLE:
+ execute_psample(dp, skb, a);
+ OVS_CB(skb)->cutlen = 0;
+ if (nla_is_last(a, rem)) {
+ consume_skb(skb);
+ return 0;
+ }
+ break;
}
if (unlikely(err)) {
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 2928c142a2dd..8eb1d644b741 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -168,8 +168,13 @@ static u32 ovs_ct_get_mark(const struct nf_conn *ct)
static void ovs_ct_get_labels(const struct nf_conn *ct,
struct ovs_key_ct_labels *labels)
{
- struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL;
+ struct nf_conn_labels *cl = NULL;
+ if (ct) {
+ if (ct->master && !nf_ct_is_confirmed(ct))
+ ct = ct->master;
+ cl = nf_ct_labels_find(ct);
+ }
if (cl)
memcpy(labels, cl->bits, OVS_CT_LABELS_LEN);
else
@@ -674,6 +679,8 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
action |= BIT(NF_NAT_MANIP_DST);
err = nf_ct_nat(skb, ct, ctinfo, &action, &info->range, info->commit);
+ if (err != NF_ACCEPT)
+ return err;
if (action & BIT(NF_NAT_MANIP_SRC))
ovs_nat_update_key(key, skb, NF_NAT_MANIP_SRC);
@@ -692,6 +699,22 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
}
#endif
+static int verdict_to_errno(unsigned int verdict)
+{
+ switch (verdict & NF_VERDICT_MASK) {
+ case NF_ACCEPT:
+ return 0;
+ case NF_DROP:
+ return -EINVAL;
+ case NF_STOLEN:
+ return -EINPROGRESS;
+ default:
+ break;
+ }
+
+ return -EINVAL;
+}
+
/* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if
* not done already. Update key with new CT state after passing the packet
* through conntrack.
@@ -730,7 +753,7 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
err = nf_conntrack_in(skb, &state);
if (err != NF_ACCEPT)
- return -ENOENT;
+ return verdict_to_errno(err);
/* Clear CT state NAT flags to mark that we have not yet done
* NAT after the nf_conntrack_in() call. We can actually clear
@@ -757,9 +780,12 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
* the key->ct_state.
*/
if (info->nat && !(key->ct_state & OVS_CS_F_NAT_MASK) &&
- (nf_ct_is_confirmed(ct) || info->commit) &&
- ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) {
- return -EINVAL;
+ (nf_ct_is_confirmed(ct) || info->commit)) {
+ int err = ovs_ct_nat(net, key, info, skb, ct, ctinfo);
+
+ err = verdict_to_errno(err);
+ if (err)
+ return err;
}
/* Userspace may decide to perform a ct lookup without a helper
@@ -790,9 +816,12 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
* - When committing an unconfirmed connection.
*/
if ((nf_ct_is_confirmed(ct) ? !cached || add_helper :
- info->commit) &&
- nf_ct_helper(skb, ct, ctinfo, info->family) != NF_ACCEPT) {
- return -EINVAL;
+ info->commit)) {
+ int err = nf_ct_helper(skb, ct, ctinfo, info->family);
+
+ err = verdict_to_errno(err);
+ if (err)
+ return err;
}
if (nf_ct_protonum(ct) == IPPROTO_TCP &&
@@ -996,10 +1025,9 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
/* This will take care of sending queued events even if the connection
* is already confirmed.
*/
- if (nf_conntrack_confirm(skb) != NF_ACCEPT)
- return -EINVAL;
+ err = nf_conntrack_confirm(skb);
- return 0;
+ return verdict_to_errno(err);
}
/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
@@ -1034,6 +1062,10 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
else
err = ovs_ct_lookup(net, key, info, skb);
+ /* conntrack core returned NF_STOLEN */
+ if (err == -EINPROGRESS)
+ return err;
+
skb_push_rcsum(skb, nh_ofs);
if (err)
ovs_kfree_skb_reason(skb, OVS_DROP_CONNTRACK);
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 11c69415c605..99d72543abd3 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -15,7 +15,6 @@
#include <linux/delay.h>
#include <linux/time.h>
#include <linux/etherdevice.h>
-#include <linux/genetlink.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/mutex.h>
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 0cd29971a907..9ca6231ea647 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -115,12 +115,15 @@ struct datapath {
* fragmented.
* @acts_origlen: The netlink size of the flow actions applied to this skb.
* @cutlen: The number of bytes from the packet end to be removed.
+ * @probability: The sampling probability that was applied to this skb; 0 means
+ * no sampling has occurred; U32_MAX means 100% probability.
*/
struct ovs_skb_cb {
struct vport *input_vport;
u16 mru;
u16 acts_origlen;
u32 cutlen;
+ u32 probability;
};
#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 33b21a0c0548..8a848ce72e29 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -561,7 +561,6 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
*/
key->tp.src = htons(icmp->icmp6_type);
key->tp.dst = htons(icmp->icmp6_code);
- memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd));
if (icmp->icmp6_code == 0 &&
(icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
@@ -570,6 +569,8 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
struct nd_msg *nd;
int offset;
+ memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd));
+
/* In order to process neighbor discovery options, we need the
* entire packet.
*/
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index ebc5728aab4e..c92bdc4dfe19 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -64,6 +64,7 @@ static bool actions_may_change_flow(const struct nlattr *actions)
case OVS_ACTION_ATTR_TRUNC:
case OVS_ACTION_ATTR_USERSPACE:
case OVS_ACTION_ATTR_DROP:
+ case OVS_ACTION_ATTR_PSAMPLE:
break;
case OVS_ACTION_ATTR_CT:
@@ -152,6 +153,13 @@ static void update_range(struct sw_flow_match *match,
sizeof((match)->key->field)); \
} while (0)
+#define SW_FLOW_KEY_BITMAP_COPY(match, field, value_p, nbits, is_mask) ({ \
+ update_range(match, offsetof(struct sw_flow_key, field), \
+ bitmap_size(nbits), is_mask); \
+ bitmap_copy(is_mask ? (match)->mask->key.field : (match)->key->field, \
+ value_p, nbits); \
+})
+
static bool match_validate(const struct sw_flow_match *match,
u64 key_attrs, u64 mask_attrs, bool log)
{
@@ -670,8 +678,8 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
bool log)
{
bool ttl = false, ipv4 = false, ipv6 = false;
+ IP_TUNNEL_DECLARE_FLAGS(tun_flags) = { };
bool info_bridge_mode = false;
- __be16 tun_flags = 0;
int opts_type = 0;
struct nlattr *a;
int rem;
@@ -697,7 +705,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
case OVS_TUNNEL_KEY_ATTR_ID:
SW_FLOW_KEY_PUT(match, tun_key.tun_id,
nla_get_be64(a), is_mask);
- tun_flags |= TUNNEL_KEY;
+ __set_bit(IP_TUNNEL_KEY_BIT, tun_flags);
break;
case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src,
@@ -729,10 +737,10 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
ttl = true;
break;
case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
- tun_flags |= TUNNEL_DONT_FRAGMENT;
+ __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, tun_flags);
break;
case OVS_TUNNEL_KEY_ATTR_CSUM:
- tun_flags |= TUNNEL_CSUM;
+ __set_bit(IP_TUNNEL_CSUM_BIT, tun_flags);
break;
case OVS_TUNNEL_KEY_ATTR_TP_SRC:
SW_FLOW_KEY_PUT(match, tun_key.tp_src,
@@ -743,7 +751,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
nla_get_be16(a), is_mask);
break;
case OVS_TUNNEL_KEY_ATTR_OAM:
- tun_flags |= TUNNEL_OAM;
+ __set_bit(IP_TUNNEL_OAM_BIT, tun_flags);
break;
case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
if (opts_type) {
@@ -755,7 +763,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
if (err)
return err;
- tun_flags |= TUNNEL_GENEVE_OPT;
+ __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, tun_flags);
opts_type = type;
break;
case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
@@ -768,7 +776,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
if (err)
return err;
- tun_flags |= TUNNEL_VXLAN_OPT;
+ __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, tun_flags);
opts_type = type;
break;
case OVS_TUNNEL_KEY_ATTR_PAD:
@@ -784,7 +792,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
if (err)
return err;
- tun_flags |= TUNNEL_ERSPAN_OPT;
+ __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_flags);
opts_type = type;
break;
case OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE:
@@ -798,7 +806,8 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
}
}
- SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
+ SW_FLOW_KEY_BITMAP_COPY(match, tun_key.tun_flags, tun_flags,
+ __IP_TUNNEL_FLAG_NUM, is_mask);
if (is_mask)
SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true);
else
@@ -823,13 +832,15 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
}
if (ipv4) {
if (info_bridge_mode) {
+ __clear_bit(IP_TUNNEL_KEY_BIT, tun_flags);
+
if (match->key->tun_key.u.ipv4.src ||
match->key->tun_key.u.ipv4.dst ||
match->key->tun_key.tp_src ||
match->key->tun_key.tp_dst ||
match->key->tun_key.ttl ||
match->key->tun_key.tos ||
- tun_flags & ~TUNNEL_KEY) {
+ !ip_tunnel_flags_empty(tun_flags)) {
OVS_NLERR(log, "IPv4 tun info is not correct");
return -EINVAL;
}
@@ -874,7 +885,7 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
const void *tun_opts, int swkey_tun_opts_len,
unsigned short tun_proto, u8 mode)
{
- if (output->tun_flags & TUNNEL_KEY &&
+ if (test_bit(IP_TUNNEL_KEY_BIT, output->tun_flags) &&
nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id,
OVS_TUNNEL_KEY_ATTR_PAD))
return -EMSGSIZE;
@@ -910,10 +921,10 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
return -EMSGSIZE;
if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl))
return -EMSGSIZE;
- if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
+ if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, output->tun_flags) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
return -EMSGSIZE;
- if ((output->tun_flags & TUNNEL_CSUM) &&
+ if (test_bit(IP_TUNNEL_CSUM_BIT, output->tun_flags) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
return -EMSGSIZE;
if (output->tp_src &&
@@ -922,18 +933,20 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
if (output->tp_dst &&
nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst))
return -EMSGSIZE;
- if ((output->tun_flags & TUNNEL_OAM) &&
+ if (test_bit(IP_TUNNEL_OAM_BIT, output->tun_flags) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
return -EMSGSIZE;
if (swkey_tun_opts_len) {
- if (output->tun_flags & TUNNEL_GENEVE_OPT &&
+ if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, output->tun_flags) &&
nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
swkey_tun_opts_len, tun_opts))
return -EMSGSIZE;
- else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
+ else if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT,
+ output->tun_flags) &&
vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
return -EMSGSIZE;
- else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
+ else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
+ output->tun_flags) &&
nla_put(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
swkey_tun_opts_len, tun_opts))
return -EMSGSIZE;
@@ -2029,7 +2042,7 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
if ((swkey->tun_proto || is_mask)) {
const void *opts = NULL;
- if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
+ if (ip_tunnel_is_options_present(output->tun_key.tun_flags))
opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
if (ip_tun_to_nlattr(skb, &output->tun_key, opts,
@@ -2397,7 +2410,7 @@ static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len)
/* Whenever new actions are added, the need to update this
* function should be considered.
*/
- BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 24);
+ BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 25);
if (!actions)
return;
@@ -2752,7 +2765,8 @@ static int validate_geneve_opts(struct sw_flow_key *key)
opts_len -= len;
}
- key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
+ if (crit_opt)
+ __set_bit(IP_TUNNEL_CRIT_OPT_BIT, key->tun_key.tun_flags);
return 0;
}
@@ -2760,6 +2774,7 @@ static int validate_geneve_opts(struct sw_flow_key *key)
static int validate_and_copy_set_tun(const struct nlattr *attr,
struct sw_flow_actions **sfa, bool log)
{
+ IP_TUNNEL_DECLARE_FLAGS(dst_opt_type) = { };
struct sw_flow_match match;
struct sw_flow_key key;
struct metadata_dst *tun_dst;
@@ -2767,9 +2782,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
struct ovs_tunnel_info *ovs_tun;
struct nlattr *a;
int err = 0, start, opts_type;
- __be16 dst_opt_type;
- dst_opt_type = 0;
ovs_match_init(&match, &key, true, NULL);
opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);
if (opts_type < 0)
@@ -2781,13 +2794,14 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
err = validate_geneve_opts(&key);
if (err < 0)
return err;
- dst_opt_type = TUNNEL_GENEVE_OPT;
+
+ __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, dst_opt_type);
break;
case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
- dst_opt_type = TUNNEL_VXLAN_OPT;
+ __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, dst_opt_type);
break;
case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
- dst_opt_type = TUNNEL_ERSPAN_OPT;
+ __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, dst_opt_type);
break;
}
}
@@ -3144,6 +3158,28 @@ static int validate_and_copy_check_pkt_len(struct net *net,
return 0;
}
+static int validate_psample(const struct nlattr *attr)
+{
+ static const struct nla_policy policy[OVS_PSAMPLE_ATTR_MAX + 1] = {
+ [OVS_PSAMPLE_ATTR_GROUP] = { .type = NLA_U32 },
+ [OVS_PSAMPLE_ATTR_COOKIE] = {
+ .type = NLA_BINARY,
+ .len = OVS_PSAMPLE_COOKIE_MAX_SIZE,
+ },
+ };
+ struct nlattr *a[OVS_PSAMPLE_ATTR_MAX + 1];
+ int err;
+
+ if (!IS_ENABLED(CONFIG_PSAMPLE))
+ return -EOPNOTSUPP;
+
+ err = nla_parse_nested(a, OVS_PSAMPLE_ATTR_MAX, attr, policy, NULL);
+ if (err)
+ return err;
+
+ return a[OVS_PSAMPLE_ATTR_GROUP] ? 0 : -EINVAL;
+}
+
static int copy_action(const struct nlattr *from,
struct sw_flow_actions **sfa, bool log)
{
@@ -3199,6 +3235,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
[OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls),
[OVS_ACTION_ATTR_DEC_TTL] = (u32)-1,
[OVS_ACTION_ATTR_DROP] = sizeof(u32),
+ [OVS_ACTION_ATTR_PSAMPLE] = (u32)-1,
};
const struct ovs_action_push_vlan *vlan;
int type = nla_type(a);
@@ -3477,6 +3514,12 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
return -EINVAL;
break;
+ case OVS_ACTION_ATTR_PSAMPLE:
+ err = validate_psample(a);
+ if (err)
+ return err;
+ break;
+
default:
OVS_NLERR(log, "Unknown Action type %d", type);
return -EINVAL;
diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
index ed11cd12b512..8bbf983cd244 100644
--- a/net/openvswitch/meter.h
+++ b/net/openvswitch/meter.h
@@ -11,7 +11,6 @@
#include <linux/kernel.h>
#include <linux/netlink.h>
#include <linux/openvswitch.h>
-#include <linux/genetlink.h>
#include <linux/skbuff.h>
#include <linux/bits.h>
diff --git a/net/openvswitch/openvswitch_trace.h b/net/openvswitch/openvswitch_trace.h
index 3eb35d9eb700..74d75aaebef4 100644
--- a/net/openvswitch/openvswitch_trace.h
+++ b/net/openvswitch/openvswitch_trace.h
@@ -43,8 +43,8 @@ TRACE_EVENT(ovs_do_execute_action,
TP_fast_assign(
__entry->dpaddr = dp;
- __assign_str(dp_name, ovs_dp_name(dp));
- __assign_str(dev_name, skb->dev->name);
+ __assign_str(dp_name);
+ __assign_str(dev_name);
__entry->skbaddr = skb;
__entry->len = skb->len;
__entry->data_len = skb->data_len;
@@ -113,8 +113,8 @@ TRACE_EVENT(ovs_dp_upcall,
TP_fast_assign(
__entry->dpaddr = dp;
- __assign_str(dp_name, ovs_dp_name(dp));
- __assign_str(dev_name, skb->dev->name);
+ __assign_str(dp_name);
+ __assign_str(dev_name);
__entry->skbaddr = skb;
__entry->len = skb->len;
__entry->data_len = skb->data_len;
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 74c88a6baa43..4b33133cbdff 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -85,7 +85,6 @@ static const struct net_device_ops internal_dev_netdev_ops = {
.ndo_stop = internal_dev_stop,
.ndo_start_xmit = internal_dev_xmit,
.ndo_set_mac_address = eth_mac_addr,
- .ndo_get_stats64 = dev_get_tstats64,
};
static struct rtnl_link_ops internal_dev_link_ops __read_mostly = {
@@ -140,11 +139,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
err = -ENOMEM;
goto error_free_vport;
}
- vport->dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!vport->dev->tstats) {
- err = -ENOMEM;
- goto error_free_netdev;
- }
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
dev_net_set(vport->dev, ovs_dp_get_net(vport->dp));
dev->ifindex = parms->desired_ifindex;
@@ -169,8 +164,6 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
error_unlock:
rtnl_unlock();
- free_percpu(dev->tstats);
-error_free_netdev:
free_netdev(dev);
error_free_vport:
ovs_vport_free(vport);
@@ -186,7 +179,6 @@ static void internal_dev_destroy(struct vport *vport)
/* unregister_netdevice() waits for an RCU grace period. */
unregister_netdevice(vport->dev);
- free_percpu(vport->dev->tstats);
rtnl_unlock();
}
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 903537a5da22..91a11067e458 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -82,6 +82,13 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name)
err = -ENODEV;
goto error_free_vport;
}
+ /* Ensure that the device exists and that the provided
+ * name is not one of its aliases.
+ */
+ if (strcmp(name, ovs_vport_name(vport))) {
+ err = -ENODEV;
+ goto error_put;
+ }
netdev_tracker_alloc(vport->dev, &vport->dev_tracker, GFP_KERNEL);
if (vport->dev->flags & IFF_LOOPBACK ||
(vport->dev->type != ARPHRD_ETHER &&
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 972ae01a70f7..8732f6e51ae5 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -500,6 +500,7 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
OVS_CB(skb)->input_vport = vport;
OVS_CB(skb)->mru = 0;
OVS_CB(skb)->cutlen = 0;
+ OVS_CB(skb)->probability = 0;
if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) {
u32 mark;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 18f616f487ea..4a364cdd445e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -538,6 +538,61 @@ static void *packet_current_frame(struct packet_sock *po,
return packet_lookup_frame(po, rb, rb->head, status);
}
+static u16 vlan_get_tci(struct sk_buff *skb, struct net_device *dev)
+{
+ u8 *skb_orig_data = skb->data;
+ int skb_orig_len = skb->len;
+ struct vlan_hdr vhdr, *vh;
+ unsigned int header_len;
+
+ if (!dev)
+ return 0;
+
+ /* In the SOCK_DGRAM scenario, skb data starts at the network
+ * protocol, which is after the VLAN headers. The outer VLAN
+ * header is at the hard_header_len offset in non-variable
+ * length link layer headers. If it's a VLAN device, the
+ * min_header_len should be used to exclude the VLAN header
+ * size.
+ */
+ if (dev->min_header_len == dev->hard_header_len)
+ header_len = dev->hard_header_len;
+ else if (is_vlan_dev(dev))
+ header_len = dev->min_header_len;
+ else
+ return 0;
+
+ skb_push(skb, skb->data - skb_mac_header(skb));
+ vh = skb_header_pointer(skb, header_len, sizeof(vhdr), &vhdr);
+ if (skb_orig_data != skb->data) {
+ skb->data = skb_orig_data;
+ skb->len = skb_orig_len;
+ }
+ if (unlikely(!vh))
+ return 0;
+
+ return ntohs(vh->h_vlan_TCI);
+}
+
+static __be16 vlan_get_protocol_dgram(struct sk_buff *skb)
+{
+ __be16 proto = skb->protocol;
+
+ if (unlikely(eth_type_vlan(proto))) {
+ u8 *skb_orig_data = skb->data;
+ int skb_orig_len = skb->len;
+
+ skb_push(skb, skb->data - skb_mac_header(skb));
+ proto = __vlan_get_protocol(skb, proto, NULL);
+ if (skb_orig_data != skb->data) {
+ skb->data = skb_orig_data;
+ skb->len = skb_orig_len;
+ }
+ }
+
+ return proto;
+}
+
static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc)
{
del_timer_sync(&pkc->retire_blk_timer);
@@ -1007,10 +1062,16 @@ static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc,
static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
struct tpacket3_hdr *ppd)
{
+ struct packet_sock *po = container_of(pkc, struct packet_sock, rx_ring.prb_bdqc);
+
if (skb_vlan_tag_present(pkc->skb)) {
ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb);
ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto);
ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
+ } else if (unlikely(po->sk.sk_type == SOCK_DGRAM && eth_type_vlan(pkc->skb->protocol))) {
+ ppd->hv1.tp_vlan_tci = vlan_get_tci(pkc->skb, pkc->skb->dev);
+ ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->protocol);
+ ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
} else {
ppd->hv1.tp_vlan_tci = 0;
ppd->hv1.tp_vlan_tpid = 0;
@@ -2056,8 +2117,7 @@ retry:
skb->dev = dev;
skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = READ_ONCE(sk->sk_mark);
- skb->tstamp = sockc.transmit_time;
-
+ skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid);
skb_setup_tx_timestamp(skb, sockc.tsflags);
if (unlikely(extra_len == 4))
@@ -2122,7 +2182,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
enum skb_drop_reason drop_reason = SKB_CONSUMED;
- struct sock *sk;
+ struct sock *sk = NULL;
struct sockaddr_ll *sll;
struct packet_sock *po;
u8 *skb_head = skb->data;
@@ -2227,7 +2287,7 @@ drop_n_restore:
skb->len = skb_len;
}
drop:
- kfree_skb_reason(skb, drop_reason);
+ sk_skb_reason_drop(sk, skb, drop_reason);
return 0;
}
@@ -2235,7 +2295,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
enum skb_drop_reason drop_reason = SKB_CONSUMED;
- struct sock *sk;
+ struct sock *sk = NULL;
struct packet_sock *po;
struct sockaddr_ll *sll;
union tpacket_uhdr h;
@@ -2428,6 +2488,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
h.h2->tp_vlan_tci = skb_vlan_tag_get(skb);
h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto);
status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
+ } else if (unlikely(sk->sk_type == SOCK_DGRAM && eth_type_vlan(skb->protocol))) {
+ h.h2->tp_vlan_tci = vlan_get_tci(skb, skb->dev);
+ h.h2->tp_vlan_tpid = ntohs(skb->protocol);
+ status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
} else {
h.h2->tp_vlan_tci = 0;
h.h2->tp_vlan_tpid = 0;
@@ -2457,7 +2521,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
sll->sll_family = AF_PACKET;
sll->sll_hatype = dev->type;
- sll->sll_protocol = skb->protocol;
+ sll->sll_protocol = (sk->sk_type == SOCK_DGRAM) ?
+ vlan_get_protocol_dgram(skb) : skb->protocol;
sll->sll_pkttype = skb->pkt_type;
if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV)))
sll->sll_ifindex = orig_dev->ifindex;
@@ -2495,7 +2560,7 @@ drop_n_restore:
skb->len = skb_len;
}
drop:
- kfree_skb_reason(skb, drop_reason);
+ sk_skb_reason_drop(sk, skb, drop_reason);
return 0;
drop_n_account:
@@ -2504,7 +2569,7 @@ drop_n_account:
drop_reason = SKB_DROP_REASON_PACKET_SOCK_ERROR;
sk->sk_data_ready(sk);
- kfree_skb_reason(copy_skb, drop_reason);
+ sk_skb_reason_drop(sk, copy_skb, drop_reason);
goto drop_n_restore;
}
@@ -2522,8 +2587,7 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
ts = __packet_set_timestamp(po, ph, skb);
__packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
- if (!packet_read_pending(&po->tx_ring))
- complete(&po->skb_completion);
+ complete(&po->skb_completion);
}
sock_wfree(skb);
@@ -2585,7 +2649,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
skb->dev = dev;
skb->priority = READ_ONCE(po->sk.sk_priority);
skb->mark = READ_ONCE(po->sk.sk_mark);
- skb->tstamp = sockc->transmit_time;
+ skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, po->sk.sk_clockid);
skb_setup_tx_timestamp(skb, sockc->tsflags);
skb_zcopy_set_nouarg(skb, ph.raw);
@@ -3063,7 +3127,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
skb->dev = dev;
skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc.mark;
- skb->tstamp = sockc.transmit_time;
+ skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid);
if (unlikely(extra_len == 4))
skb->no_fcs = 1;
@@ -3483,7 +3547,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
/* Original length was stored in sockaddr_ll fields */
origlen = PACKET_SKB_CB(skb)->sa.origlen;
sll->sll_family = AF_PACKET;
- sll->sll_protocol = skb->protocol;
+ sll->sll_protocol = (sock->type == SOCK_DGRAM) ?
+ vlan_get_protocol_dgram(skb) : skb->protocol;
}
sock_recv_cmsgs(msg, sk, skb);
@@ -3540,6 +3605,21 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
aux.tp_vlan_tci = skb_vlan_tag_get(skb);
aux.tp_vlan_tpid = ntohs(skb->vlan_proto);
aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
+ } else if (unlikely(sock->type == SOCK_DGRAM && eth_type_vlan(skb->protocol))) {
+ struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
+ struct net_device *dev;
+
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(sock_net(sk), sll->sll_ifindex);
+ if (dev) {
+ aux.tp_vlan_tci = vlan_get_tci(skb, dev);
+ aux.tp_vlan_tpid = ntohs(skb->protocol);
+ aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
+ } else {
+ aux.tp_vlan_tci = 0;
+ aux.tp_vlan_tpid = 0;
+ }
+ rcu_read_unlock();
} else {
aux.tp_vlan_tci = 0;
aux.tp_vlan_tpid = 0;
@@ -3800,28 +3880,30 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
case PACKET_TX_RING:
{
union tpacket_req_u req_u;
- int len;
+ ret = -EINVAL;
lock_sock(sk);
switch (po->tp_version) {
case TPACKET_V1:
case TPACKET_V2:
- len = sizeof(req_u.req);
+ if (optlen < sizeof(req_u.req))
+ break;
+ ret = copy_from_sockptr(&req_u.req, optval,
+ sizeof(req_u.req)) ?
+ -EINVAL : 0;
break;
case TPACKET_V3:
default:
- len = sizeof(req_u.req3);
+ if (optlen < sizeof(req_u.req3))
+ break;
+ ret = copy_from_sockptr(&req_u.req3, optval,
+ sizeof(req_u.req3)) ?
+ -EINVAL : 0;
break;
}
- if (optlen < len) {
- ret = -EINVAL;
- } else {
- if (copy_from_sockptr(&req_u.req, optval, len))
- ret = -EFAULT;
- else
- ret = packet_set_ring(sk, &req_u, 0,
- optname == PACKET_TX_RING);
- }
+ if (!ret)
+ ret = packet_set_ring(sk, &req_u, 0,
+ optname == PACKET_TX_RING);
release_sock(sk);
return ret;
}
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 3dd5f52bc1b5..53a858478e22 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -759,8 +759,8 @@ static void pep_sock_close(struct sock *sk, long timeout)
sock_put(sk);
}
-static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp,
- bool kern)
+static struct sock *pep_sock_accept(struct sock *sk,
+ struct proto_accept_arg *arg)
{
struct pep_sock *pn = pep_sk(sk), *newpn;
struct sock *newsk = NULL;
@@ -772,8 +772,8 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp,
u8 pipe_handle, enabled, n_sb;
u8 aligned = 0;
- skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
- errp);
+ skb = skb_recv_datagram(sk, (arg->flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
+ &arg->err);
if (!skb)
return NULL;
@@ -836,7 +836,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp,
/* Create a new to-be-accepted sock */
newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot,
- kern);
+ arg->kern);
if (!newsk) {
pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL);
err = -ENOBUFS;
@@ -878,7 +878,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp,
drop:
release_sock(sk);
kfree_skb(skb);
- *errp = err;
+ arg->err = err;
return newsk;
}
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 59aebe296890..7008d402499d 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -178,7 +178,7 @@ static int fill_route(struct sk_buff *skb, struct net_device *dev, u8 dst,
rtm->rtm_type = RTN_UNICAST;
rtm->rtm_flags = 0;
if (nla_put_u8(skb, RTA_DST, dst) ||
- nla_put_u32(skb, RTA_OIF, dev->ifindex))
+ nla_put_u32(skb, RTA_OIF, READ_ONCE(dev->ifindex)))
goto nla_put_failure;
nlmsg_end(skb, nlh);
return 0;
@@ -193,7 +193,7 @@ void rtm_phonet_notify(int event, struct net_device *dev, u8 dst)
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) +
+ skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct rtmsg)) +
nla_total_size(1) + nla_total_size(4), GFP_KERNEL);
if (skb == NULL)
goto errout;
@@ -263,6 +263,7 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
+ int err = 0;
u8 addr;
rcu_read_lock();
@@ -272,16 +273,16 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
if (!dev)
continue;
- if (fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, RTM_NEWROUTE) < 0)
- goto out;
+ err = fill_route(skb, dev, addr << 2,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, RTM_NEWROUTE);
+ if (err < 0)
+ break;
}
-
-out:
rcu_read_unlock();
cb->args[0] = addr;
- return skb->len;
+ return err;
}
int __init phonet_netlink_register(void)
@@ -301,6 +302,6 @@ int __init phonet_netlink_register(void)
rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_DELROUTE,
route_doit, NULL, 0);
rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_GETROUTE,
- NULL, route_dumpit, 0);
+ NULL, route_dumpit, RTNL_FLAG_DUMP_UNLOCKED);
return 0;
}
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 1018340d89a7..5ce0b3ee5def 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -292,18 +292,17 @@ out:
}
static int pn_socket_accept(struct socket *sock, struct socket *newsock,
- int flags, bool kern)
+ struct proto_accept_arg *arg)
{
struct sock *sk = sock->sk;
struct sock *newsk;
- int err;
if (unlikely(sk->sk_state != TCP_LISTEN))
return -EINVAL;
- newsk = sk->sk_prot->accept(sk, flags, &err, kern);
+ newsk = sk->sk_prot->accept(sk, arg);
if (!newsk)
- return err;
+ return arg->err;
lock_sock(newsk);
sock_graft(newsk, newsock);
diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c
index 0d0bf41381c2..463a74a27d3e 100644
--- a/net/phonet/sysctl.c
+++ b/net/phonet/sysctl.c
@@ -48,7 +48,7 @@ void phonet_get_local_port_range(int *min, int *max)
} while (read_seqretry(&local_port_range_lock, seq));
}
-static int proc_local_port_range(struct ctl_table *table, int write,
+static int proc_local_port_range(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
@@ -81,7 +81,6 @@ static struct ctl_table phonet_table[] = {
.mode = 0644,
.proc_handler = proc_local_port_range,
},
- { }
};
int __init phonet_sysctl_init(void)
diff --git a/net/psample/psample.c b/net/psample/psample.c
index ddd211a151d0..a0ddae8a65f9 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -221,7 +221,7 @@ static int __psample_ip_tun_to_nlattr(struct sk_buff *skb,
const struct ip_tunnel_key *tun_key = &tun_info->key;
int tun_opts_len = tun_info->options_len;
- if (tun_key->tun_flags & TUNNEL_KEY &&
+ if (test_bit(IP_TUNNEL_KEY_BIT, tun_key->tun_flags) &&
nla_put_be64(skb, PSAMPLE_TUNNEL_KEY_ATTR_ID, tun_key->tun_id,
PSAMPLE_TUNNEL_KEY_ATTR_PAD))
return -EMSGSIZE;
@@ -257,10 +257,10 @@ static int __psample_ip_tun_to_nlattr(struct sk_buff *skb,
return -EMSGSIZE;
if (nla_put_u8(skb, PSAMPLE_TUNNEL_KEY_ATTR_TTL, tun_key->ttl))
return -EMSGSIZE;
- if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) &&
+ if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, tun_key->tun_flags) &&
nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
return -EMSGSIZE;
- if ((tun_key->tun_flags & TUNNEL_CSUM) &&
+ if (test_bit(IP_TUNNEL_CSUM_BIT, tun_key->tun_flags) &&
nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_CSUM))
return -EMSGSIZE;
if (tun_key->tp_src &&
@@ -269,15 +269,16 @@ static int __psample_ip_tun_to_nlattr(struct sk_buff *skb,
if (tun_key->tp_dst &&
nla_put_be16(skb, PSAMPLE_TUNNEL_KEY_ATTR_TP_DST, tun_key->tp_dst))
return -EMSGSIZE;
- if ((tun_key->tun_flags & TUNNEL_OAM) &&
+ if (test_bit(IP_TUNNEL_OAM_BIT, tun_key->tun_flags) &&
nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_OAM))
return -EMSGSIZE;
if (tun_opts_len) {
- if (tun_key->tun_flags & TUNNEL_GENEVE_OPT &&
+ if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, tun_key->tun_flags) &&
nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_GENEVE_OPTS,
tun_opts_len, tun_opts))
return -EMSGSIZE;
- else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT &&
+ else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
+ tun_key->tun_flags) &&
nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
tun_opts_len, tun_opts))
return -EMSGSIZE;
@@ -314,7 +315,7 @@ static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info)
int tun_opts_len = tun_info->options_len;
int sum = nla_total_size(0); /* PSAMPLE_ATTR_TUNNEL */
- if (tun_key->tun_flags & TUNNEL_KEY)
+ if (test_bit(IP_TUNNEL_KEY_BIT, tun_key->tun_flags))
sum += nla_total_size_64bit(sizeof(u64));
if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE)
@@ -337,20 +338,21 @@ static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info)
if (tun_key->tos)
sum += nla_total_size(sizeof(u8));
sum += nla_total_size(sizeof(u8)); /* TTL */
- if (tun_key->tun_flags & TUNNEL_DONT_FRAGMENT)
+ if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, tun_key->tun_flags))
sum += nla_total_size(0);
- if (tun_key->tun_flags & TUNNEL_CSUM)
+ if (test_bit(IP_TUNNEL_CSUM_BIT, tun_key->tun_flags))
sum += nla_total_size(0);
if (tun_key->tp_src)
sum += nla_total_size(sizeof(u16));
if (tun_key->tp_dst)
sum += nla_total_size(sizeof(u16));
- if (tun_key->tun_flags & TUNNEL_OAM)
+ if (test_bit(IP_TUNNEL_OAM_BIT, tun_key->tun_flags))
sum += nla_total_size(0);
if (tun_opts_len) {
- if (tun_key->tun_flags & TUNNEL_GENEVE_OPT)
+ if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, tun_key->tun_flags))
sum += nla_total_size(tun_opts_len);
- else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT)
+ else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
+ tun_key->tun_flags))
sum += nla_total_size(tun_opts_len);
}
@@ -358,8 +360,9 @@ static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info)
}
#endif
-void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
- u32 sample_rate, const struct psample_metadata *md)
+void psample_sample_packet(struct psample_group *group,
+ const struct sk_buff *skb, u32 sample_rate,
+ const struct psample_metadata *md)
{
ktime_t tstamp = ktime_get_real();
int out_ifindex = md->out_ifindex;
@@ -374,6 +377,10 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
void *data;
int ret;
+ if (!genl_has_listeners(&psample_nl_family, group->net,
+ PSAMPLE_NL_MCGRP_SAMPLE))
+ return;
+
meta_len = (in_ifindex ? nla_total_size(sizeof(u16)) : 0) +
(out_ifindex ? nla_total_size(sizeof(u16)) : 0) +
(md->out_tc_valid ? nla_total_size(sizeof(u16)) : 0) +
@@ -384,7 +391,9 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
nla_total_size(sizeof(u32)) + /* group_num */
nla_total_size(sizeof(u32)) + /* seq */
nla_total_size_64bit(sizeof(u64)) + /* timestamp */
- nla_total_size(sizeof(u16)); /* protocol */
+ nla_total_size(sizeof(u16)) + /* protocol */
+ (md->user_cookie_len ?
+ nla_total_size(md->user_cookie_len) : 0); /* user cookie */
#ifdef CONFIG_INET
tun_info = skb_tunnel_info(skb);
@@ -484,6 +493,14 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
}
#endif
+ if (md->user_cookie && md->user_cookie_len &&
+ nla_put(nl_skb, PSAMPLE_ATTR_USER_COOKIE, md->user_cookie_len,
+ md->user_cookie))
+ goto error;
+
+ if (md->rate_as_probability)
+ nla_put_flag(nl_skb, PSAMPLE_ATTR_SAMPLE_PROBABILITY);
+
genlmsg_end(nl_skb, data);
genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0,
PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC);
diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c
index 9ced13c0627a..69f53625a049 100644
--- a/net/qrtr/mhi.c
+++ b/net/qrtr/mhi.c
@@ -118,6 +118,51 @@ static const struct mhi_device_id qcom_mhi_qrtr_id_table[] = {
};
MODULE_DEVICE_TABLE(mhi, qcom_mhi_qrtr_id_table);
+static int __maybe_unused qcom_mhi_qrtr_pm_suspend_late(struct device *dev)
+{
+ struct mhi_device *mhi_dev = container_of(dev, struct mhi_device, dev);
+ enum mhi_state state;
+
+ state = mhi_get_mhi_state(mhi_dev->mhi_cntrl);
+ /*
+ * If the device is in suspend state, then no need for the
+ * client driver to unprepare the channels.
+ */
+ if (state == MHI_STATE_M3)
+ return 0;
+
+ mhi_unprepare_from_transfer(mhi_dev);
+
+ return 0;
+}
+
+static int __maybe_unused qcom_mhi_qrtr_pm_resume_early(struct device *dev)
+{
+ struct mhi_device *mhi_dev = container_of(dev, struct mhi_device, dev);
+ enum mhi_state state;
+ int rc;
+
+ state = mhi_get_mhi_state(mhi_dev->mhi_cntrl);
+ /*
+ * If the device is in suspend state, we won't unprepare channels
+ * in suspend callback, therefore no need to prepare channels when
+ * resume.
+ */
+ if (state == MHI_STATE_M3)
+ return 0;
+
+ rc = mhi_prepare_for_transfer_autoqueue(mhi_dev);
+ if (rc)
+ dev_err(dev, "failed to prepare for autoqueue transfer %d\n", rc);
+
+ return rc;
+}
+
+static const struct dev_pm_ops qcom_mhi_qrtr_pm_ops = {
+ SET_LATE_SYSTEM_SLEEP_PM_OPS(qcom_mhi_qrtr_pm_suspend_late,
+ qcom_mhi_qrtr_pm_resume_early)
+};
+
static struct mhi_driver qcom_mhi_qrtr_driver = {
.probe = qcom_mhi_qrtr_probe,
.remove = qcom_mhi_qrtr_remove,
@@ -126,6 +171,7 @@ static struct mhi_driver qcom_mhi_qrtr_driver = {
.id_table = qcom_mhi_qrtr_id_table,
.driver = {
.name = "qcom_mhi_qrtr",
+ .pm = &qcom_mhi_qrtr_pm_ops,
},
};
diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c
index abb0c70ffc8b..3de9350cbf30 100644
--- a/net/qrtr/ns.c
+++ b/net/qrtr/ns.c
@@ -132,8 +132,8 @@ static int service_announce_new(struct sockaddr_qrtr *dest,
return kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt));
}
-static int service_announce_del(struct sockaddr_qrtr *dest,
- struct qrtr_server *srv)
+static void service_announce_del(struct sockaddr_qrtr *dest,
+ struct qrtr_server *srv)
{
struct qrtr_ctrl_pkt pkt;
struct msghdr msg = { };
@@ -157,10 +157,10 @@ static int service_announce_del(struct sockaddr_qrtr *dest,
msg.msg_namelen = sizeof(*dest);
ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt));
- if (ret < 0)
+ if (ret < 0 && ret != -ENODEV)
pr_err("failed to announce del service\n");
- return ret;
+ return;
}
static void lookup_notify(struct sockaddr_qrtr *to, struct qrtr_server *srv,
@@ -188,7 +188,7 @@ static void lookup_notify(struct sockaddr_qrtr *to, struct qrtr_server *srv,
msg.msg_namelen = sizeof(*to);
ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt));
- if (ret < 0)
+ if (ret < 0 && ret != -ENODEV)
pr_err("failed to send lookup notification\n");
}
@@ -207,6 +207,9 @@ static int announce_servers(struct sockaddr_qrtr *sq)
xa_for_each(&node->servers, index, srv) {
ret = service_announce_new(sq, srv);
if (ret < 0) {
+ if (ret == -ENODEV)
+ continue;
+
pr_err("failed to announce new service\n");
return ret;
}
@@ -369,7 +372,7 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from)
msg.msg_namelen = sizeof(sq);
ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt));
- if (ret < 0) {
+ if (ret < 0 && ret != -ENODEV) {
pr_err("failed to send bye cmd\n");
return ret;
}
@@ -443,7 +446,7 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from,
msg.msg_namelen = sizeof(sq);
ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt));
- if (ret < 0) {
+ if (ret < 0 && ret != -ENODEV) {
pr_err("failed to send del client cmd\n");
return ret;
}
@@ -725,6 +728,24 @@ int qrtr_ns_init(void)
if (ret < 0)
goto err_wq;
+ /* As the qrtr ns socket owner and creator is the same module, we have
+ * to decrease the qrtr module reference count to guarantee that it
+ * remains zero after the ns socket is created, otherwise, executing
+ * "rmmod" command is unable to make the qrtr module deleted after the
+ * qrtr module is inserted successfully.
+ *
+ * However, the reference count is increased twice in
+ * sock_create_kern(): one is to increase the reference count of owner
+ * of qrtr socket's proto_ops struct; another is to increment the
+ * reference count of owner of qrtr proto struct. Therefore, we must
+ * decrement the module reference count twice to ensure that it keeps
+ * zero after server's listening socket is created. Of course, we
+ * must bump the module reference count twice as well before the socket
+ * is closed.
+ */
+ module_put(qrtr_ns.sock->ops->owner);
+ module_put(qrtr_ns.sock->sk->sk_prot_creator->owner);
+
return 0;
err_wq:
@@ -739,6 +760,15 @@ void qrtr_ns_remove(void)
{
cancel_work_sync(&qrtr_ns.work);
destroy_workqueue(qrtr_ns.workqueue);
+
+ /* sock_release() expects the two references that were put during
+ * qrtr_ns_init(). This function is only called during module remove,
+ * so try_stop_module() has already set the refcnt to 0. Use
+ * __module_get() instead of try_module_get() to successfully take two
+ * references.
+ */
+ __module_get(qrtr_ns.sock->ops->owner);
+ __module_get(qrtr_ns.sock->sk->sk_prot_creator->owner);
sock_release(qrtr_ns.sock);
}
EXPORT_SYMBOL_GPL(qrtr_ns_remove);
diff --git a/net/rds/ib_sysctl.c b/net/rds/ib_sysctl.c
index e4e41b3afce7..2af678e71e3c 100644
--- a/net/rds/ib_sysctl.c
+++ b/net/rds/ib_sysctl.c
@@ -103,7 +103,6 @@ static struct ctl_table rds_ib_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
};
void rds_ib_sysctl_exit(void)
diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c
index e381bbcd9cc1..025f518a4349 100644
--- a/net/rds/sysctl.c
+++ b/net/rds/sysctl.c
@@ -89,7 +89,6 @@ static struct ctl_table rds_sysctl_rds_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
};
void rds_sysctl_exit(void)
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 2dba7505b414..351ac1747224 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -61,7 +61,7 @@ static atomic_t rds_tcp_unloading = ATOMIC_INIT(0);
static struct kmem_cache *rds_tcp_conn_slab;
-static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
+static int rds_tcp_skbuf_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *fpos);
static int rds_tcp_min_sndbuf = SOCK_MIN_SNDBUF;
@@ -86,7 +86,6 @@ static struct ctl_table rds_tcp_sysctl_table[] = {
.proc_handler = rds_tcp_skbuf_handler,
.extra1 = &rds_tcp_min_rcvbuf,
},
- { }
};
u32 rds_tcp_write_seq(struct rds_tcp_connection *tc)
@@ -683,7 +682,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
spin_unlock_irq(&rds_tcp_conn_lock);
}
-static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
+static int rds_tcp_skbuf_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *fpos)
{
struct net *net = current->nsproxy->net_ns;
@@ -720,9 +719,7 @@ static int __init rds_tcp_init(void)
{
int ret;
- rds_tcp_conn_slab = kmem_cache_create("rds_tcp_connection",
- sizeof(struct rds_tcp_connection),
- 0, 0, NULL);
+ rds_tcp_conn_slab = KMEM_CACHE(rds_tcp_connection, 0);
if (!rds_tcp_conn_slab) {
ret = -ENOMEM;
goto out;
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 05008ce5c421..d89bd8d0c354 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -105,6 +105,10 @@ int rds_tcp_accept_one(struct socket *sock)
int conn_state;
struct rds_conn_path *cp;
struct in6_addr *my_addr, *peer_addr;
+ struct proto_accept_arg arg = {
+ .flags = O_NONBLOCK,
+ .kern = true,
+ };
#if !IS_ENABLED(CONFIG_IPV6)
struct in6_addr saddr, daddr;
#endif
@@ -119,7 +123,7 @@ int rds_tcp_accept_one(struct socket *sock)
if (ret)
goto out;
- ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, true);
+ ret = sock->ops->accept(sock, new_sock, &arg);
if (ret < 0)
goto out;
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index c00f04a1a534..7997a19d1da3 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -337,9 +337,7 @@ out:
int rds_tcp_recv_init(void)
{
- rds_tcp_incoming_slab = kmem_cache_create("rds_tcp_incoming",
- sizeof(struct rds_tcp_incoming),
- 0, 0, NULL);
+ rds_tcp_incoming_slab = KMEM_CACHE(rds_tcp_incoming, 0);
if (!rds_tcp_incoming_slab)
return -ENOMEM;
return 0;
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index c3feb4f49d09..7a5367628c05 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -546,10 +546,10 @@ bool rfkill_set_hw_state_reason(struct rfkill *rfkill,
BUG_ON(!rfkill);
- if (WARN(reason &
- ~(RFKILL_HARD_BLOCK_SIGNAL | RFKILL_HARD_BLOCK_NOT_OWNER),
- "hw_state reason not supported: 0x%lx", reason))
- return blocked;
+ if (WARN(reason & ~(RFKILL_HARD_BLOCK_SIGNAL |
+ RFKILL_HARD_BLOCK_NOT_OWNER),
+ "hw_state reason not supported: 0x%lx", reason))
+ return rfkill_blocked(rfkill);
spin_lock_irqsave(&rfkill->lock, flags);
prev = !!(rfkill->hard_block_reasons & reason);
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index 4e32d659524e..84529886c2e6 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -156,14 +156,12 @@ err_destroy:
return ret;
}
-static int rfkill_gpio_remove(struct platform_device *pdev)
+static void rfkill_gpio_remove(struct platform_device *pdev)
{
struct rfkill_gpio_data *rfkill = platform_get_drvdata(pdev);
rfkill_unregister(rfkill->rfkill_dev);
rfkill_destroy(rfkill->rfkill_dev);
-
- return 0;
}
#ifdef CONFIG_ACPI
@@ -183,7 +181,7 @@ MODULE_DEVICE_TABLE(of, rfkill_of_match);
static struct platform_driver rfkill_gpio_driver = {
.probe = rfkill_gpio_probe,
- .remove = rfkill_gpio_remove,
+ .remove_new = rfkill_gpio_remove,
.driver = {
.name = "rfkill_gpio",
.acpi_match_table = ACPI_PTR(rfkill_acpi_match),
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index ef81d019b20f..59050caab65c 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -919,8 +919,8 @@ out_release:
return err;
}
-static int rose_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int rose_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sk_buff *skb;
struct sock *newsk;
@@ -953,7 +953,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags,
if (skb)
break;
- if (flags & O_NONBLOCK) {
+ if (arg->flags & O_NONBLOCK) {
err = -EWOULDBLOCK;
break;
}
diff --git a/net/rose/sysctl_net_rose.c b/net/rose/sysctl_net_rose.c
index d391d7758f52..d801315b7083 100644
--- a/net/rose/sysctl_net_rose.c
+++ b/net/rose/sysctl_net_rose.c
@@ -112,7 +112,6 @@ static struct ctl_table rose_table[] = {
.extra1 = &min_window,
.extra2 = &max_window
},
- { }
};
void __init rose_register_sysctl(void)
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 5222bc97d192..f4844683e120 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -65,7 +65,7 @@ static void rxrpc_write_space(struct sock *sk)
if (skwq_has_sleeper(wq))
wake_up_interruptible(&wq->wait);
- sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+ sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
rcu_read_unlock();
}
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 08c0a32db8c7..08de24658f4f 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -697,7 +697,7 @@ struct rxrpc_call {
* packets) rather than bytes.
*/
#define RXRPC_TX_SMSS RXRPC_JUMBO_DATALEN
-#define RXRPC_MIN_CWND (RXRPC_TX_SMSS > 2190 ? 2 : RXRPC_TX_SMSS > 1095 ? 3 : 4)
+#define RXRPC_MIN_CWND 4
u8 cong_cwnd; /* Congestion window size */
u8 cong_extra; /* Extra to send for congestion management */
u8 cong_ssthresh; /* Slow-start threshold */
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 01fa71e8b1f7..f9e983a12c14 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -174,12 +174,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
call->rx_winsize = rxrpc_rx_window_size;
call->tx_winsize = 16;
- if (RXRPC_TX_SMSS > 2190)
- call->cong_cwnd = 2;
- else if (RXRPC_TX_SMSS > 1095)
- call->cong_cwnd = 3;
- else
- call->cong_cwnd = 4;
+ call->cong_cwnd = RXRPC_MIN_CWND;
call->cong_ssthresh = RXRPC_TX_MAX_WINDOW;
call->rxnet = rxnet;
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 3dedb8c0618c..16d49a861dbb 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -9,6 +9,17 @@
#include "ar-internal.h"
+/* Override priority when generating ACKs for received DATA */
+static const u8 rxrpc_ack_priority[RXRPC_ACK__INVALID] = {
+ [RXRPC_ACK_IDLE] = 1,
+ [RXRPC_ACK_DELAY] = 2,
+ [RXRPC_ACK_REQUESTED] = 3,
+ [RXRPC_ACK_DUPLICATE] = 4,
+ [RXRPC_ACK_EXCEEDS_WINDOW] = 5,
+ [RXRPC_ACK_NOSPACE] = 6,
+ [RXRPC_ACK_OUT_OF_SEQUENCE] = 7,
+};
+
static void rxrpc_proto_abort(struct rxrpc_call *call, rxrpc_seq_t seq,
enum rxrpc_abort_reason why)
{
@@ -365,7 +376,7 @@ static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb,
* Process a DATA packet.
*/
static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
- bool *_notify)
+ bool *_notify, rxrpc_serial_t *_ack_serial, int *_ack_reason)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct sk_buff *oos;
@@ -418,8 +429,6 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
/* Send an immediate ACK if we fill in a hole */
else if (!skb_queue_empty(&call->rx_oos_queue))
ack_reason = RXRPC_ACK_DELAY;
- else
- call->ackr_nr_unacked++;
window++;
if (after(window, wtop)) {
@@ -497,12 +506,16 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
}
send_ack:
- if (ack_reason >= 0)
- rxrpc_send_ACK(call, ack_reason, serial,
- rxrpc_propose_ack_input_data);
- else
- rxrpc_propose_delay_ACK(call, serial,
- rxrpc_propose_ack_input_data);
+ if (ack_reason >= 0) {
+ if (rxrpc_ack_priority[ack_reason] > rxrpc_ack_priority[*_ack_reason]) {
+ *_ack_serial = serial;
+ *_ack_reason = ack_reason;
+ } else if (rxrpc_ack_priority[ack_reason] == rxrpc_ack_priority[*_ack_reason] &&
+ ack_reason == RXRPC_ACK_REQUESTED) {
+ *_ack_serial = serial;
+ *_ack_reason = ack_reason;
+ }
+ }
}
/*
@@ -513,9 +526,11 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb
struct rxrpc_jumbo_header jhdr;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb), *jsp;
struct sk_buff *jskb;
+ rxrpc_serial_t ack_serial = 0;
unsigned int offset = sizeof(struct rxrpc_wire_header);
unsigned int len = skb->len - offset;
bool notify = false;
+ int ack_reason = 0;
while (sp->hdr.flags & RXRPC_JUMBO_PACKET) {
if (len < RXRPC_JUMBO_SUBPKTLEN)
@@ -535,7 +550,7 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb
jsp = rxrpc_skb(jskb);
jsp->offset = offset;
jsp->len = RXRPC_JUMBO_DATALEN;
- rxrpc_input_data_one(call, jskb, &notify);
+ rxrpc_input_data_one(call, jskb, &notify, &ack_serial, &ack_reason);
rxrpc_free_skb(jskb, rxrpc_skb_put_jumbo_subpacket);
sp->hdr.flags = jhdr.flags;
@@ -548,7 +563,16 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb
sp->offset = offset;
sp->len = len;
- rxrpc_input_data_one(call, skb, &notify);
+ rxrpc_input_data_one(call, skb, &notify, &ack_serial, &ack_reason);
+
+ if (ack_reason > 0) {
+ rxrpc_send_ACK(call, ack_reason, ack_serial,
+ rxrpc_propose_ack_input_data);
+ } else {
+ call->ackr_nr_unacked++;
+ rxrpc_propose_delay_ACK(call, sp->hdr.serial,
+ rxrpc_propose_ack_input_data);
+ }
if (notify) {
trace_rxrpc_notify_socket(call->debug_id, sp->hdr.serial);
rxrpc_notify_socket(call);
@@ -685,9 +709,6 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb
call->tx_winsize = rwind;
}
- if (call->cong_ssthresh > rwind)
- call->cong_ssthresh = rwind;
-
mtu = min(ntohl(trailer->maxMTU), ntohl(trailer->ifMTU));
peer = call->peer;
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index c9bedd0e2d86..9bf9a1f6e4cb 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -127,7 +127,6 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.extra1 = (void *)SYSCTL_ONE,
.extra2 = (void *)&four,
},
- { }
};
int __init rxrpc_sysctl_init(void)
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 9ee622fb1160..2714c4ed928e 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -62,7 +62,7 @@ static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie,
{
struct tc_cookie *old;
- old = xchg((__force struct tc_cookie **)old_cookie, new_cookie);
+ old = unrcu_pointer(xchg(old_cookie, RCU_INITIALIZER(new_cookie)));
if (old)
call_rcu(&old->rcu, tcf_free_cookie_rcu);
}
@@ -830,7 +830,6 @@ int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
u32 max;
if (*index) {
-again:
rcu_read_lock();
p = idr_find(&idrinfo->action_idr, *index);
@@ -839,7 +838,7 @@ again:
* index but did not assign the pointer yet.
*/
rcu_read_unlock();
- goto again;
+ return -EAGAIN;
}
if (!p) {
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 0e3cf11ae5fc..396b576390d0 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -54,8 +54,8 @@ TC_INDIRECT_SCOPE int tcf_bpf_act(struct sk_buff *skb,
bpf_compute_data_pointers(skb);
filter_res = bpf_prog_run(filter, skb);
}
- if (unlikely(!skb->tstamp && skb->mono_delivery_time))
- skb->mono_delivery_time = 0;
+ if (unlikely(!skb->tstamp && skb->tstamp_type))
+ skb->tstamp_type = SKB_CLOCK_REALTIME;
if (skb_sk_is_prefetched(skb) && filter_res != TC_ACT_OK)
skb_orphan(skb);
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index baac083fd8f1..3ba8e7e739b5 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -41,21 +41,28 @@ static struct workqueue_struct *act_ct_wq;
static struct rhashtable zones_ht;
static DEFINE_MUTEX(zones_mutex);
+struct zones_ht_key {
+ struct net *net;
+ u16 zone;
+ /* Note : pad[] must be the last field. */
+ u8 pad[];
+};
+
struct tcf_ct_flow_table {
struct rhash_head node; /* In zones tables */
struct rcu_work rwork;
struct nf_flowtable nf_ft;
refcount_t ref;
- u16 zone;
+ struct zones_ht_key key;
bool dying;
};
static const struct rhashtable_params zones_params = {
.head_offset = offsetof(struct tcf_ct_flow_table, node),
- .key_offset = offsetof(struct tcf_ct_flow_table, zone),
- .key_len = sizeof_field(struct tcf_ct_flow_table, zone),
+ .key_offset = offsetof(struct tcf_ct_flow_table, key),
+ .key_len = offsetof(struct zones_ht_key, pad),
.automatic_shrinking = true,
};
@@ -316,11 +323,12 @@ static struct nf_flowtable_type flowtable_ct = {
static int tcf_ct_flow_table_get(struct net *net, struct tcf_ct_params *params)
{
+ struct zones_ht_key key = { .net = net, .zone = params->zone };
struct tcf_ct_flow_table *ct_ft;
int err = -ENOMEM;
mutex_lock(&zones_mutex);
- ct_ft = rhashtable_lookup_fast(&zones_ht, &params->zone, zones_params);
+ ct_ft = rhashtable_lookup_fast(&zones_ht, &key, zones_params);
if (ct_ft && refcount_inc_not_zero(&ct_ft->ref))
goto out_unlock;
@@ -329,7 +337,7 @@ static int tcf_ct_flow_table_get(struct net *net, struct tcf_ct_params *params)
goto err_alloc;
refcount_set(&ct_ft->ref, 1);
- ct_ft->zone = params->zone;
+ ct_ft->key = key;
err = rhashtable_insert_fast(&zones_ht, &ct_ft->node, zones_params);
if (err)
goto err_insert;
@@ -938,6 +946,8 @@ static int tcf_ct_act_nat(struct sk_buff *skb,
action |= BIT(NF_NAT_MANIP_DST);
err = nf_ct_nat(skb, ct, ctinfo, &action, range, commit);
+ if (err != NF_ACCEPT)
+ return err & NF_VERDICT_MASK;
if (action & BIT(NF_NAT_MANIP_SRC))
tc_skb_cb(skb)->post_ct_snat = 1;
@@ -1029,7 +1039,7 @@ TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
state.pf = family;
err = nf_conntrack_in(skb, &state);
if (err != NF_ACCEPT)
- goto out_push;
+ goto nf_error;
}
do_nat:
@@ -1041,7 +1051,7 @@ do_nat:
err = tcf_ct_act_nat(skb, ct, ctinfo, p->ct_action, &p->range, commit);
if (err != NF_ACCEPT)
- goto drop;
+ goto nf_error;
if (!nf_ct_is_confirmed(ct) && commit && p->helper && !nfct_help(ct)) {
err = __nf_ct_try_assign_helper(ct, p->tmpl, GFP_ATOMIC);
@@ -1055,8 +1065,9 @@ do_nat:
}
if (nf_ct_is_confirmed(ct) ? ((!cached && !skip_add) || add_helper) : commit) {
- if (nf_ct_helper(skb, ct, ctinfo, family) != NF_ACCEPT)
- goto drop;
+ err = nf_ct_helper(skb, ct, ctinfo, family);
+ if (err != NF_ACCEPT)
+ goto nf_error;
}
if (commit) {
@@ -1069,8 +1080,17 @@ do_nat:
/* This will take care of sending queued events
* even if the connection is already confirmed.
*/
- if (nf_conntrack_confirm(skb) != NF_ACCEPT)
- goto drop;
+ err = nf_conntrack_confirm(skb);
+ if (err != NF_ACCEPT)
+ goto nf_error;
+
+ /* The ct may be dropped if a clash has been resolved,
+ * so it's necessary to retrieve it from skb again to
+ * prevent UAF.
+ */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
+ skip_add = true;
}
if (!skip_add)
@@ -1094,6 +1114,21 @@ out_frag:
drop:
tcf_action_inc_drop_qstats(&c->common);
return TC_ACT_SHOT;
+
+nf_error:
+ /* some verdicts store extra data in upper bits, such
+ * as errno or queue number.
+ */
+ switch (err & NF_VERDICT_MASK) {
+ case NF_DROP:
+ goto drop;
+ case NF_STOLEN:
+ tcf_action_inc_drop_qstats(&c->common);
+ return TC_ACT_CONSUMED;
+ default:
+ DEBUG_NET_WARN_ON_ONCE(1);
+ goto drop;
+ }
}
static const struct nla_policy ct_policy[TCA_CT_MAX + 1] = {
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index a69b53d54039..2ceb4d141b71 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -167,7 +167,9 @@ TC_INDIRECT_SCOPE int tcf_sample_act(struct sk_buff *skb,
{
struct tcf_sample *s = to_sample(a);
struct psample_group *psample_group;
+ u8 cookie_data[TC_COOKIE_MAX_SIZE];
struct psample_metadata md = {};
+ struct tc_cookie *user_cookie;
int retval;
tcf_lastuse_update(&s->tcf_tm);
@@ -189,6 +191,16 @@ TC_INDIRECT_SCOPE int tcf_sample_act(struct sk_buff *skb,
if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev))
skb_push(skb, skb->mac_len);
+ rcu_read_lock();
+ user_cookie = rcu_dereference(a->user_cookie);
+ if (user_cookie) {
+ memcpy(cookie_data, user_cookie->data,
+ user_cookie->len);
+ md.user_cookie = cookie_data;
+ md.user_cookie_len = user_cookie->len;
+ }
+ rcu_read_unlock();
+
md.trunc_size = s->truncate ? s->trunc_size : skb->len;
psample_sample_packet(psample_group, skb, s->rate, &md);
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index cd0accaf844a..dc0229693461 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -246,7 +246,7 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
memset(&opt, 0, sizeof(opt));
opt.index = d->tcf_index;
- opt.refcnt = refcount_read(&d->tcf_refcnt) - ref,
+ opt.refcnt = refcount_read(&d->tcf_refcnt) - ref;
opt.bindcnt = atomic_read(&d->tcf_bindcnt) - bind;
spin_lock_bh(&d->tcf_lock);
opt.action = d->tcf_action;
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 1536f8b16f1b..af7c99845948 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -230,7 +230,7 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst,
nla_for_each_attr(attr, head, len, rem) {
switch (nla_type(attr)) {
case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE:
- if (type && type != TUNNEL_GENEVE_OPT) {
+ if (type && type != IP_TUNNEL_GENEVE_OPT_BIT) {
NL_SET_ERR_MSG(extack, "Duplicate type for geneve options");
return -EINVAL;
}
@@ -247,7 +247,7 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst,
dst_len -= opt_len;
dst += opt_len;
}
- type = TUNNEL_GENEVE_OPT;
+ type = IP_TUNNEL_GENEVE_OPT_BIT;
break;
case TCA_TUNNEL_KEY_ENC_OPTS_VXLAN:
if (type) {
@@ -259,7 +259,7 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst,
if (opt_len < 0)
return opt_len;
opts_len += opt_len;
- type = TUNNEL_VXLAN_OPT;
+ type = IP_TUNNEL_VXLAN_OPT_BIT;
break;
case TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN:
if (type) {
@@ -271,7 +271,7 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst,
if (opt_len < 0)
return opt_len;
opts_len += opt_len;
- type = TUNNEL_ERSPAN_OPT;
+ type = IP_TUNNEL_ERSPAN_OPT_BIT;
break;
}
}
@@ -302,7 +302,7 @@ static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info,
switch (nla_type(nla_data(nla))) {
case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE:
#if IS_ENABLED(CONFIG_INET)
- info->key.tun_flags |= TUNNEL_GENEVE_OPT;
+ __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags);
return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info),
opts_len, extack);
#else
@@ -310,7 +310,7 @@ static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info,
#endif
case TCA_TUNNEL_KEY_ENC_OPTS_VXLAN:
#if IS_ENABLED(CONFIG_INET)
- info->key.tun_flags |= TUNNEL_VXLAN_OPT;
+ __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags);
return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info),
opts_len, extack);
#else
@@ -318,7 +318,7 @@ static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info,
#endif
case TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN:
#if IS_ENABLED(CONFIG_INET)
- info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+ __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags);
return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info),
opts_len, extack);
#else
@@ -363,6 +363,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
bool bind = act_flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
struct tcf_tunnel_key_params *params_new;
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
struct metadata_dst *metadata = NULL;
struct tcf_chain *goto_ch = NULL;
struct tc_tunnel_key *parm;
@@ -371,7 +372,6 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
__be16 dst_port = 0;
__be64 key_id = 0;
int opts_len = 0;
- __be16 flags = 0;
u8 tos, ttl;
int ret = 0;
u32 index;
@@ -412,16 +412,16 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
key32 = nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID]);
key_id = key32_to_tunnel_id(key32);
- flags = TUNNEL_KEY;
+ __set_bit(IP_TUNNEL_KEY_BIT, flags);
}
- flags |= TUNNEL_CSUM;
+ __set_bit(IP_TUNNEL_CSUM_BIT, flags);
if (tb[TCA_TUNNEL_KEY_NO_CSUM] &&
nla_get_u8(tb[TCA_TUNNEL_KEY_NO_CSUM]))
- flags &= ~TUNNEL_CSUM;
+ __clear_bit(IP_TUNNEL_CSUM_BIT, flags);
if (nla_get_flag(tb[TCA_TUNNEL_KEY_NO_FRAG]))
- flags |= TUNNEL_DONT_FRAGMENT;
+ __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, flags);
if (tb[TCA_TUNNEL_KEY_ENC_DST_PORT])
dst_port = nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_DST_PORT]);
@@ -663,15 +663,15 @@ static int tunnel_key_opts_dump(struct sk_buff *skb,
if (!start)
return -EMSGSIZE;
- if (info->key.tun_flags & TUNNEL_GENEVE_OPT) {
+ if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags)) {
err = tunnel_key_geneve_opts_dump(skb, info);
if (err)
goto err_out;
- } else if (info->key.tun_flags & TUNNEL_VXLAN_OPT) {
+ } else if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags)) {
err = tunnel_key_vxlan_opts_dump(skb, info);
if (err)
goto err_out;
- } else if (info->key.tun_flags & TUNNEL_ERSPAN_OPT) {
+ } else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags)) {
err = tunnel_key_erspan_opts_dump(skb, info);
if (err)
goto err_out;
@@ -741,7 +741,7 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
struct ip_tunnel_key *key = &info->key;
__be32 key_id = tunnel_id_to_key32(key->tun_id);
- if (((key->tun_flags & TUNNEL_KEY) &&
+ if ((test_bit(IP_TUNNEL_KEY_BIT, key->tun_flags) &&
nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id)) ||
tunnel_key_dump_addresses(skb,
&params->tcft_enc_metadata->u.tun_info) ||
@@ -749,8 +749,8 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT,
key->tp_dst)) ||
nla_put_u8(skb, TCA_TUNNEL_KEY_NO_CSUM,
- !(key->tun_flags & TUNNEL_CSUM)) ||
- ((key->tun_flags & TUNNEL_DONT_FRAGMENT) &&
+ !test_bit(IP_TUNNEL_CSUM_BIT, key->tun_flags)) ||
+ (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags) &&
nla_put_flag(skb, TCA_TUNNEL_KEY_NO_FRAG)) ||
tunnel_key_opts_dump(skb, info))
goto nla_put_failure;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index ca5676b2668e..17d97bbe890f 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -410,12 +410,48 @@ static void tcf_proto_get(struct tcf_proto *tp)
refcount_inc(&tp->refcnt);
}
+static void tcf_maintain_bypass(struct tcf_block *block)
+{
+ int filtercnt = atomic_read(&block->filtercnt);
+ int skipswcnt = atomic_read(&block->skipswcnt);
+ bool bypass_wanted = filtercnt > 0 && filtercnt == skipswcnt;
+
+ if (bypass_wanted != block->bypass_wanted) {
+#ifdef CONFIG_NET_CLS_ACT
+ if (bypass_wanted)
+ static_branch_inc(&tcf_bypass_check_needed_key);
+ else
+ static_branch_dec(&tcf_bypass_check_needed_key);
+#endif
+ block->bypass_wanted = bypass_wanted;
+ }
+}
+
+static void tcf_block_filter_cnt_update(struct tcf_block *block, bool *counted, bool add)
+{
+ lockdep_assert_not_held(&block->cb_lock);
+
+ down_write(&block->cb_lock);
+ if (*counted != add) {
+ if (add) {
+ atomic_inc(&block->filtercnt);
+ *counted = true;
+ } else {
+ atomic_dec(&block->filtercnt);
+ *counted = false;
+ }
+ }
+ tcf_maintain_bypass(block);
+ up_write(&block->cb_lock);
+}
+
static void tcf_chain_put(struct tcf_chain *chain);
static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
bool sig_destroy, struct netlink_ext_ack *extack)
{
tp->ops->destroy(tp, rtnl_held, extack);
+ tcf_block_filter_cnt_update(tp->chain->block, &tp->counted, false);
if (sig_destroy)
tcf_proto_signal_destroyed(tp->chain, tp);
tcf_chain_put(tp->chain);
@@ -2367,6 +2403,7 @@ replay:
tfilter_notify(net, skb, n, tp, block, q, parent, fh,
RTM_NEWTFILTER, false, rtnl_held, extack);
tfilter_put(tp, fh);
+ tcf_block_filter_cnt_update(block, &tp->counted, true);
/* q pointer is NULL for shared blocks */
if (q)
q->flags &= ~TCQ_F_CAN_BYPASS;
@@ -3483,6 +3520,8 @@ static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
if (*flags & TCA_CLS_FLAGS_IN_HW)
return;
*flags |= TCA_CLS_FLAGS_IN_HW;
+ if (tc_skip_sw(*flags))
+ atomic_inc(&block->skipswcnt);
atomic_inc(&block->offloadcnt);
}
@@ -3491,6 +3530,8 @@ static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
if (!(*flags & TCA_CLS_FLAGS_IN_HW))
return;
*flags &= ~TCA_CLS_FLAGS_IN_HW;
+ if (tc_skip_sw(*flags))
+ atomic_dec(&block->skipswcnt);
atomic_dec(&block->offloadcnt);
}
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 5e83e890f6a4..1941ebec23ff 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -104,8 +104,8 @@ TC_INDIRECT_SCOPE int cls_bpf_classify(struct sk_buff *skb,
bpf_compute_data_pointers(skb);
filter_res = bpf_prog_run(prog->filter, skb);
}
- if (unlikely(!skb->tstamp && skb->mono_delivery_time))
- skb->mono_delivery_time = 0;
+ if (unlikely(!skb->tstamp && skb->tstamp_type))
+ skb->tstamp_type = SKB_CLOCK_REALTIME;
if (prog->exts_integrated) {
res->class = 0;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index e1314674b4a9..e280c27cb9f9 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -28,6 +28,7 @@
#include <net/vxlan.h>
#include <net/erspan.h>
#include <net/gtp.h>
+#include <net/pfcp.h>
#include <net/tc_wrapper.h>
#include <net/dst.h>
@@ -40,6 +41,16 @@
#define TCA_FLOWER_KEY_CT_FLAGS_MASK \
(TCA_FLOWER_KEY_CT_FLAGS_MAX - 1)
+#define TCA_FLOWER_KEY_FLAGS_POLICY_MASK \
+ (TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT | \
+ TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST)
+
+#define TCA_FLOWER_KEY_ENC_FLAGS_POLICY_MASK \
+ (TCA_FLOWER_KEY_FLAGS_TUNNEL_CSUM | \
+ TCA_FLOWER_KEY_FLAGS_TUNNEL_DONT_FRAGMENT | \
+ TCA_FLOWER_KEY_FLAGS_TUNNEL_OAM | \
+ TCA_FLOWER_KEY_FLAGS_TUNNEL_CRIT_OPT)
+
struct fl_flow_key {
struct flow_dissector_key_meta meta;
struct flow_dissector_key_control control;
@@ -668,8 +679,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_ENC_UDP_DST_PORT] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK] = { .type = NLA_U16 },
- [TCA_FLOWER_KEY_FLAGS] = { .type = NLA_U32 },
- [TCA_FLOWER_KEY_FLAGS_MASK] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_FLAGS] = NLA_POLICY_MASK(NLA_BE32,
+ TCA_FLOWER_KEY_FLAGS_POLICY_MASK),
+ [TCA_FLOWER_KEY_FLAGS_MASK] = NLA_POLICY_MASK(NLA_BE32,
+ TCA_FLOWER_KEY_FLAGS_POLICY_MASK),
[TCA_FLOWER_KEY_ICMPV4_TYPE] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_ICMPV4_TYPE_MASK] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_ICMPV4_CODE] = { .type = NLA_U8 },
@@ -731,6 +744,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_SPI_MASK] = { .type = NLA_U32 },
[TCA_FLOWER_L2_MISS] = NLA_POLICY_MAX(NLA_U8, 1),
[TCA_FLOWER_KEY_CFM] = { .type = NLA_NESTED },
+ [TCA_FLOWER_KEY_ENC_FLAGS] = NLA_POLICY_MASK(NLA_BE32,
+ TCA_FLOWER_KEY_ENC_FLAGS_POLICY_MASK),
+ [TCA_FLOWER_KEY_ENC_FLAGS_MASK] = NLA_POLICY_MASK(NLA_BE32,
+ TCA_FLOWER_KEY_ENC_FLAGS_POLICY_MASK),
};
static const struct nla_policy
@@ -741,6 +758,7 @@ enc_opts_policy[TCA_FLOWER_KEY_ENC_OPTS_MAX + 1] = {
[TCA_FLOWER_KEY_ENC_OPTS_VXLAN] = { .type = NLA_NESTED },
[TCA_FLOWER_KEY_ENC_OPTS_ERSPAN] = { .type = NLA_NESTED },
[TCA_FLOWER_KEY_ENC_OPTS_GTP] = { .type = NLA_NESTED },
+ [TCA_FLOWER_KEY_ENC_OPTS_PFCP] = { .type = NLA_NESTED },
};
static const struct nla_policy
@@ -771,6 +789,12 @@ gtp_opt_policy[TCA_FLOWER_KEY_ENC_OPT_GTP_MAX + 1] = {
};
static const struct nla_policy
+pfcp_opt_policy[TCA_FLOWER_KEY_ENC_OPT_PFCP_MAX + 1] = {
+ [TCA_FLOWER_KEY_ENC_OPT_PFCP_TYPE] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ENC_OPT_PFCP_SEID] = { .type = NLA_U64 },
+};
+
+static const struct nla_policy
mpls_stack_entry_policy[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1] = {
[TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL] = { .type = NLA_U8 },
@@ -1147,19 +1171,29 @@ static void fl_set_key_flag(u32 flower_key, u32 flower_mask,
}
}
-static int fl_set_key_flags(struct nlattr **tb, u32 *flags_key,
- u32 *flags_mask, struct netlink_ext_ack *extack)
+static int fl_set_key_flags(struct nlattr *tca_opts, struct nlattr **tb,
+ bool encap, u32 *flags_key, u32 *flags_mask,
+ struct netlink_ext_ack *extack)
{
+ int fl_key, fl_mask;
u32 key, mask;
+ if (encap) {
+ fl_key = TCA_FLOWER_KEY_ENC_FLAGS;
+ fl_mask = TCA_FLOWER_KEY_ENC_FLAGS_MASK;
+ } else {
+ fl_key = TCA_FLOWER_KEY_FLAGS;
+ fl_mask = TCA_FLOWER_KEY_FLAGS_MASK;
+ }
+
/* mask is mandatory for flags */
- if (!tb[TCA_FLOWER_KEY_FLAGS_MASK]) {
+ if (NL_REQ_ATTR_CHECK(extack, tca_opts, tb, fl_mask)) {
NL_SET_ERR_MSG(extack, "Missing flags mask");
return -EINVAL;
}
- key = be32_to_cpu(nla_get_be32(tb[TCA_FLOWER_KEY_FLAGS]));
- mask = be32_to_cpu(nla_get_be32(tb[TCA_FLOWER_KEY_FLAGS_MASK]));
+ key = be32_to_cpu(nla_get_be32(tb[fl_key]));
+ mask = be32_to_cpu(nla_get_be32(tb[fl_mask]));
*flags_key = 0;
*flags_mask = 0;
@@ -1170,6 +1204,21 @@ static int fl_set_key_flags(struct nlattr **tb, u32 *flags_key,
TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST,
FLOW_DIS_FIRST_FRAG);
+ fl_set_key_flag(key, mask, flags_key, flags_mask,
+ TCA_FLOWER_KEY_FLAGS_TUNNEL_CSUM,
+ FLOW_DIS_F_TUNNEL_CSUM);
+
+ fl_set_key_flag(key, mask, flags_key, flags_mask,
+ TCA_FLOWER_KEY_FLAGS_TUNNEL_DONT_FRAGMENT,
+ FLOW_DIS_F_TUNNEL_DONT_FRAGMENT);
+
+ fl_set_key_flag(key, mask, flags_key, flags_mask,
+ TCA_FLOWER_KEY_FLAGS_TUNNEL_OAM, FLOW_DIS_F_TUNNEL_OAM);
+
+ fl_set_key_flag(key, mask, flags_key, flags_mask,
+ TCA_FLOWER_KEY_FLAGS_TUNNEL_CRIT_OPT,
+ FLOW_DIS_F_TUNNEL_CRIT_OPT);
+
return 0;
}
@@ -1419,6 +1468,44 @@ static int fl_set_gtp_opt(const struct nlattr *nla, struct fl_flow_key *key,
return sizeof(*sinfo);
}
+static int fl_set_pfcp_opt(const struct nlattr *nla, struct fl_flow_key *key,
+ int depth, int option_len,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_PFCP_MAX + 1];
+ struct pfcp_metadata *md;
+ int err;
+
+ md = (struct pfcp_metadata *)&key->enc_opts.data[key->enc_opts.len];
+ memset(md, 0xff, sizeof(*md));
+
+ if (!depth)
+ return sizeof(*md);
+
+ if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_PFCP) {
+ NL_SET_ERR_MSG_MOD(extack, "Non-pfcp option type for mask");
+ return -EINVAL;
+ }
+
+ err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_PFCP_MAX, nla,
+ pfcp_opt_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (!option_len && !tb[TCA_FLOWER_KEY_ENC_OPT_PFCP_TYPE]) {
+ NL_SET_ERR_MSG_MOD(extack, "Missing tunnel key pfcp option type");
+ return -EINVAL;
+ }
+
+ if (tb[TCA_FLOWER_KEY_ENC_OPT_PFCP_TYPE])
+ md->type = nla_get_u8(tb[TCA_FLOWER_KEY_ENC_OPT_PFCP_TYPE]);
+
+ if (tb[TCA_FLOWER_KEY_ENC_OPT_PFCP_SEID])
+ md->seid = nla_get_be64(tb[TCA_FLOWER_KEY_ENC_OPT_PFCP_SEID]);
+
+ return sizeof(*md);
+}
+
static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
struct fl_flow_key *mask,
struct netlink_ext_ack *extack)
@@ -1454,12 +1541,13 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
switch (nla_type(nla_opt_key)) {
case TCA_FLOWER_KEY_ENC_OPTS_GENEVE:
if (key->enc_opts.dst_opt_type &&
- key->enc_opts.dst_opt_type != TUNNEL_GENEVE_OPT) {
+ key->enc_opts.dst_opt_type !=
+ IP_TUNNEL_GENEVE_OPT_BIT) {
NL_SET_ERR_MSG(extack, "Duplicate type for geneve options");
return -EINVAL;
}
option_len = 0;
- key->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT;
+ key->enc_opts.dst_opt_type = IP_TUNNEL_GENEVE_OPT_BIT;
option_len = fl_set_geneve_opt(nla_opt_key, key,
key_depth, option_len,
extack);
@@ -1470,7 +1558,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
/* At the same time we need to parse through the mask
* in order to verify exact and mask attribute lengths.
*/
- mask->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT;
+ mask->enc_opts.dst_opt_type = IP_TUNNEL_GENEVE_OPT_BIT;
option_len = fl_set_geneve_opt(nla_opt_msk, mask,
msk_depth, option_len,
extack);
@@ -1489,7 +1577,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
return -EINVAL;
}
option_len = 0;
- key->enc_opts.dst_opt_type = TUNNEL_VXLAN_OPT;
+ key->enc_opts.dst_opt_type = IP_TUNNEL_VXLAN_OPT_BIT;
option_len = fl_set_vxlan_opt(nla_opt_key, key,
key_depth, option_len,
extack);
@@ -1500,7 +1588,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
/* At the same time we need to parse through the mask
* in order to verify exact and mask attribute lengths.
*/
- mask->enc_opts.dst_opt_type = TUNNEL_VXLAN_OPT;
+ mask->enc_opts.dst_opt_type = IP_TUNNEL_VXLAN_OPT_BIT;
option_len = fl_set_vxlan_opt(nla_opt_msk, mask,
msk_depth, option_len,
extack);
@@ -1519,7 +1607,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
return -EINVAL;
}
option_len = 0;
- key->enc_opts.dst_opt_type = TUNNEL_ERSPAN_OPT;
+ key->enc_opts.dst_opt_type = IP_TUNNEL_ERSPAN_OPT_BIT;
option_len = fl_set_erspan_opt(nla_opt_key, key,
key_depth, option_len,
extack);
@@ -1530,7 +1618,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
/* At the same time we need to parse through the mask
* in order to verify exact and mask attribute lengths.
*/
- mask->enc_opts.dst_opt_type = TUNNEL_ERSPAN_OPT;
+ mask->enc_opts.dst_opt_type = IP_TUNNEL_ERSPAN_OPT_BIT;
option_len = fl_set_erspan_opt(nla_opt_msk, mask,
msk_depth, option_len,
extack);
@@ -1550,7 +1638,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
return -EINVAL;
}
option_len = 0;
- key->enc_opts.dst_opt_type = TUNNEL_GTP_OPT;
+ key->enc_opts.dst_opt_type = IP_TUNNEL_GTP_OPT_BIT;
option_len = fl_set_gtp_opt(nla_opt_key, key,
key_depth, option_len,
extack);
@@ -1561,7 +1649,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
/* At the same time we need to parse through the mask
* in order to verify exact and mask attribute lengths.
*/
- mask->enc_opts.dst_opt_type = TUNNEL_GTP_OPT;
+ mask->enc_opts.dst_opt_type = IP_TUNNEL_GTP_OPT_BIT;
option_len = fl_set_gtp_opt(nla_opt_msk, mask,
msk_depth, option_len,
extack);
@@ -1575,6 +1663,36 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
return -EINVAL;
}
break;
+ case TCA_FLOWER_KEY_ENC_OPTS_PFCP:
+ if (key->enc_opts.dst_opt_type) {
+ NL_SET_ERR_MSG_MOD(extack, "Duplicate type for pfcp options");
+ return -EINVAL;
+ }
+ option_len = 0;
+ key->enc_opts.dst_opt_type = IP_TUNNEL_PFCP_OPT_BIT;
+ option_len = fl_set_pfcp_opt(nla_opt_key, key,
+ key_depth, option_len,
+ extack);
+ if (option_len < 0)
+ return option_len;
+
+ key->enc_opts.len += option_len;
+ /* At the same time we need to parse through the mask
+ * in order to verify exact and mask attribute lengths.
+ */
+ mask->enc_opts.dst_opt_type = IP_TUNNEL_PFCP_OPT_BIT;
+ option_len = fl_set_pfcp_opt(nla_opt_msk, mask,
+ msk_depth, option_len,
+ extack);
+ if (option_len < 0)
+ return option_len;
+
+ mask->enc_opts.len += option_len;
+ if (key->enc_opts.len != mask->enc_opts.len) {
+ NL_SET_ERR_MSG_MOD(extack, "Key and mask miss aligned");
+ return -EINVAL;
+ }
+ break;
default:
NL_SET_ERR_MSG(extack, "Unknown tunnel option type");
return -EINVAL;
@@ -1748,9 +1866,9 @@ static int fl_set_key_cfm(struct nlattr **tb,
return 0;
}
-static int fl_set_key(struct net *net, struct nlattr **tb,
- struct fl_flow_key *key, struct fl_flow_key *mask,
- struct netlink_ext_ack *extack)
+static int fl_set_key(struct net *net, struct nlattr *tca_opts,
+ struct nlattr **tb, struct fl_flow_key *key,
+ struct fl_flow_key *mask, struct netlink_ext_ack *extack)
{
__be16 ethertype;
int ret = 0;
@@ -1982,9 +2100,18 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
if (ret)
return ret;
- if (tb[TCA_FLOWER_KEY_FLAGS])
- ret = fl_set_key_flags(tb, &key->control.flags,
+ if (tb[TCA_FLOWER_KEY_FLAGS]) {
+ ret = fl_set_key_flags(tca_opts, tb, false,
+ &key->control.flags,
&mask->control.flags, extack);
+ if (ret)
+ return ret;
+ }
+
+ if (tb[TCA_FLOWER_KEY_ENC_FLAGS])
+ ret = fl_set_key_flags(tca_opts, tb, true,
+ &key->enc_control.flags,
+ &mask->enc_control.flags, extack);
return ret;
}
@@ -2075,7 +2202,8 @@ static void fl_init_dissector(struct flow_dissector *dissector,
FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, enc_ipv6);
if (FL_KEY_IS_MASKED(mask, enc_ipv4) ||
- FL_KEY_IS_MASKED(mask, enc_ipv6))
+ FL_KEY_IS_MASKED(mask, enc_ipv6) ||
+ FL_KEY_IS_MASKED(mask, enc_control))
FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_ENC_CONTROL,
enc_control);
FL_KEY_SET_IF_MASKED(mask, keys, cnt,
@@ -2233,6 +2361,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
{
struct cls_fl_head *head = fl_head_dereference(tp);
bool rtnl_held = !(flags & TCA_ACT_FLAGS_NO_RTNL);
+ struct nlattr *tca_opts = tca[TCA_OPTIONS];
struct cls_fl_filter *fold = *arg;
bool bound_to_filter = false;
struct cls_fl_filter *fnew;
@@ -2241,7 +2370,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
bool in_ht;
int err;
- if (!tca[TCA_OPTIONS]) {
+ if (!tca_opts) {
err = -EINVAL;
goto errout_fold;
}
@@ -2259,7 +2388,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
}
err = nla_parse_nested_deprecated(tb, TCA_FLOWER_MAX,
- tca[TCA_OPTIONS], fl_policy, NULL);
+ tca_opts, fl_policy, NULL);
if (err < 0)
goto errout_tb;
@@ -2335,7 +2464,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
bound_to_filter = true;
}
- err = fl_set_key(net, tb, &fnew->key, &mask->key, extack);
+ err = fl_set_key(net, tca_opts, tb, &fnew->key, &mask->key, extack);
if (err)
goto unbind_filter;
@@ -2675,18 +2804,19 @@ static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain,
struct nlattr **tca,
struct netlink_ext_ack *extack)
{
+ struct nlattr *tca_opts = tca[TCA_OPTIONS];
struct fl_flow_tmplt *tmplt;
struct nlattr **tb;
int err;
- if (!tca[TCA_OPTIONS])
+ if (!tca_opts)
return ERR_PTR(-EINVAL);
tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL);
if (!tb)
return ERR_PTR(-ENOBUFS);
err = nla_parse_nested_deprecated(tb, TCA_FLOWER_MAX,
- tca[TCA_OPTIONS], fl_policy, NULL);
+ tca_opts, fl_policy, NULL);
if (err)
goto errout_tb;
@@ -2696,7 +2826,8 @@ static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain,
goto errout_tb;
}
tmplt->chain = chain;
- err = fl_set_key(net, tb, &tmplt->dummy_key, &tmplt->mask, extack);
+ err = fl_set_key(net, tca_opts, tb, &tmplt->dummy_key,
+ &tmplt->mask, extack);
if (err)
goto errout_tmplt;
@@ -2972,12 +3103,22 @@ static void fl_get_key_flag(u32 dissector_key, u32 dissector_mask,
}
}
-static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
+static int fl_dump_key_flags(struct sk_buff *skb, bool encap,
+ u32 flags_key, u32 flags_mask)
{
- u32 key, mask;
+ int fl_key, fl_mask;
__be32 _key, _mask;
+ u32 key, mask;
int err;
+ if (encap) {
+ fl_key = TCA_FLOWER_KEY_ENC_FLAGS;
+ fl_mask = TCA_FLOWER_KEY_ENC_FLAGS_MASK;
+ } else {
+ fl_key = TCA_FLOWER_KEY_FLAGS;
+ fl_mask = TCA_FLOWER_KEY_FLAGS_MASK;
+ }
+
if (!memchr_inv(&flags_mask, 0, sizeof(flags_mask)))
return 0;
@@ -2990,14 +3131,29 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST,
FLOW_DIS_FIRST_FRAG);
+ fl_get_key_flag(flags_key, flags_mask, &key, &mask,
+ TCA_FLOWER_KEY_FLAGS_TUNNEL_CSUM,
+ FLOW_DIS_F_TUNNEL_CSUM);
+
+ fl_get_key_flag(flags_key, flags_mask, &key, &mask,
+ TCA_FLOWER_KEY_FLAGS_TUNNEL_DONT_FRAGMENT,
+ FLOW_DIS_F_TUNNEL_DONT_FRAGMENT);
+
+ fl_get_key_flag(flags_key, flags_mask, &key, &mask,
+ TCA_FLOWER_KEY_FLAGS_TUNNEL_OAM, FLOW_DIS_F_TUNNEL_OAM);
+
+ fl_get_key_flag(flags_key, flags_mask, &key, &mask,
+ TCA_FLOWER_KEY_FLAGS_TUNNEL_CRIT_OPT,
+ FLOW_DIS_F_TUNNEL_CRIT_OPT);
+
_key = cpu_to_be32(key);
_mask = cpu_to_be32(mask);
- err = nla_put(skb, TCA_FLOWER_KEY_FLAGS, 4, &_key);
+ err = nla_put(skb, fl_key, 4, &_key);
if (err)
return err;
- return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask);
+ return nla_put(skb, fl_mask, 4, &_mask);
}
static int fl_dump_key_geneve_opt(struct sk_buff *skb,
@@ -3117,6 +3273,32 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int fl_dump_key_pfcp_opt(struct sk_buff *skb,
+ struct flow_dissector_key_enc_opts *enc_opts)
+{
+ struct pfcp_metadata *md;
+ struct nlattr *nest;
+
+ nest = nla_nest_start_noflag(skb, TCA_FLOWER_KEY_ENC_OPTS_PFCP);
+ if (!nest)
+ goto nla_put_failure;
+
+ md = (struct pfcp_metadata *)&enc_opts->data[0];
+ if (nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_PFCP_TYPE, md->type))
+ goto nla_put_failure;
+
+ if (nla_put_be64(skb, TCA_FLOWER_KEY_ENC_OPT_PFCP_SEID,
+ md->seid, 0))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
static int fl_dump_key_ct(struct sk_buff *skb,
struct flow_dissector_key_ct *key,
struct flow_dissector_key_ct *mask)
@@ -3202,26 +3384,31 @@ static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type,
goto nla_put_failure;
switch (enc_opts->dst_opt_type) {
- case TUNNEL_GENEVE_OPT:
+ case IP_TUNNEL_GENEVE_OPT_BIT:
err = fl_dump_key_geneve_opt(skb, enc_opts);
if (err)
goto nla_put_failure;
break;
- case TUNNEL_VXLAN_OPT:
+ case IP_TUNNEL_VXLAN_OPT_BIT:
err = fl_dump_key_vxlan_opt(skb, enc_opts);
if (err)
goto nla_put_failure;
break;
- case TUNNEL_ERSPAN_OPT:
+ case IP_TUNNEL_ERSPAN_OPT_BIT:
err = fl_dump_key_erspan_opt(skb, enc_opts);
if (err)
goto nla_put_failure;
break;
- case TUNNEL_GTP_OPT:
+ case IP_TUNNEL_GTP_OPT_BIT:
err = fl_dump_key_gtp_opt(skb, enc_opts);
if (err)
goto nla_put_failure;
break;
+ case IP_TUNNEL_PFCP_OPT_BIT:
+ err = fl_dump_key_pfcp_opt(skb, enc_opts);
+ if (err)
+ goto nla_put_failure;
+ break;
default:
goto nla_put_failure;
}
@@ -3473,7 +3660,8 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
if (fl_dump_key_ct(skb, &key->ct, &mask->ct))
goto nla_put_failure;
- if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags))
+ if (fl_dump_key_flags(skb, false, key->control.flags,
+ mask->control.flags))
goto nla_put_failure;
if (fl_dump_key_val(skb, &key->hash.hash, TCA_FLOWER_KEY_HASH,
@@ -3484,6 +3672,10 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
if (fl_dump_key_cfm(skb, &key->cfm, &mask->cfm))
goto nla_put_failure;
+ if (fl_dump_key_flags(skb, true, key->enc_control.flags,
+ mask->enc_control.flags))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 60239378d43f..74afc210527d 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1334,7 +1334,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
* before again attaching a qdisc.
*/
if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
- dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
+ WRITE_ONCE(dev->tx_queue_len, DEFAULT_TX_QUEUE_LEN);
netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
}
@@ -1389,6 +1389,7 @@ err_out4:
ops->destroy(sch);
qdisc_put_stab(rtnl_dereference(sch->stab));
err_out3:
+ lockdep_unregister_key(&sch->root_lock_key);
netdev_put(dev, &sch->dev_tracker);
qdisc_free(sch);
err_out2:
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index edee926ccde8..9602dafe32e6 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -1512,7 +1512,7 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
if (!q->overflow_timeout) {
int i;
/* Build fresh max-heap */
- for (i = CAKE_MAX_TINS * CAKE_QUEUES / 2; i >= 0; i--)
+ for (i = CAKE_MAX_TINS * CAKE_QUEUES / 2 - 1; i >= 0; i--)
cake_heapify(q, i);
}
q->overflow_timeout = 65535;
@@ -2572,6 +2572,8 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
{
struct cake_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_CAKE_MAX + 1];
+ u16 rate_flags;
+ u8 flow_mode;
int err;
err = nla_parse_nested_deprecated(tb, TCA_CAKE_MAX, opt, cake_policy,
@@ -2579,10 +2581,11 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
if (err < 0)
return err;
+ flow_mode = q->flow_mode;
if (tb[TCA_CAKE_NAT]) {
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
- q->flow_mode &= ~CAKE_FLOW_NAT_FLAG;
- q->flow_mode |= CAKE_FLOW_NAT_FLAG *
+ flow_mode &= ~CAKE_FLOW_NAT_FLAG;
+ flow_mode |= CAKE_FLOW_NAT_FLAG *
!!nla_get_u32(tb[TCA_CAKE_NAT]);
#else
NL_SET_ERR_MSG_ATTR(extack, tb[TCA_CAKE_NAT],
@@ -2592,29 +2595,34 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
}
if (tb[TCA_CAKE_BASE_RATE64])
- q->rate_bps = nla_get_u64(tb[TCA_CAKE_BASE_RATE64]);
+ WRITE_ONCE(q->rate_bps,
+ nla_get_u64(tb[TCA_CAKE_BASE_RATE64]));
if (tb[TCA_CAKE_DIFFSERV_MODE])
- q->tin_mode = nla_get_u32(tb[TCA_CAKE_DIFFSERV_MODE]);
+ WRITE_ONCE(q->tin_mode,
+ nla_get_u32(tb[TCA_CAKE_DIFFSERV_MODE]));
+ rate_flags = q->rate_flags;
if (tb[TCA_CAKE_WASH]) {
if (!!nla_get_u32(tb[TCA_CAKE_WASH]))
- q->rate_flags |= CAKE_FLAG_WASH;
+ rate_flags |= CAKE_FLAG_WASH;
else
- q->rate_flags &= ~CAKE_FLAG_WASH;
+ rate_flags &= ~CAKE_FLAG_WASH;
}
if (tb[TCA_CAKE_FLOW_MODE])
- q->flow_mode = ((q->flow_mode & CAKE_FLOW_NAT_FLAG) |
+ flow_mode = ((flow_mode & CAKE_FLOW_NAT_FLAG) |
(nla_get_u32(tb[TCA_CAKE_FLOW_MODE]) &
CAKE_FLOW_MASK));
if (tb[TCA_CAKE_ATM])
- q->atm_mode = nla_get_u32(tb[TCA_CAKE_ATM]);
+ WRITE_ONCE(q->atm_mode,
+ nla_get_u32(tb[TCA_CAKE_ATM]));
if (tb[TCA_CAKE_OVERHEAD]) {
- q->rate_overhead = nla_get_s32(tb[TCA_CAKE_OVERHEAD]);
- q->rate_flags |= CAKE_FLAG_OVERHEAD;
+ WRITE_ONCE(q->rate_overhead,
+ nla_get_s32(tb[TCA_CAKE_OVERHEAD]));
+ rate_flags |= CAKE_FLAG_OVERHEAD;
q->max_netlen = 0;
q->max_adjlen = 0;
@@ -2623,7 +2631,7 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
}
if (tb[TCA_CAKE_RAW]) {
- q->rate_flags &= ~CAKE_FLAG_OVERHEAD;
+ rate_flags &= ~CAKE_FLAG_OVERHEAD;
q->max_netlen = 0;
q->max_adjlen = 0;
@@ -2632,54 +2640,58 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
}
if (tb[TCA_CAKE_MPU])
- q->rate_mpu = nla_get_u32(tb[TCA_CAKE_MPU]);
+ WRITE_ONCE(q->rate_mpu,
+ nla_get_u32(tb[TCA_CAKE_MPU]));
if (tb[TCA_CAKE_RTT]) {
- q->interval = nla_get_u32(tb[TCA_CAKE_RTT]);
+ u32 interval = nla_get_u32(tb[TCA_CAKE_RTT]);
- if (!q->interval)
- q->interval = 1;
+ WRITE_ONCE(q->interval, max(interval, 1U));
}
if (tb[TCA_CAKE_TARGET]) {
- q->target = nla_get_u32(tb[TCA_CAKE_TARGET]);
+ u32 target = nla_get_u32(tb[TCA_CAKE_TARGET]);
- if (!q->target)
- q->target = 1;
+ WRITE_ONCE(q->target, max(target, 1U));
}
if (tb[TCA_CAKE_AUTORATE]) {
if (!!nla_get_u32(tb[TCA_CAKE_AUTORATE]))
- q->rate_flags |= CAKE_FLAG_AUTORATE_INGRESS;
+ rate_flags |= CAKE_FLAG_AUTORATE_INGRESS;
else
- q->rate_flags &= ~CAKE_FLAG_AUTORATE_INGRESS;
+ rate_flags &= ~CAKE_FLAG_AUTORATE_INGRESS;
}
if (tb[TCA_CAKE_INGRESS]) {
if (!!nla_get_u32(tb[TCA_CAKE_INGRESS]))
- q->rate_flags |= CAKE_FLAG_INGRESS;
+ rate_flags |= CAKE_FLAG_INGRESS;
else
- q->rate_flags &= ~CAKE_FLAG_INGRESS;
+ rate_flags &= ~CAKE_FLAG_INGRESS;
}
if (tb[TCA_CAKE_ACK_FILTER])
- q->ack_filter = nla_get_u32(tb[TCA_CAKE_ACK_FILTER]);
+ WRITE_ONCE(q->ack_filter,
+ nla_get_u32(tb[TCA_CAKE_ACK_FILTER]));
if (tb[TCA_CAKE_MEMORY])
- q->buffer_config_limit = nla_get_u32(tb[TCA_CAKE_MEMORY]);
+ WRITE_ONCE(q->buffer_config_limit,
+ nla_get_u32(tb[TCA_CAKE_MEMORY]));
if (tb[TCA_CAKE_SPLIT_GSO]) {
if (!!nla_get_u32(tb[TCA_CAKE_SPLIT_GSO]))
- q->rate_flags |= CAKE_FLAG_SPLIT_GSO;
+ rate_flags |= CAKE_FLAG_SPLIT_GSO;
else
- q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO;
+ rate_flags &= ~CAKE_FLAG_SPLIT_GSO;
}
if (tb[TCA_CAKE_FWMARK]) {
- q->fwmark_mask = nla_get_u32(tb[TCA_CAKE_FWMARK]);
- q->fwmark_shft = q->fwmark_mask ? __ffs(q->fwmark_mask) : 0;
+ WRITE_ONCE(q->fwmark_mask, nla_get_u32(tb[TCA_CAKE_FWMARK]));
+ WRITE_ONCE(q->fwmark_shft,
+ q->fwmark_mask ? __ffs(q->fwmark_mask) : 0);
}
+ WRITE_ONCE(q->rate_flags, rate_flags);
+ WRITE_ONCE(q->flow_mode, flow_mode);
if (q->tins) {
sch_tree_lock(sch);
cake_reconfigure(sch);
@@ -2774,68 +2786,72 @@ static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct cake_sched_data *q = qdisc_priv(sch);
struct nlattr *opts;
+ u16 rate_flags;
+ u8 flow_mode;
opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
if (!opts)
goto nla_put_failure;
- if (nla_put_u64_64bit(skb, TCA_CAKE_BASE_RATE64, q->rate_bps,
- TCA_CAKE_PAD))
+ if (nla_put_u64_64bit(skb, TCA_CAKE_BASE_RATE64,
+ READ_ONCE(q->rate_bps), TCA_CAKE_PAD))
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_CAKE_FLOW_MODE,
- q->flow_mode & CAKE_FLOW_MASK))
+ flow_mode = READ_ONCE(q->flow_mode);
+ if (nla_put_u32(skb, TCA_CAKE_FLOW_MODE, flow_mode & CAKE_FLOW_MASK))
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_CAKE_RTT, q->interval))
+ if (nla_put_u32(skb, TCA_CAKE_RTT, READ_ONCE(q->interval)))
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_CAKE_TARGET, q->target))
+ if (nla_put_u32(skb, TCA_CAKE_TARGET, READ_ONCE(q->target)))
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_CAKE_MEMORY, q->buffer_config_limit))
+ if (nla_put_u32(skb, TCA_CAKE_MEMORY,
+ READ_ONCE(q->buffer_config_limit)))
goto nla_put_failure;
+ rate_flags = READ_ONCE(q->rate_flags);
if (nla_put_u32(skb, TCA_CAKE_AUTORATE,
- !!(q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS)))
+ !!(rate_flags & CAKE_FLAG_AUTORATE_INGRESS)))
goto nla_put_failure;
if (nla_put_u32(skb, TCA_CAKE_INGRESS,
- !!(q->rate_flags & CAKE_FLAG_INGRESS)))
+ !!(rate_flags & CAKE_FLAG_INGRESS)))
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_CAKE_ACK_FILTER, q->ack_filter))
+ if (nla_put_u32(skb, TCA_CAKE_ACK_FILTER, READ_ONCE(q->ack_filter)))
goto nla_put_failure;
if (nla_put_u32(skb, TCA_CAKE_NAT,
- !!(q->flow_mode & CAKE_FLOW_NAT_FLAG)))
+ !!(flow_mode & CAKE_FLOW_NAT_FLAG)))
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_CAKE_DIFFSERV_MODE, q->tin_mode))
+ if (nla_put_u32(skb, TCA_CAKE_DIFFSERV_MODE, READ_ONCE(q->tin_mode)))
goto nla_put_failure;
if (nla_put_u32(skb, TCA_CAKE_WASH,
- !!(q->rate_flags & CAKE_FLAG_WASH)))
+ !!(rate_flags & CAKE_FLAG_WASH)))
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_CAKE_OVERHEAD, q->rate_overhead))
+ if (nla_put_u32(skb, TCA_CAKE_OVERHEAD, READ_ONCE(q->rate_overhead)))
goto nla_put_failure;
- if (!(q->rate_flags & CAKE_FLAG_OVERHEAD))
+ if (!(rate_flags & CAKE_FLAG_OVERHEAD))
if (nla_put_u32(skb, TCA_CAKE_RAW, 0))
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_CAKE_ATM, q->atm_mode))
+ if (nla_put_u32(skb, TCA_CAKE_ATM, READ_ONCE(q->atm_mode)))
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_CAKE_MPU, q->rate_mpu))
+ if (nla_put_u32(skb, TCA_CAKE_MPU, READ_ONCE(q->rate_mpu)))
goto nla_put_failure;
if (nla_put_u32(skb, TCA_CAKE_SPLIT_GSO,
- !!(q->rate_flags & CAKE_FLAG_SPLIT_GSO)))
+ !!(rate_flags & CAKE_FLAG_SPLIT_GSO)))
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_CAKE_FWMARK, q->fwmark_mask))
+ if (nla_put_u32(skb, TCA_CAKE_FWMARK, READ_ONCE(q->fwmark_mask)))
goto nla_put_failure;
return nla_nest_end(skb, opts);
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index 69001eff0315..939425da1895 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -389,11 +389,11 @@ static int cbs_change(struct Qdisc *sch, struct nlattr *opt,
}
/* Everything went OK, save the parameters used. */
- q->hicredit = qopt->hicredit;
- q->locredit = qopt->locredit;
- q->idleslope = qopt->idleslope * BYTES_PER_KBIT;
- q->sendslope = qopt->sendslope * BYTES_PER_KBIT;
- q->offload = qopt->offload;
+ WRITE_ONCE(q->hicredit, qopt->hicredit);
+ WRITE_ONCE(q->locredit, qopt->locredit);
+ WRITE_ONCE(q->idleslope, qopt->idleslope * BYTES_PER_KBIT);
+ WRITE_ONCE(q->sendslope, qopt->sendslope * BYTES_PER_KBIT);
+ WRITE_ONCE(q->offload, qopt->offload);
return 0;
}
@@ -459,11 +459,11 @@ static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb)
if (!nest)
goto nla_put_failure;
- opt.hicredit = q->hicredit;
- opt.locredit = q->locredit;
- opt.sendslope = div64_s64(q->sendslope, BYTES_PER_KBIT);
- opt.idleslope = div64_s64(q->idleslope, BYTES_PER_KBIT);
- opt.offload = q->offload;
+ opt.hicredit = READ_ONCE(q->hicredit);
+ opt.locredit = READ_ONCE(q->locredit);
+ opt.sendslope = div64_s64(READ_ONCE(q->sendslope), BYTES_PER_KBIT);
+ opt.idleslope = div64_s64(READ_ONCE(q->idleslope), BYTES_PER_KBIT);
+ opt.offload = READ_ONCE(q->offload);
if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index ea108030c6b4..91072010923d 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -405,8 +405,8 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt,
} else
sch_tree_lock(sch);
- q->flags = ctl->flags;
- q->limit = ctl->limit;
+ WRITE_ONCE(q->flags, ctl->flags);
+ WRITE_ONCE(q->limit, ctl->limit);
red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
ctl->Plog, ctl->Scell_log,
@@ -431,15 +431,16 @@ static int choke_init(struct Qdisc *sch, struct nlattr *opt,
static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct choke_sched_data *q = qdisc_priv(sch);
+ u8 Wlog = READ_ONCE(q->parms.Wlog);
struct nlattr *opts = NULL;
struct tc_red_qopt opt = {
- .limit = q->limit,
- .flags = q->flags,
- .qth_min = q->parms.qth_min >> q->parms.Wlog,
- .qth_max = q->parms.qth_max >> q->parms.Wlog,
- .Wlog = q->parms.Wlog,
- .Plog = q->parms.Plog,
- .Scell_log = q->parms.Scell_log,
+ .limit = READ_ONCE(q->limit),
+ .flags = READ_ONCE(q->flags),
+ .qth_min = READ_ONCE(q->parms.qth_min) >> Wlog,
+ .qth_max = READ_ONCE(q->parms.qth_max) >> Wlog,
+ .Wlog = Wlog,
+ .Plog = READ_ONCE(q->parms.Plog),
+ .Scell_log = READ_ONCE(q->parms.Scell_log),
};
opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
@@ -447,7 +448,7 @@ static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
goto nla_put_failure;
if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
- nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
+ nla_put_u32(skb, TCA_CHOKE_MAX_P, READ_ONCE(q->parms.max_P)))
goto nla_put_failure;
return nla_nest_end(skb, opts);
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index ecb3f164bb25..3e8d4fe4d91e 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -118,26 +118,31 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_CODEL_TARGET]) {
u32 target = nla_get_u32(tb[TCA_CODEL_TARGET]);
- q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT;
+ WRITE_ONCE(q->params.target,
+ ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT);
}
if (tb[TCA_CODEL_CE_THRESHOLD]) {
u64 val = nla_get_u32(tb[TCA_CODEL_CE_THRESHOLD]);
- q->params.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
+ WRITE_ONCE(q->params.ce_threshold,
+ (val * NSEC_PER_USEC) >> CODEL_SHIFT);
}
if (tb[TCA_CODEL_INTERVAL]) {
u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]);
- q->params.interval = ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT;
+ WRITE_ONCE(q->params.interval,
+ ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT);
}
if (tb[TCA_CODEL_LIMIT])
- sch->limit = nla_get_u32(tb[TCA_CODEL_LIMIT]);
+ WRITE_ONCE(sch->limit,
+ nla_get_u32(tb[TCA_CODEL_LIMIT]));
if (tb[TCA_CODEL_ECN])
- q->params.ecn = !!nla_get_u32(tb[TCA_CODEL_ECN]);
+ WRITE_ONCE(q->params.ecn,
+ !!nla_get_u32(tb[TCA_CODEL_ECN]));
qlen = sch->q.qlen;
while (sch->q.qlen > sch->limit) {
@@ -183,6 +188,7 @@ static int codel_init(struct Qdisc *sch, struct nlattr *opt,
static int codel_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct codel_sched_data *q = qdisc_priv(sch);
+ codel_time_t ce_threshold;
struct nlattr *opts;
opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
@@ -190,17 +196,18 @@ static int codel_dump(struct Qdisc *sch, struct sk_buff *skb)
goto nla_put_failure;
if (nla_put_u32(skb, TCA_CODEL_TARGET,
- codel_time_to_us(q->params.target)) ||
+ codel_time_to_us(READ_ONCE(q->params.target))) ||
nla_put_u32(skb, TCA_CODEL_LIMIT,
- sch->limit) ||
+ READ_ONCE(sch->limit)) ||
nla_put_u32(skb, TCA_CODEL_INTERVAL,
- codel_time_to_us(q->params.interval)) ||
+ codel_time_to_us(READ_ONCE(q->params.interval))) ||
nla_put_u32(skb, TCA_CODEL_ECN,
- q->params.ecn))
+ READ_ONCE(q->params.ecn)))
goto nla_put_failure;
- if (q->params.ce_threshold != CODEL_DISABLED_THRESHOLD &&
+ ce_threshold = READ_ONCE(q->params.ce_threshold);
+ if (ce_threshold != CODEL_DISABLED_THRESHOLD &&
nla_put_u32(skb, TCA_CODEL_CE_THRESHOLD,
- codel_time_to_us(q->params.ce_threshold)))
+ codel_time_to_us(ce_threshold)))
goto nla_put_failure;
return nla_nest_end(skb, opts);
diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
index 2e4bef713b6a..c74d778c32a1 100644
--- a/net/sched/sch_etf.c
+++ b/net/sched/sch_etf.c
@@ -467,15 +467,15 @@ static int etf_dump(struct Qdisc *sch, struct sk_buff *skb)
if (!nest)
goto nla_put_failure;
- opt.delta = q->delta;
- opt.clockid = q->clockid;
- if (q->offload)
+ opt.delta = READ_ONCE(q->delta);
+ opt.clockid = READ_ONCE(q->clockid);
+ if (READ_ONCE(q->offload))
opt.flags |= TC_ETF_OFFLOAD_ON;
- if (q->deadline_mode)
+ if (READ_ONCE(q->deadline_mode))
opt.flags |= TC_ETF_DEADLINE_MODE_ON;
- if (q->skip_sock_check)
+ if (READ_ONCE(q->skip_sock_check))
opt.flags |= TC_ETF_SKIP_SOCK_CHECK;
if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt))
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index 835b4460b448..f80bc05d4c5a 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -646,7 +646,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
sch_tree_lock(sch);
- q->nbands = nbands;
+ WRITE_ONCE(q->nbands, nbands);
for (i = nstrict; i < q->nstrict; i++) {
if (q->classes[i].qdisc->q.qlen) {
list_add_tail(&q->classes[i].alist, &q->active);
@@ -658,11 +658,11 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
list_del(&q->classes[i].alist);
qdisc_tree_flush_backlog(q->classes[i].qdisc);
}
- q->nstrict = nstrict;
+ WRITE_ONCE(q->nstrict, nstrict);
memcpy(q->prio2band, priomap, sizeof(priomap));
for (i = 0; i < q->nbands; i++)
- q->classes[i].quantum = quanta[i];
+ WRITE_ONCE(q->classes[i].quantum, quanta[i]);
for (i = oldbands; i < q->nbands; i++) {
q->classes[i].qdisc = queues[i];
@@ -676,7 +676,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
for (i = q->nbands; i < oldbands; i++) {
qdisc_put(q->classes[i].qdisc);
q->classes[i].qdisc = NULL;
- q->classes[i].quantum = 0;
+ WRITE_ONCE(q->classes[i].quantum, 0);
q->classes[i].deficit = 0;
gnet_stats_basic_sync_init(&q->classes[i].bstats);
memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats));
@@ -733,6 +733,7 @@ static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb)
struct ets_sched *q = qdisc_priv(sch);
struct nlattr *opts;
struct nlattr *nest;
+ u8 nbands, nstrict;
int band;
int prio;
int err;
@@ -745,21 +746,22 @@ static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb)
if (!opts)
goto nla_err;
- if (nla_put_u8(skb, TCA_ETS_NBANDS, q->nbands))
+ nbands = READ_ONCE(q->nbands);
+ if (nla_put_u8(skb, TCA_ETS_NBANDS, nbands))
goto nla_err;
- if (q->nstrict &&
- nla_put_u8(skb, TCA_ETS_NSTRICT, q->nstrict))
+ nstrict = READ_ONCE(q->nstrict);
+ if (nstrict && nla_put_u8(skb, TCA_ETS_NSTRICT, nstrict))
goto nla_err;
- if (q->nbands > q->nstrict) {
+ if (nbands > nstrict) {
nest = nla_nest_start(skb, TCA_ETS_QUANTA);
if (!nest)
goto nla_err;
- for (band = q->nstrict; band < q->nbands; band++) {
+ for (band = nstrict; band < nbands; band++) {
if (nla_put_u32(skb, TCA_ETS_QUANTA_BAND,
- q->classes[band].quantum))
+ READ_ONCE(q->classes[band].quantum)))
goto nla_err;
}
@@ -771,7 +773,8 @@ static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb)
goto nla_err;
for (prio = 0; prio <= TC_PRIO_MAX; prio++) {
- if (nla_put_u8(skb, TCA_ETS_PRIOMAP_BAND, q->prio2band[prio]))
+ if (nla_put_u8(skb, TCA_ETS_PRIOMAP_BAND,
+ READ_ONCE(q->prio2band[prio])))
goto nla_err;
}
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 450f5c67ac49..b50b2c2cc09b 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -19,7 +19,8 @@
static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
- if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit))
+ if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <=
+ READ_ONCE(sch->limit)))
return qdisc_enqueue_tail(skb, sch);
return qdisc_drop(skb, sch, to_free);
@@ -28,7 +29,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
- if (likely(sch->q.qlen < sch->limit))
+ if (likely(sch->q.qlen < READ_ONCE(sch->limit)))
return qdisc_enqueue_tail(skb, sch);
return qdisc_drop(skb, sch, to_free);
@@ -39,7 +40,7 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch,
{
unsigned int prev_backlog;
- if (likely(sch->q.qlen < sch->limit))
+ if (likely(sch->q.qlen < READ_ONCE(sch->limit)))
return qdisc_enqueue_tail(skb, sch);
prev_backlog = sch->qstats.backlog;
@@ -105,14 +106,14 @@ static int __fifo_init(struct Qdisc *sch, struct nlattr *opt,
if (is_bfifo)
limit *= psched_mtu(qdisc_dev(sch));
- sch->limit = limit;
+ WRITE_ONCE(sch->limit, limit);
} else {
struct tc_fifo_qopt *ctl = nla_data(opt);
if (nla_len(opt) < sizeof(*ctl))
return -EINVAL;
- sch->limit = ctl->limit;
+ WRITE_ONCE(sch->limit, ctl->limit);
}
if (is_bfifo)
@@ -154,7 +155,7 @@ static void fifo_destroy(struct Qdisc *sch)
static int __fifo_dump(struct Qdisc *sch, struct sk_buff *skb)
{
- struct tc_fifo_qopt opt = { .limit = sch->limit };
+ struct tc_fifo_qopt opt = { .limit = READ_ONCE(sch->limit) };
if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
goto nla_put_failure;
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index cdf23ff16f40..238974725679 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -106,6 +106,8 @@ struct fq_perband_flows {
int quantum; /* based on band nr : 576KB, 192KB, 64KB */
};
+#define FQ_PRIO2BAND_CRUMB_SIZE ((TC_PRIO_MAX + 1) >> 2)
+
struct fq_sched_data {
/* Read mostly cache line */
@@ -122,7 +124,7 @@ struct fq_sched_data {
u8 rate_enable;
u8 fq_trees_log;
u8 horizon_drop;
- u8 prio2band[(TC_PRIO_MAX + 1) >> 2];
+ u8 prio2band[FQ_PRIO2BAND_CRUMB_SIZE];
u32 timer_slack; /* hrtimer slack in ns */
/* Read/Write fields. */
@@ -159,7 +161,7 @@ struct fq_sched_data {
/* return the i-th 2-bit value ("crumb") */
static u8 fq_prio2band(const u8 *prio2band, unsigned int prio)
{
- return (prio2band[prio / 4] >> (2 * (prio & 0x3))) & 0x3;
+ return (READ_ONCE(prio2band[prio / 4]) >> (2 * (prio & 0x3))) & 0x3;
}
/*
@@ -888,7 +890,7 @@ static int fq_resize(struct Qdisc *sch, u32 log)
fq_rehash(q, old_fq_root, q->fq_trees_log, array, log);
q->fq_root = array;
- q->fq_trees_log = log;
+ WRITE_ONCE(q->fq_trees_log, log);
sch_tree_unlock(sch);
@@ -927,11 +929,15 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
static void fq_prio2band_compress_crumb(const u8 *in, u8 *out)
{
const int num_elems = TC_PRIO_MAX + 1;
+ u8 tmp[FQ_PRIO2BAND_CRUMB_SIZE];
int i;
- memset(out, 0, num_elems / 4);
+ memset(tmp, 0, sizeof(tmp));
for (i = 0; i < num_elems; i++)
- out[i / 4] |= in[i] << (2 * (i & 0x3));
+ tmp[i / 4] |= in[i] << (2 * (i & 0x3));
+
+ for (i = 0; i < FQ_PRIO2BAND_CRUMB_SIZE; i++)
+ WRITE_ONCE(out[i], tmp[i]);
}
static void fq_prio2band_decompress_crumb(const u8 *in, u8 *out)
@@ -958,7 +964,7 @@ static int fq_load_weights(struct fq_sched_data *q,
}
}
for (i = 0; i < FQ_BANDS; i++)
- q->band_flows[i].quantum = weights[i];
+ WRITE_ONCE(q->band_flows[i].quantum, weights[i]);
return 0;
}
@@ -1011,16 +1017,18 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
err = -EINVAL;
}
if (tb[TCA_FQ_PLIMIT])
- sch->limit = nla_get_u32(tb[TCA_FQ_PLIMIT]);
+ WRITE_ONCE(sch->limit,
+ nla_get_u32(tb[TCA_FQ_PLIMIT]));
if (tb[TCA_FQ_FLOW_PLIMIT])
- q->flow_plimit = nla_get_u32(tb[TCA_FQ_FLOW_PLIMIT]);
+ WRITE_ONCE(q->flow_plimit,
+ nla_get_u32(tb[TCA_FQ_FLOW_PLIMIT]));
if (tb[TCA_FQ_QUANTUM]) {
u32 quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]);
if (quantum > 0 && quantum <= (1 << 20)) {
- q->quantum = quantum;
+ WRITE_ONCE(q->quantum, quantum);
} else {
NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
err = -EINVAL;
@@ -1028,7 +1036,8 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
}
if (tb[TCA_FQ_INITIAL_QUANTUM])
- q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
+ WRITE_ONCE(q->initial_quantum,
+ nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]));
if (tb[TCA_FQ_FLOW_DEFAULT_RATE])
pr_warn_ratelimited("sch_fq: defrate %u ignored.\n",
@@ -1037,17 +1046,19 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_FQ_FLOW_MAX_RATE]) {
u32 rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
- q->flow_max_rate = (rate == ~0U) ? ~0UL : rate;
+ WRITE_ONCE(q->flow_max_rate,
+ (rate == ~0U) ? ~0UL : rate);
}
if (tb[TCA_FQ_LOW_RATE_THRESHOLD])
- q->low_rate_threshold =
- nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]);
+ WRITE_ONCE(q->low_rate_threshold,
+ nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]));
if (tb[TCA_FQ_RATE_ENABLE]) {
u32 enable = nla_get_u32(tb[TCA_FQ_RATE_ENABLE]);
if (enable <= 1)
- q->rate_enable = enable;
+ WRITE_ONCE(q->rate_enable,
+ enable);
else
err = -EINVAL;
}
@@ -1055,7 +1066,8 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_FQ_FLOW_REFILL_DELAY]) {
u32 usecs_delay = nla_get_u32(tb[TCA_FQ_FLOW_REFILL_DELAY]) ;
- q->flow_refill_delay = usecs_to_jiffies(usecs_delay);
+ WRITE_ONCE(q->flow_refill_delay,
+ usecs_to_jiffies(usecs_delay));
}
if (!err && tb[TCA_FQ_PRIOMAP])
@@ -1065,21 +1077,26 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
err = fq_load_weights(q, tb[TCA_FQ_WEIGHTS], extack);
if (tb[TCA_FQ_ORPHAN_MASK])
- q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]);
+ WRITE_ONCE(q->orphan_mask,
+ nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]));
if (tb[TCA_FQ_CE_THRESHOLD])
- q->ce_threshold = (u64)NSEC_PER_USEC *
- nla_get_u32(tb[TCA_FQ_CE_THRESHOLD]);
+ WRITE_ONCE(q->ce_threshold,
+ (u64)NSEC_PER_USEC *
+ nla_get_u32(tb[TCA_FQ_CE_THRESHOLD]));
if (tb[TCA_FQ_TIMER_SLACK])
- q->timer_slack = nla_get_u32(tb[TCA_FQ_TIMER_SLACK]);
+ WRITE_ONCE(q->timer_slack,
+ nla_get_u32(tb[TCA_FQ_TIMER_SLACK]));
if (tb[TCA_FQ_HORIZON])
- q->horizon = (u64)NSEC_PER_USEC *
- nla_get_u32(tb[TCA_FQ_HORIZON]);
+ WRITE_ONCE(q->horizon,
+ (u64)NSEC_PER_USEC *
+ nla_get_u32(tb[TCA_FQ_HORIZON]));
if (tb[TCA_FQ_HORIZON_DROP])
- q->horizon_drop = nla_get_u8(tb[TCA_FQ_HORIZON_DROP]);
+ WRITE_ONCE(q->horizon_drop,
+ nla_get_u8(tb[TCA_FQ_HORIZON_DROP]));
if (!err) {
@@ -1160,13 +1177,13 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct fq_sched_data *q = qdisc_priv(sch);
- u64 ce_threshold = q->ce_threshold;
struct tc_prio_qopt prio = {
.bands = FQ_BANDS,
};
- u64 horizon = q->horizon;
struct nlattr *opts;
+ u64 ce_threshold;
s32 weights[3];
+ u64 horizon;
opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
if (opts == NULL)
@@ -1174,35 +1191,48 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
/* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */
+ ce_threshold = READ_ONCE(q->ce_threshold);
do_div(ce_threshold, NSEC_PER_USEC);
+
+ horizon = READ_ONCE(q->horizon);
do_div(horizon, NSEC_PER_USEC);
- if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) ||
- nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) ||
- nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) ||
- nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) ||
- nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) ||
+ if (nla_put_u32(skb, TCA_FQ_PLIMIT,
+ READ_ONCE(sch->limit)) ||
+ nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT,
+ READ_ONCE(q->flow_plimit)) ||
+ nla_put_u32(skb, TCA_FQ_QUANTUM,
+ READ_ONCE(q->quantum)) ||
+ nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM,
+ READ_ONCE(q->initial_quantum)) ||
+ nla_put_u32(skb, TCA_FQ_RATE_ENABLE,
+ READ_ONCE(q->rate_enable)) ||
nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE,
- min_t(unsigned long, q->flow_max_rate, ~0U)) ||
+ min_t(unsigned long,
+ READ_ONCE(q->flow_max_rate), ~0U)) ||
nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY,
- jiffies_to_usecs(q->flow_refill_delay)) ||
- nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) ||
+ jiffies_to_usecs(READ_ONCE(q->flow_refill_delay))) ||
+ nla_put_u32(skb, TCA_FQ_ORPHAN_MASK,
+ READ_ONCE(q->orphan_mask)) ||
nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD,
- q->low_rate_threshold) ||
+ READ_ONCE(q->low_rate_threshold)) ||
nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) ||
- nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log) ||
- nla_put_u32(skb, TCA_FQ_TIMER_SLACK, q->timer_slack) ||
+ nla_put_u32(skb, TCA_FQ_BUCKETS_LOG,
+ READ_ONCE(q->fq_trees_log)) ||
+ nla_put_u32(skb, TCA_FQ_TIMER_SLACK,
+ READ_ONCE(q->timer_slack)) ||
nla_put_u32(skb, TCA_FQ_HORIZON, (u32)horizon) ||
- nla_put_u8(skb, TCA_FQ_HORIZON_DROP, q->horizon_drop))
+ nla_put_u8(skb, TCA_FQ_HORIZON_DROP,
+ READ_ONCE(q->horizon_drop)))
goto nla_put_failure;
fq_prio2band_decompress_crumb(q->prio2band, prio.priomap);
if (nla_put(skb, TCA_FQ_PRIOMAP, sizeof(prio), &prio))
goto nla_put_failure;
- weights[0] = q->band_flows[0].quantum;
- weights[1] = q->band_flows[1].quantum;
- weights[2] = q->band_flows[2].quantum;
+ weights[0] = READ_ONCE(q->band_flows[0].quantum);
+ weights[1] = READ_ONCE(q->band_flows[1].quantum);
+ weights[2] = READ_ONCE(q->band_flows[2].quantum);
if (nla_put(skb, TCA_FQ_WEIGHTS, sizeof(weights), &weights))
goto nla_put_failure;
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 79f9d6de6c85..4f908c11ba95 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -396,40 +396,49 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_FQ_CODEL_TARGET]) {
u64 target = nla_get_u32(tb[TCA_FQ_CODEL_TARGET]);
- q->cparams.target = (target * NSEC_PER_USEC) >> CODEL_SHIFT;
+ WRITE_ONCE(q->cparams.target,
+ (target * NSEC_PER_USEC) >> CODEL_SHIFT);
}
if (tb[TCA_FQ_CODEL_CE_THRESHOLD]) {
u64 val = nla_get_u32(tb[TCA_FQ_CODEL_CE_THRESHOLD]);
- q->cparams.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
+ WRITE_ONCE(q->cparams.ce_threshold,
+ (val * NSEC_PER_USEC) >> CODEL_SHIFT);
}
if (tb[TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR])
- q->cparams.ce_threshold_selector = nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR]);
+ WRITE_ONCE(q->cparams.ce_threshold_selector,
+ nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR]));
if (tb[TCA_FQ_CODEL_CE_THRESHOLD_MASK])
- q->cparams.ce_threshold_mask = nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_MASK]);
+ WRITE_ONCE(q->cparams.ce_threshold_mask,
+ nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_MASK]));
if (tb[TCA_FQ_CODEL_INTERVAL]) {
u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]);
- q->cparams.interval = (interval * NSEC_PER_USEC) >> CODEL_SHIFT;
+ WRITE_ONCE(q->cparams.interval,
+ (interval * NSEC_PER_USEC) >> CODEL_SHIFT);
}
if (tb[TCA_FQ_CODEL_LIMIT])
- sch->limit = nla_get_u32(tb[TCA_FQ_CODEL_LIMIT]);
+ WRITE_ONCE(sch->limit,
+ nla_get_u32(tb[TCA_FQ_CODEL_LIMIT]));
if (tb[TCA_FQ_CODEL_ECN])
- q->cparams.ecn = !!nla_get_u32(tb[TCA_FQ_CODEL_ECN]);
+ WRITE_ONCE(q->cparams.ecn,
+ !!nla_get_u32(tb[TCA_FQ_CODEL_ECN]));
if (quantum)
- q->quantum = quantum;
+ WRITE_ONCE(q->quantum, quantum);
if (tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])
- q->drop_batch_size = max(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]));
+ WRITE_ONCE(q->drop_batch_size,
+ max(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])));
if (tb[TCA_FQ_CODEL_MEMORY_LIMIT])
- q->memory_limit = min(1U << 31, nla_get_u32(tb[TCA_FQ_CODEL_MEMORY_LIMIT]));
+ WRITE_ONCE(q->memory_limit,
+ min(1U << 31, nla_get_u32(tb[TCA_FQ_CODEL_MEMORY_LIMIT])));
while (sch->q.qlen > sch->limit ||
q->memory_usage > q->memory_limit) {
@@ -522,6 +531,7 @@ init_failure:
static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
+ codel_time_t ce_threshold;
struct nlattr *opts;
opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
@@ -529,30 +539,33 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
goto nla_put_failure;
if (nla_put_u32(skb, TCA_FQ_CODEL_TARGET,
- codel_time_to_us(q->cparams.target)) ||
+ codel_time_to_us(READ_ONCE(q->cparams.target))) ||
nla_put_u32(skb, TCA_FQ_CODEL_LIMIT,
- sch->limit) ||
+ READ_ONCE(sch->limit)) ||
nla_put_u32(skb, TCA_FQ_CODEL_INTERVAL,
- codel_time_to_us(q->cparams.interval)) ||
+ codel_time_to_us(READ_ONCE(q->cparams.interval))) ||
nla_put_u32(skb, TCA_FQ_CODEL_ECN,
- q->cparams.ecn) ||
+ READ_ONCE(q->cparams.ecn)) ||
nla_put_u32(skb, TCA_FQ_CODEL_QUANTUM,
- q->quantum) ||
+ READ_ONCE(q->quantum)) ||
nla_put_u32(skb, TCA_FQ_CODEL_DROP_BATCH_SIZE,
- q->drop_batch_size) ||
+ READ_ONCE(q->drop_batch_size)) ||
nla_put_u32(skb, TCA_FQ_CODEL_MEMORY_LIMIT,
- q->memory_limit) ||
+ READ_ONCE(q->memory_limit)) ||
nla_put_u32(skb, TCA_FQ_CODEL_FLOWS,
- q->flows_cnt))
+ READ_ONCE(q->flows_cnt)))
goto nla_put_failure;
- if (q->cparams.ce_threshold != CODEL_DISABLED_THRESHOLD) {
+ ce_threshold = READ_ONCE(q->cparams.ce_threshold);
+ if (ce_threshold != CODEL_DISABLED_THRESHOLD) {
if (nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD,
- codel_time_to_us(q->cparams.ce_threshold)))
+ codel_time_to_us(ce_threshold)))
goto nla_put_failure;
- if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR, q->cparams.ce_threshold_selector))
+ if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR,
+ READ_ONCE(q->cparams.ce_threshold_selector)))
goto nla_put_failure;
- if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_MASK, q->cparams.ce_threshold_mask))
+ if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_MASK,
+ READ_ONCE(q->cparams.ce_threshold_mask)))
goto nla_put_failure;
}
diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index 358cf304f4c9..c38f33ff80bd 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -299,8 +299,8 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_FQ_PIE_LIMIT]) {
u32 limit = nla_get_u32(tb[TCA_FQ_PIE_LIMIT]);
- q->p_params.limit = limit;
- sch->limit = limit;
+ WRITE_ONCE(q->p_params.limit, limit);
+ WRITE_ONCE(sch->limit, limit);
}
if (tb[TCA_FQ_PIE_FLOWS]) {
if (q->flows) {
@@ -322,39 +322,45 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt,
u32 target = nla_get_u32(tb[TCA_FQ_PIE_TARGET]);
/* convert to pschedtime */
- q->p_params.target =
- PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC);
+ WRITE_ONCE(q->p_params.target,
+ PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC));
}
/* tupdate is in jiffies */
if (tb[TCA_FQ_PIE_TUPDATE])
- q->p_params.tupdate =
- usecs_to_jiffies(nla_get_u32(tb[TCA_FQ_PIE_TUPDATE]));
+ WRITE_ONCE(q->p_params.tupdate,
+ usecs_to_jiffies(nla_get_u32(tb[TCA_FQ_PIE_TUPDATE])));
if (tb[TCA_FQ_PIE_ALPHA])
- q->p_params.alpha = nla_get_u32(tb[TCA_FQ_PIE_ALPHA]);
+ WRITE_ONCE(q->p_params.alpha,
+ nla_get_u32(tb[TCA_FQ_PIE_ALPHA]));
if (tb[TCA_FQ_PIE_BETA])
- q->p_params.beta = nla_get_u32(tb[TCA_FQ_PIE_BETA]);
+ WRITE_ONCE(q->p_params.beta,
+ nla_get_u32(tb[TCA_FQ_PIE_BETA]));
if (tb[TCA_FQ_PIE_QUANTUM])
- q->quantum = nla_get_u32(tb[TCA_FQ_PIE_QUANTUM]);
+ WRITE_ONCE(q->quantum, nla_get_u32(tb[TCA_FQ_PIE_QUANTUM]));
if (tb[TCA_FQ_PIE_MEMORY_LIMIT])
- q->memory_limit = nla_get_u32(tb[TCA_FQ_PIE_MEMORY_LIMIT]);
+ WRITE_ONCE(q->memory_limit,
+ nla_get_u32(tb[TCA_FQ_PIE_MEMORY_LIMIT]));
if (tb[TCA_FQ_PIE_ECN_PROB])
- q->ecn_prob = nla_get_u32(tb[TCA_FQ_PIE_ECN_PROB]);
+ WRITE_ONCE(q->ecn_prob,
+ nla_get_u32(tb[TCA_FQ_PIE_ECN_PROB]));
if (tb[TCA_FQ_PIE_ECN])
- q->p_params.ecn = nla_get_u32(tb[TCA_FQ_PIE_ECN]);
+ WRITE_ONCE(q->p_params.ecn,
+ nla_get_u32(tb[TCA_FQ_PIE_ECN]));
if (tb[TCA_FQ_PIE_BYTEMODE])
- q->p_params.bytemode = nla_get_u32(tb[TCA_FQ_PIE_BYTEMODE]);
+ WRITE_ONCE(q->p_params.bytemode,
+ nla_get_u32(tb[TCA_FQ_PIE_BYTEMODE]));
if (tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR])
- q->p_params.dq_rate_estimator =
- nla_get_u32(tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR]);
+ WRITE_ONCE(q->p_params.dq_rate_estimator,
+ nla_get_u32(tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR]));
/* Drop excess packets if new limit is lower */
while (sch->q.qlen > sch->limit) {
@@ -471,22 +477,23 @@ static int fq_pie_dump(struct Qdisc *sch, struct sk_buff *skb)
return -EMSGSIZE;
/* convert target from pschedtime to us */
- if (nla_put_u32(skb, TCA_FQ_PIE_LIMIT, sch->limit) ||
- nla_put_u32(skb, TCA_FQ_PIE_FLOWS, q->flows_cnt) ||
+ if (nla_put_u32(skb, TCA_FQ_PIE_LIMIT, READ_ONCE(sch->limit)) ||
+ nla_put_u32(skb, TCA_FQ_PIE_FLOWS, READ_ONCE(q->flows_cnt)) ||
nla_put_u32(skb, TCA_FQ_PIE_TARGET,
- ((u32)PSCHED_TICKS2NS(q->p_params.target)) /
+ ((u32)PSCHED_TICKS2NS(READ_ONCE(q->p_params.target))) /
NSEC_PER_USEC) ||
nla_put_u32(skb, TCA_FQ_PIE_TUPDATE,
- jiffies_to_usecs(q->p_params.tupdate)) ||
- nla_put_u32(skb, TCA_FQ_PIE_ALPHA, q->p_params.alpha) ||
- nla_put_u32(skb, TCA_FQ_PIE_BETA, q->p_params.beta) ||
- nla_put_u32(skb, TCA_FQ_PIE_QUANTUM, q->quantum) ||
- nla_put_u32(skb, TCA_FQ_PIE_MEMORY_LIMIT, q->memory_limit) ||
- nla_put_u32(skb, TCA_FQ_PIE_ECN_PROB, q->ecn_prob) ||
- nla_put_u32(skb, TCA_FQ_PIE_ECN, q->p_params.ecn) ||
- nla_put_u32(skb, TCA_FQ_PIE_BYTEMODE, q->p_params.bytemode) ||
+ jiffies_to_usecs(READ_ONCE(q->p_params.tupdate))) ||
+ nla_put_u32(skb, TCA_FQ_PIE_ALPHA, READ_ONCE(q->p_params.alpha)) ||
+ nla_put_u32(skb, TCA_FQ_PIE_BETA, READ_ONCE(q->p_params.beta)) ||
+ nla_put_u32(skb, TCA_FQ_PIE_QUANTUM, READ_ONCE(q->quantum)) ||
+ nla_put_u32(skb, TCA_FQ_PIE_MEMORY_LIMIT,
+ READ_ONCE(q->memory_limit)) ||
+ nla_put_u32(skb, TCA_FQ_PIE_ECN_PROB, READ_ONCE(q->ecn_prob)) ||
+ nla_put_u32(skb, TCA_FQ_PIE_ECN, READ_ONCE(q->p_params.ecn)) ||
+ nla_put_u32(skb, TCA_FQ_PIE_BYTEMODE, READ_ONCE(q->p_params.bytemode)) ||
nla_put_u32(skb, TCA_FQ_PIE_DQ_RATE_ESTIMATOR,
- q->p_params.dq_rate_estimator))
+ READ_ONCE(q->p_params.dq_rate_estimator)))
goto nla_put_failure;
return nla_nest_end(skb, opts);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 4a2c763e2d11..2af24547a82c 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -506,19 +506,22 @@ static void dev_watchdog(struct timer_list *t)
unsigned int timedout_ms = 0;
unsigned int i;
unsigned long trans_start;
+ unsigned long oldest_start = jiffies;
for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *txq;
txq = netdev_get_tx_queue(dev, i);
trans_start = READ_ONCE(txq->trans_start);
- if (netif_xmit_stopped(txq) &&
- time_after(jiffies, (trans_start +
- dev->watchdog_timeo))) {
+ if (!netif_xmit_stopped(txq))
+ continue;
+ if (time_after(jiffies, trans_start + dev->watchdog_timeo)) {
timedout_ms = jiffies_to_msecs(jiffies - trans_start);
atomic_long_inc(&txq->trans_timeout);
break;
}
+ if (time_after(oldest_start, trans_start))
+ oldest_start = trans_start;
}
if (unlikely(timedout_ms)) {
@@ -531,7 +534,7 @@ static void dev_watchdog(struct timer_list *t)
netif_unfreeze_queues(dev);
}
if (!mod_timer(&dev->watchdog_timer,
- round_jiffies(jiffies +
+ round_jiffies(oldest_start +
dev->watchdog_timeo)))
release = false;
}
@@ -630,6 +633,7 @@ EXPORT_SYMBOL_GPL(netif_carrier_event);
static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
struct sk_buff **to_free)
{
+ dev_core_stats_tx_dropped_inc(skb->dev);
__qdisc_drop(skb, to_free);
return NET_XMIT_CN;
}
@@ -673,6 +677,7 @@ struct Qdisc noop_qdisc = {
.qlen = 0,
.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.skb_bad_txq.lock),
},
+ .owner = -1,
};
EXPORT_SYMBOL(noop_qdisc);
@@ -945,7 +950,9 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
__skb_queue_head_init(&sch->gso_skb);
__skb_queue_head_init(&sch->skb_bad_txq);
gnet_stats_basic_sync_init(&sch->bstats);
+ lockdep_register_key(&sch->root_lock_key);
spin_lock_init(&sch->q.lock);
+ lockdep_set_class(&sch->q.lock, &sch->root_lock_key);
if (ops->static_flags & TCQ_F_CPUSTATS) {
sch->cpu_bstats =
@@ -980,6 +987,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
return sch;
errout1:
+ lockdep_unregister_key(&sch->root_lock_key);
kfree(sch);
errout:
return ERR_PTR(err);
@@ -1068,6 +1076,7 @@ static void __qdisc_destroy(struct Qdisc *qdisc)
if (ops->destroy)
ops->destroy(qdisc);
+ lockdep_unregister_key(&qdisc->root_lock_key);
module_put(ops->owner);
netdev_put(dev, &qdisc->dev_tracker);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 4e626df742d7..c287bf8423b4 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1174,7 +1174,8 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
}
/* classification failed, try default class */
- cl = hfsc_find_class(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
+ cl = hfsc_find_class(TC_H_MAKE(TC_H_MAJ(sch->handle),
+ READ_ONCE(q->defcls)), sch);
if (cl == NULL || cl->level > 0)
return NULL;
@@ -1443,9 +1444,7 @@ hfsc_change_qdisc(struct Qdisc *sch, struct nlattr *opt,
return -EINVAL;
qopt = nla_data(opt);
- sch_tree_lock(sch);
- q->defcls = qopt->defcls;
- sch_tree_unlock(sch);
+ WRITE_ONCE(q->defcls, qopt->defcls);
return 0;
}
@@ -1525,7 +1524,7 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
unsigned char *b = skb_tail_pointer(skb);
struct tc_hfsc_qopt qopt;
- qopt.defcls = q->defcls;
+ qopt.defcls = READ_ONCE(q->defcls);
if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 3f906df1435b..44d9efe1a96a 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -534,27 +534,31 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt,
sch_tree_lock(sch);
if (tb[TCA_HHF_BACKLOG_LIMIT])
- sch->limit = nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]);
+ WRITE_ONCE(sch->limit, nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]));
- q->quantum = new_quantum;
- q->hhf_non_hh_weight = new_hhf_non_hh_weight;
+ WRITE_ONCE(q->quantum, new_quantum);
+ WRITE_ONCE(q->hhf_non_hh_weight, new_hhf_non_hh_weight);
if (tb[TCA_HHF_HH_FLOWS_LIMIT])
- q->hh_flows_limit = nla_get_u32(tb[TCA_HHF_HH_FLOWS_LIMIT]);
+ WRITE_ONCE(q->hh_flows_limit,
+ nla_get_u32(tb[TCA_HHF_HH_FLOWS_LIMIT]));
if (tb[TCA_HHF_RESET_TIMEOUT]) {
u32 us = nla_get_u32(tb[TCA_HHF_RESET_TIMEOUT]);
- q->hhf_reset_timeout = usecs_to_jiffies(us);
+ WRITE_ONCE(q->hhf_reset_timeout,
+ usecs_to_jiffies(us));
}
if (tb[TCA_HHF_ADMIT_BYTES])
- q->hhf_admit_bytes = nla_get_u32(tb[TCA_HHF_ADMIT_BYTES]);
+ WRITE_ONCE(q->hhf_admit_bytes,
+ nla_get_u32(tb[TCA_HHF_ADMIT_BYTES]));
if (tb[TCA_HHF_EVICT_TIMEOUT]) {
u32 us = nla_get_u32(tb[TCA_HHF_EVICT_TIMEOUT]);
- q->hhf_evict_timeout = usecs_to_jiffies(us);
+ WRITE_ONCE(q->hhf_evict_timeout,
+ usecs_to_jiffies(us));
}
qlen = sch->q.qlen;
@@ -657,15 +661,18 @@ static int hhf_dump(struct Qdisc *sch, struct sk_buff *skb)
if (opts == NULL)
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_HHF_BACKLOG_LIMIT, sch->limit) ||
- nla_put_u32(skb, TCA_HHF_QUANTUM, q->quantum) ||
- nla_put_u32(skb, TCA_HHF_HH_FLOWS_LIMIT, q->hh_flows_limit) ||
+ if (nla_put_u32(skb, TCA_HHF_BACKLOG_LIMIT, READ_ONCE(sch->limit)) ||
+ nla_put_u32(skb, TCA_HHF_QUANTUM, READ_ONCE(q->quantum)) ||
+ nla_put_u32(skb, TCA_HHF_HH_FLOWS_LIMIT,
+ READ_ONCE(q->hh_flows_limit)) ||
nla_put_u32(skb, TCA_HHF_RESET_TIMEOUT,
- jiffies_to_usecs(q->hhf_reset_timeout)) ||
- nla_put_u32(skb, TCA_HHF_ADMIT_BYTES, q->hhf_admit_bytes) ||
+ jiffies_to_usecs(READ_ONCE(q->hhf_reset_timeout))) ||
+ nla_put_u32(skb, TCA_HHF_ADMIT_BYTES,
+ READ_ONCE(q->hhf_admit_bytes)) ||
nla_put_u32(skb, TCA_HHF_EVICT_TIMEOUT,
- jiffies_to_usecs(q->hhf_evict_timeout)) ||
- nla_put_u32(skb, TCA_HHF_NON_HH_WEIGHT, q->hhf_non_hh_weight))
+ jiffies_to_usecs(READ_ONCE(q->hhf_evict_timeout))) ||
+ nla_put_u32(skb, TCA_HHF_NON_HH_WEIGHT,
+ READ_ONCE(q->hhf_non_hh_weight)))
goto nla_put_failure;
return nla_nest_end(skb, opts);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 93e6fb56f3b5..ff3de37874e4 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1039,13 +1039,6 @@ static void htb_work_func(struct work_struct *work)
rcu_read_unlock();
}
-static void htb_set_lockdep_class_child(struct Qdisc *q)
-{
- static struct lock_class_key child_key;
-
- lockdep_set_class(qdisc_lock(q), &child_key);
-}
-
static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
{
return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
@@ -1132,7 +1125,6 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt,
return -ENOMEM;
}
- htb_set_lockdep_class_child(qdisc);
q->direct_qdiscs[ntx] = qdisc;
qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
}
@@ -1468,7 +1460,6 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
}
if (q->offload) {
- htb_set_lockdep_class_child(new);
/* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
qdisc_refcount_inc(new);
old_q = htb_graft_helper(dev_queue, new);
@@ -1733,11 +1724,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg,
new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
cl->parent->common.classid,
NULL);
- if (q->offload) {
- if (new_q)
- htb_set_lockdep_class_child(new_q);
+ if (q->offload)
htb_parent_to_leaf_offload(sch, dev_queue, new_q);
- }
}
sch_tree_lock(sch);
@@ -1947,13 +1935,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
classid, NULL);
if (q->offload) {
- if (new_q) {
- htb_set_lockdep_class_child(new_q);
- /* One ref for cl->leaf.q, the other for
- * dev_queue->qdisc.
- */
+ /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
+ if (new_q)
qdisc_refcount_inc(new_q);
- }
old_q = htb_graft_helper(dev_queue, new_q);
/* No qdisc_put needed. */
WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index c2ef9dcf91d2..cc6051d4f2ef 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -91,7 +91,7 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
entry = tcx_entry_fetch_or_create(dev, true, &created);
if (!entry)
return -ENOMEM;
- tcx_miniq_set_active(entry, true);
+ tcx_miniq_inc(entry);
mini_qdisc_pair_init(&q->miniqp, sch, &tcx_entry(entry)->miniq);
if (created)
tcx_entry_update(dev, entry, true);
@@ -121,7 +121,7 @@ static void ingress_destroy(struct Qdisc *sch)
tcf_block_put_ext(q->block, sch, &q->block_info);
if (entry) {
- tcx_miniq_set_active(entry, false);
+ tcx_miniq_dec(entry);
if (!tcx_entry_is_active(entry)) {
tcx_entry_update(dev, NULL, true);
tcx_entry_free(entry);
@@ -257,7 +257,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
entry = tcx_entry_fetch_or_create(dev, true, &created);
if (!entry)
return -ENOMEM;
- tcx_miniq_set_active(entry, true);
+ tcx_miniq_inc(entry);
mini_qdisc_pair_init(&q->miniqp_ingress, sch, &tcx_entry(entry)->miniq);
if (created)
tcx_entry_update(dev, entry, true);
@@ -276,7 +276,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
entry = tcx_entry_fetch_or_create(dev, false, &created);
if (!entry)
return -ENOMEM;
- tcx_miniq_set_active(entry, true);
+ tcx_miniq_inc(entry);
mini_qdisc_pair_init(&q->miniqp_egress, sch, &tcx_entry(entry)->miniq);
if (created)
tcx_entry_update(dev, entry, false);
@@ -302,7 +302,7 @@ static void clsact_destroy(struct Qdisc *sch)
tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info);
if (ingress_entry) {
- tcx_miniq_set_active(ingress_entry, false);
+ tcx_miniq_dec(ingress_entry);
if (!tcx_entry_is_active(ingress_entry)) {
tcx_entry_update(dev, NULL, true);
tcx_entry_free(ingress_entry);
@@ -310,7 +310,7 @@ static void clsact_destroy(struct Qdisc *sch)
}
if (egress_entry) {
- tcx_miniq_set_active(egress_entry, false);
+ tcx_miniq_dec(egress_entry);
if (!tcx_entry_is_active(egress_entry)) {
tcx_entry_update(dev, NULL, false);
tcx_entry_free(egress_entry);
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 225353fbb3f1..51d4013b6121 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -215,10 +215,8 @@ static int mqprio_parse_tc_entries(struct Qdisc *sch, struct nlattr *nlattr_opt,
for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
fp[tc] = priv->fp[tc];
- nla_for_each_attr(n, nlattr_opt, nlattr_opt_len, rem) {
- if (nla_type(n) != TCA_MQPRIO_TC_ENTRY)
- continue;
-
+ nla_for_each_attr_type(n, TCA_MQPRIO_TC_ENTRY, nlattr_opt,
+ nlattr_opt_len, rem) {
err = mqprio_parse_tc_entry(fp, n, &seen_tcs, extack);
if (err)
goto out;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 79e93a19d5fa..06e03f5cd7ce 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -185,7 +185,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
qopt->bands = qdisc_dev(sch)->real_num_tx_queues;
- removed = kmalloc(sizeof(*removed) * (q->max_bands - q->bands),
+ removed = kmalloc(sizeof(*removed) * (q->max_bands - qopt->bands),
GFP_KERNEL);
if (!removed)
return -ENOMEM;
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 1764059b0635..b3dcb845b327 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -156,36 +156,38 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt,
u32 target = nla_get_u32(tb[TCA_PIE_TARGET]);
/* convert to pschedtime */
- q->params.target = PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC);
+ WRITE_ONCE(q->params.target,
+ PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC));
}
/* tupdate is in jiffies */
if (tb[TCA_PIE_TUPDATE])
- q->params.tupdate =
- usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE]));
+ WRITE_ONCE(q->params.tupdate,
+ usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE])));
if (tb[TCA_PIE_LIMIT]) {
u32 limit = nla_get_u32(tb[TCA_PIE_LIMIT]);
- q->params.limit = limit;
- sch->limit = limit;
+ WRITE_ONCE(q->params.limit, limit);
+ WRITE_ONCE(sch->limit, limit);
}
if (tb[TCA_PIE_ALPHA])
- q->params.alpha = nla_get_u32(tb[TCA_PIE_ALPHA]);
+ WRITE_ONCE(q->params.alpha, nla_get_u32(tb[TCA_PIE_ALPHA]));
if (tb[TCA_PIE_BETA])
- q->params.beta = nla_get_u32(tb[TCA_PIE_BETA]);
+ WRITE_ONCE(q->params.beta, nla_get_u32(tb[TCA_PIE_BETA]));
if (tb[TCA_PIE_ECN])
- q->params.ecn = nla_get_u32(tb[TCA_PIE_ECN]);
+ WRITE_ONCE(q->params.ecn, nla_get_u32(tb[TCA_PIE_ECN]));
if (tb[TCA_PIE_BYTEMODE])
- q->params.bytemode = nla_get_u32(tb[TCA_PIE_BYTEMODE]);
+ WRITE_ONCE(q->params.bytemode,
+ nla_get_u32(tb[TCA_PIE_BYTEMODE]));
if (tb[TCA_PIE_DQ_RATE_ESTIMATOR])
- q->params.dq_rate_estimator =
- nla_get_u32(tb[TCA_PIE_DQ_RATE_ESTIMATOR]);
+ WRITE_ONCE(q->params.dq_rate_estimator,
+ nla_get_u32(tb[TCA_PIE_DQ_RATE_ESTIMATOR]));
/* Drop excess packets if new limit is lower */
qlen = sch->q.qlen;
@@ -469,17 +471,18 @@ static int pie_dump(struct Qdisc *sch, struct sk_buff *skb)
/* convert target from pschedtime to us */
if (nla_put_u32(skb, TCA_PIE_TARGET,
- ((u32)PSCHED_TICKS2NS(q->params.target)) /
+ ((u32)PSCHED_TICKS2NS(READ_ONCE(q->params.target))) /
NSEC_PER_USEC) ||
- nla_put_u32(skb, TCA_PIE_LIMIT, sch->limit) ||
+ nla_put_u32(skb, TCA_PIE_LIMIT, READ_ONCE(sch->limit)) ||
nla_put_u32(skb, TCA_PIE_TUPDATE,
- jiffies_to_usecs(q->params.tupdate)) ||
- nla_put_u32(skb, TCA_PIE_ALPHA, q->params.alpha) ||
- nla_put_u32(skb, TCA_PIE_BETA, q->params.beta) ||
+ jiffies_to_usecs(READ_ONCE(q->params.tupdate))) ||
+ nla_put_u32(skb, TCA_PIE_ALPHA, READ_ONCE(q->params.alpha)) ||
+ nla_put_u32(skb, TCA_PIE_BETA, READ_ONCE(q->params.beta)) ||
nla_put_u32(skb, TCA_PIE_ECN, q->params.ecn) ||
- nla_put_u32(skb, TCA_PIE_BYTEMODE, q->params.bytemode) ||
+ nla_put_u32(skb, TCA_PIE_BYTEMODE,
+ READ_ONCE(q->params.bytemode)) ||
nla_put_u32(skb, TCA_PIE_DQ_RATE_ESTIMATOR,
- q->params.dq_rate_estimator))
+ READ_ONCE(q->params.dq_rate_estimator)))
goto nla_put_failure;
return nla_nest_end(skb, opts);
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index e66f4afb920d..3b9245a3c767 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -608,6 +608,7 @@ static void sfq_perturbation(struct timer_list *t)
struct Qdisc *sch = q->sch;
spinlock_t *root_lock;
siphash_key_t nkey;
+ int period;
get_random_bytes(&nkey, sizeof(nkey));
rcu_read_lock();
@@ -618,8 +619,12 @@ static void sfq_perturbation(struct timer_list *t)
sfq_rehash(sch);
spin_unlock(root_lock);
- if (q->perturb_period)
- mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
+ /* q->perturb_period can change under us from
+ * sfq_change() and sfq_destroy().
+ */
+ period = READ_ONCE(q->perturb_period);
+ if (period)
+ mod_timer(&q->perturb_timer, jiffies + period);
rcu_read_unlock();
}
@@ -662,7 +667,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
q->quantum = ctl->quantum;
q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
}
- q->perturb_period = ctl->perturb_period * HZ;
+ WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ);
if (ctl->flows)
q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
if (ctl->divisor) {
@@ -724,7 +729,7 @@ static void sfq_destroy(struct Qdisc *sch)
struct sfq_sched_data *q = qdisc_priv(sch);
tcf_block_put(q->block);
- q->perturb_period = 0;
+ WRITE_ONCE(q->perturb_period, 0);
del_timer_sync(&q->perturb_timer);
sfq_free(q->ht);
sfq_free(q->slots);
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
index b4dd626c309c..20ff7386b74b 100644
--- a/net/sched/sch_skbprio.c
+++ b/net/sched/sch_skbprio.c
@@ -79,7 +79,9 @@ static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
prio = min(skb->priority, max_priority);
qdisc = &q->qdiscs[prio];
- if (sch->q.qlen < sch->limit) {
+
+ /* sch->limit can change under us from skbprio_change() */
+ if (sch->q.qlen < READ_ONCE(sch->limit)) {
__skb_queue_tail(qdisc, skb);
qdisc_qstats_backlog_inc(sch, skb);
q->qstats[prio].backlog += qdisc_pkt_len(skb);
@@ -172,7 +174,7 @@ static int skbprio_change(struct Qdisc *sch, struct nlattr *opt,
if (opt->nla_len != nla_attr_size(sizeof(*ctl)))
return -EINVAL;
- sch->limit = ctl->limit;
+ WRITE_ONCE(sch->limit, ctl->limit);
return 0;
}
@@ -200,7 +202,7 @@ static int skbprio_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct tc_skbprio_qopt opt;
- opt.limit = sch->limit;
+ opt.limit = READ_ONCE(sch->limit);
if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
return -1;
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index a0d54b422186..cc2df9f8c14a 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -1151,11 +1151,6 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
list_for_each_entry(entry, &new->entries, list)
cycle = ktime_add_ns(cycle, entry->interval);
- if (!cycle) {
- NL_SET_ERR_MSG(extack, "'cycle_time' can never be 0");
- return -EINVAL;
- }
-
if (cycle < 0 || cycle > INT_MAX) {
NL_SET_ERR_MSG(extack, "'cycle_time' is too big");
return -EINVAL;
@@ -1164,6 +1159,11 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
new->cycle_time = cycle;
}
+ if (new->cycle_time < new->num_entries * length_to_duration(q, ETH_ZLEN)) {
+ NL_SET_ERR_MSG(extack, "'cycle_time' is too small");
+ return -EINVAL;
+ }
+
taprio_calculate_gate_durations(q, new);
return 0;
@@ -1176,16 +1176,13 @@ static int taprio_parse_mqprio_opt(struct net_device *dev,
{
bool allow_overlapping_txqs = TXTIME_ASSIST_IS_ENABLED(taprio_flags);
- if (!qopt && !dev->num_tc) {
- NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
- return -EINVAL;
- }
-
- /* If num_tc is already set, it means that the user already
- * configured the mqprio part
- */
- if (dev->num_tc)
+ if (!qopt) {
+ if (!dev->num_tc) {
+ NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
+ return -EINVAL;
+ }
return 0;
+ }
/* taprio imposes that traffic classes map 1:n to tx queues */
if (qopt->num_tc > dev->num_tx_queues) {
@@ -1613,7 +1610,7 @@ static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb,
if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
const struct ethtool_ops *ops = dev->ethtool_ops;
- struct ethtool_ts_info info = {
+ struct kernel_ethtool_ts_info info = {
.cmd = ETHTOOL_GET_TS_INFO,
.phc_index = -1,
};
@@ -1752,10 +1749,7 @@ static int taprio_parse_tc_entries(struct Qdisc *sch,
fp[tc] = q->fp[tc];
}
- nla_for_each_nested(n, opt, rem) {
- if (nla_type(n) != TCA_TAPRIO_ATTR_TC_ENTRY)
- continue;
-
+ nla_for_each_nested_type(n, TCA_TAPRIO_ATTR_TC_ENTRY, opt, rem) {
err = taprio_parse_tc_entry(sch, n, max_sdu, fp, &seen_tcs,
extack);
if (err)
@@ -1851,6 +1845,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
}
q->flags = taprio_flags;
+ /* Needed for length_to_duration() during netlink attribute parsing */
+ taprio_set_picos_per_byte(dev, q);
+
err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags);
if (err < 0)
return err;
@@ -1910,7 +1907,6 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
if (err < 0)
goto free_sched;
- taprio_set_picos_per_byte(dev, q);
taprio_update_queue_max_sdu(q, new_admin, stab);
if (FULL_OFFLOAD_IS_ENABLED(q->flags))
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 59304611dc00..8badec6d82a2 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -78,7 +78,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
struct net_device *dev = qdisc_dev(sch);
struct teql_sched_data *q = qdisc_priv(sch);
- if (q->q.qlen < dev->tx_queue_len) {
+ if (q->q.qlen < READ_ONCE(dev->tx_queue_len)) {
__skb_queue_tail(&q->q, skb);
return NET_XMIT_SUCCESS;
}
@@ -424,7 +424,7 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu)
} while ((q = NEXT_SLAVE(q)) != m->slaves);
}
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
return 0;
}
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 17fcaa9b0df9..a8a254a5008e 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -735,15 +735,19 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep)
struct sock *sk = ep->base.sk;
struct net *net = sock_net(sk);
struct sctp_hashbucket *head;
+ int err = 0;
ep->hashent = sctp_ep_hashfn(net, ep->base.bind_addr.port);
head = &sctp_ep_hashtable[ep->hashent];
+ write_lock(&head->lock);
if (sk->sk_reuseport) {
bool any = sctp_is_ep_boundall(sk);
struct sctp_endpoint *ep2;
struct list_head *list;
- int cnt = 0, err = 1;
+ int cnt = 0;
+
+ err = 1;
list_for_each(list, &ep->base.bind_addr.address_list)
cnt++;
@@ -761,24 +765,24 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep)
if (!err) {
err = reuseport_add_sock(sk, sk2, any);
if (err)
- return err;
+ goto out;
break;
} else if (err < 0) {
- return err;
+ goto out;
}
}
if (err) {
err = reuseport_alloc(sk, any);
if (err)
- return err;
+ goto out;
}
}
- write_lock(&head->lock);
hlist_add_head(&ep->node, &head->chain);
+out:
write_unlock(&head->lock);
- return 0;
+ return err;
}
/* Add an endpoint to the hash. Local BH-safe. */
@@ -803,10 +807,9 @@ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep)
head = &sctp_ep_hashtable[ep->hashent];
+ write_lock(&head->lock);
if (rcu_access_pointer(sk->sk_reuseport_cb))
reuseport_detach_sock(sk);
-
- write_lock(&head->lock);
hlist_del_init(&ep->node);
write_unlock(&head->lock);
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 24368f755ab1..f7b809c0d142 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -415,7 +415,7 @@ out:
if (!IS_ERR_OR_NULL(dst)) {
struct rt6_info *rt;
- rt = (struct rt6_info *)dst;
+ rt = dst_rt6_info(dst);
t->dst_cookie = rt6_get_cookie(rt);
pr_debug("rt6_dst:%pI6/%d rt6_src:%pI6\n",
&rt->rt6i_dst.addr, rt->rt6i_dst.plen,
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index e849f368ed91..5a7436a13b74 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -552,7 +552,7 @@ static void sctp_v4_get_saddr(struct sctp_sock *sk,
struct flowi *fl)
{
union sctp_addr *saddr = &t->saddr;
- struct rtable *rt = (struct rtable *)t->dst;
+ struct rtable *rt = dst_rtable(t->dst);
if (rt) {
saddr->v4.sin_family = AF_INET;
@@ -1085,7 +1085,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, struct sctp_transport *t)
skb_reset_inner_mac_header(skb);
skb_reset_inner_transport_header(skb);
skb_set_inner_ipproto(skb, IPPROTO_SCTP);
- udp_tunnel_xmit_skb((struct rtable *)dst, sk, skb, fl4->saddr,
+ udp_tunnel_xmit_skb(dst_rtable(dst), sk, skb, fl4->saddr,
fl4->daddr, dscp, ip4_dst_hoplimit(dst), df,
sctp_sk(sk)->udp_port, t->encap_port, false, false);
return 0;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 08fdf1251f46..5adf0c0a6c1a 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -38,6 +38,7 @@
#include <linux/inet.h>
#include <linux/slab.h>
#include <net/sock.h>
+#include <net/proto_memory.h>
#include <net/inet_ecn.h>
#include <linux/skbuff.h>
#include <net/sctp/sctp.h>
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index c67679a41044..32f76f1298da 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4834,10 +4834,14 @@ int sctp_inet_connect(struct socket *sock, struct sockaddr *uaddr,
return sctp_connect(sock->sk, uaddr, addr_len, flags);
}
-/* FIXME: Write comments. */
+/* Only called when shutdown a listening SCTP socket. */
static int sctp_disconnect(struct sock *sk, int flags)
{
- return -EOPNOTSUPP; /* STUB */
+ if (!sctp_style(sk, TCP))
+ return -EOPNOTSUPP;
+
+ sk->sk_shutdown |= RCV_SHUTDOWN;
+ return 0;
}
/* 4.1.4 accept() - TCP Style Syntax
@@ -4847,7 +4851,7 @@ static int sctp_disconnect(struct sock *sk, int flags)
* descriptor will be returned from accept() to represent the newly
* formed association.
*/
-static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern)
+static struct sock *sctp_accept(struct sock *sk, struct proto_accept_arg *arg)
{
struct sctp_sock *sp;
struct sctp_endpoint *ep;
@@ -4866,12 +4870,13 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern)
goto out;
}
- if (!sctp_sstate(sk, LISTENING)) {
+ if (!sctp_sstate(sk, LISTENING) ||
+ (sk->sk_shutdown & RCV_SHUTDOWN)) {
error = -EINVAL;
goto out;
}
- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
error = sctp_wait_for_accept(sk, timeo);
if (error)
@@ -4882,7 +4887,7 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern)
*/
asoc = list_entry(ep->asocs.next, struct sctp_association, asocs);
- newsk = sp->pf->create_accept_sk(sk, asoc, kern);
+ newsk = sp->pf->create_accept_sk(sk, asoc, arg->kern);
if (!newsk) {
error = -ENOMEM;
goto out;
@@ -4899,7 +4904,7 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern)
out:
release_sock(sk);
- *err = error;
+ arg->err = error;
return newsk;
}
@@ -7119,6 +7124,7 @@ static int sctp_getsockopt_assoc_ids(struct sock *sk, int len,
struct sctp_sock *sp = sctp_sk(sk);
struct sctp_association *asoc;
struct sctp_assoc_ids *ids;
+ size_t ids_size;
u32 num = 0;
if (sctp_style(sk, TCP))
@@ -7131,11 +7137,11 @@ static int sctp_getsockopt_assoc_ids(struct sock *sk, int len,
num++;
}
- if (len < sizeof(struct sctp_assoc_ids) + sizeof(sctp_assoc_t) * num)
+ ids_size = struct_size(ids, gaids_assoc_id, num);
+ if (len < ids_size)
return -EINVAL;
- len = sizeof(struct sctp_assoc_ids) + sizeof(sctp_assoc_t) * num;
-
+ len = ids_size;
ids = kmalloc(len, GFP_USER | __GFP_NOWARN);
if (unlikely(!ids))
return -ENOMEM;
@@ -9276,7 +9282,7 @@ void sctp_data_ready(struct sock *sk)
if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN |
EPOLLRDNORM | EPOLLRDBAND);
- sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
+ sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN);
rcu_read_unlock();
}
@@ -9392,7 +9398,8 @@ static int sctp_wait_for_accept(struct sock *sk, long timeo)
}
err = -EINVAL;
- if (!sctp_sstate(sk, LISTENING))
+ if (!sctp_sstate(sk, LISTENING) ||
+ (sk->sk_shutdown & RCV_SHUTDOWN))
break;
err = 0;
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index f65d6f92afcb..e5a5af343c4c 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -43,19 +43,19 @@ static unsigned long max_autoclose_max =
(MAX_SCHEDULE_TIMEOUT / HZ > UINT_MAX)
? UINT_MAX : MAX_SCHEDULE_TIMEOUT / HZ;
-static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write,
+static int proc_sctp_do_hmac_alg(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos);
-static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write,
+static int proc_sctp_do_rto_min(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos);
-static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, void *buffer,
+static int proc_sctp_do_rto_max(const struct ctl_table *ctl, int write, void *buffer,
size_t *lenp, loff_t *ppos);
-static int proc_sctp_do_udp_port(struct ctl_table *ctl, int write, void *buffer,
+static int proc_sctp_do_udp_port(const struct ctl_table *ctl, int write, void *buffer,
size_t *lenp, loff_t *ppos);
-static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write,
+static int proc_sctp_do_alpha_beta(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos);
-static int proc_sctp_do_auth(struct ctl_table *ctl, int write,
+static int proc_sctp_do_auth(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos);
-static int proc_sctp_do_probe_interval(struct ctl_table *ctl, int write,
+static int proc_sctp_do_probe_interval(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos);
static struct ctl_table sctp_table[] = {
@@ -80,8 +80,6 @@ static struct ctl_table sctp_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
-
- { /* sentinel */ }
};
/* The following index defines are used in sctp_sysctl_net_register().
@@ -384,11 +382,9 @@ static struct ctl_table sctp_net_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = &pf_expose_max,
},
-
- { /* sentinel */ }
};
-static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write,
+static int proc_sctp_do_hmac_alg(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net = current->nsproxy->net_ns;
@@ -433,7 +429,7 @@ static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write,
return ret;
}
-static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write,
+static int proc_sctp_do_rto_min(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net = current->nsproxy->net_ns;
@@ -461,7 +457,7 @@ static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write,
return ret;
}
-static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write,
+static int proc_sctp_do_rto_max(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net = current->nsproxy->net_ns;
@@ -489,7 +485,7 @@ static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write,
return ret;
}
-static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write,
+static int proc_sctp_do_alpha_beta(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
if (write)
@@ -499,7 +495,7 @@ static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write,
return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
}
-static int proc_sctp_do_auth(struct ctl_table *ctl, int write,
+static int proc_sctp_do_auth(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net = current->nsproxy->net_ns;
@@ -528,7 +524,7 @@ static int proc_sctp_do_auth(struct ctl_table *ctl, int write,
return ret;
}
-static int proc_sctp_do_udp_port(struct ctl_table *ctl, int write,
+static int proc_sctp_do_udp_port(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net = current->nsproxy->net_ns;
@@ -569,7 +565,7 @@ static int proc_sctp_do_udp_port(struct ctl_table *ctl, int write,
return ret;
}
-static int proc_sctp_do_probe_interval(struct ctl_table *ctl, int write,
+static int proc_sctp_do_probe_interval(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net = current->nsproxy->net_ns;
@@ -597,6 +593,7 @@ static int proc_sctp_do_probe_interval(struct ctl_table *ctl, int write,
int sctp_sysctl_net_register(struct net *net)
{
+ size_t table_size = ARRAY_SIZE(sctp_net_table);
struct ctl_table *table;
int i;
@@ -604,7 +601,7 @@ int sctp_sysctl_net_register(struct net *net)
if (!table)
return -ENOMEM;
- for (i = 0; table[i].data; i++)
+ for (i = 0; i < table_size; i++)
table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp;
table[SCTP_RTO_MIN_IDX].extra2 = &net->sctp.rto_max;
@@ -613,8 +610,7 @@ int sctp_sysctl_net_register(struct net *net)
table[SCTP_PS_RETRANS_IDX].extra1 = &net->sctp.pf_retrans;
net->sctp.sysctl_header = register_net_sysctl_sz(net, "net/sctp",
- table,
- ARRAY_SIZE(sctp_net_table));
+ table, table_size);
if (net->sctp.sysctl_header == NULL) {
kfree(table);
return -ENOMEM;
@@ -624,7 +620,7 @@ int sctp_sysctl_net_register(struct net *net)
void sctp_sysctl_net_unregister(struct net *net)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
table = net->sctp.sysctl_header->ctl_table_arg;
unregister_net_sysctl_table(net->sctp.sysctl_header);
diff --git a/net/smc/Kconfig b/net/smc/Kconfig
index 746be3996768..ba5e6a2dd2fd 100644
--- a/net/smc/Kconfig
+++ b/net/smc/Kconfig
@@ -20,3 +20,16 @@ config SMC_DIAG
smcss.
if unsure, say Y.
+
+config SMC_LO
+ bool "SMC intra-OS shortcut with loopback-ism"
+ depends on SMC
+ default n
+ help
+ SMC_LO enables the creation of an Emulated-ISM device named
+ loopback-ism in SMC and makes use of it for transferring data
+ when communication occurs within the same OS. This helps in
+ convenient testing of SMC-D since loopback-ism is independent
+ of architecture or hardware.
+
+ if unsure, say N.
diff --git a/net/smc/Makefile b/net/smc/Makefile
index 875efcd126a2..60f1c87d5212 100644
--- a/net/smc/Makefile
+++ b/net/smc/Makefile
@@ -4,5 +4,6 @@ obj-$(CONFIG_SMC) += smc.o
obj-$(CONFIG_SMC_DIAG) += smc_diag.o
smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o
-smc-y += smc_tracepoint.o
+smc-y += smc_tracepoint.o smc_inet.o
smc-$(CONFIG_SYSCTL) += smc_sysctl.o
+smc-$(CONFIG_SMC_LO) += smc_loopback.o
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 4b52b3b159c0..8e3093938cd2 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -53,6 +53,8 @@
#include "smc_stats.h"
#include "smc_tracepoint.h"
#include "smc_sysctl.h"
+#include "smc_loopback.h"
+#include "smc_inet.h"
static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
* creation on server
@@ -169,11 +171,11 @@ static bool smc_hs_congested(const struct sock *sk)
return false;
}
-static struct smc_hashinfo smc_v4_hashinfo = {
+struct smc_hashinfo smc_v4_hashinfo = {
.lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
};
-static struct smc_hashinfo smc_v6_hashinfo = {
+struct smc_hashinfo smc_v6_hashinfo = {
.lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
};
@@ -191,7 +193,6 @@ int smc_hash_sk(struct sock *sk)
return 0;
}
-EXPORT_SYMBOL_GPL(smc_hash_sk);
void smc_unhash_sk(struct sock *sk)
{
@@ -202,13 +203,12 @@ void smc_unhash_sk(struct sock *sk)
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
write_unlock_bh(&h->lock);
}
-EXPORT_SYMBOL_GPL(smc_unhash_sk);
/* This will be called before user really release sock_lock. So do the
* work which we didn't do because of user hold the sock_lock in the
* BH context
*/
-static void smc_release_cb(struct sock *sk)
+void smc_release_cb(struct sock *sk)
{
struct smc_sock *smc = smc_sk(sk);
@@ -308,7 +308,7 @@ static int __smc_release(struct smc_sock *smc)
return rc;
}
-static int smc_release(struct socket *sock)
+int smc_release(struct socket *sock)
{
struct sock *sk = sock->sk;
struct smc_sock *smc;
@@ -362,25 +362,15 @@ static void smc_destruct(struct sock *sk)
return;
}
-static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
- int protocol)
+void smc_sk_init(struct net *net, struct sock *sk, int protocol)
{
- struct smc_sock *smc;
- struct proto *prot;
- struct sock *sk;
-
- prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
- sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
- if (!sk)
- return NULL;
+ struct smc_sock *smc = smc_sk(sk);
- sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
sk->sk_state = SMC_INIT;
sk->sk_destruct = smc_destruct;
sk->sk_protocol = protocol;
WRITE_ONCE(sk->sk_sndbuf, 2 * READ_ONCE(net->smc.sysctl_wmem));
WRITE_ONCE(sk->sk_rcvbuf, 2 * READ_ONCE(net->smc.sysctl_rmem));
- smc = smc_sk(sk);
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
INIT_WORK(&smc->connect_work, smc_connect_work);
INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
@@ -390,12 +380,30 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
sk->sk_prot->hash(sk);
mutex_init(&smc->clcsock_release_lock);
smc_init_saved_callbacks(smc);
+ smc->limit_smc_hs = net->smc.limit_smc_hs;
+ smc->use_fallback = false; /* assume rdma capability first */
+ smc->fallback_rsn = 0;
+}
+
+static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
+ int protocol)
+{
+ struct proto *prot;
+ struct sock *sk;
+
+ prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
+ sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
+ if (!sk)
+ return NULL;
+
+ sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
+ smc_sk_init(net, sk, protocol);
return sk;
}
-static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
- int addr_len)
+int smc_bind(struct socket *sock, struct sockaddr *uaddr,
+ int addr_len)
{
struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
struct sock *sk = sock->sk;
@@ -460,29 +468,11 @@ out:
static void smc_adjust_sock_bufsizes(struct sock *nsk, struct sock *osk,
unsigned long mask)
{
- struct net *nnet = sock_net(nsk);
-
nsk->sk_userlocks = osk->sk_userlocks;
- if (osk->sk_userlocks & SOCK_SNDBUF_LOCK) {
+ if (osk->sk_userlocks & SOCK_SNDBUF_LOCK)
nsk->sk_sndbuf = osk->sk_sndbuf;
- } else {
- if (mask == SK_FLAGS_SMC_TO_CLC)
- WRITE_ONCE(nsk->sk_sndbuf,
- READ_ONCE(nnet->ipv4.sysctl_tcp_wmem[1]));
- else
- WRITE_ONCE(nsk->sk_sndbuf,
- 2 * READ_ONCE(nnet->smc.sysctl_wmem));
- }
- if (osk->sk_userlocks & SOCK_RCVBUF_LOCK) {
+ if (osk->sk_userlocks & SOCK_RCVBUF_LOCK)
nsk->sk_rcvbuf = osk->sk_rcvbuf;
- } else {
- if (mask == SK_FLAGS_SMC_TO_CLC)
- WRITE_ONCE(nsk->sk_rcvbuf,
- READ_ONCE(nnet->ipv4.sysctl_tcp_rmem[1]));
- else
- WRITE_ONCE(nsk->sk_rcvbuf,
- 2 * READ_ONCE(nnet->smc.sysctl_rmem));
- }
}
static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
@@ -1437,6 +1427,14 @@ static int smc_connect_ism(struct smc_sock *smc,
}
smc_conn_save_peer_info(smc, aclc);
+
+ if (smc_ism_support_dmb_nocopy(smc->conn.lgr->smcd)) {
+ rc = smcd_buf_attach(smc);
+ if (rc) {
+ rc = SMC_CLC_DECL_MEM; /* try to fallback */
+ goto connect_abort;
+ }
+ }
smc_close_init(smc);
smc_rx_init(smc);
smc_tx_init(smc);
@@ -1634,8 +1632,8 @@ out:
release_sock(&smc->sk);
}
-static int smc_connect(struct socket *sock, struct sockaddr *addr,
- int alen, int flags)
+int smc_connect(struct socket *sock, struct sockaddr *addr,
+ int alen, int flags)
{
struct sock *sk = sock->sk;
struct smc_sock *smc;
@@ -2541,6 +2539,14 @@ static void smc_listen_work(struct work_struct *work)
mutex_unlock(&smc_server_lgr_pending);
}
smc_conn_save_peer_info(new_smc, cclc);
+
+ if (ini->is_smcd &&
+ smc_ism_support_dmb_nocopy(new_smc->conn.lgr->smcd)) {
+ rc = smcd_buf_attach(new_smc);
+ if (rc)
+ goto out_decl;
+ }
+
smc_listen_out_connected(new_smc);
SMC_STAT_SERV_SUCC_INC(sock_net(newclcsock->sk), ini);
goto out_free;
@@ -2608,7 +2614,7 @@ out:
read_unlock_bh(&listen_clcsock->sk_callback_lock);
}
-static int smc_listen(struct socket *sock, int backlog)
+int smc_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
struct smc_sock *smc;
@@ -2673,8 +2679,8 @@ out:
return rc;
}
-static int smc_accept(struct socket *sock, struct socket *new_sock,
- int flags, bool kern)
+int smc_accept(struct socket *sock, struct socket *new_sock,
+ struct proto_accept_arg *arg)
{
struct sock *sk = sock->sk, *nsk;
DECLARE_WAITQUEUE(wait, current);
@@ -2693,7 +2699,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock,
}
/* Wait for an incoming connection */
- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
add_wait_queue_exclusive(sk_sleep(sk), &wait);
while (!(nsk = smc_accept_dequeue(sk, new_sock))) {
set_current_state(TASK_INTERRUPTIBLE);
@@ -2720,7 +2726,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock,
if (rc)
goto out;
- if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) {
+ if (lsmc->sockopt_defer_accept && !(arg->flags & O_NONBLOCK)) {
/* wait till data arrives on the socket */
timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept *
MSEC_PER_SEC);
@@ -2743,8 +2749,8 @@ out:
return rc;
}
-static int smc_getname(struct socket *sock, struct sockaddr *addr,
- int peer)
+int smc_getname(struct socket *sock, struct sockaddr *addr,
+ int peer)
{
struct smc_sock *smc;
@@ -2757,7 +2763,7 @@ static int smc_getname(struct socket *sock, struct sockaddr *addr,
return smc->clcsock->ops->getname(smc->clcsock, addr, peer);
}
-static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
+int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
struct sock *sk = sock->sk;
struct smc_sock *smc;
@@ -2795,8 +2801,8 @@ out:
return rc;
}
-static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
- int flags)
+int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+ int flags)
{
struct sock *sk = sock->sk;
struct smc_sock *smc;
@@ -2845,8 +2851,8 @@ static __poll_t smc_accept_poll(struct sock *parent)
return mask;
}
-static __poll_t smc_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+__poll_t smc_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
{
struct sock *sk = sock->sk;
struct smc_sock *smc;
@@ -2898,7 +2904,7 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
return mask;
}
-static int smc_shutdown(struct socket *sock, int how)
+int smc_shutdown(struct socket *sock, int how)
{
struct sock *sk = sock->sk;
bool do_shutdown = true;
@@ -3038,8 +3044,8 @@ static int __smc_setsockopt(struct socket *sock, int level, int optname,
return rc;
}
-static int smc_setsockopt(struct socket *sock, int level, int optname,
- sockptr_t optval, unsigned int optlen)
+int smc_setsockopt(struct socket *sock, int level, int optname,
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct smc_sock *smc;
@@ -3125,8 +3131,8 @@ out:
return rc;
}
-static int smc_getsockopt(struct socket *sock, int level, int optname,
- char __user *optval, int __user *optlen)
+int smc_getsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, int __user *optlen)
{
struct smc_sock *smc;
int rc;
@@ -3151,8 +3157,8 @@ static int smc_getsockopt(struct socket *sock, int level, int optname,
return rc;
}
-static int smc_ioctl(struct socket *sock, unsigned int cmd,
- unsigned long arg)
+int smc_ioctl(struct socket *sock, unsigned int cmd,
+ unsigned long arg)
{
union smc_host_cursor cons, urg;
struct smc_connection *conn;
@@ -3238,9 +3244,9 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
* Note that subsequent recv() calls have to wait till all splice() processing
* completed.
*/
-static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t len,
- unsigned int flags)
+ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
{
struct sock *sk = sock->sk;
struct smc_sock *smc;
@@ -3306,6 +3312,29 @@ static const struct proto_ops smc_sock_ops = {
.splice_read = smc_splice_read,
};
+int smc_create_clcsk(struct net *net, struct sock *sk, int family)
+{
+ struct smc_sock *smc = smc_sk(sk);
+ int rc;
+
+ rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
+ &smc->clcsock);
+ if (rc)
+ return rc;
+
+ /* smc_clcsock_release() does not wait smc->clcsock->sk's
+ * destruction; its sk_state might not be TCP_CLOSE after
+ * smc->sk is close()d, and TCP timers can be fired later,
+ * which need net ref.
+ */
+ sk = smc->clcsock->sk;
+ __netns_tracker_free(net, &sk->ns_tracker, false);
+ sk->sk_net_refcnt = 1;
+ get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
+ sock_inuse_add(net, 1);
+ return 0;
+}
+
static int __smc_create(struct net *net, struct socket *sock, int protocol,
int kern, struct socket *clcsock)
{
@@ -3331,35 +3360,15 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol,
/* create internal TCP socket for CLC handshake and fallback */
smc = smc_sk(sk);
- smc->use_fallback = false; /* assume rdma capability first */
- smc->fallback_rsn = 0;
-
- /* default behavior from limit_smc_hs in every net namespace */
- smc->limit_smc_hs = net->smc.limit_smc_hs;
rc = 0;
- if (!clcsock) {
- rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
- &smc->clcsock);
- if (rc) {
- sk_common_release(sk);
- goto out;
- }
-
- /* smc_clcsock_release() does not wait smc->clcsock->sk's
- * destruction; its sk_state might not be TCP_CLOSE after
- * smc->sk is close()d, and TCP timers can be fired later,
- * which need net ref.
- */
- sk = smc->clcsock->sk;
- __netns_tracker_free(net, &sk->ns_tracker, false);
- sk->sk_net_refcnt = 1;
- get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
- sock_inuse_add(net, 1);
- } else {
+ if (clcsock)
smc->clcsock = clcsock;
- }
+ else
+ rc = smc_create_clcsk(net, sk, family);
+ if (rc)
+ sk_common_release(sk);
out:
return rc;
}
@@ -3557,15 +3566,28 @@ static int __init smc_init(void)
goto out_sock;
}
- rc = tcp_register_ulp(&smc_ulp_ops);
+ rc = smc_loopback_init();
if (rc) {
- pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc);
+ pr_err("%s: smc_loopback_init fails with %d\n", __func__, rc);
goto out_ib;
}
+ rc = tcp_register_ulp(&smc_ulp_ops);
+ if (rc) {
+ pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc);
+ goto out_lo;
+ }
+ rc = smc_inet_init();
+ if (rc) {
+ pr_err("%s: smc_inet_init fails with %d\n", __func__, rc);
+ goto out_ulp;
+ }
static_branch_enable(&tcp_have_smc);
return 0;
-
+out_ulp:
+ tcp_unregister_ulp(&smc_ulp_ops);
+out_lo:
+ smc_loopback_exit();
out_ib:
smc_ib_unregister_client();
out_sock:
@@ -3600,9 +3622,11 @@ out_pernet_subsys:
static void __exit smc_exit(void)
{
static_branch_disable(&tcp_have_smc);
+ smc_inet_exit();
tcp_unregister_ulp(&smc_ulp_ops);
sock_unregister(PF_SMC);
smc_core_exit();
+ smc_loopback_exit();
smc_ib_unregister_client();
smc_ism_exit();
destroy_workqueue(smc_close_wq);
@@ -3626,4 +3650,9 @@ MODULE_DESCRIPTION("smc socket address family");
MODULE_LICENSE("GPL");
MODULE_ALIAS_NETPROTO(PF_SMC);
MODULE_ALIAS_TCP_ULP("smc");
+/* 256 for IPPROTO_SMC and 1 for SOCK_STREAM */
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 256, 1);
+#if IS_ENABLED(CONFIG_IPV6)
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 256, 1);
+#endif /* CONFIG_IPV6 */
MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 18c8b7870198..34b781e463c4 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -34,6 +34,44 @@
extern struct proto smc_proto;
extern struct proto smc_proto6;
+extern struct smc_hashinfo smc_v4_hashinfo;
+extern struct smc_hashinfo smc_v6_hashinfo;
+
+int smc_hash_sk(struct sock *sk);
+void smc_unhash_sk(struct sock *sk);
+void smc_release_cb(struct sock *sk);
+
+int smc_release(struct socket *sock);
+int smc_bind(struct socket *sock, struct sockaddr *uaddr,
+ int addr_len);
+int smc_connect(struct socket *sock, struct sockaddr *addr,
+ int alen, int flags);
+int smc_accept(struct socket *sock, struct socket *new_sock,
+ struct proto_accept_arg *arg);
+int smc_getname(struct socket *sock, struct sockaddr *addr,
+ int peer);
+__poll_t smc_poll(struct file *file, struct socket *sock,
+ poll_table *wait);
+int smc_ioctl(struct socket *sock, unsigned int cmd,
+ unsigned long arg);
+int smc_listen(struct socket *sock, int backlog);
+int smc_shutdown(struct socket *sock, int how);
+int smc_setsockopt(struct socket *sock, int level, int optname,
+ sockptr_t optval, unsigned int optlen);
+int smc_getsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, int __user *optlen);
+int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len);
+int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+ int flags);
+ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags);
+
+/* smc sock initialization */
+void smc_sk_init(struct net *net, struct sock *sk, int protocol);
+/* clcsock initialization */
+int smc_create_clcsk(struct net *net, struct sock *sk, int family);
+
#ifdef ATOMIC64_INIT
#define KERNEL_HAS_ATOMIC64
#endif
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 3c06625ceb20..619b3bab3824 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -18,6 +18,7 @@
#include "smc_tx.h"
#include "smc_rx.h"
#include "smc_close.h"
+#include "smc_ism.h"
/********************************** send *************************************/
@@ -255,6 +256,14 @@ int smcd_cdc_msg_send(struct smc_connection *conn)
return rc;
smc_curs_copy(&conn->rx_curs_confirmed, &curs, conn);
conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0;
+
+ if (smc_ism_support_dmb_nocopy(conn->lgr->smcd))
+ /* if local sndbuf shares the same memory region with
+ * peer DMB, then don't update the tx_curs_fin
+ * and sndbuf_space until peer has consumed the data.
+ */
+ return 0;
+
/* Calculate transmitted data and increment free send buffer space */
diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin,
&conn->tx_curs_sent);
@@ -266,7 +275,7 @@ int smcd_cdc_msg_send(struct smc_connection *conn)
smc_curs_copy(&conn->tx_curs_fin, &conn->tx_curs_sent, conn);
smc_tx_sndbuf_nonfull(smc);
- return rc;
+ return 0;
}
/********************************* receive ***********************************/
@@ -323,7 +332,7 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
{
union smc_host_cursor cons_old, prod_old;
struct smc_connection *conn = &smc->conn;
- int diff_cons, diff_prod;
+ int diff_cons, diff_prod, diff_tx;
smc_curs_copy(&prod_old, &conn->local_rx_ctrl.prod, conn);
smc_curs_copy(&cons_old, &conn->local_rx_ctrl.cons, conn);
@@ -339,6 +348,29 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
atomic_add(diff_cons, &conn->peer_rmbe_space);
/* guarantee 0 <= peer_rmbe_space <= peer_rmbe_size */
smp_mb__after_atomic();
+
+ /* if local sndbuf shares the same memory region with
+ * peer RMB, then update tx_curs_fin and sndbuf_space
+ * here since peer has already consumed the data.
+ */
+ if (conn->lgr->is_smcd &&
+ smc_ism_support_dmb_nocopy(conn->lgr->smcd)) {
+ /* Calculate consumed data and
+ * increment free send buffer space.
+ */
+ diff_tx = smc_curs_diff(conn->sndbuf_desc->len,
+ &conn->tx_curs_fin,
+ &conn->local_rx_ctrl.cons);
+ /* increase local sndbuf space and fin_curs */
+ smp_mb__before_atomic();
+ atomic_add(diff_tx, &conn->sndbuf_space);
+ /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */
+ smp_mb__after_atomic();
+ smc_curs_copy(&conn->tx_curs_fin,
+ &conn->local_rx_ctrl.cons, conn);
+
+ smc_tx_sndbuf_nonfull(smc);
+ }
}
diff_prod = smc_curs_diff(conn->rmb_desc->len, &prod_old,
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index e55026c7529c..33fa787c28eb 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -853,8 +853,10 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
pclc_smcd = &pclc->pclc_smcd;
pclc_prfx = &pclc->pclc_prfx;
ipv6_prfx = pclc->pclc_prfx_ipv6;
- v2_ext = &pclc->pclc_v2_ext;
- smcd_v2_ext = &pclc->pclc_smcd_v2_ext;
+ v2_ext = container_of(&pclc->pclc_v2_ext,
+ struct smc_clc_v2_extension, fixed);
+ smcd_v2_ext = container_of(&pclc->pclc_smcd_v2_ext,
+ struct smc_clc_smcd_v2_extension, fixed);
gidchids = pclc->pclc_gidchids;
trl = &pclc->pclc_trl;
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 7cc7070b9772..467effb50cd6 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -134,12 +134,15 @@ struct smc_clc_smcd_gid_chid {
*/
struct smc_clc_v2_extension {
- struct smc_clnt_opts_area_hdr hdr;
- u8 roce[16]; /* RoCEv2 GID */
- u8 max_conns;
- u8 max_links;
- __be16 feature_mask;
- u8 reserved[12];
+ /* New members must be added within the struct_group() macro below. */
+ struct_group_tagged(smc_clc_v2_extension_fixed, fixed,
+ struct smc_clnt_opts_area_hdr hdr;
+ u8 roce[16]; /* RoCEv2 GID */
+ u8 max_conns;
+ u8 max_links;
+ __be16 feature_mask;
+ u8 reserved[12];
+ );
u8 user_eids[][SMC_MAX_EID_LEN];
};
@@ -159,8 +162,11 @@ struct smc_clc_msg_smcd { /* SMC-D GID information */
};
struct smc_clc_smcd_v2_extension {
- u8 system_eid[SMC_MAX_EID_LEN];
- u8 reserved[16];
+ /* New members must be added within the struct_group() macro below. */
+ struct_group_tagged(smc_clc_smcd_v2_extension_fixed, fixed,
+ u8 system_eid[SMC_MAX_EID_LEN];
+ u8 reserved[16];
+ );
struct smc_clc_smcd_gid_chid gidchid[];
};
@@ -183,9 +189,9 @@ struct smc_clc_msg_proposal_area {
struct smc_clc_msg_smcd pclc_smcd;
struct smc_clc_msg_proposal_prefix pclc_prfx;
struct smc_clc_ipv6_prefix pclc_prfx_ipv6[SMC_CLC_MAX_V6_PREFIX];
- struct smc_clc_v2_extension pclc_v2_ext;
+ struct smc_clc_v2_extension_fixed pclc_v2_ext;
u8 user_eids[SMC_CLC_MAX_UEID][SMC_MAX_EID_LEN];
- struct smc_clc_smcd_v2_extension pclc_smcd_v2_ext;
+ struct smc_clc_smcd_v2_extension_fixed pclc_smcd_v2_ext;
struct smc_clc_smcd_gid_chid
pclc_gidchids[SMCD_CLC_MAX_V2_GID_ENTRIES];
struct smc_clc_msg_trail pclc_trl;
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 9b84d5897aa5..3b95828d9976 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1149,6 +1149,20 @@ static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
}
}
+static void smcd_buf_detach(struct smc_connection *conn)
+{
+ struct smcd_dev *smcd = conn->lgr->smcd;
+ u64 peer_token = conn->peer_token;
+
+ if (!conn->sndbuf_desc)
+ return;
+
+ smc_ism_detach_dmb(smcd, peer_token);
+
+ kfree(conn->sndbuf_desc);
+ conn->sndbuf_desc = NULL;
+}
+
static void smc_buf_unuse(struct smc_connection *conn,
struct smc_link_group *lgr)
{
@@ -1192,6 +1206,8 @@ void smc_conn_free(struct smc_connection *conn)
if (lgr->is_smcd) {
if (!list_empty(&lgr->list))
smc_ism_unset_conn(conn);
+ if (smc_ism_support_dmb_nocopy(lgr->smcd))
+ smcd_buf_detach(conn);
tasklet_kill(&conn->rx_tsklet);
} else {
smc_cdc_wait_pend_tx_wr(conn);
@@ -1445,6 +1461,8 @@ static void smc_conn_kill(struct smc_connection *conn, bool soft)
smc_sk_wake_ups(smc);
if (conn->lgr->is_smcd) {
smc_ism_unset_conn(conn);
+ if (smc_ism_support_dmb_nocopy(conn->lgr->smcd))
+ smcd_buf_detach(conn);
if (soft)
tasklet_kill(&conn->rx_tsklet);
else
@@ -1988,7 +2006,7 @@ out:
}
#define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
-#define SMCR_RMBE_SIZES 5 /* 0 -> 16KB, 1 -> 32KB, .. 5 -> 512KB */
+#define SMCR_RMBE_SIZES 15 /* 0 -> 16KB, 1 -> 32KB, .. 15 -> 512MB */
/* convert the RMB size into the compressed notation (minimum 16K, see
* SMCD/R_DMBE_SIZES.
@@ -1997,7 +2015,6 @@ out:
*/
static u8 smc_compress_bufsize(int size, bool is_smcd, bool is_rmb)
{
- const unsigned int max_scat = SG_MAX_SINGLE_ALLOC * PAGE_SIZE;
u8 compressed;
if (size <= SMC_BUF_MIN_SIZE)
@@ -2007,9 +2024,11 @@ static u8 smc_compress_bufsize(int size, bool is_smcd, bool is_rmb)
compressed = min_t(u8, ilog2(size) + 1,
is_smcd ? SMCD_DMBE_SIZES : SMCR_RMBE_SIZES);
+#ifdef CONFIG_ARCH_NO_SG_CHAIN
if (!is_smcd && is_rmb)
/* RMBs are backed by & limited to max size of scatterlists */
- compressed = min_t(u8, compressed, ilog2(max_scat >> 14));
+ compressed = min_t(u8, compressed, ilog2((SG_MAX_SINGLE_ALLOC * PAGE_SIZE) >> 14));
+#endif
return compressed;
}
@@ -2464,12 +2483,18 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd)
int rc;
/* create send buffer */
+ if (is_smcd &&
+ smc_ism_support_dmb_nocopy(smc->conn.lgr->smcd))
+ goto create_rmb;
+
rc = __smc_buf_create(smc, is_smcd, false);
if (rc)
return rc;
+
+create_rmb:
/* create rmb */
rc = __smc_buf_create(smc, is_smcd, true);
- if (rc) {
+ if (rc && smc->conn.sndbuf_desc) {
down_write(&smc->conn.lgr->sndbufs_lock);
list_del(&smc->conn.sndbuf_desc->list);
up_write(&smc->conn.lgr->sndbufs_lock);
@@ -2479,6 +2504,41 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd)
return rc;
}
+int smcd_buf_attach(struct smc_sock *smc)
+{
+ struct smc_connection *conn = &smc->conn;
+ struct smcd_dev *smcd = conn->lgr->smcd;
+ u64 peer_token = conn->peer_token;
+ struct smc_buf_desc *buf_desc;
+ int rc;
+
+ buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
+ if (!buf_desc)
+ return -ENOMEM;
+
+ /* The ghost sndbuf_desc describes the same memory region as
+ * peer RMB. Its lifecycle is consistent with the connection's
+ * and it will be freed with the connections instead of the
+ * link group.
+ */
+ rc = smc_ism_attach_dmb(smcd, peer_token, buf_desc);
+ if (rc)
+ goto free;
+
+ smc->sk.sk_sndbuf = buf_desc->len;
+ buf_desc->cpu_addr =
+ (u8 *)buf_desc->cpu_addr + sizeof(struct smcd_cdc_msg);
+ buf_desc->len -= sizeof(struct smcd_cdc_msg);
+ conn->sndbuf_desc = buf_desc;
+ conn->sndbuf_desc->used = 1;
+ atomic_set(&conn->sndbuf_space, conn->sndbuf_desc->len);
+ return 0;
+
+free:
+ kfree(buf_desc);
+ return rc;
+}
+
static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
{
int i;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 1f175376037b..d93cf51dbd7c 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -557,6 +557,7 @@ void smc_smcd_terminate(struct smcd_dev *dev, struct smcd_gid *peer_gid,
void smc_smcd_terminate_all(struct smcd_dev *dev);
void smc_smcr_terminate_all(struct smc_ib_device *smcibdev);
int smc_buf_create(struct smc_sock *smc, bool is_smcd);
+int smcd_buf_attach(struct smc_sock *smc);
int smc_uncompress_bufsize(u8 compressed);
int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_link *link,
struct smc_clc_msg_accept_confirm *clc);
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 97704a9e84c7..9297dc20bfe2 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -209,13 +209,18 @@ int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
if (IS_ERR(rt))
goto out;
if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET)
- goto out;
- neigh = rt->dst.ops->neigh_lookup(&rt->dst, NULL, &fl4.daddr);
- if (neigh) {
- memcpy(nexthop_mac, neigh->ha, ETH_ALEN);
- *uses_gateway = rt->rt_uses_gateway;
- return 0;
- }
+ goto out_rt;
+ neigh = dst_neigh_lookup(&rt->dst, &fl4.daddr);
+ if (!neigh)
+ goto out_rt;
+ memcpy(nexthop_mac, neigh->ha, ETH_ALEN);
+ *uses_gateway = rt->rt_uses_gateway;
+ neigh_release(neigh);
+ ip_rt_put(rt);
+ return 0;
+
+out_rt:
+ ip_rt_put(rt);
out:
return -ENOENT;
}
diff --git a/net/smc/smc_inet.c b/net/smc/smc_inet.c
new file mode 100644
index 000000000000..bece346dd8e9
--- /dev/null
+++ b/net/smc/smc_inet.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ * Definitions for the IPPROTO_SMC (socket related)
+ *
+ * Copyright IBM Corp. 2016, 2018
+ * Copyright (c) 2024, Alibaba Inc.
+ *
+ * Author: D. Wythe <alibuda@linux.alibaba.com>
+ */
+
+#include <net/protocol.h>
+#include <net/sock.h>
+
+#include "smc_inet.h"
+#include "smc.h"
+
+static int smc_inet_init_sock(struct sock *sk);
+
+static struct proto smc_inet_prot = {
+ .name = "INET_SMC",
+ .owner = THIS_MODULE,
+ .init = smc_inet_init_sock,
+ .hash = smc_hash_sk,
+ .unhash = smc_unhash_sk,
+ .release_cb = smc_release_cb,
+ .obj_size = sizeof(struct smc_sock),
+ .h.smc_hash = &smc_v4_hashinfo,
+ .slab_flags = SLAB_TYPESAFE_BY_RCU,
+};
+
+static const struct proto_ops smc_inet_stream_ops = {
+ .family = PF_INET,
+ .owner = THIS_MODULE,
+ .release = smc_release,
+ .bind = smc_bind,
+ .connect = smc_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = smc_accept,
+ .getname = smc_getname,
+ .poll = smc_poll,
+ .ioctl = smc_ioctl,
+ .listen = smc_listen,
+ .shutdown = smc_shutdown,
+ .setsockopt = smc_setsockopt,
+ .getsockopt = smc_getsockopt,
+ .sendmsg = smc_sendmsg,
+ .recvmsg = smc_recvmsg,
+ .mmap = sock_no_mmap,
+ .splice_read = smc_splice_read,
+};
+
+static struct inet_protosw smc_inet_protosw = {
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_SMC,
+ .prot = &smc_inet_prot,
+ .ops = &smc_inet_stream_ops,
+ .flags = INET_PROTOSW_ICSK,
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+static struct proto smc_inet6_prot = {
+ .name = "INET6_SMC",
+ .owner = THIS_MODULE,
+ .init = smc_inet_init_sock,
+ .hash = smc_hash_sk,
+ .unhash = smc_unhash_sk,
+ .release_cb = smc_release_cb,
+ .obj_size = sizeof(struct smc_sock),
+ .h.smc_hash = &smc_v6_hashinfo,
+ .slab_flags = SLAB_TYPESAFE_BY_RCU,
+};
+
+static const struct proto_ops smc_inet6_stream_ops = {
+ .family = PF_INET6,
+ .owner = THIS_MODULE,
+ .release = smc_release,
+ .bind = smc_bind,
+ .connect = smc_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = smc_accept,
+ .getname = smc_getname,
+ .poll = smc_poll,
+ .ioctl = smc_ioctl,
+ .listen = smc_listen,
+ .shutdown = smc_shutdown,
+ .setsockopt = smc_setsockopt,
+ .getsockopt = smc_getsockopt,
+ .sendmsg = smc_sendmsg,
+ .recvmsg = smc_recvmsg,
+ .mmap = sock_no_mmap,
+ .splice_read = smc_splice_read,
+};
+
+static struct inet_protosw smc_inet6_protosw = {
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_SMC,
+ .prot = &smc_inet6_prot,
+ .ops = &smc_inet6_stream_ops,
+ .flags = INET_PROTOSW_ICSK,
+};
+#endif /* CONFIG_IPV6 */
+
+static int smc_inet_init_sock(struct sock *sk)
+{
+ struct net *net = sock_net(sk);
+
+ /* init common smc sock */
+ smc_sk_init(net, sk, IPPROTO_SMC);
+ /* create clcsock */
+ return smc_create_clcsk(net, sk, sk->sk_family);
+}
+
+int __init smc_inet_init(void)
+{
+ int rc;
+
+ rc = proto_register(&smc_inet_prot, 1);
+ if (rc) {
+ pr_err("%s: proto_register smc_inet_prot fails with %d\n",
+ __func__, rc);
+ return rc;
+ }
+ /* no return value */
+ inet_register_protosw(&smc_inet_protosw);
+
+#if IS_ENABLED(CONFIG_IPV6)
+ rc = proto_register(&smc_inet6_prot, 1);
+ if (rc) {
+ pr_err("%s: proto_register smc_inet6_prot fails with %d\n",
+ __func__, rc);
+ goto out_inet6_prot;
+ }
+ rc = inet6_register_protosw(&smc_inet6_protosw);
+ if (rc) {
+ pr_err("%s: inet6_register_protosw smc_inet6_protosw fails with %d\n",
+ __func__, rc);
+ goto out_inet6_protosw;
+ }
+ return rc;
+out_inet6_protosw:
+ proto_unregister(&smc_inet6_prot);
+out_inet6_prot:
+ inet_unregister_protosw(&smc_inet_protosw);
+ proto_unregister(&smc_inet_prot);
+#endif /* CONFIG_IPV6 */
+ return rc;
+}
+
+void smc_inet_exit(void)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ inet6_unregister_protosw(&smc_inet6_protosw);
+ proto_unregister(&smc_inet6_prot);
+#endif /* CONFIG_IPV6 */
+ inet_unregister_protosw(&smc_inet_protosw);
+ proto_unregister(&smc_inet_prot);
+}
diff --git a/net/smc/smc_inet.h b/net/smc/smc_inet.h
new file mode 100644
index 000000000000..a489c8a2b8ef
--- /dev/null
+++ b/net/smc/smc_inet.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ * Definitions for the IPPROTO_SMC (socket related)
+
+ * Copyright IBM Corp. 2016
+ * Copyright (c) 2024, Alibaba Inc.
+ *
+ * Author: D. Wythe <alibuda@linux.alibaba.com>
+ */
+#ifndef __INET_SMC
+#define __INET_SMC
+
+/* Initialize protocol registration on IPPROTO_SMC,
+ * @return 0 on success
+ */
+int smc_inet_init(void);
+
+void smc_inet_exit(void);
+
+#endif /* __INET_SMC */
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index ac88de2a06a0..84f98e18c7db 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -91,6 +91,11 @@ bool smc_ism_is_v2_capable(void)
return smc_ism_v2_capable;
}
+void smc_ism_set_v2_capable(void)
+{
+ smc_ism_v2_capable = true;
+}
+
/* Set a connection using this DMBE. */
void smc_ism_set_conn(struct smc_connection *conn)
{
@@ -126,6 +131,8 @@ int smc_ism_get_vlan(struct smcd_dev *smcd, unsigned short vlanid)
if (!vlanid) /* No valid vlan id */
return -EINVAL;
+ if (!smcd->ops->add_vlan_id)
+ return -EOPNOTSUPP;
/* create new vlan entry, in case we need it */
new_vlan = kzalloc(sizeof(*new_vlan), GFP_KERNEL);
@@ -171,6 +178,8 @@ int smc_ism_put_vlan(struct smcd_dev *smcd, unsigned short vlanid)
if (!vlanid) /* No valid vlan id */
return -EINVAL;
+ if (!smcd->ops->del_vlan_id)
+ return -EOPNOTSUPP;
spin_lock_irqsave(&smcd->lock, flags);
list_for_each_entry(vlan, &smcd->vlan, list) {
@@ -222,7 +231,6 @@ int smc_ism_unregister_dmb(struct smcd_dev *smcd, struct smc_buf_desc *dmb_desc)
int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len,
struct smc_buf_desc *dmb_desc)
{
-#if IS_ENABLED(CONFIG_ISM)
struct smcd_dmb dmb;
int rc;
@@ -231,7 +239,7 @@ int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len,
dmb.sba_idx = dmb_desc->sba_idx;
dmb.vlan_id = lgr->vlan_id;
dmb.rgid = lgr->peer_gid.gid;
- rc = lgr->smcd->ops->register_dmb(lgr->smcd, &dmb, &smc_ism_client);
+ rc = lgr->smcd->ops->register_dmb(lgr->smcd, &dmb, lgr->smcd->client);
if (!rc) {
dmb_desc->sba_idx = dmb.sba_idx;
dmb_desc->token = dmb.dmb_tok;
@@ -240,9 +248,46 @@ int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len,
dmb_desc->len = dmb.dmb_len;
}
return rc;
-#else
- return 0;
-#endif
+}
+
+bool smc_ism_support_dmb_nocopy(struct smcd_dev *smcd)
+{
+ /* for now only loopback-ism supports
+ * merging sndbuf with peer DMB to avoid
+ * data copies between them.
+ */
+ return (smcd->ops->support_dmb_nocopy &&
+ smcd->ops->support_dmb_nocopy(smcd));
+}
+
+int smc_ism_attach_dmb(struct smcd_dev *dev, u64 token,
+ struct smc_buf_desc *dmb_desc)
+{
+ struct smcd_dmb dmb;
+ int rc = 0;
+
+ if (!dev->ops->attach_dmb)
+ return -EINVAL;
+
+ memset(&dmb, 0, sizeof(dmb));
+ dmb.dmb_tok = token;
+ rc = dev->ops->attach_dmb(dev, &dmb);
+ if (!rc) {
+ dmb_desc->sba_idx = dmb.sba_idx;
+ dmb_desc->token = dmb.dmb_tok;
+ dmb_desc->cpu_addr = dmb.cpu_addr;
+ dmb_desc->dma_addr = dmb.dma_addr;
+ dmb_desc->len = dmb.dmb_len;
+ }
+ return rc;
+}
+
+int smc_ism_detach_dmb(struct smcd_dev *dev, u64 token)
+{
+ if (!dev->ops->detach_dmb)
+ return -EINVAL;
+
+ return dev->ops->detach_dmb(dev, token);
}
static int smc_nl_handle_smcd_dev(struct smcd_dev *smcd,
@@ -322,6 +367,8 @@ static void smc_nl_prep_smcd_dev(struct smcd_dev_list *dev_list,
list_for_each_entry(smcd, &dev_list->list, list) {
if (num < snum)
goto next;
+ if (smc_ism_is_loopback(smcd))
+ goto next;
if (smc_nl_handle_smcd_dev(smcd, skb, cb))
goto errout;
next:
@@ -372,7 +419,8 @@ static void smcd_handle_sw_event(struct smc_ism_event_work *wrk)
smc_smcd_terminate(wrk->smcd, &peer_gid, ev_info.vlan_id);
break;
case ISM_EVENT_CODE_TESTLINK: /* Activity timer */
- if (ev_info.code == ISM_EVENT_REQUEST) {
+ if (ev_info.code == ISM_EVENT_REQUEST &&
+ wrk->smcd->ops->signal_event) {
ev_info.code = ISM_EVENT_RESPONSE;
wrk->smcd->ops->signal_event(wrk->smcd,
&peer_gid,
@@ -436,7 +484,7 @@ static struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
static void smcd_register_dev(struct ism_dev *ism)
{
const struct smcd_ops *ops = ism_get_smcd_ops();
- struct smcd_dev *smcd;
+ struct smcd_dev *smcd, *fentry;
if (!ops)
return;
@@ -446,20 +494,28 @@ static void smcd_register_dev(struct ism_dev *ism)
if (!smcd)
return;
smcd->priv = ism;
+ smcd->client = &smc_ism_client;
ism_set_priv(ism, &smc_ism_client, smcd);
if (smc_pnetid_by_dev_port(&ism->pdev->dev, 0, smcd->pnetid))
smc_pnetid_by_table_smcd(smcd);
+ if (smcd->ops->supports_v2())
+ smc_ism_set_v2_capable();
mutex_lock(&smcd_dev_list.mutex);
- if (list_empty(&smcd_dev_list.list)) {
- if (smcd->ops->supports_v2())
- smc_ism_v2_capable = true;
- }
- /* sort list: devices without pnetid before devices with pnetid */
- if (smcd->pnetid[0])
+ /* sort list:
+ * - devices without pnetid before devices with pnetid;
+ * - loopback-ism always at the very beginning;
+ */
+ if (!smcd->pnetid[0]) {
+ fentry = list_first_entry_or_null(&smcd_dev_list.list,
+ struct smcd_dev, list);
+ if (fentry && smc_ism_is_loopback(fentry))
+ list_add(&smcd->list, &fentry->list);
+ else
+ list_add(&smcd->list, &smcd_dev_list.list);
+ } else {
list_add_tail(&smcd->list, &smcd_dev_list.list);
- else
- list_add(&smcd->list, &smcd_dev_list.list);
+ }
mutex_unlock(&smcd_dev_list.mutex);
pr_warn_ratelimited("smc: adding smcd device %s with pnetid %.16s%s\n",
@@ -541,6 +597,8 @@ int smc_ism_signal_shutdown(struct smc_link_group *lgr)
if (lgr->peer_shutdown)
return 0;
+ if (!lgr->smcd->ops->signal_event)
+ return 0;
memcpy(ev_info.uid, lgr->id, SMC_LGR_ID_SIZE);
ev_info.vlan_id = lgr->vlan_id;
diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h
index 165cd013404b..6763133dd8d0 100644
--- a/net/smc/smc_ism.h
+++ b/net/smc/smc_ism.h
@@ -48,10 +48,15 @@ int smc_ism_put_vlan(struct smcd_dev *dev, unsigned short vlan_id);
int smc_ism_register_dmb(struct smc_link_group *lgr, int buf_size,
struct smc_buf_desc *dmb_desc);
int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc);
+bool smc_ism_support_dmb_nocopy(struct smcd_dev *smcd);
+int smc_ism_attach_dmb(struct smcd_dev *dev, u64 token,
+ struct smc_buf_desc *dmb_desc);
+int smc_ism_detach_dmb(struct smcd_dev *dev, u64 token);
int smc_ism_signal_shutdown(struct smc_link_group *lgr);
void smc_ism_get_system_eid(u8 **eid);
u16 smc_ism_get_chid(struct smcd_dev *dev);
bool smc_ism_is_v2_capable(void);
+void smc_ism_set_v2_capable(void);
int smc_ism_init(void);
void smc_ism_exit(void);
int smcd_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb);
@@ -84,4 +89,9 @@ static inline bool smc_ism_is_emulated(struct smcd_dev *smcd)
return __smc_ism_is_emulated(chid);
}
+static inline bool smc_ism_is_loopback(struct smcd_dev *smcd)
+{
+ return (smcd->ops->get_chid(smcd) == 0xFFFF);
+}
+
#endif
diff --git a/net/smc/smc_loopback.c b/net/smc/smc_loopback.c
new file mode 100644
index 000000000000..3c5f64ca4115
--- /dev/null
+++ b/net/smc/smc_loopback.c
@@ -0,0 +1,427 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shared Memory Communications Direct over loopback-ism device.
+ *
+ * Functions for loopback-ism device.
+ *
+ * Copyright (c) 2024, Alibaba Inc.
+ *
+ * Author: Wen Gu <guwen@linux.alibaba.com>
+ * Tony Lu <tonylu@linux.alibaba.com>
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/types.h>
+#include <net/smc.h>
+
+#include "smc_cdc.h"
+#include "smc_ism.h"
+#include "smc_loopback.h"
+
+#define SMC_LO_V2_CAPABLE 0x1 /* loopback-ism acts as ISMv2 */
+#define SMC_LO_SUPPORT_NOCOPY 0x1
+#define SMC_DMA_ADDR_INVALID (~(dma_addr_t)0)
+
+static const char smc_lo_dev_name[] = "loopback-ism";
+static struct smc_lo_dev *lo_dev;
+
+static void smc_lo_generate_ids(struct smc_lo_dev *ldev)
+{
+ struct smcd_gid *lgid = &ldev->local_gid;
+ uuid_t uuid;
+
+ uuid_gen(&uuid);
+ memcpy(&lgid->gid, &uuid, sizeof(lgid->gid));
+ memcpy(&lgid->gid_ext, (u8 *)&uuid + sizeof(lgid->gid),
+ sizeof(lgid->gid_ext));
+
+ ldev->chid = SMC_LO_RESERVED_CHID;
+}
+
+static int smc_lo_query_rgid(struct smcd_dev *smcd, struct smcd_gid *rgid,
+ u32 vid_valid, u32 vid)
+{
+ struct smc_lo_dev *ldev = smcd->priv;
+
+ /* rgid should be the same as lgid */
+ if (!ldev || rgid->gid != ldev->local_gid.gid ||
+ rgid->gid_ext != ldev->local_gid.gid_ext)
+ return -ENETUNREACH;
+ return 0;
+}
+
+static int smc_lo_register_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb,
+ void *client_priv)
+{
+ struct smc_lo_dmb_node *dmb_node, *tmp_node;
+ struct smc_lo_dev *ldev = smcd->priv;
+ int sba_idx, rc;
+
+ /* check space for new dmb */
+ for_each_clear_bit(sba_idx, ldev->sba_idx_mask, SMC_LO_MAX_DMBS) {
+ if (!test_and_set_bit(sba_idx, ldev->sba_idx_mask))
+ break;
+ }
+ if (sba_idx == SMC_LO_MAX_DMBS)
+ return -ENOSPC;
+
+ dmb_node = kzalloc(sizeof(*dmb_node), GFP_KERNEL);
+ if (!dmb_node) {
+ rc = -ENOMEM;
+ goto err_bit;
+ }
+
+ dmb_node->sba_idx = sba_idx;
+ dmb_node->len = dmb->dmb_len;
+ dmb_node->cpu_addr = kzalloc(dmb_node->len, GFP_KERNEL |
+ __GFP_NOWARN | __GFP_NORETRY |
+ __GFP_NOMEMALLOC);
+ if (!dmb_node->cpu_addr) {
+ rc = -ENOMEM;
+ goto err_node;
+ }
+ dmb_node->dma_addr = SMC_DMA_ADDR_INVALID;
+ refcount_set(&dmb_node->refcnt, 1);
+
+again:
+ /* add new dmb into hash table */
+ get_random_bytes(&dmb_node->token, sizeof(dmb_node->token));
+ write_lock_bh(&ldev->dmb_ht_lock);
+ hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb_node->token) {
+ if (tmp_node->token == dmb_node->token) {
+ write_unlock_bh(&ldev->dmb_ht_lock);
+ goto again;
+ }
+ }
+ hash_add(ldev->dmb_ht, &dmb_node->list, dmb_node->token);
+ write_unlock_bh(&ldev->dmb_ht_lock);
+ atomic_inc(&ldev->dmb_cnt);
+
+ dmb->sba_idx = dmb_node->sba_idx;
+ dmb->dmb_tok = dmb_node->token;
+ dmb->cpu_addr = dmb_node->cpu_addr;
+ dmb->dma_addr = dmb_node->dma_addr;
+ dmb->dmb_len = dmb_node->len;
+
+ return 0;
+
+err_node:
+ kfree(dmb_node);
+err_bit:
+ clear_bit(sba_idx, ldev->sba_idx_mask);
+ return rc;
+}
+
+static void __smc_lo_unregister_dmb(struct smc_lo_dev *ldev,
+ struct smc_lo_dmb_node *dmb_node)
+{
+ /* remove dmb from hash table */
+ write_lock_bh(&ldev->dmb_ht_lock);
+ hash_del(&dmb_node->list);
+ write_unlock_bh(&ldev->dmb_ht_lock);
+
+ clear_bit(dmb_node->sba_idx, ldev->sba_idx_mask);
+ kvfree(dmb_node->cpu_addr);
+ kfree(dmb_node);
+
+ if (atomic_dec_and_test(&ldev->dmb_cnt))
+ wake_up(&ldev->ldev_release);
+}
+
+static int smc_lo_unregister_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb)
+{
+ struct smc_lo_dmb_node *dmb_node = NULL, *tmp_node;
+ struct smc_lo_dev *ldev = smcd->priv;
+
+ /* find dmb from hash table */
+ read_lock_bh(&ldev->dmb_ht_lock);
+ hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb->dmb_tok) {
+ if (tmp_node->token == dmb->dmb_tok) {
+ dmb_node = tmp_node;
+ break;
+ }
+ }
+ if (!dmb_node) {
+ read_unlock_bh(&ldev->dmb_ht_lock);
+ return -EINVAL;
+ }
+ read_unlock_bh(&ldev->dmb_ht_lock);
+
+ if (refcount_dec_and_test(&dmb_node->refcnt))
+ __smc_lo_unregister_dmb(ldev, dmb_node);
+ return 0;
+}
+
+static int smc_lo_support_dmb_nocopy(struct smcd_dev *smcd)
+{
+ return SMC_LO_SUPPORT_NOCOPY;
+}
+
+static int smc_lo_attach_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb)
+{
+ struct smc_lo_dmb_node *dmb_node = NULL, *tmp_node;
+ struct smc_lo_dev *ldev = smcd->priv;
+
+ /* find dmb_node according to dmb->dmb_tok */
+ read_lock_bh(&ldev->dmb_ht_lock);
+ hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb->dmb_tok) {
+ if (tmp_node->token == dmb->dmb_tok) {
+ dmb_node = tmp_node;
+ break;
+ }
+ }
+ if (!dmb_node) {
+ read_unlock_bh(&ldev->dmb_ht_lock);
+ return -EINVAL;
+ }
+ read_unlock_bh(&ldev->dmb_ht_lock);
+
+ if (!refcount_inc_not_zero(&dmb_node->refcnt))
+ /* the dmb is being unregistered, but has
+ * not been removed from the hash table.
+ */
+ return -EINVAL;
+
+ /* provide dmb information */
+ dmb->sba_idx = dmb_node->sba_idx;
+ dmb->dmb_tok = dmb_node->token;
+ dmb->cpu_addr = dmb_node->cpu_addr;
+ dmb->dma_addr = dmb_node->dma_addr;
+ dmb->dmb_len = dmb_node->len;
+ return 0;
+}
+
+static int smc_lo_detach_dmb(struct smcd_dev *smcd, u64 token)
+{
+ struct smc_lo_dmb_node *dmb_node = NULL, *tmp_node;
+ struct smc_lo_dev *ldev = smcd->priv;
+
+ /* find dmb_node according to dmb->dmb_tok */
+ read_lock_bh(&ldev->dmb_ht_lock);
+ hash_for_each_possible(ldev->dmb_ht, tmp_node, list, token) {
+ if (tmp_node->token == token) {
+ dmb_node = tmp_node;
+ break;
+ }
+ }
+ if (!dmb_node) {
+ read_unlock_bh(&ldev->dmb_ht_lock);
+ return -EINVAL;
+ }
+ read_unlock_bh(&ldev->dmb_ht_lock);
+
+ if (refcount_dec_and_test(&dmb_node->refcnt))
+ __smc_lo_unregister_dmb(ldev, dmb_node);
+ return 0;
+}
+
+static int smc_lo_move_data(struct smcd_dev *smcd, u64 dmb_tok,
+ unsigned int idx, bool sf, unsigned int offset,
+ void *data, unsigned int size)
+{
+ struct smc_lo_dmb_node *rmb_node = NULL, *tmp_node;
+ struct smc_lo_dev *ldev = smcd->priv;
+ struct smc_connection *conn;
+
+ if (!sf)
+ /* since sndbuf is merged with peer DMB, there is
+ * no need to copy data from sndbuf to peer DMB.
+ */
+ return 0;
+
+ read_lock_bh(&ldev->dmb_ht_lock);
+ hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb_tok) {
+ if (tmp_node->token == dmb_tok) {
+ rmb_node = tmp_node;
+ break;
+ }
+ }
+ if (!rmb_node) {
+ read_unlock_bh(&ldev->dmb_ht_lock);
+ return -EINVAL;
+ }
+ memcpy((char *)rmb_node->cpu_addr + offset, data, size);
+ read_unlock_bh(&ldev->dmb_ht_lock);
+
+ conn = smcd->conn[rmb_node->sba_idx];
+ if (!conn || conn->killed)
+ return -EPIPE;
+ tasklet_schedule(&conn->rx_tsklet);
+ return 0;
+}
+
+static int smc_lo_supports_v2(void)
+{
+ return SMC_LO_V2_CAPABLE;
+}
+
+static void smc_lo_get_local_gid(struct smcd_dev *smcd,
+ struct smcd_gid *smcd_gid)
+{
+ struct smc_lo_dev *ldev = smcd->priv;
+
+ smcd_gid->gid = ldev->local_gid.gid;
+ smcd_gid->gid_ext = ldev->local_gid.gid_ext;
+}
+
+static u16 smc_lo_get_chid(struct smcd_dev *smcd)
+{
+ return ((struct smc_lo_dev *)smcd->priv)->chid;
+}
+
+static struct device *smc_lo_get_dev(struct smcd_dev *smcd)
+{
+ return &((struct smc_lo_dev *)smcd->priv)->dev;
+}
+
+static const struct smcd_ops lo_ops = {
+ .query_remote_gid = smc_lo_query_rgid,
+ .register_dmb = smc_lo_register_dmb,
+ .unregister_dmb = smc_lo_unregister_dmb,
+ .support_dmb_nocopy = smc_lo_support_dmb_nocopy,
+ .attach_dmb = smc_lo_attach_dmb,
+ .detach_dmb = smc_lo_detach_dmb,
+ .add_vlan_id = NULL,
+ .del_vlan_id = NULL,
+ .set_vlan_required = NULL,
+ .reset_vlan_required = NULL,
+ .signal_event = NULL,
+ .move_data = smc_lo_move_data,
+ .supports_v2 = smc_lo_supports_v2,
+ .get_local_gid = smc_lo_get_local_gid,
+ .get_chid = smc_lo_get_chid,
+ .get_dev = smc_lo_get_dev,
+};
+
+static struct smcd_dev *smcd_lo_alloc_dev(const struct smcd_ops *ops,
+ int max_dmbs)
+{
+ struct smcd_dev *smcd;
+
+ smcd = kzalloc(sizeof(*smcd), GFP_KERNEL);
+ if (!smcd)
+ return NULL;
+
+ smcd->conn = kcalloc(max_dmbs, sizeof(struct smc_connection *),
+ GFP_KERNEL);
+ if (!smcd->conn)
+ goto out_smcd;
+
+ smcd->ops = ops;
+
+ spin_lock_init(&smcd->lock);
+ spin_lock_init(&smcd->lgr_lock);
+ INIT_LIST_HEAD(&smcd->vlan);
+ INIT_LIST_HEAD(&smcd->lgr_list);
+ init_waitqueue_head(&smcd->lgrs_deleted);
+ return smcd;
+
+out_smcd:
+ kfree(smcd);
+ return NULL;
+}
+
+static int smcd_lo_register_dev(struct smc_lo_dev *ldev)
+{
+ struct smcd_dev *smcd;
+
+ smcd = smcd_lo_alloc_dev(&lo_ops, SMC_LO_MAX_DMBS);
+ if (!smcd)
+ return -ENOMEM;
+ ldev->smcd = smcd;
+ smcd->priv = ldev;
+ smc_ism_set_v2_capable();
+ mutex_lock(&smcd_dev_list.mutex);
+ list_add(&smcd->list, &smcd_dev_list.list);
+ mutex_unlock(&smcd_dev_list.mutex);
+ pr_warn_ratelimited("smc: adding smcd device %s\n",
+ dev_name(&ldev->dev));
+ return 0;
+}
+
+static void smcd_lo_unregister_dev(struct smc_lo_dev *ldev)
+{
+ struct smcd_dev *smcd = ldev->smcd;
+
+ pr_warn_ratelimited("smc: removing smcd device %s\n",
+ dev_name(&ldev->dev));
+ smcd->going_away = 1;
+ smc_smcd_terminate_all(smcd);
+ mutex_lock(&smcd_dev_list.mutex);
+ list_del_init(&smcd->list);
+ mutex_unlock(&smcd_dev_list.mutex);
+ kfree(smcd->conn);
+ kfree(smcd);
+}
+
+static int smc_lo_dev_init(struct smc_lo_dev *ldev)
+{
+ smc_lo_generate_ids(ldev);
+ rwlock_init(&ldev->dmb_ht_lock);
+ hash_init(ldev->dmb_ht);
+ atomic_set(&ldev->dmb_cnt, 0);
+ init_waitqueue_head(&ldev->ldev_release);
+
+ return smcd_lo_register_dev(ldev);
+}
+
+static void smc_lo_dev_exit(struct smc_lo_dev *ldev)
+{
+ smcd_lo_unregister_dev(ldev);
+ if (atomic_read(&ldev->dmb_cnt))
+ wait_event(ldev->ldev_release, !atomic_read(&ldev->dmb_cnt));
+}
+
+static void smc_lo_dev_release(struct device *dev)
+{
+ struct smc_lo_dev *ldev =
+ container_of(dev, struct smc_lo_dev, dev);
+
+ kfree(ldev);
+}
+
+static int smc_lo_dev_probe(void)
+{
+ struct smc_lo_dev *ldev;
+ int ret;
+
+ ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
+ if (!ldev)
+ return -ENOMEM;
+
+ ldev->dev.parent = NULL;
+ ldev->dev.release = smc_lo_dev_release;
+ device_initialize(&ldev->dev);
+ dev_set_name(&ldev->dev, smc_lo_dev_name);
+
+ ret = smc_lo_dev_init(ldev);
+ if (ret)
+ goto free_dev;
+
+ lo_dev = ldev; /* global loopback device */
+ return 0;
+
+free_dev:
+ put_device(&ldev->dev);
+ return ret;
+}
+
+static void smc_lo_dev_remove(void)
+{
+ if (!lo_dev)
+ return;
+
+ smc_lo_dev_exit(lo_dev);
+ put_device(&lo_dev->dev); /* device_initialize in smc_lo_dev_probe */
+}
+
+int smc_loopback_init(void)
+{
+ return smc_lo_dev_probe();
+}
+
+void smc_loopback_exit(void)
+{
+ smc_lo_dev_remove();
+}
diff --git a/net/smc/smc_loopback.h b/net/smc/smc_loopback.h
new file mode 100644
index 000000000000..6dd4292dae56
--- /dev/null
+++ b/net/smc/smc_loopback.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Shared Memory Communications Direct over loopback-ism device.
+ *
+ * SMC-D loopback-ism device structure definitions.
+ *
+ * Copyright (c) 2024, Alibaba Inc.
+ *
+ * Author: Wen Gu <guwen@linux.alibaba.com>
+ * Tony Lu <tonylu@linux.alibaba.com>
+ *
+ */
+
+#ifndef _SMC_LOOPBACK_H
+#define _SMC_LOOPBACK_H
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <net/smc.h>
+
+#if IS_ENABLED(CONFIG_SMC_LO)
+#define SMC_LO_MAX_DMBS 5000
+#define SMC_LO_DMBS_HASH_BITS 12
+#define SMC_LO_RESERVED_CHID 0xFFFF
+
+struct smc_lo_dmb_node {
+ struct hlist_node list;
+ u64 token;
+ u32 len;
+ u32 sba_idx;
+ void *cpu_addr;
+ dma_addr_t dma_addr;
+ refcount_t refcnt;
+};
+
+struct smc_lo_dev {
+ struct smcd_dev *smcd;
+ struct device dev;
+ u16 chid;
+ struct smcd_gid local_gid;
+ atomic_t dmb_cnt;
+ rwlock_t dmb_ht_lock;
+ DECLARE_BITMAP(sba_idx_mask, SMC_LO_MAX_DMBS);
+ DECLARE_HASHTABLE(dmb_ht, SMC_LO_DMBS_HASH_BITS);
+ wait_queue_head_t ldev_release;
+};
+
+int smc_loopback_init(void);
+void smc_loopback_exit(void);
+#else
+static inline int smc_loopback_init(void)
+{
+ return 0;
+}
+
+static inline void smc_loopback_exit(void)
+{
+}
+#endif
+
+#endif /* _SMC_LOOPBACK_H */
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index 9a2f3638d161..f0cbe77a80b4 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -42,10 +42,10 @@ static void smc_rx_wake_up(struct sock *sk)
if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
EPOLLRDNORM | EPOLLRDBAND);
- sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
+ sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN);
if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
(sk->sk_state == SMC_CLOSED))
- sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
+ sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_HUP);
rcu_read_unlock();
}
diff --git a/net/smc/smc_stats.h b/net/smc/smc_stats.h
index 9d32058db2b5..e19177ce4092 100644
--- a/net/smc/smc_stats.h
+++ b/net/smc/smc_stats.h
@@ -19,7 +19,7 @@
#include "smc_clc.h"
-#define SMC_MAX_FBACK_RSN_CNT 30
+#define SMC_MAX_FBACK_RSN_CNT 36
enum {
SMC_BUF_8K,
diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
index a5946d1b9d60..13f2bc092db1 100644
--- a/net/smc/smc_sysctl.c
+++ b/net/smc/smc_sysctl.c
@@ -90,11 +90,11 @@ static struct ctl_table smc_table[] = {
.extra1 = &conns_per_lgr_min,
.extra2 = &conns_per_lgr_max,
},
- { }
};
int __net_init smc_sysctl_net_init(struct net *net)
{
+ size_t table_size = ARRAY_SIZE(smc_table);
struct ctl_table *table;
table = smc_table;
@@ -105,12 +105,12 @@ int __net_init smc_sysctl_net_init(struct net *net)
if (!table)
goto err_alloc;
- for (i = 0; i < ARRAY_SIZE(smc_table) - 1; i++)
+ for (i = 0; i < table_size; i++)
table[i].data += (void *)net - (void *)&init_net;
}
net->smc.smc_hdr = register_net_sysctl_sz(net, "net/smc", table,
- ARRAY_SIZE(smc_table));
+ table_size);
if (!net->smc.smc_hdr)
goto err_reg;
@@ -133,7 +133,7 @@ err_alloc:
void __net_exit smc_sysctl_net_exit(struct net *net)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
table = net->smc.smc_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->smc.smc_hdr);
diff --git a/net/smc/smc_tracepoint.h b/net/smc/smc_tracepoint.h
index 9fc5e586d24a..a9a6e3c1113a 100644
--- a/net/smc/smc_tracepoint.h
+++ b/net/smc/smc_tracepoint.h
@@ -60,7 +60,7 @@ DECLARE_EVENT_CLASS(smc_msg_event,
__entry->smc = smc;
__entry->net_cookie = sock_net(sk)->net_cookie;
__entry->len = len;
- __assign_str(name, smc->conn.lnk->ibname);
+ __assign_str(name);
),
TP_printk("smc=%p net=%llu len=%zu dev=%s",
@@ -104,7 +104,7 @@ TRACE_EVENT(smcr_link_down,
__entry->lgr = lgr;
__entry->net_cookie = lgr->net->net_cookie;
__entry->state = lnk->state;
- __assign_str(name, lnk->ibname);
+ __assign_str(name);
__entry->location = location;
),
diff --git a/net/socket.c b/net/socket.c
index e5f3af49a8b6..fcbdd5bc47ac 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -88,7 +88,7 @@
#include <linux/xattr.h>
#include <linux/nospec.h>
#include <linux/indirect_call_wrapper.h>
-#include <linux/io_uring.h>
+#include <linux/io_uring/net.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
@@ -1822,6 +1822,20 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
return __sys_socketpair(family, type, protocol, usockvec);
}
+int __sys_bind_socket(struct socket *sock, struct sockaddr_storage *address,
+ int addrlen)
+{
+ int err;
+
+ err = security_socket_bind(sock, (struct sockaddr *)address,
+ addrlen);
+ if (!err)
+ err = READ_ONCE(sock->ops)->bind(sock,
+ (struct sockaddr *)address,
+ addrlen);
+ return err;
+}
+
/*
* Bind a name to a socket. Nothing much to do here since it's
* the protocol's responsibility to handle the local address.
@@ -1839,15 +1853,8 @@ int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock) {
err = move_addr_to_kernel(umyaddr, addrlen, &address);
- if (!err) {
- err = security_socket_bind(sock,
- (struct sockaddr *)&address,
- addrlen);
- if (!err)
- err = READ_ONCE(sock->ops)->bind(sock,
- (struct sockaddr *)
- &address, addrlen);
- }
+ if (!err)
+ err = __sys_bind_socket(sock, &address, addrlen);
fput_light(sock->file, fput_needed);
}
return err;
@@ -1863,23 +1870,28 @@ SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
* necessary for a listen, and if that works, we mark the socket as
* ready for listening.
*/
+int __sys_listen_socket(struct socket *sock, int backlog)
+{
+ int somaxconn, err;
+
+ somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn);
+ if ((unsigned int)backlog > somaxconn)
+ backlog = somaxconn;
+
+ err = security_socket_listen(sock, backlog);
+ if (!err)
+ err = READ_ONCE(sock->ops)->listen(sock, backlog);
+ return err;
+}
int __sys_listen(int fd, int backlog)
{
struct socket *sock;
int err, fput_needed;
- int somaxconn;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock) {
- somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn);
- if ((unsigned int)backlog > somaxconn)
- backlog = somaxconn;
-
- err = security_socket_listen(sock, backlog);
- if (!err)
- err = READ_ONCE(sock->ops)->listen(sock, backlog);
-
+ err = __sys_listen_socket(sock, backlog);
fput_light(sock->file, fput_needed);
}
return err;
@@ -1890,7 +1902,7 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog)
return __sys_listen(fd, backlog);
}
-struct file *do_accept(struct file *file, unsigned file_flags,
+struct file *do_accept(struct file *file, struct proto_accept_arg *arg,
struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags)
{
@@ -1926,8 +1938,8 @@ struct file *do_accept(struct file *file, unsigned file_flags,
if (err)
goto out_fd;
- err = ops->accept(sock, newsock, sock->file->f_flags | file_flags,
- false);
+ arg->flags |= sock->file->f_flags;
+ err = ops->accept(sock, newsock, arg);
if (err < 0)
goto out_fd;
@@ -1953,6 +1965,7 @@ out_fd:
static int __sys_accept4_file(struct file *file, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags)
{
+ struct proto_accept_arg arg = { };
struct file *newfile;
int newfd;
@@ -1966,7 +1979,7 @@ static int __sys_accept4_file(struct file *file, struct sockaddr __user *upeer_s
if (unlikely(newfd < 0))
return newfd;
- newfile = do_accept(file, 0, upeer_sockaddr, upeer_addrlen,
+ newfile = do_accept(file, &arg, upeer_sockaddr, upeer_addrlen,
flags);
if (IS_ERR(newfile)) {
put_unused_fd(newfd);
@@ -3580,6 +3593,10 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
{
struct sock *sk = sock->sk;
const struct proto_ops *ops = READ_ONCE(sock->ops);
+ struct proto_accept_arg arg = {
+ .flags = flags,
+ .kern = true,
+ };
int err;
err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
@@ -3587,7 +3604,7 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
if (err < 0)
goto done;
- err = ops->accept(sock, *newsock, flags, true);
+ err = ops->accept(sock, *newsock, &arg);
if (err < 0) {
sock_release(*newsock);
*newsock = NULL;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index c7af0220f82f..369310909fc9 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1875,8 +1875,10 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages);
/* slack space should prevent this ever happening: */
- if (unlikely(snd_buf->len > snd_buf->buflen))
+ if (unlikely(snd_buf->len > snd_buf->buflen)) {
+ status = -EIO;
goto wrap_failed;
+ }
/* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was
* done anyway, so it's safe to put the request on the wire: */
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
diff --git a/net/sunrpc/auth_gss/auth_gss_internal.h b/net/sunrpc/auth_gss/auth_gss_internal.h
index c53b329092d4..4ebc1b7043d9 100644
--- a/net/sunrpc/auth_gss/auth_gss_internal.h
+++ b/net/sunrpc/auth_gss/auth_gss_internal.h
@@ -23,7 +23,7 @@ simple_get_bytes(const void *p, const void *end, void *res, size_t len)
}
static inline const void *
-simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
+simple_get_netobj_noprof(const void *p, const void *end, struct xdr_netobj *dest)
{
const void *q;
unsigned int len;
@@ -35,7 +35,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
if (unlikely(q > end || q < p))
return ERR_PTR(-EFAULT);
if (len) {
- dest->data = kmemdup(p, len, GFP_KERNEL);
+ dest->data = kmemdup_noprof(p, len, GFP_KERNEL);
if (unlikely(dest->data == NULL))
return ERR_PTR(-ENOMEM);
} else
@@ -43,3 +43,5 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
dest->len = len;
return q;
}
+
+#define simple_get_netobj(...) alloc_hooks(simple_get_netobj_noprof(__VA_ARGS__))
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index 06d8ee0db000..4eb19c3a54c7 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -168,7 +168,7 @@ static int krb5_DK(const struct gss_krb5_enctype *gk5e,
goto err_return;
blocksize = crypto_sync_skcipher_blocksize(cipher);
if (crypto_sync_skcipher_setkey(cipher, inkey->data, inkey->len))
- goto err_return;
+ goto err_free_cipher;
ret = -ENOMEM;
inblockdata = kmalloc(blocksize, gfp_mask);
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 24de94184700..73a90ad873fb 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1033,17 +1033,11 @@ null_verifier:
static void gss_free_in_token_pages(struct gssp_in_token *in_token)
{
- u32 inlen;
int i;
i = 0;
- inlen = in_token->page_len;
- while (inlen) {
- if (in_token->pages[i])
- put_page(in_token->pages[i]);
- inlen -= inlen > PAGE_SIZE ? PAGE_SIZE : inlen;
- }
-
+ while (in_token->pages[i])
+ put_page(in_token->pages[i++]);
kfree(in_token->pages);
in_token->pages = NULL;
}
@@ -1075,7 +1069,7 @@ static int gss_read_proxy_verf(struct svc_rqst *rqstp,
goto out_denied_free;
pages = DIV_ROUND_UP(inlen, PAGE_SIZE);
- in_token->pages = kcalloc(pages, sizeof(struct page *), GFP_KERNEL);
+ in_token->pages = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL);
if (!in_token->pages)
goto out_denied_free;
in_token->page_base = 0;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 28f3749f6dc6..09f29a95f2bc 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1071,6 +1071,7 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
.authflavor = old->cl_auth->au_flavor,
.cred = old->cl_cred,
.stats = old->cl_stats,
+ .timeout = old->cl_timeout,
};
struct rpc_clnt *clnt;
int err;
@@ -2325,12 +2326,13 @@ call_transmit_status(struct rpc_task *task)
task->tk_action = call_transmit;
task->tk_status = 0;
break;
- case -ECONNREFUSED:
case -EHOSTDOWN:
case -ENETDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
case -EPERM:
+ break;
+ case -ECONNREFUSED:
if (RPC_IS_SOFTCONN(task)) {
if (!task->tk_msg.rpc_proc->p_proc)
trace_xprt_ping(task->tk_xprt,
@@ -2698,8 +2700,19 @@ rpc_decode_header(struct rpc_task *task, struct xdr_stream *xdr)
goto out_msg_denied;
error = rpcauth_checkverf(task, xdr);
- if (error)
+ if (error) {
+ struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+
+ if (!test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags)) {
+ rpcauth_invalcred(task);
+ if (!task->tk_cred_retry)
+ goto out_err;
+ task->tk_cred_retry--;
+ trace_rpc__stale_creds(task);
+ return -EKEYREJECTED;
+ }
goto out_verifier;
+ }
p = xdr_inline_decode(xdr, sizeof(*p));
if (!p)
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 6debf4fd42d4..cef623ea1506 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -369,8 +369,10 @@ static void rpc_make_runnable(struct workqueue_struct *wq,
if (RPC_IS_ASYNC(task)) {
INIT_WORK(&task->u.tk_work, rpc_async_schedule);
queue_work(wq, &task->u.tk_work);
- } else
+ } else {
+ smp_mb__after_atomic();
wake_up_bit(&task->tk_runstate, RPC_TASK_QUEUED);
+ }
}
/*
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index b33e429336fb..88a59cfa5583 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -72,57 +72,100 @@ static struct svc_pool_map svc_pool_map = {
static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
static int
-param_set_pool_mode(const char *val, const struct kernel_param *kp)
+__param_set_pool_mode(const char *val, struct svc_pool_map *m)
{
- int *ip = (int *)kp->arg;
- struct svc_pool_map *m = &svc_pool_map;
- int err;
+ int err, mode;
mutex_lock(&svc_pool_map_mutex);
- err = -EBUSY;
- if (m->count)
- goto out;
-
err = 0;
if (!strncmp(val, "auto", 4))
- *ip = SVC_POOL_AUTO;
+ mode = SVC_POOL_AUTO;
else if (!strncmp(val, "global", 6))
- *ip = SVC_POOL_GLOBAL;
+ mode = SVC_POOL_GLOBAL;
else if (!strncmp(val, "percpu", 6))
- *ip = SVC_POOL_PERCPU;
+ mode = SVC_POOL_PERCPU;
else if (!strncmp(val, "pernode", 7))
- *ip = SVC_POOL_PERNODE;
+ mode = SVC_POOL_PERNODE;
else
err = -EINVAL;
+ if (err)
+ goto out;
+
+ if (m->count == 0)
+ m->mode = mode;
+ else if (mode != m->mode)
+ err = -EBUSY;
out:
mutex_unlock(&svc_pool_map_mutex);
return err;
}
static int
-param_get_pool_mode(char *buf, const struct kernel_param *kp)
+param_set_pool_mode(const char *val, const struct kernel_param *kp)
+{
+ struct svc_pool_map *m = kp->arg;
+
+ return __param_set_pool_mode(val, m);
+}
+
+int sunrpc_set_pool_mode(const char *val)
+{
+ return __param_set_pool_mode(val, &svc_pool_map);
+}
+EXPORT_SYMBOL(sunrpc_set_pool_mode);
+
+/**
+ * sunrpc_get_pool_mode - get the current pool_mode for the host
+ * @buf: where to write the current pool_mode
+ * @size: size of @buf
+ *
+ * Grab the current pool_mode from the svc_pool_map and write
+ * the resulting string to @buf. Returns the number of characters
+ * written to @buf (a'la snprintf()).
+ */
+int
+sunrpc_get_pool_mode(char *buf, size_t size)
{
- int *ip = (int *)kp->arg;
+ struct svc_pool_map *m = &svc_pool_map;
- switch (*ip)
+ switch (m->mode)
{
case SVC_POOL_AUTO:
- return sysfs_emit(buf, "auto\n");
+ return snprintf(buf, size, "auto");
case SVC_POOL_GLOBAL:
- return sysfs_emit(buf, "global\n");
+ return snprintf(buf, size, "global");
case SVC_POOL_PERCPU:
- return sysfs_emit(buf, "percpu\n");
+ return snprintf(buf, size, "percpu");
case SVC_POOL_PERNODE:
- return sysfs_emit(buf, "pernode\n");
+ return snprintf(buf, size, "pernode");
default:
- return sysfs_emit(buf, "%d\n", *ip);
+ return snprintf(buf, size, "%d", m->mode);
}
}
+EXPORT_SYMBOL(sunrpc_get_pool_mode);
+
+static int
+param_get_pool_mode(char *buf, const struct kernel_param *kp)
+{
+ char str[16];
+ int len;
+
+ len = sunrpc_get_pool_mode(str, ARRAY_SIZE(str));
+
+ /* Ensure we have room for newline and NUL */
+ len = min_t(int, len, ARRAY_SIZE(str) - 2);
+
+ /* tack on the newline */
+ str[len] = '\n';
+ str[len + 1] = '\0';
+
+ return sysfs_emit(buf, "%s", str);
+}
module_param_call(pool_mode, param_set_pool_mode, param_get_pool_mode,
- &svc_pool_map.mode, 0644);
+ &svc_pool_map, 0644);
/*
* Detect best pool mapping mode heuristically,
@@ -250,10 +293,8 @@ svc_pool_map_get(void)
int npools = -1;
mutex_lock(&svc_pool_map_mutex);
-
if (m->count++) {
mutex_unlock(&svc_pool_map_mutex);
- WARN_ON_ONCE(m->npools <= 1);
return m->npools;
}
@@ -275,32 +316,21 @@ svc_pool_map_get(void)
m->mode = SVC_POOL_GLOBAL;
}
m->npools = npools;
-
- if (npools == 1)
- /* service is unpooled, so doesn't hold a reference */
- m->count--;
-
mutex_unlock(&svc_pool_map_mutex);
return npools;
}
/*
- * Drop a reference to the global map of cpus to pools, if
- * pools were in use, i.e. if npools > 1.
+ * Drop a reference to the global map of cpus to pools.
* When the last reference is dropped, the map data is
- * freed; this allows the sysadmin to change the pool
- * mode using the pool_mode module option without
- * rebooting or re-loading sunrpc.ko.
+ * freed; this allows the sysadmin to change the pool.
*/
static void
-svc_pool_map_put(int npools)
+svc_pool_map_put(void)
{
struct svc_pool_map *m = &svc_pool_map;
- if (npools <= 1)
- return;
mutex_lock(&svc_pool_map_mutex);
-
if (!--m->count) {
kfree(m->to_pool);
m->to_pool = NULL;
@@ -308,7 +338,6 @@ svc_pool_map_put(int npools)
m->pool_to = NULL;
m->npools = 0;
}
-
mutex_unlock(&svc_pool_map_mutex);
}
@@ -553,9 +582,10 @@ struct svc_serv *svc_create_pooled(struct svc_program *prog,
serv = __svc_create(prog, stats, bufsize, npools, threadfn);
if (!serv)
goto out_err;
+ serv->sv_is_pooled = true;
return serv;
out_err:
- svc_pool_map_put(npools);
+ svc_pool_map_put();
return NULL;
}
EXPORT_SYMBOL_GPL(svc_create_pooled);
@@ -585,7 +615,8 @@ svc_destroy(struct svc_serv **servp)
cache_clean_deferred(serv);
- svc_pool_map_put(serv->sv_nrpools);
+ if (serv->sv_is_pooled)
+ svc_pool_map_put();
for (i = 0; i < serv->sv_nrpools; i++) {
struct svc_pool *pool = &serv->sv_pools[i];
@@ -1265,8 +1296,6 @@ svc_generic_init_request(struct svc_rqst *rqstp,
if (rqstp->rq_proc >= versp->vs_nproc)
goto err_bad_proc;
rqstp->rq_procinfo = procp = &versp->vs_proc[rqstp->rq_proc];
- if (!procp)
- goto err_bad_proc;
/* Initialize storage for argp and resp */
memset(rqstp->rq_argp, 0, procp->pc_argzero);
@@ -1559,9 +1588,11 @@ out_drop:
*/
void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp)
{
+ struct rpc_timeout timeout = {
+ .to_increment = 0,
+ };
struct rpc_task *task;
int proc_error;
- struct rpc_timeout timeout;
/* Build the svc_rqst used by the common processing routine */
rqstp->rq_xid = req->rq_xid;
@@ -1614,6 +1645,7 @@ void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp)
timeout.to_initval = req->rq_xprt->timeout->to_initval;
timeout.to_retries = req->rq_xprt->timeout->to_retries;
}
+ timeout.to_maxval = timeout.to_initval;
memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
task = rpc_run_bc_task(req, &timeout);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index b4a85a227bd7..d3735ab3e6d1 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -46,7 +46,6 @@ static LIST_HEAD(svc_xprt_class_list);
/* SMP locking strategy:
*
- * svc_pool->sp_lock protects most of the fields of that pool.
* svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt.
* when both need to be taken (rare), svc_serv->sv_lock is first.
* The "service mutex" protects svc_serv->sv_nrthread.
@@ -158,6 +157,7 @@ int svc_print_xprts(char *buf, int maxlen)
*/
void svc_xprt_deferred_close(struct svc_xprt *xprt)
{
+ trace_svc_xprt_close(xprt);
if (!test_and_set_bit(XPT_CLOSE, &xprt->xpt_flags))
svc_xprt_enqueue(xprt);
}
@@ -211,51 +211,6 @@ void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl,
}
EXPORT_SYMBOL_GPL(svc_xprt_init);
-static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
- struct svc_serv *serv,
- struct net *net,
- const int family,
- const unsigned short port,
- int flags)
-{
- struct sockaddr_in sin = {
- .sin_family = AF_INET,
- .sin_addr.s_addr = htonl(INADDR_ANY),
- .sin_port = htons(port),
- };
-#if IS_ENABLED(CONFIG_IPV6)
- struct sockaddr_in6 sin6 = {
- .sin6_family = AF_INET6,
- .sin6_addr = IN6ADDR_ANY_INIT,
- .sin6_port = htons(port),
- };
-#endif
- struct svc_xprt *xprt;
- struct sockaddr *sap;
- size_t len;
-
- switch (family) {
- case PF_INET:
- sap = (struct sockaddr *)&sin;
- len = sizeof(sin);
- break;
-#if IS_ENABLED(CONFIG_IPV6)
- case PF_INET6:
- sap = (struct sockaddr *)&sin6;
- len = sizeof(sin6);
- break;
-#endif
- default:
- return ERR_PTR(-EAFNOSUPPORT);
- }
-
- xprt = xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
- if (IS_ERR(xprt))
- trace_svc_xprt_create_err(serv->sv_program->pg_name,
- xcl->xcl_name, sap, len, xprt);
- return xprt;
-}
-
/**
* svc_xprt_received - start next receiver thread
* @xprt: controlling transport
@@ -294,9 +249,8 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new)
}
static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
- struct net *net, const int family,
- const unsigned short port, int flags,
- const struct cred *cred)
+ struct net *net, struct sockaddr *sap,
+ size_t len, int flags, const struct cred *cred)
{
struct svc_xprt_class *xcl;
@@ -312,8 +266,11 @@ static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
goto err;
spin_unlock(&svc_xprt_class_lock);
- newxprt = __svc_xpo_create(xcl, serv, net, family, port, flags);
+ newxprt = xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
if (IS_ERR(newxprt)) {
+ trace_svc_xprt_create_err(serv->sv_program->pg_name,
+ xcl->xcl_name, sap, len,
+ newxprt);
module_put(xcl->xcl_owner);
return PTR_ERR(newxprt);
}
@@ -330,6 +287,48 @@ static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
}
/**
+ * svc_xprt_create_from_sa - Add a new listener to @serv from socket address
+ * @serv: target RPC service
+ * @xprt_name: transport class name
+ * @net: network namespace
+ * @sap: socket address pointer
+ * @flags: SVC_SOCK flags
+ * @cred: credential to bind to this transport
+ *
+ * Return local xprt port on success or %-EPROTONOSUPPORT on failure
+ */
+int svc_xprt_create_from_sa(struct svc_serv *serv, const char *xprt_name,
+ struct net *net, struct sockaddr *sap,
+ int flags, const struct cred *cred)
+{
+ size_t len;
+ int err;
+
+ switch (sap->sa_family) {
+ case AF_INET:
+ len = sizeof(struct sockaddr_in);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ len = sizeof(struct sockaddr_in6);
+ break;
+#endif
+ default:
+ return -EAFNOSUPPORT;
+ }
+
+ err = _svc_xprt_create(serv, xprt_name, net, sap, len, flags, cred);
+ if (err == -EPROTONOSUPPORT) {
+ request_module("svc%s", xprt_name);
+ err = _svc_xprt_create(serv, xprt_name, net, sap, len, flags,
+ cred);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(svc_xprt_create_from_sa);
+
+/**
* svc_xprt_create - Add a new listener to @serv
* @serv: target RPC service
* @xprt_name: transport class name
@@ -339,23 +338,41 @@ static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
* @flags: SVC_SOCK flags
* @cred: credential to bind to this transport
*
- * Return values:
- * %0: New listener added successfully
- * %-EPROTONOSUPPORT: Requested transport type not supported
+ * Return local xprt port on success or %-EPROTONOSUPPORT on failure
*/
int svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
struct net *net, const int family,
const unsigned short port, int flags,
const struct cred *cred)
{
- int err;
+ struct sockaddr_in sin = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_ANY),
+ .sin_port = htons(port),
+ };
+#if IS_ENABLED(CONFIG_IPV6)
+ struct sockaddr_in6 sin6 = {
+ .sin6_family = AF_INET6,
+ .sin6_addr = IN6ADDR_ANY_INIT,
+ .sin6_port = htons(port),
+ };
+#endif
+ struct sockaddr *sap;
- err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred);
- if (err == -EPROTONOSUPPORT) {
- request_module("svc%s", xprt_name);
- err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred);
+ switch (family) {
+ case PF_INET:
+ sap = (struct sockaddr *)&sin;
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case PF_INET6:
+ sap = (struct sockaddr *)&sin6;
+ break;
+#endif
+ default:
+ return -EAFNOSUPPORT;
}
- return err;
+
+ return svc_xprt_create_from_sa(serv, xprt_name, net, sap, flags, cred);
}
EXPORT_SYMBOL_GPL(svc_xprt_create);
@@ -1260,6 +1277,40 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
}
/**
+ * svc_find_listener - find an RPC transport instance
+ * @serv: pointer to svc_serv to search
+ * @xcl_name: C string containing transport's class name
+ * @net: owner net pointer
+ * @sa: sockaddr containing address
+ *
+ * Return the transport instance pointer for the endpoint accepting
+ * connections/peer traffic from the specified transport class,
+ * and matching sockaddr.
+ */
+struct svc_xprt *svc_find_listener(struct svc_serv *serv, const char *xcl_name,
+ struct net *net, const struct sockaddr *sa)
+{
+ struct svc_xprt *xprt;
+ struct svc_xprt *found = NULL;
+
+ spin_lock_bh(&serv->sv_lock);
+ list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) {
+ if (xprt->xpt_net != net)
+ continue;
+ if (strcmp(xprt->xpt_class->xcl_name, xcl_name))
+ continue;
+ if (!rpc_cmp_addr_port(sa, (struct sockaddr *)&xprt->xpt_local))
+ continue;
+ found = xprt;
+ svc_xprt_get(xprt);
+ break;
+ }
+ spin_unlock_bh(&serv->sv_lock);
+ return found;
+}
+EXPORT_SYMBOL_GPL(svc_find_listener);
+
+/**
* svc_find_xprt - find an RPC transport instance
* @serv: pointer to svc_serv to search
* @xcl_name: C string containing transport's class name
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index 93941ab12549..bdb587a72422 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -40,7 +40,7 @@ EXPORT_SYMBOL_GPL(nlm_debug);
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-static int proc_do_xprt(struct ctl_table *table, int write,
+static int proc_do_xprt(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
char tmpbuf[256];
@@ -62,7 +62,7 @@ static int proc_do_xprt(struct ctl_table *table, int write,
}
static int
-proc_dodebug(struct ctl_table *table, int write, void *buffer, size_t *lenp,
+proc_dodebug(const struct ctl_table *table, int write, void *buffer, size_t *lenp,
loff_t *ppos)
{
char tmpbuf[20], *s = NULL;
@@ -160,7 +160,6 @@ static struct ctl_table debug_table[] = {
.mode = 0444,
.proc_handler = proc_do_xprt,
},
- { }
};
void
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index 55b21bae866d..3232aa23cdb4 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
-rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o \
+rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o ib_client.o \
svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \
svc_rdma_sendto.o svc_rdma_recvfrom.o svc_rdma_rw.o \
svc_rdma_pcl.o module.o
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index ffbf99894970..31434aeb8e29 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -54,7 +54,7 @@ static void frwr_cid_init(struct rpcrdma_ep *ep,
cid->ci_completion_id = mr->mr_ibmr->res.id;
}
-static void frwr_mr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
+static void frwr_mr_unmap(struct rpcrdma_mr *mr)
{
if (mr->mr_device) {
trace_xprtrdma_mr_unmap(mr);
@@ -73,7 +73,7 @@ void frwr_mr_release(struct rpcrdma_mr *mr)
{
int rc;
- frwr_mr_unmap(mr->mr_xprt, mr);
+ frwr_mr_unmap(mr);
rc = ib_dereg_mr(mr->mr_ibmr);
if (rc)
@@ -84,7 +84,7 @@ void frwr_mr_release(struct rpcrdma_mr *mr)
static void frwr_mr_put(struct rpcrdma_mr *mr)
{
- frwr_mr_unmap(mr->mr_xprt, mr);
+ frwr_mr_unmap(mr);
/* The MR is returned to the req's MR free list instead
* of to the xprt's MR free list. No spinlock is needed.
@@ -92,7 +92,8 @@ static void frwr_mr_put(struct rpcrdma_mr *mr)
rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs);
}
-/* frwr_reset - Place MRs back on the free list
+/**
+ * frwr_reset - Place MRs back on @req's free list
* @req: request to reset
*
* Used after a failed marshal. For FRWR, this means the MRs
diff --git a/net/sunrpc/xprtrdma/ib_client.c b/net/sunrpc/xprtrdma/ib_client.c
new file mode 100644
index 000000000000..a938c19c3490
--- /dev/null
+++ b/net/sunrpc/xprtrdma/ib_client.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/*
+ * Copyright (c) 2024 Oracle. All rights reserved.
+ */
+
+/* #include <linux/module.h>
+#include <linux/slab.h> */
+#include <linux/xarray.h>
+#include <linux/types.h>
+#include <linux/kref.h>
+#include <linux/completion.h>
+
+#include <linux/sunrpc/svc_rdma.h>
+#include <linux/sunrpc/rdma_rn.h>
+
+#include "xprt_rdma.h"
+#include <trace/events/rpcrdma.h>
+
+/* Per-ib_device private data for rpcrdma */
+struct rpcrdma_device {
+ struct kref rd_kref;
+ unsigned long rd_flags;
+ struct ib_device *rd_device;
+ struct xarray rd_xa;
+ struct completion rd_done;
+};
+
+#define RPCRDMA_RD_F_REMOVING (0)
+
+static struct ib_client rpcrdma_ib_client;
+
+/*
+ * Listeners have no associated device, so we never register them.
+ * Note that ib_get_client_data() does not check if @device is
+ * NULL for us.
+ */
+static struct rpcrdma_device *rpcrdma_get_client_data(struct ib_device *device)
+{
+ if (!device)
+ return NULL;
+ return ib_get_client_data(device, &rpcrdma_ib_client);
+}
+
+/**
+ * rpcrdma_rn_register - register to get device removal notifications
+ * @device: device to monitor
+ * @rn: notification object that wishes to be notified
+ * @done: callback to notify caller of device removal
+ *
+ * Returns zero on success. The callback in rn_done is guaranteed
+ * to be invoked when the device is removed, unless this notification
+ * is unregistered first.
+ *
+ * On failure, a negative errno is returned.
+ */
+int rpcrdma_rn_register(struct ib_device *device,
+ struct rpcrdma_notification *rn,
+ void (*done)(struct rpcrdma_notification *rn))
+{
+ struct rpcrdma_device *rd = rpcrdma_get_client_data(device);
+
+ if (!rd || test_bit(RPCRDMA_RD_F_REMOVING, &rd->rd_flags))
+ return -ENETUNREACH;
+
+ kref_get(&rd->rd_kref);
+ if (xa_alloc(&rd->rd_xa, &rn->rn_index, rn, xa_limit_32b, GFP_KERNEL) < 0)
+ return -ENOMEM;
+ rn->rn_done = done;
+ return 0;
+}
+
+static void rpcrdma_rn_release(struct kref *kref)
+{
+ struct rpcrdma_device *rd = container_of(kref, struct rpcrdma_device,
+ rd_kref);
+
+ trace_rpcrdma_client_completion(rd->rd_device);
+ complete(&rd->rd_done);
+}
+
+/**
+ * rpcrdma_rn_unregister - stop device removal notifications
+ * @device: monitored device
+ * @rn: notification object that no longer wishes to be notified
+ */
+void rpcrdma_rn_unregister(struct ib_device *device,
+ struct rpcrdma_notification *rn)
+{
+ struct rpcrdma_device *rd = rpcrdma_get_client_data(device);
+
+ if (!rd)
+ return;
+
+ xa_erase(&rd->rd_xa, rn->rn_index);
+ kref_put(&rd->rd_kref, rpcrdma_rn_release);
+}
+
+/**
+ * rpcrdma_add_one - ib_client device insertion callback
+ * @device: device about to be inserted
+ *
+ * Returns zero on success. xprtrdma private data has been allocated
+ * for this device. On failure, a negative errno is returned.
+ */
+static int rpcrdma_add_one(struct ib_device *device)
+{
+ struct rpcrdma_device *rd;
+
+ rd = kzalloc(sizeof(*rd), GFP_KERNEL);
+ if (!rd)
+ return -ENOMEM;
+
+ kref_init(&rd->rd_kref);
+ xa_init_flags(&rd->rd_xa, XA_FLAGS_ALLOC1);
+ rd->rd_device = device;
+ init_completion(&rd->rd_done);
+ ib_set_client_data(device, &rpcrdma_ib_client, rd);
+
+ trace_rpcrdma_client_add_one(device);
+ return 0;
+}
+
+/**
+ * rpcrdma_remove_one - ib_client device removal callback
+ * @device: device about to be removed
+ * @client_data: this module's private per-device data
+ *
+ * Upon return, all transports associated with @device have divested
+ * themselves from IB hardware resources.
+ */
+static void rpcrdma_remove_one(struct ib_device *device,
+ void *client_data)
+{
+ struct rpcrdma_device *rd = client_data;
+ struct rpcrdma_notification *rn;
+ unsigned long index;
+
+ trace_rpcrdma_client_remove_one(device);
+
+ set_bit(RPCRDMA_RD_F_REMOVING, &rd->rd_flags);
+ xa_for_each(&rd->rd_xa, index, rn)
+ rn->rn_done(rn);
+
+ /*
+ * Wait only if there are still outstanding notification
+ * registrants for this device.
+ */
+ if (!refcount_dec_and_test(&rd->rd_kref.refcount)) {
+ trace_rpcrdma_client_wait_on(device);
+ wait_for_completion(&rd->rd_done);
+ }
+
+ trace_rpcrdma_client_remove_one_done(device);
+ kfree(rd);
+}
+
+static struct ib_client rpcrdma_ib_client = {
+ .name = "rpcrdma",
+ .add = rpcrdma_add_one,
+ .remove = rpcrdma_remove_one,
+};
+
+/**
+ * rpcrdma_ib_client_unregister - unregister ib_client for xprtrdma
+ *
+ * cel: watch for orphaned rpcrdma_device objects on module unload
+ */
+void rpcrdma_ib_client_unregister(void)
+{
+ ib_unregister_client(&rpcrdma_ib_client);
+}
+
+/**
+ * rpcrdma_ib_client_register - register ib_client for rpcrdma
+ *
+ * Returns zero on success, or a negative errno.
+ */
+int rpcrdma_ib_client_register(void)
+{
+ return ib_register_client(&rpcrdma_ib_client);
+}
diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c
index 45c5b41ac8dc..697f571d4c01 100644
--- a/net/sunrpc/xprtrdma/module.c
+++ b/net/sunrpc/xprtrdma/module.c
@@ -11,6 +11,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/sunrpc/svc_rdma.h>
+#include <linux/sunrpc/rdma_rn.h>
#include <asm/swab.h>
@@ -30,21 +31,32 @@ static void __exit rpc_rdma_cleanup(void)
{
xprt_rdma_cleanup();
svc_rdma_cleanup();
+ rpcrdma_ib_client_unregister();
}
static int __init rpc_rdma_init(void)
{
int rc;
+ rc = rpcrdma_ib_client_register();
+ if (rc)
+ goto out_rc;
+
rc = svc_rdma_init();
if (rc)
- goto out;
+ goto out_ib_client;
rc = xprt_rdma_init();
if (rc)
- svc_rdma_cleanup();
+ goto out_svc_rdma;
-out:
+ return 0;
+
+out_svc_rdma:
+ svc_rdma_cleanup();
+out_ib_client:
+ rpcrdma_ib_client_unregister();
+out_rc:
return rc;
}
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 190a4de239c8..1478c41c7e9d 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1471,8 +1471,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
credits = 1; /* don't deadlock */
else if (credits > r_xprt->rx_ep->re_max_requests)
credits = r_xprt->rx_ep->re_max_requests;
- rpcrdma_post_recvs(r_xprt, credits + (buf->rb_bc_srv_max_requests << 1),
- false);
+ rpcrdma_post_recvs(r_xprt, credits + (buf->rb_bc_srv_max_requests << 1));
if (buf->rb_credits != credits)
rpcrdma_update_cwnd(r_xprt, credits);
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index f86970733eb0..58ae6ec4f25b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -74,7 +74,7 @@ enum {
SVCRDMA_COUNTER_BUFSIZ = sizeof(unsigned long long),
};
-static int svcrdma_counter_handler(struct ctl_table *table, int write,
+static int svcrdma_counter_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct percpu_counter *stat = (struct percpu_counter *)table->data;
@@ -209,7 +209,6 @@ static struct ctl_table svcrdma_parm_table[] = {
.extra1 = &zero,
.extra2 = &zero,
},
- { },
};
static void svc_rdma_proc_cleanup(void)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 2b1c16b9547d..f15750cacacf 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -65,6 +65,8 @@
static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
struct net *net, int node);
+static int svc_rdma_listen_handler(struct rdma_cm_id *cma_id,
+ struct rdma_cm_event *event);
static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
struct net *net,
struct sockaddr *sa, int salen,
@@ -122,6 +124,41 @@ static void qp_event_handler(struct ib_event *event, void *context)
}
}
+static struct rdma_cm_id *
+svc_rdma_create_listen_id(struct net *net, struct sockaddr *sap,
+ void *context)
+{
+ struct rdma_cm_id *listen_id;
+ int ret;
+
+ listen_id = rdma_create_id(net, svc_rdma_listen_handler, context,
+ RDMA_PS_TCP, IB_QPT_RC);
+ if (IS_ERR(listen_id))
+ return listen_id;
+
+ /* Allow both IPv4 and IPv6 sockets to bind a single port
+ * at the same time.
+ */
+#if IS_ENABLED(CONFIG_IPV6)
+ ret = rdma_set_afonly(listen_id, 1);
+ if (ret)
+ goto out_destroy;
+#endif
+ ret = rdma_bind_addr(listen_id, sap);
+ if (ret)
+ goto out_destroy;
+
+ ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG);
+ if (ret)
+ goto out_destroy;
+
+ return listen_id;
+
+out_destroy:
+ rdma_destroy_id(listen_id);
+ return ERR_PTR(ret);
+}
+
static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
struct net *net, int node)
{
@@ -247,17 +284,31 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id,
*
* Return values:
* %0: Do not destroy @cma_id
- * %1: Destroy @cma_id (never returned here)
+ * %1: Destroy @cma_id
*
* NB: There is never a DEVICE_REMOVAL event for INADDR_ANY listeners.
*/
static int svc_rdma_listen_handler(struct rdma_cm_id *cma_id,
struct rdma_cm_event *event)
{
+ struct sockaddr *sap = (struct sockaddr *)&cma_id->route.addr.src_addr;
+ struct svcxprt_rdma *cma_xprt = cma_id->context;
+ struct svc_xprt *cma_rdma = &cma_xprt->sc_xprt;
+ struct rdma_cm_id *listen_id;
+
switch (event->event) {
case RDMA_CM_EVENT_CONNECT_REQUEST:
handle_connect_req(cma_id, &event->param.conn);
break;
+ case RDMA_CM_EVENT_ADDR_CHANGE:
+ listen_id = svc_rdma_create_listen_id(cma_rdma->xpt_net,
+ sap, cma_xprt);
+ if (IS_ERR(listen_id)) {
+ pr_err("Listener dead, address change failed for device %s\n",
+ cma_id->device->name);
+ } else
+ cma_xprt->sc_cm_id = listen_id;
+ return 1;
default:
break;
}
@@ -307,7 +358,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
{
struct rdma_cm_id *listen_id;
struct svcxprt_rdma *cma_xprt;
- int ret;
if (sa->sa_family != AF_INET && sa->sa_family != AF_INET6)
return ERR_PTR(-EAFNOSUPPORT);
@@ -317,30 +367,13 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
strcpy(cma_xprt->sc_xprt.xpt_remotebuf, "listener");
- listen_id = rdma_create_id(net, svc_rdma_listen_handler, cma_xprt,
- RDMA_PS_TCP, IB_QPT_RC);
+ listen_id = svc_rdma_create_listen_id(net, sa, cma_xprt);
if (IS_ERR(listen_id)) {
- ret = PTR_ERR(listen_id);
- goto err0;
+ kfree(cma_xprt);
+ return (struct svc_xprt *)listen_id;
}
-
- /* Allow both IPv4 and IPv6 sockets to bind a single port
- * at the same time.
- */
-#if IS_ENABLED(CONFIG_IPV6)
- ret = rdma_set_afonly(listen_id, 1);
- if (ret)
- goto err1;
-#endif
- ret = rdma_bind_addr(listen_id, sa);
- if (ret)
- goto err1;
cma_xprt->sc_cm_id = listen_id;
- ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG);
- if (ret)
- goto err1;
-
/*
* We need to use the address from the cm_id in case the
* caller specified 0 for the port number.
@@ -349,12 +382,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
svc_xprt_set_local(&cma_xprt->sc_xprt, sa, salen);
return &cma_xprt->sc_xprt;
-
- err1:
- rdma_destroy_id(listen_id);
- err0:
- kfree(cma_xprt);
- return ERR_PTR(ret);
}
/*
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 29b0562d62e7..9a8ce5df83ca 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -137,7 +137,6 @@ static struct ctl_table xr_tunables_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { },
};
#endif
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 4f8d7efa469f..63262ef0c2e3 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -49,14 +49,14 @@
* o buffer memory
*/
+#include <linux/bitops.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/svc_rdma.h>
#include <linux/log2.h>
-#include <asm-generic/barrier.h>
-#include <asm/bitops.h>
+#include <asm/barrier.h>
#include <rdma/ib_cm.h>
@@ -69,13 +69,15 @@ static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_sendctx *sc);
static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt);
-static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep);
static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_ep_get(struct rpcrdma_ep *ep);
static int rpcrdma_ep_put(struct rpcrdma_ep *ep);
static struct rpcrdma_regbuf *
+rpcrdma_regbuf_alloc_node(size_t size, enum dma_data_direction direction,
+ int node);
+static struct rpcrdma_regbuf *
rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction);
static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb);
static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb);
@@ -222,7 +224,6 @@ static void rpcrdma_update_cm_private(struct rpcrdma_ep *ep,
static int
rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
{
- struct sockaddr *sap = (struct sockaddr *)&id->route.addr.dst_addr;
struct rpcrdma_ep *ep = id->context;
might_sleep();
@@ -241,10 +242,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
ep->re_async_rc = -ENETUNREACH;
complete(&ep->re_done);
return 0;
- case RDMA_CM_EVENT_DEVICE_REMOVAL:
- pr_info("rpcrdma: removing device %s for %pISpc\n",
- ep->re_id->device->name, sap);
- fallthrough;
case RDMA_CM_EVENT_ADDR_CHANGE:
ep->re_connect_status = -ENODEV;
goto disconnected;
@@ -280,6 +277,14 @@ disconnected:
return 0;
}
+static void rpcrdma_ep_removal_done(struct rpcrdma_notification *rn)
+{
+ struct rpcrdma_ep *ep = container_of(rn, struct rpcrdma_ep, re_rn);
+
+ trace_xprtrdma_device_removal(ep->re_id);
+ xprt_force_disconnect(ep->re_xprt);
+}
+
static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_ep *ep)
{
@@ -319,6 +324,10 @@ static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt,
if (rc)
goto out;
+ rc = rpcrdma_rn_register(id->device, &ep->re_rn, rpcrdma_ep_removal_done);
+ if (rc)
+ goto out;
+
return id;
out:
@@ -346,6 +355,8 @@ static void rpcrdma_ep_destroy(struct kref *kref)
ib_dealloc_pd(ep->re_pd);
ep->re_pd = NULL;
+ rpcrdma_rn_unregister(ep->re_id->device, &ep->re_rn);
+
kfree(ep);
module_put(THIS_MODULE);
}
@@ -501,7 +512,7 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt)
* outstanding Receives.
*/
rpcrdma_ep_get(ep);
- rpcrdma_post_recvs(r_xprt, 1, true);
+ rpcrdma_post_recvs(r_xprt, 1);
rc = rdma_connect(ep->re_id, &ep->re_remote_cma);
if (rc)
@@ -893,6 +904,8 @@ static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt)
static void rpcrdma_req_reset(struct rpcrdma_req *req)
{
+ struct rpcrdma_mr *mr;
+
/* Credits are valid for only one connection */
req->rl_slot.rq_cong = 0;
@@ -902,7 +915,19 @@ static void rpcrdma_req_reset(struct rpcrdma_req *req)
rpcrdma_regbuf_dma_unmap(req->rl_sendbuf);
rpcrdma_regbuf_dma_unmap(req->rl_recvbuf);
- frwr_reset(req);
+ /* The verbs consumer can't know the state of an MR on the
+ * req->rl_registered list unless a successful completion
+ * has occurred, so they cannot be re-used.
+ */
+ while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {
+ struct rpcrdma_buffer *buf = &mr->mr_xprt->rx_buf;
+
+ spin_lock(&buf->rb_lock);
+ list_del(&mr->mr_all);
+ spin_unlock(&buf->rb_lock);
+
+ frwr_mr_release(mr);
+ }
}
/* ASSUMPTION: the rb_allreqs list is stable for the duration,
@@ -920,18 +945,20 @@ static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt)
}
static noinline
-struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
- bool temp)
+struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+ struct rpcrdma_ep *ep = r_xprt->rx_ep;
+ struct ib_device *device = ep->re_id->device;
struct rpcrdma_rep *rep;
rep = kzalloc(sizeof(*rep), XPRTRDMA_GFP_FLAGS);
if (rep == NULL)
goto out;
- rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep->re_inline_recv,
- DMA_FROM_DEVICE);
+ rep->rr_rdmabuf = rpcrdma_regbuf_alloc_node(ep->re_inline_recv,
+ DMA_FROM_DEVICE,
+ ibdev_to_node(device));
if (!rep->rr_rdmabuf)
goto out_free;
@@ -946,7 +973,6 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
rep->rr_recv_wr.num_sge = 1;
- rep->rr_temp = temp;
spin_lock(&buf->rb_lock);
list_add(&rep->rr_all, &buf->rb_all_reps);
@@ -965,17 +991,6 @@ static void rpcrdma_rep_free(struct rpcrdma_rep *rep)
kfree(rep);
}
-static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
-{
- struct rpcrdma_buffer *buf = &rep->rr_rxprt->rx_buf;
-
- spin_lock(&buf->rb_lock);
- list_del(&rep->rr_all);
- spin_unlock(&buf->rb_lock);
-
- rpcrdma_rep_free(rep);
-}
-
static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf)
{
struct llist_node *node;
@@ -1007,10 +1022,8 @@ static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt)
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_rep *rep;
- list_for_each_entry(rep, &buf->rb_all_reps, rr_all) {
+ list_for_each_entry(rep, &buf->rb_all_reps, rr_all)
rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf);
- rep->rr_temp = true; /* Mark this rep for destruction */
- }
}
static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf)
@@ -1227,14 +1240,15 @@ void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
* or Replies they may be registered externally via frwr_map.
*/
static struct rpcrdma_regbuf *
-rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction)
+rpcrdma_regbuf_alloc_node(size_t size, enum dma_data_direction direction,
+ int node)
{
struct rpcrdma_regbuf *rb;
- rb = kmalloc(sizeof(*rb), XPRTRDMA_GFP_FLAGS);
+ rb = kmalloc_node(sizeof(*rb), XPRTRDMA_GFP_FLAGS, node);
if (!rb)
return NULL;
- rb->rg_data = kmalloc(size, XPRTRDMA_GFP_FLAGS);
+ rb->rg_data = kmalloc_node(size, XPRTRDMA_GFP_FLAGS, node);
if (!rb->rg_data) {
kfree(rb);
return NULL;
@@ -1246,6 +1260,12 @@ rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction)
return rb;
}
+static struct rpcrdma_regbuf *
+rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction)
+{
+ return rpcrdma_regbuf_alloc_node(size, direction, NUMA_NO_NODE);
+}
+
/**
* rpcrdma_regbuf_realloc - re-allocate a SEND/RECV buffer
* @rb: regbuf to reallocate
@@ -1323,10 +1343,9 @@ static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb)
* rpcrdma_post_recvs - Refill the Receive Queue
* @r_xprt: controlling transport instance
* @needed: current credit grant
- * @temp: mark Receive buffers to be deleted after one use
*
*/
-void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
+void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
@@ -1340,8 +1359,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
if (likely(ep->re_receive_count > needed))
goto out;
needed -= ep->re_receive_count;
- if (!temp)
- needed += RPCRDMA_MAX_RECV_BATCH;
+ needed += RPCRDMA_MAX_RECV_BATCH;
if (atomic_inc_return(&ep->re_receiving) > 1)
goto out;
@@ -1350,12 +1368,8 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
wr = NULL;
while (needed) {
rep = rpcrdma_rep_get_locked(buf);
- if (rep && rep->rr_temp) {
- rpcrdma_rep_destroy(rep);
- continue;
- }
if (!rep)
- rep = rpcrdma_rep_create(r_xprt, temp);
+ rep = rpcrdma_rep_create(r_xprt);
if (!rep)
break;
if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) {
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index da409450dfc0..8147d2b41494 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -56,6 +56,7 @@
#include <linux/sunrpc/rpc_rdma_cid.h> /* completion IDs */
#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */
#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */
+#include <linux/sunrpc/rdma_rn.h> /* removal notifications */
#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */
#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */
@@ -92,6 +93,7 @@ struct rpcrdma_ep {
struct rpcrdma_connect_private
re_cm_private;
struct rdma_conn_param re_remote_cma;
+ struct rpcrdma_notification re_rn;
int re_receive_count;
unsigned int re_max_requests; /* depends on device */
unsigned int re_inline_send; /* negotiated */
@@ -198,7 +200,6 @@ struct rpcrdma_rep {
__be32 rr_proc;
int rr_wc_flags;
u32 rr_inv_rkey;
- bool rr_temp;
struct rpcrdma_regbuf *rr_rdmabuf;
struct rpcrdma_xprt *rr_rxprt;
struct rpc_rqst *rr_rqst;
@@ -466,7 +467,7 @@ void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc);
int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt);
void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt);
-void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp);
+void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed);
/*
* Buffer calls - xprtrdma/verbs.c
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index ce18716491c8..0e1691316f42 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -160,7 +160,6 @@ static struct ctl_table xs_tunables_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { },
};
/*
@@ -2442,6 +2441,13 @@ static void xs_tcp_setup_socket(struct work_struct *work)
transport->srcport = 0;
status = -EAGAIN;
break;
+ case -EPERM:
+ /* Happens, for instance, if a BPF program is preventing
+ * the connect. Remap the error so upper layers can better
+ * deal with it.
+ */
+ status = -ECONNREFUSED;
+ fallthrough;
case -EINVAL:
/* Happens, for instance, if the user specified a link
* local IPv6 address without a scope-id.
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index c9189a970eec..6488ead9e464 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -244,6 +244,99 @@ static int switchdev_port_obj_notify(enum switchdev_notifier_type nt,
return 0;
}
+static void switchdev_obj_id_to_helpful_msg(struct net_device *dev,
+ enum switchdev_obj_id obj_id,
+ int err, bool add)
+{
+ const char *action = add ? "add" : "del";
+ const char *reason = "";
+ const char *problem;
+ const char *obj_str;
+
+ switch (obj_id) {
+ case SWITCHDEV_OBJ_ID_UNDEFINED:
+ obj_str = "Undefined object";
+ problem = "Attempted operation is undefined, indicating a possible programming\n"
+ "error.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_PORT_VLAN:
+ obj_str = "VLAN entry";
+ problem = "Failure in VLAN settings on this port might disrupt network\n"
+ "segmentation or traffic isolation, affecting network partitioning.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_PORT_MDB:
+ obj_str = "Port Multicast Database entry";
+ problem = "Failure in updating the port's Multicast Database could lead to\n"
+ "multicast forwarding issues.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_HOST_MDB:
+ obj_str = "Host Multicast Database entry";
+ problem = "Failure in updating the host's Multicast Database may impact multicast\n"
+ "group memberships or traffic delivery, affecting multicast\n"
+ "communication.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_MRP:
+ obj_str = "Media Redundancy Protocol configuration for port";
+ problem = "Failure to set MRP ring ID on this port prevents communication with\n"
+ "the specified redundancy ring, resulting in an inability to engage\n"
+ "in MRP-based network operations.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_RING_TEST_MRP:
+ obj_str = "MRP Test Frame Operations for port";
+ problem = "Failure to generate/monitor MRP test frames may lead to inability to\n"
+ "assess the ring's operational integrity and fault response, hindering\n"
+ "proactive network management.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_RING_ROLE_MRP:
+ obj_str = "MRP Ring Role Configuration";
+ problem = "Improper MRP ring role configuration may create conflicts in the ring,\n"
+ "disrupting communication for all participants, or isolate the local\n"
+ "system from the ring, hindering its ability to communicate with other\n"
+ "participants.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_RING_STATE_MRP:
+ obj_str = "MRP Ring State Configuration";
+ problem = "Failure to correctly set the MRP ring state can result in network\n"
+ "loops or leave segments without communication. In a Closed state,\n"
+ "it maintains loop prevention by blocking one MRM port, while an Open\n"
+ "state activates in response to failures, changing port states to\n"
+ "preserve network connectivity.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_IN_TEST_MRP:
+ obj_str = "MRP_InTest Frame Generation Configuration";
+ problem = "Failure in managing MRP_InTest frame generation can misjudge the\n"
+ "interconnection ring's state, leading to incorrect blocking or\n"
+ "unblocking of the I/C port. This misconfiguration might result\n"
+ "in unintended network loops or isolate critical network segments,\n"
+ "compromising network integrity and reliability.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_IN_ROLE_MRP:
+ obj_str = "Interconnection Ring Role Configuration";
+ problem = "Failure in incorrect assignment of interconnection ring roles\n"
+ "(MIM/MIC) can impair the formation of the interconnection rings.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_IN_STATE_MRP:
+ obj_str = "Interconnection Ring State Configuration";
+ problem = "Failure in updating the interconnection ring state can lead in\n"
+ "case of Open state to incorrect blocking or unblocking of the\n"
+ "I/C port, resulting in unintended network loops or isolation\n"
+ "of critical network\n";
+ break;
+ default:
+ obj_str = "Unknown object";
+ problem = "Indicating a possible programming error.\n";
+ }
+
+ switch (err) {
+ case -ENOSPC:
+ reason = "Current HW/SW setup lacks sufficient resources.\n";
+ break;
+ }
+
+ netdev_err(dev, "Failed to %s %s (object id=%d) with error: %pe (%d).\n%s%s\n",
+ action, obj_str, obj_id, ERR_PTR(err), err, problem, reason);
+}
+
static void switchdev_port_obj_add_deferred(struct net_device *dev,
const void *data)
{
@@ -254,8 +347,7 @@ static void switchdev_port_obj_add_deferred(struct net_device *dev,
err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD,
dev, obj, NULL);
if (err && err != -EOPNOTSUPP)
- netdev_err(dev, "failed (err=%d) to add object (id=%d)\n",
- err, obj->id);
+ switchdev_obj_id_to_helpful_msg(dev, obj->id, err, true);
if (obj->complete)
obj->complete(dev, err, obj->complete_priv);
}
@@ -304,8 +396,7 @@ static void switchdev_port_obj_del_deferred(struct net_device *dev,
err = switchdev_port_obj_del_now(dev, obj);
if (err && err != -EOPNOTSUPP)
- netdev_err(dev, "failed (err=%d) to del object (id=%d)\n",
- err, obj->id);
+ switchdev_obj_id_to_helpful_msg(dev, obj->id, err, false);
if (obj->complete)
obj->complete(dev, err, obj->complete_priv);
}
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 051ed5f6fc93..19e8048241ba 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -40,7 +40,7 @@ static int is_seen(struct ctl_table_set *set)
/* Return standard mode bits for table entry. */
static int net_ctl_permissions(struct ctl_table_header *head,
- struct ctl_table *table)
+ const struct ctl_table *table)
{
struct net *net = container_of(head->set, struct net, sysctls);
@@ -54,7 +54,6 @@ static int net_ctl_permissions(struct ctl_table_header *head,
}
static void net_ctl_set_ownership(struct ctl_table_header *head,
- struct ctl_table *table,
kuid_t *uid, kgid_t *gid)
{
struct net *net = container_of(head->set, struct net, sysctls);
@@ -128,7 +127,7 @@ static void ensure_safe_net_sysctl(struct net *net, const char *path,
pr_debug("Registering net sysctl (net %p): %s\n", net, path);
ent = table;
- for (size_t i = 0; i < table_size && ent->procname; ent++, i++) {
+ for (size_t i = 0; i < table_size; ent++, i++) {
unsigned long addr;
const char *where;
@@ -166,17 +165,10 @@ struct ctl_table_header *register_net_sysctl_sz(struct net *net,
struct ctl_table *table,
size_t table_size)
{
- int count;
- struct ctl_table *entry;
-
if (!net_eq(net, &init_net))
ensure_safe_net_sysctl(net, path, table, table_size);
- entry = table;
- for (count = 0 ; count < table_size && entry->procname; entry++, count++)
- ;
-
- return __register_sysctl_table(&net->sysctls, path, table, count);
+ return __register_sysctl_table(&net->sysctls, path, table, table_size);
}
EXPORT_SYMBOL_GPL(register_net_sysctl_sz);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 7eccd97e0609..7f3fe3401c45 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -72,7 +72,6 @@ struct tipc_node;
struct tipc_bearer;
struct tipc_bc_base;
struct tipc_link;
-struct tipc_name_table;
struct tipc_topsrv;
struct tipc_monitor;
#ifdef CONFIG_TIPC_CRYPTO
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 0716eb5c8a31..5c2088a469ce 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -241,13 +241,6 @@ enum {
LINK_SYNCHING = 0xc << 24
};
-/* Link FSM state checking routines
- */
-static int link_is_up(struct tipc_link *l)
-{
- return l->state & (LINK_ESTABLISHED | LINK_SYNCHING);
-}
-
static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
struct sk_buff_head *xmitq);
static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
@@ -274,7 +267,7 @@ static void tipc_link_update_cwin(struct tipc_link *l, int released,
*/
bool tipc_link_is_up(struct tipc_link *l)
{
- return link_is_up(l);
+ return l->state & (LINK_ESTABLISHED | LINK_SYNCHING);
}
bool tipc_link_peer_is_down(struct tipc_link *l)
@@ -1790,7 +1783,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
rcv_nxt = l->rcv_nxt;
win_lim = rcv_nxt + TIPC_MAX_LINK_WIN;
- if (unlikely(!link_is_up(l))) {
+ if (unlikely(!tipc_link_is_up(l))) {
if (l->state == LINK_ESTABLISHING)
rc = TIPC_LINK_UP_EVT;
kfree_skb(skb);
@@ -1848,7 +1841,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
struct tipc_link *bcl = l->bc_rcvlink;
struct tipc_msg *hdr;
struct sk_buff *skb;
- bool node_up = link_is_up(bcl);
+ bool node_up = tipc_link_is_up(bcl);
u16 glen = 0, bc_rcvgap = 0;
int dlen = 0;
void *data;
@@ -2163,7 +2156,7 @@ bool tipc_link_validate_msg(struct tipc_link *l, struct tipc_msg *hdr)
if (session != curr_session)
return false;
/* Extra sanity check */
- if (!link_is_up(l) && msg_ack(hdr))
+ if (!tipc_link_is_up(l) && msg_ack(hdr))
return false;
if (!(l->peer_caps & TIPC_LINK_PROTO_SEQNO))
return true;
@@ -2261,7 +2254,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
}
/* ACTIVATE_MSG serves as PEER_RESET if link is already down */
- if (mtyp == RESET_MSG || !link_is_up(l))
+ if (mtyp == RESET_MSG || !tipc_link_is_up(l))
rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT);
/* ACTIVATE_MSG takes up link if it was already locally reset */
@@ -2300,7 +2293,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
if (msg_probe(hdr))
l->stats.recv_probes++;
- if (!link_is_up(l)) {
+ if (!tipc_link_is_up(l)) {
if (l->state == LINK_ESTABLISHING)
rc = TIPC_LINK_UP_EVT;
break;
@@ -2387,7 +2380,7 @@ void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr)
int mtyp = msg_type(hdr);
u16 peers_snd_nxt = msg_bc_snd_nxt(hdr);
- if (link_is_up(l))
+ if (tipc_link_is_up(l))
return;
if (msg_user(hdr) == BCAST_PROTOCOL) {
@@ -2415,7 +2408,7 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
u16 peers_snd_nxt = msg_bc_snd_nxt(hdr);
int rc = 0;
- if (!link_is_up(l))
+ if (!tipc_link_is_up(l))
return rc;
if (!msg_peer_node_is_up(hdr))
@@ -2475,7 +2468,7 @@ int tipc_link_bc_ack_rcv(struct tipc_link *r, u16 acked, u16 gap,
bool unused = false;
int rc = 0;
- if (!link_is_up(r) || !r->bc_peer_is_up)
+ if (!tipc_link_is_up(r) || !r->bc_peer_is_up)
return 0;
if (gap) {
@@ -2873,7 +2866,7 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
l->tolerance = tol;
if (l->bc_rcvlink)
l->bc_rcvlink->tolerance = tol;
- if (link_is_up(l))
+ if (tipc_link_is_up(l))
tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq);
}
diff --git a/net/tipc/node.c b/net/tipc/node.c
index c1e890a82434..500320e5ca47 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -2105,6 +2105,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
} else {
n = tipc_node_find_by_id(net, ehdr->id);
}
+ skb_dst_force(skb);
tipc_crypto_rcv(net, (n) ? n->crypto_rx : NULL, &skb, b);
if (!skb)
return;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 7e4135db5816..1a0cd06f0eae 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -146,8 +146,6 @@ static void tipc_data_ready(struct sock *sk);
static void tipc_write_space(struct sock *sk);
static void tipc_sock_destruct(struct sock *sk);
static int tipc_release(struct socket *sock);
-static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
- bool kern);
static void tipc_sk_timeout(struct timer_list *t);
static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua);
static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua);
@@ -659,7 +657,7 @@ static int tipc_release(struct socket *sock)
}
/**
- * __tipc_bind - associate or disassocate TIPC name(s) with a socket
+ * __tipc_bind - associate or disassociate TIPC name(s) with a socket
* @sock: socket structure
* @skaddr: socket address describing name(s) and desired operation
* @alen: size of socket address data structure
@@ -2711,13 +2709,12 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
* tipc_accept - wait for connection request
* @sock: listening socket
* @new_sock: new socket that is to be connected
- * @flags: file-related flags associated with socket
- * @kern: caused by kernel or by userspace?
+ * @arg: arguments for accept
*
* Return: 0 on success, errno otherwise
*/
-static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
- bool kern)
+static int tipc_accept(struct socket *sock, struct socket *new_sock,
+ struct proto_accept_arg *arg)
{
struct sock *new_sk, *sk = sock->sk;
struct tipc_sock *new_tsock;
@@ -2733,14 +2730,14 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
res = -EINVAL;
goto exit;
}
- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
res = tipc_wait_for_accept(sock, timeo);
if (res)
goto exit;
buf = skb_peek(&sk->sk_receive_queue);
- res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, kern);
+ res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, arg->kern);
if (res)
goto exit;
security_sk_clone(sock->sk, new_sock->sk);
@@ -3565,11 +3562,8 @@ int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
rhashtable_walk_start(iter);
while ((tsk = rhashtable_walk_next(iter)) != NULL) {
if (IS_ERR(tsk)) {
- err = PTR_ERR(tsk);
- if (err == -EAGAIN) {
- err = 0;
+ if (PTR_ERR(tsk) == -EAGAIN)
continue;
- }
break;
}
diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c
index 9fb65c988f7f..30d2e06e3d8c 100644
--- a/net/tipc/sysctl.c
+++ b/net/tipc/sysctl.c
@@ -91,7 +91,6 @@ static struct ctl_table tipc_table[] = {
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
- {}
};
int tipc_register_sysctl(void)
diff --git a/net/tipc/trace.h b/net/tipc/trace.h
index 04af83f0500c..865142ed0ab4 100644
--- a/net/tipc/trace.h
+++ b/net/tipc/trace.h
@@ -145,7 +145,7 @@ DECLARE_EVENT_CLASS(tipc_skb_class,
),
TP_fast_assign(
- __assign_str(header, header);
+ __assign_str(header);
tipc_skb_dump(skb, more, __get_str(buf));
),
@@ -172,7 +172,7 @@ DECLARE_EVENT_CLASS(tipc_list_class,
),
TP_fast_assign(
- __assign_str(header, header);
+ __assign_str(header);
tipc_list_dump(list, more, __get_str(buf));
),
@@ -200,7 +200,7 @@ DECLARE_EVENT_CLASS(tipc_sk_class,
),
TP_fast_assign(
- __assign_str(header, header);
+ __assign_str(header);
__entry->portid = tipc_sock_get_portid(sk);
tipc_sk_dump(sk, dqueues, __get_str(buf));
if (skb)
@@ -254,7 +254,7 @@ DECLARE_EVENT_CLASS(tipc_link_class,
),
TP_fast_assign(
- __assign_str(header, header);
+ __assign_str(header);
memcpy(__entry->name, tipc_link_name(l), TIPC_MAX_LINK_NAME);
tipc_link_dump(l, dqueues, __get_str(buf));
),
@@ -337,7 +337,7 @@ DECLARE_EVENT_CLASS(tipc_node_class,
),
TP_fast_assign(
- __assign_str(header, header);
+ __assign_str(header);
__entry->addr = tipc_node_get_addr(n);
tipc_node_dump(n, more, __get_str(buf));
),
@@ -374,7 +374,7 @@ DECLARE_EVENT_CLASS(tipc_fsm_class,
),
TP_fast_assign(
- __assign_str(name, name);
+ __assign_str(name);
__entry->os = os;
__entry->ns = ns;
__entry->evt = evt;
@@ -409,8 +409,8 @@ TRACE_EVENT(tipc_l2_device_event,
),
TP_fast_assign(
- __assign_str(dev_name, dev->name);
- __assign_str(b_name, b->name);
+ __assign_str(dev_name);
+ __assign_str(b_name);
__entry->evt = evt;
__entry->b_up = test_bit(0, &b->up);
__entry->carrier = netif_carrier_ok(dev);
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index f892b0903dba..439f75539977 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -135,8 +135,11 @@ static int tipc_udp_addr2str(struct tipc_media_addr *a, char *buf, int size)
snprintf(buf, size, "%pI4:%u", &ua->ipv4, ntohs(ua->port));
else if (ntohs(ua->proto) == ETH_P_IPV6)
snprintf(buf, size, "%pI6:%u", &ua->ipv6, ntohs(ua->port));
- else
+ else {
pr_err("Invalid UDP media address\n");
+ return 1;
+ }
+
return 0;
}
@@ -174,7 +177,7 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
local_bh_disable();
ndst = dst_cache_get(cache);
if (dst->proto == htons(ETH_P_IP)) {
- struct rtable *rt = (struct rtable *)ndst;
+ struct rtable *rt = dst_rtable(ndst);
if (!rt) {
struct flowi4 fl = {
diff --git a/net/tls/Kconfig b/net/tls/Kconfig
index 0cdc1f7b6b08..ce8d56a19187 100644
--- a/net/tls/Kconfig
+++ b/net/tls/Kconfig
@@ -20,6 +20,7 @@ config TLS
config TLS_DEVICE
bool "Transport Layer Security HW offload"
depends on TLS
+ select SKB_DECRYPTED
select SOCK_VALIDATE_XMIT
select SOCK_RX_QUEUE_MAPPING
default n
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index bf8ed36b1ad6..dc063c2c7950 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -37,6 +37,7 @@
#include <net/inet_connection_sock.h>
#include <net/tcp.h>
#include <net/tls.h>
+#include <linux/skbuff_ref.h>
#include "tls.h"
#include "trace.h"
@@ -230,14 +231,10 @@ static void tls_device_resync_tx(struct sock *sk, struct tls_context *tls_ctx,
u32 seq)
{
struct net_device *netdev;
- struct sk_buff *skb;
int err = 0;
u8 *rcd_sn;
- skb = tcp_write_queue_tail(sk);
- if (skb)
- TCP_SKB_CB(skb)->eor = 1;
-
+ tcp_write_collapse_fence(sk);
rcd_sn = tls_ctx->tx.rec_seq;
trace_tls_device_tx_resync_send(sk, seq, rcd_sn);
@@ -1066,7 +1063,6 @@ int tls_set_device_offload(struct sock *sk)
struct tls_prot_info *prot;
struct net_device *netdev;
struct tls_context *ctx;
- struct sk_buff *skb;
char *iv, *rec_seq;
int rc;
@@ -1137,9 +1133,7 @@ int tls_set_device_offload(struct sock *sk)
* SKBs where only part of the payload needs to be encrypted.
* So mark the last skb in the write queue as end of record.
*/
- skb = tcp_write_queue_tail(sk);
- if (skb)
- TCP_SKB_CB(skb)->eor = 1;
+ tcp_write_collapse_fence(sk);
/* Avoid offloading if the device is down
* We don't want to offload new flows after
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index 4e7228f275fa..f9e3d3d90dcf 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -33,6 +33,7 @@
#include <crypto/aead.h>
#include <crypto/scatterwalk.h>
#include <net/ip6_checksum.h>
+#include <linux/skbuff_ref.h>
#include "tls.h"
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index b4674f03d71a..6b4b9f2749a6 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -616,6 +616,7 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
struct tls_crypto_info *alt_crypto_info;
struct tls_context *ctx = tls_get_ctx(sk);
const struct tls_cipher_desc *cipher_desc;
+ union tls_crypto_context *crypto_ctx;
int rc = 0;
int conf;
@@ -623,13 +624,15 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
return -EINVAL;
if (tx) {
- crypto_info = &ctx->crypto_send.info;
+ crypto_ctx = &ctx->crypto_send;
alt_crypto_info = &ctx->crypto_recv.info;
} else {
- crypto_info = &ctx->crypto_recv.info;
+ crypto_ctx = &ctx->crypto_recv;
alt_crypto_info = &ctx->crypto_send.info;
}
+ crypto_info = &crypto_ctx->info;
+
/* Currently we don't support set crypto info more than one time */
if (TLS_CRYPTO_INFO_READY(crypto_info))
return -EBUSY;
@@ -710,7 +713,7 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
return 0;
err_crypto_info:
- memzero_explicit(crypto_info, sizeof(union tls_crypto_context));
+ memzero_explicit(crypto_ctx, sizeof(*crypto_ctx));
return rc;
}
@@ -816,9 +819,17 @@ struct tls_context *tls_ctx_create(struct sock *sk)
return NULL;
mutex_init(&ctx->tx_lock);
- rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
ctx->sk_proto = READ_ONCE(sk->sk_prot);
ctx->sk = sk;
+ /* Release semantic of rcu_assign_pointer() ensures that
+ * ctx->sk_proto is visible before changing sk->sk_prot in
+ * update_sk_prot(), and prevents reading uninitialized value in
+ * tls_{getsockopt, setsockopt}. Note that we do not need a
+ * read barrier in tls_{getsockopt,setsockopt} as there is an
+ * address dependency between sk->sk_proto->{getsockopt,setsockopt}
+ * and ctx->sk_proto.
+ */
+ rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
return ctx;
}
diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c
index 5df08d848b5c..77e33e1e340e 100644
--- a/net/tls/tls_strp.c
+++ b/net/tls/tls_strp.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2016 Tom Herbert <tom@herbertland.com> */
#include <linux/skbuff.h>
+#include <linux/skbuff_ref.h>
#include <linux/workqueue.h>
#include <net/strparser.h>
#include <net/tcp.h>
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index b783231668c6..305a412785f5 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2147,7 +2147,6 @@ recv_end:
if (ret) {
if (err >= 0 || err == -EINPROGRESS)
err = ret;
- decrypted = 0;
goto end;
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 9a6ad5974dff..0be0dcb07f7b 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -126,6 +126,81 @@ static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
* hash table is protected with spinlock.
* each socket state is protected by separate spinlock.
*/
+#ifdef CONFIG_PROVE_LOCKING
+#define cmp_ptr(l, r) (((l) > (r)) - ((l) < (r)))
+
+static int unix_table_lock_cmp_fn(const struct lockdep_map *a,
+ const struct lockdep_map *b)
+{
+ return cmp_ptr(a, b);
+}
+
+static int unix_state_lock_cmp_fn(const struct lockdep_map *_a,
+ const struct lockdep_map *_b)
+{
+ const struct unix_sock *a, *b;
+
+ a = container_of(_a, struct unix_sock, lock.dep_map);
+ b = container_of(_b, struct unix_sock, lock.dep_map);
+
+ if (a->sk.sk_state == TCP_LISTEN) {
+ /* unix_stream_connect(): Before the 2nd unix_state_lock(),
+ *
+ * 1. a is TCP_LISTEN.
+ * 2. b is not a.
+ * 3. concurrent connect(b -> a) must fail.
+ *
+ * Except for 2. & 3., the b's state can be any possible
+ * value due to concurrent connect() or listen().
+ *
+ * 2. is detected in debug_spin_lock_before(), and 3. cannot
+ * be expressed as lock_cmp_fn.
+ */
+ switch (b->sk.sk_state) {
+ case TCP_CLOSE:
+ case TCP_ESTABLISHED:
+ case TCP_LISTEN:
+ return -1;
+ default:
+ /* Invalid case. */
+ return 0;
+ }
+ }
+
+ /* Should never happen. Just to be symmetric. */
+ if (b->sk.sk_state == TCP_LISTEN) {
+ switch (b->sk.sk_state) {
+ case TCP_CLOSE:
+ case TCP_ESTABLISHED:
+ return 1;
+ default:
+ return 0;
+ }
+ }
+
+ /* unix_state_double_lock(): ascending address order. */
+ return cmp_ptr(a, b);
+}
+
+static int unix_recvq_lock_cmp_fn(const struct lockdep_map *_a,
+ const struct lockdep_map *_b)
+{
+ const struct sock *a, *b;
+
+ a = container_of(_a, struct sock, sk_receive_queue.lock.dep_map);
+ b = container_of(_b, struct sock, sk_receive_queue.lock.dep_map);
+
+ /* unix_collect_skb(): listener -> embryo order. */
+ if (a->sk_state == TCP_LISTEN && unix_sk(b)->listener == a)
+ return -1;
+
+ /* Should never happen. Just to be symmetric. */
+ if (b->sk_state == TCP_LISTEN && unix_sk(a)->listener == b)
+ return 1;
+
+ return 0;
+}
+#endif
static unsigned int unix_unbound_hash(struct sock *sk)
{
@@ -168,7 +243,7 @@ static void unix_table_double_lock(struct net *net,
swap(hash1, hash2);
spin_lock(&net->unx.table.locks[hash1]);
- spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
+ spin_lock(&net->unx.table.locks[hash2]);
}
static void unix_table_double_unlock(struct net *net,
@@ -221,15 +296,9 @@ static inline int unix_may_send(struct sock *sk, struct sock *osk)
return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
}
-static inline int unix_recvq_full(const struct sock *sk)
-{
- return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
-}
-
static inline int unix_recvq_full_lockless(const struct sock *sk)
{
- return skb_queue_len_lockless(&sk->sk_receive_queue) >
- READ_ONCE(sk->sk_max_ack_backlog);
+ return skb_queue_len_lockless(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
}
struct sock *unix_peer_get(struct sock *s)
@@ -530,10 +599,10 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
return 0;
}
-static int unix_writable(const struct sock *sk)
+static int unix_writable(const struct sock *sk, unsigned char state)
{
- return sk->sk_state != TCP_LISTEN &&
- (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
+ return state != TCP_LISTEN &&
+ (refcount_read(&sk->sk_wmem_alloc) << 2) <= READ_ONCE(sk->sk_sndbuf);
}
static void unix_write_space(struct sock *sk)
@@ -541,12 +610,12 @@ static void unix_write_space(struct sock *sk)
struct socket_wq *wq;
rcu_read_lock();
- if (unix_writable(sk)) {
+ if (unix_writable(sk, READ_ONCE(sk->sk_state))) {
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait,
EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
- sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+ sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
rcu_read_unlock();
}
@@ -570,7 +639,6 @@ static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
sk_error_report(other);
}
}
- other->sk_state = TCP_CLOSE;
}
static void unix_sock_destructor(struct sock *sk)
@@ -617,7 +685,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
u->path.dentry = NULL;
u->path.mnt = NULL;
state = sk->sk_state;
- sk->sk_state = TCP_CLOSE;
+ WRITE_ONCE(sk->sk_state, TCP_CLOSE);
skpair = unix_peer(sk);
unix_peer(sk) = NULL;
@@ -638,7 +706,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
unix_state_lock(skpair);
/* No more writes */
WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
- if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
+ if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || embrion)
WRITE_ONCE(skpair->sk_err, ECONNRESET);
unix_state_unlock(skpair);
skpair->sk_state_change(skpair);
@@ -654,8 +722,8 @@ static void unix_release_sock(struct sock *sk, int embrion)
while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
if (state == TCP_LISTEN)
unix_release_sock(skb->sk, 1);
+
/* passed fds are erased in the kfree_skb hook */
- UNIXCB(skb).consumed = skb->len;
kfree_skb(skb);
}
@@ -683,14 +751,19 @@ static void unix_release_sock(struct sock *sk, int embrion)
static void init_peercred(struct sock *sk)
{
+ sk->sk_peer_pid = get_pid(task_tgid(current));
+ sk->sk_peer_cred = get_current_cred();
+}
+
+static void update_peercred(struct sock *sk)
+{
const struct cred *old_cred;
struct pid *old_pid;
spin_lock(&sk->sk_peer_lock);
old_pid = sk->sk_peer_pid;
old_cred = sk->sk_peer_cred;
- sk->sk_peer_pid = get_pid(task_tgid(current));
- sk->sk_peer_cred = get_current_cred();
+ init_peercred(sk);
spin_unlock(&sk->sk_peer_lock);
put_pid(old_pid);
@@ -699,26 +772,12 @@ static void init_peercred(struct sock *sk)
static void copy_peercred(struct sock *sk, struct sock *peersk)
{
- const struct cred *old_cred;
- struct pid *old_pid;
+ lockdep_assert_held(&unix_sk(peersk)->lock);
- if (sk < peersk) {
- spin_lock(&sk->sk_peer_lock);
- spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
- } else {
- spin_lock(&peersk->sk_peer_lock);
- spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
- }
- old_pid = sk->sk_peer_pid;
- old_cred = sk->sk_peer_cred;
- sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
+ spin_lock(&sk->sk_peer_lock);
+ sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
-
spin_unlock(&sk->sk_peer_lock);
- spin_unlock(&peersk->sk_peer_lock);
-
- put_pid(old_pid);
- put_cred(old_cred);
}
static int unix_listen(struct socket *sock, int backlog)
@@ -731,7 +790,7 @@ static int unix_listen(struct socket *sock, int backlog)
if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
goto out; /* Only stream/seqpacket sockets accept */
err = -EINVAL;
- if (!u->addr)
+ if (!READ_ONCE(u->addr))
goto out; /* No listens on an unbound socket */
unix_state_lock(sk);
if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
@@ -739,9 +798,10 @@ static int unix_listen(struct socket *sock, int backlog)
if (backlog > sk->sk_max_ack_backlog)
wake_up_interruptible_all(&u->peer_wait);
sk->sk_max_ack_backlog = backlog;
- sk->sk_state = TCP_LISTEN;
+ WRITE_ONCE(sk->sk_state, TCP_LISTEN);
+
/* set credentials so connect can copy them */
- init_peercred(sk);
+ update_peercred(sk);
err = 0;
out_unlock:
@@ -755,7 +815,7 @@ static int unix_bind(struct socket *, struct sockaddr *, int);
static int unix_stream_connect(struct socket *, struct sockaddr *,
int addr_len, int flags);
static int unix_socketpair(struct socket *, struct socket *);
-static int unix_accept(struct socket *, struct socket *, int, bool);
+static int unix_accept(struct socket *, struct socket *, struct proto_accept_arg *arg);
static int unix_getname(struct socket *, struct sockaddr *, int);
static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
static __poll_t unix_dgram_poll(struct file *, struct socket *,
@@ -976,14 +1036,17 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
sk->sk_hash = unix_unbound_hash(sk);
sk->sk_allocation = GFP_KERNEL_ACCOUNT;
sk->sk_write_space = unix_write_space;
- sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
+ sk->sk_max_ack_backlog = READ_ONCE(net->unx.sysctl_max_dgram_qlen);
sk->sk_destruct = unix_sock_destructor;
+ lock_set_cmp_fn(&sk->sk_receive_queue.lock, unix_recvq_lock_cmp_fn, NULL);
+
u = unix_sk(sk);
- u->inflight = 0;
+ u->listener = NULL;
+ u->vertex = NULL;
u->path.dentry = NULL;
u->path.mnt = NULL;
spin_lock_init(&u->lock);
- INIT_LIST_HEAD(&u->link);
+ lock_set_cmp_fn(&u->lock, unix_state_lock_cmp_fn, NULL);
mutex_init(&u->iolock); /* single task reading lock */
mutex_init(&u->bindlock); /* single task binding lock */
init_waitqueue_head(&u->peer_wait);
@@ -1131,8 +1194,8 @@ static struct sock *unix_find_other(struct net *net,
static int unix_autobind(struct sock *sk)
{
- unsigned int new_hash, old_hash = sk->sk_hash;
struct unix_sock *u = unix_sk(sk);
+ unsigned int new_hash, old_hash;
struct net *net = sock_net(sk);
struct unix_address *addr;
u32 lastnum, ordernum;
@@ -1155,6 +1218,7 @@ static int unix_autobind(struct sock *sk)
addr->name->sun_family = AF_UNIX;
refcount_set(&addr->refcnt, 1);
+ old_hash = sk->sk_hash;
ordernum = get_random_u32();
lastnum = ordernum & 0xFFFFF;
retry:
@@ -1195,8 +1259,8 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
{
umode_t mode = S_IFSOCK |
(SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
- unsigned int new_hash, old_hash = sk->sk_hash;
struct unix_sock *u = unix_sk(sk);
+ unsigned int new_hash, old_hash;
struct net *net = sock_net(sk);
struct mnt_idmap *idmap;
struct unix_address *addr;
@@ -1234,6 +1298,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
if (u->addr)
goto out_unlock;
+ old_hash = sk->sk_hash;
new_hash = unix_bsd_hash(d_backing_inode(dentry));
unix_table_double_lock(net, old_hash, new_hash);
u->path.mnt = mntget(parent.mnt);
@@ -1261,8 +1326,8 @@ out:
static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
int addr_len)
{
- unsigned int new_hash, old_hash = sk->sk_hash;
struct unix_sock *u = unix_sk(sk);
+ unsigned int new_hash, old_hash;
struct net *net = sock_net(sk);
struct unix_address *addr;
int err;
@@ -1280,6 +1345,7 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
goto out_mutex;
}
+ old_hash = sk->sk_hash;
new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
unix_table_double_lock(net, old_hash, new_hash);
@@ -1329,11 +1395,12 @@ static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
unix_state_lock(sk1);
return;
}
+
if (sk1 > sk2)
swap(sk1, sk2);
unix_state_lock(sk1);
- unix_state_lock_nested(sk2, U_LOCK_SECOND);
+ unix_state_lock(sk2);
}
static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
@@ -1369,7 +1436,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
- !unix_sk(sk)->addr) {
+ !READ_ONCE(unix_sk(sk)->addr)) {
err = unix_autobind(sk);
if (err)
goto out;
@@ -1399,7 +1466,8 @@ restart:
if (err)
goto out_unlock;
- sk->sk_state = other->sk_state = TCP_ESTABLISHED;
+ WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
+ WRITE_ONCE(other->sk_state, TCP_ESTABLISHED);
} else {
/*
* 1003.1g breaking connected state with AF_UNSPEC
@@ -1416,13 +1484,20 @@ restart:
unix_peer(sk) = other;
if (!other)
- sk->sk_state = TCP_CLOSE;
+ WRITE_ONCE(sk->sk_state, TCP_CLOSE);
unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
unix_state_double_unlock(sk, other);
- if (other != old_peer)
+ if (other != old_peer) {
unix_dgram_disconnected(sk, old_peer);
+
+ unix_state_lock(old_peer);
+ if (!unix_peer(old_peer))
+ WRITE_ONCE(old_peer->sk_state, TCP_CLOSE);
+ unix_state_unlock(old_peer);
+ }
+
sock_put(old_peer);
} else {
unix_peer(sk) = other;
@@ -1468,9 +1543,9 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
struct unix_sock *u = unix_sk(sk), *newu, *otheru;
struct net *net = sock_net(sk);
struct sk_buff *skb = NULL;
+ unsigned char state;
long timeo;
int err;
- int st;
err = unix_validate_addr(sunaddr, addr_len);
if (err)
@@ -1481,7 +1556,8 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
goto out;
if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
- test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
+ test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
+ !READ_ONCE(u->addr)) {
err = unix_autobind(sk);
if (err)
goto out;
@@ -1518,7 +1594,6 @@ restart:
goto out;
}
- /* Latch state of peer */
unix_state_lock(other);
/* Apparently VFS overslept socket death. Retry. */
@@ -1534,7 +1609,7 @@ restart:
if (other->sk_shutdown & RCV_SHUTDOWN)
goto out_unlock;
- if (unix_recvq_full(other)) {
+ if (unix_recvq_full_lockless(other)) {
err = -EAGAIN;
if (!timeo)
goto out_unlock;
@@ -1548,39 +1623,21 @@ restart:
goto restart;
}
- /* Latch our state.
-
- It is tricky place. We need to grab our state lock and cannot
- drop lock on peer. It is dangerous because deadlock is
- possible. Connect to self case and simultaneous
- attempt to connect are eliminated by checking socket
- state. other is TCP_LISTEN, if sk is TCP_LISTEN we
- check this before attempt to grab lock.
-
- Well, and we have to recheck the state after socket locked.
+ /* self connect and simultaneous connect are eliminated
+ * by rejecting TCP_LISTEN socket to avoid deadlock.
*/
- st = sk->sk_state;
-
- switch (st) {
- case TCP_CLOSE:
- /* This is ok... continue with connect */
- break;
- case TCP_ESTABLISHED:
- /* Socket is already connected */
- err = -EISCONN;
- goto out_unlock;
- default:
- err = -EINVAL;
+ state = READ_ONCE(sk->sk_state);
+ if (unlikely(state != TCP_CLOSE)) {
+ err = state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
goto out_unlock;
}
- unix_state_lock_nested(sk, U_LOCK_SECOND);
+ unix_state_lock(sk);
- if (sk->sk_state != st) {
+ if (unlikely(sk->sk_state != TCP_CLOSE)) {
+ err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
unix_state_unlock(sk);
- unix_state_unlock(other);
- sock_put(other);
- goto restart;
+ goto out_unlock;
}
err = security_unix_stream_connect(sk, other, newsk);
@@ -1597,6 +1654,7 @@ restart:
newsk->sk_type = sk->sk_type;
init_peercred(newsk);
newu = unix_sk(newsk);
+ newu->listener = other;
RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
otheru = unix_sk(other);
@@ -1628,7 +1686,7 @@ restart:
copy_peercred(sk, other);
sock->state = SS_CONNECTED;
- sk->sk_state = TCP_ESTABLISHED;
+ WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
sock_hold(newsk);
smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
@@ -1688,32 +1746,31 @@ static void unix_sock_inherit_flags(const struct socket *old,
set_bit(SOCK_PASSSEC, &new->flags);
}
-static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int unix_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sock *sk = sock->sk;
- struct sock *tsk;
struct sk_buff *skb;
- int err;
+ struct sock *tsk;
- err = -EOPNOTSUPP;
+ arg->err = -EOPNOTSUPP;
if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
goto out;
- err = -EINVAL;
- if (sk->sk_state != TCP_LISTEN)
+ arg->err = -EINVAL;
+ if (READ_ONCE(sk->sk_state) != TCP_LISTEN)
goto out;
/* If socket state is TCP_LISTEN it cannot change (for now...),
* so that no locks are necessary.
*/
- skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
- &err);
+ skb = skb_recv_datagram(sk, (arg->flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
+ &arg->err);
if (!skb) {
/* This means receive shutdown. */
- if (err == 0)
- err = -EINVAL;
+ if (arg->err == 0)
+ arg->err = -EINVAL;
goto out;
}
@@ -1723,6 +1780,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
/* attach accepted sock to socket */
unix_state_lock(tsk);
+ unix_update_edges(unix_sk(tsk));
newsock->state = SS_CONNECTED;
unix_sock_inherit_flags(sock, newsock);
sock_graft(tsk, newsock);
@@ -1730,7 +1788,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
return 0;
out:
- return err;
+ return arg->err;
}
@@ -1789,81 +1847,29 @@ static inline bool too_many_unix_fds(struct task_struct *p)
static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
{
- int i;
-
if (too_many_unix_fds(current))
return -ETOOMANYREFS;
- /* Need to duplicate file references for the sake of garbage
- * collection. Otherwise a socket in the fps might become a
- * candidate for GC while the skb is not yet queued.
- */
- UNIXCB(skb).fp = scm_fp_dup(scm->fp);
- if (!UNIXCB(skb).fp)
- return -ENOMEM;
+ UNIXCB(skb).fp = scm->fp;
+ scm->fp = NULL;
- for (i = scm->fp->count - 1; i >= 0; i--)
- unix_inflight(scm->fp->user, scm->fp->fp[i]);
+ if (unix_prepare_fpl(UNIXCB(skb).fp))
+ return -ENOMEM;
return 0;
}
static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
{
- int i;
-
scm->fp = UNIXCB(skb).fp;
UNIXCB(skb).fp = NULL;
- for (i = scm->fp->count - 1; i >= 0; i--)
- unix_notinflight(scm->fp->user, scm->fp->fp[i]);
+ unix_destroy_fpl(scm->fp);
}
static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
{
scm->fp = scm_fp_dup(UNIXCB(skb).fp);
-
- /*
- * Garbage collection of unix sockets starts by selecting a set of
- * candidate sockets which have reference only from being in flight
- * (total_refs == inflight_refs). This condition is checked once during
- * the candidate collection phase, and candidates are marked as such, so
- * that non-candidates can later be ignored. While inflight_refs is
- * protected by unix_gc_lock, total_refs (file count) is not, hence this
- * is an instantaneous decision.
- *
- * Once a candidate, however, the socket must not be reinstalled into a
- * file descriptor while the garbage collection is in progress.
- *
- * If the above conditions are met, then the directed graph of
- * candidates (*) does not change while unix_gc_lock is held.
- *
- * Any operations that changes the file count through file descriptors
- * (dup, close, sendmsg) does not change the graph since candidates are
- * not installed in fds.
- *
- * Dequeing a candidate via recvmsg would install it into an fd, but
- * that takes unix_gc_lock to decrement the inflight count, so it's
- * serialized with garbage collection.
- *
- * MSG_PEEK is special in that it does not change the inflight count,
- * yet does install the socket into an fd. The following lock/unlock
- * pair is to ensure serialization with garbage collection. It must be
- * done between incrementing the file count and installing the file into
- * an fd.
- *
- * If garbage collection starts after the barrier provided by the
- * lock/unlock, then it will see the elevated refcount and not mark this
- * as a candidate. If a garbage collection is already in progress
- * before the file count was incremented, then the lock/unlock pair will
- * ensure that garbage collection is finished before progressing to
- * installing the fd.
- *
- * (*) A -> B where B is on the queue of A or B is on the queue of C
- * which is on the queue of listening socket A.
- */
- spin_lock(&unix_gc_lock);
- spin_unlock(&unix_gc_lock);
}
static void unix_destruct_scm(struct sk_buff *skb)
@@ -1937,8 +1943,10 @@ static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
struct scm_fp_list *fp = UNIXCB(skb).fp;
struct unix_sock *u = unix_sk(sk);
- if (unlikely(fp && fp->count))
+ if (unlikely(fp && fp->count)) {
atomic_add(fp->count, &u->scm_stat.nr_fds);
+ unix_add_edges(fp, u);
+ }
}
static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
@@ -1946,8 +1954,10 @@ static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
struct scm_fp_list *fp = UNIXCB(skb).fp;
struct unix_sock *u = unix_sk(sk);
- if (unlikely(fp && fp->count))
+ if (unlikely(fp && fp->count)) {
atomic_sub(fp->count, &u->scm_stat.nr_fds);
+ unix_del_edges(fp);
+ }
}
/*
@@ -1997,14 +2007,15 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
}
if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
- test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
+ test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
+ !READ_ONCE(u->addr)) {
err = unix_autobind(sk);
if (err)
goto out;
}
err = -EMSGSIZE;
- if (len > sk->sk_sndbuf - 32)
+ if (len > READ_ONCE(sk->sk_sndbuf) - 32)
goto out;
if (len > SKB_MAX_ALLOC) {
@@ -2086,7 +2097,7 @@ restart_locked:
unix_peer(sk) = NULL;
unix_dgram_peer_wake_disconnect_wakeup(sk, other);
- sk->sk_state = TCP_CLOSE;
+ WRITE_ONCE(sk->sk_state, TCP_CLOSE);
unix_state_unlock(sk);
unix_dgram_disconnected(sk, other);
@@ -2217,13 +2228,15 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other
maybe_add_creds(skb, sock, other);
skb_get(skb);
+ scm_stat_add(other, skb);
+
+ spin_lock(&other->sk_receive_queue.lock);
if (ousk->oob_skb)
consume_skb(ousk->oob_skb);
-
WRITE_ONCE(ousk->oob_skb, skb);
+ __skb_queue_tail(&other->sk_receive_queue, skb);
+ spin_unlock(&other->sk_receive_queue.lock);
- scm_stat_add(other, skb);
- skb_queue_tail(&other->sk_receive_queue, skb);
sk_send_sigurg(other);
unix_state_unlock(other);
other->sk_data_ready(other);
@@ -2261,7 +2274,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
}
if (msg->msg_namelen) {
- err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
+ err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
goto out_err;
} else {
err = -ENOTCONN;
@@ -2270,7 +2283,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
goto out_err;
}
- if (sk->sk_shutdown & SEND_SHUTDOWN)
+ if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
goto pipe_err;
while (sent < len) {
@@ -2282,7 +2295,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
&err, 0);
} else {
/* Keep two messages in the pipe so it schedules better */
- size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
+ size = min_t(int, size, (READ_ONCE(sk->sk_sndbuf) >> 1) - 64);
/* allow fallback to order-0 allocations */
size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
@@ -2375,7 +2388,7 @@ static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
if (err)
return err;
- if (sk->sk_state != TCP_ESTABLISHED)
+ if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
return -ENOTCONN;
if (msg->msg_namelen)
@@ -2389,7 +2402,7 @@ static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
{
struct sock *sk = sock->sk;
- if (sk->sk_state != TCP_ESTABLISHED)
+ if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
return -ENOTCONN;
return unix_dgram_recvmsg(sock, msg, size, flags);
@@ -2614,8 +2627,10 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
mutex_lock(&u->iolock);
unix_state_lock(sk);
+ spin_lock(&sk->sk_receive_queue.lock);
if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
+ spin_unlock(&sk->sk_receive_queue.lock);
unix_state_unlock(sk);
mutex_unlock(&u->iolock);
return -EINVAL;
@@ -2627,6 +2642,8 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
WRITE_ONCE(u->oob_skb, NULL);
else
skb_get(oob_skb);
+
+ spin_unlock(&sk->sk_receive_queue.lock);
unix_state_unlock(sk);
chunk = state->recv_actor(oob_skb, 0, chunk, state);
@@ -2650,29 +2667,53 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
{
struct unix_sock *u = unix_sk(sk);
- if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
- skb_unlink(skb, &sk->sk_receive_queue);
- consume_skb(skb);
- skb = NULL;
+ if (!unix_skb_len(skb)) {
+ struct sk_buff *unlinked_skb = NULL;
+
+ spin_lock(&sk->sk_receive_queue.lock);
+
+ if (copied && (!u->oob_skb || skb == u->oob_skb)) {
+ skb = NULL;
+ } else if (flags & MSG_PEEK) {
+ skb = skb_peek_next(skb, &sk->sk_receive_queue);
+ } else {
+ unlinked_skb = skb;
+ skb = skb_peek_next(skb, &sk->sk_receive_queue);
+ __skb_unlink(unlinked_skb, &sk->sk_receive_queue);
+ }
+
+ spin_unlock(&sk->sk_receive_queue.lock);
+
+ consume_skb(unlinked_skb);
} else {
+ struct sk_buff *unlinked_skb = NULL;
+
+ spin_lock(&sk->sk_receive_queue.lock);
+
if (skb == u->oob_skb) {
if (copied) {
skb = NULL;
- } else if (sock_flag(sk, SOCK_URGINLINE)) {
- if (!(flags & MSG_PEEK)) {
+ } else if (!(flags & MSG_PEEK)) {
+ if (sock_flag(sk, SOCK_URGINLINE)) {
WRITE_ONCE(u->oob_skb, NULL);
consume_skb(skb);
+ } else {
+ __skb_unlink(skb, &sk->sk_receive_queue);
+ WRITE_ONCE(u->oob_skb, NULL);
+ unlinked_skb = skb;
+ skb = skb_peek(&sk->sk_receive_queue);
}
- } else if (flags & MSG_PEEK) {
- skb = NULL;
- } else {
- skb_unlink(skb, &sk->sk_receive_queue);
- WRITE_ONCE(u->oob_skb, NULL);
- if (!WARN_ON_ONCE(skb_unref(skb)))
- kfree_skb(skb);
- skb = skb_peek(&sk->sk_receive_queue);
+ } else if (!sock_flag(sk, SOCK_URGINLINE)) {
+ skb = skb_peek_next(skb, &sk->sk_receive_queue);
}
}
+
+ spin_unlock(&sk->sk_receive_queue.lock);
+
+ if (unlinked_skb) {
+ WARN_ON_ONCE(skb_unref(unlinked_skb));
+ kfree_skb(unlinked_skb);
+ }
}
return skb;
}
@@ -2680,10 +2721,49 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{
- if (unlikely(sk->sk_state != TCP_ESTABLISHED))
+ struct unix_sock *u = unix_sk(sk);
+ struct sk_buff *skb;
+ int err;
+
+ if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED))
return -ENOTCONN;
- return unix_read_skb(sk, recv_actor);
+ mutex_lock(&u->iolock);
+ skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
+ mutex_unlock(&u->iolock);
+ if (!skb)
+ return err;
+
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ if (unlikely(skb == READ_ONCE(u->oob_skb))) {
+ bool drop = false;
+
+ unix_state_lock(sk);
+
+ if (sock_flag(sk, SOCK_DEAD)) {
+ unix_state_unlock(sk);
+ kfree_skb(skb);
+ return -ECONNRESET;
+ }
+
+ spin_lock(&sk->sk_receive_queue.lock);
+ if (likely(skb == u->oob_skb)) {
+ WRITE_ONCE(u->oob_skb, NULL);
+ drop = true;
+ }
+ spin_unlock(&sk->sk_receive_queue.lock);
+
+ unix_state_unlock(sk);
+
+ if (drop) {
+ WARN_ON_ONCE(skb_unref(skb));
+ kfree_skb(skb);
+ return -EAGAIN;
+ }
+ }
+#endif
+
+ return recv_actor(sk, skb);
}
static int unix_stream_read_generic(struct unix_stream_read_state *state,
@@ -2704,7 +2784,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
size_t size = state->size;
unsigned int last_len;
- if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
+ if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) {
err = -EINVAL;
goto out;
}
@@ -2730,9 +2810,8 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
skip = max(sk_peek_offset(sk, flags), 0);
do {
- int chunk;
- bool drop_skb;
struct sk_buff *skb, *last;
+ int chunk;
redo:
unix_state_lock(sk);
@@ -2828,11 +2907,7 @@ unlock:
}
chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
- skb_get(skb);
chunk = state->recv_actor(skb, skip, chunk, state);
- drop_skb = !unix_skb_len(skb);
- /* skb is only safe to use if !drop_skb */
- consume_skb(skb);
if (chunk < 0) {
if (copied == 0)
copied = -EFAULT;
@@ -2841,18 +2916,6 @@ unlock:
copied += chunk;
size -= chunk;
- if (drop_skb) {
- /* the skb was touched by a concurrent reader;
- * we should not expect anything from this skb
- * anymore and assume it invalid - we can be
- * sure it was dropped from the socket queue
- *
- * let's report a short read
- */
- err = 0;
- break;
- }
-
/* Mark read part of skb as used */
if (!(flags & MSG_PEEK)) {
UNIXCB(skb).consumed += chunk;
@@ -3035,7 +3098,7 @@ long unix_inq_len(struct sock *sk)
struct sk_buff *skb;
long amount = 0;
- if (sk->sk_state == TCP_LISTEN)
+ if (READ_ONCE(sk->sk_state) == TCP_LISTEN)
return -EINVAL;
spin_lock(&sk->sk_receive_queue.lock);
@@ -3120,12 +3183,23 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
case SIOCATMARK:
{
+ struct unix_sock *u = unix_sk(sk);
struct sk_buff *skb;
int answ = 0;
+ mutex_lock(&u->iolock);
+
skb = skb_peek(&sk->sk_receive_queue);
- if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
- answ = 1;
+ if (skb) {
+ struct sk_buff *oob_skb = READ_ONCE(u->oob_skb);
+
+ if (skb == oob_skb ||
+ (!oob_skb && !unix_skb_len(skb)))
+ answ = 1;
+ }
+
+ mutex_unlock(&u->iolock);
+
err = put_user(answ, (int __user *)arg);
}
break;
@@ -3147,12 +3221,14 @@ static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned lon
static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
+ unsigned char state;
__poll_t mask;
u8 shutdown;
sock_poll_wait(file, sock, wait);
mask = 0;
shutdown = READ_ONCE(sk->sk_shutdown);
+ state = READ_ONCE(sk->sk_state);
/* exceptional events? */
if (READ_ONCE(sk->sk_err))
@@ -3174,14 +3250,14 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
/* Connection-based need to check for termination and startup */
if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
- sk->sk_state == TCP_CLOSE)
+ state == TCP_CLOSE)
mask |= EPOLLHUP;
/*
* we set writable also when the other side has shut down the
* connection. This prevents stuck sockets.
*/
- if (unix_writable(sk))
+ if (unix_writable(sk, state))
mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
return mask;
@@ -3192,12 +3268,14 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
{
struct sock *sk = sock->sk, *other;
unsigned int writable;
+ unsigned char state;
__poll_t mask;
u8 shutdown;
sock_poll_wait(file, sock, wait);
mask = 0;
shutdown = READ_ONCE(sk->sk_shutdown);
+ state = READ_ONCE(sk->sk_state);
/* exceptional events? */
if (READ_ONCE(sk->sk_err) ||
@@ -3217,19 +3295,14 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
mask |= EPOLLIN | EPOLLRDNORM;
/* Connection-based need to check for termination and startup */
- if (sk->sk_type == SOCK_SEQPACKET) {
- if (sk->sk_state == TCP_CLOSE)
- mask |= EPOLLHUP;
- /* connection hasn't started yet? */
- if (sk->sk_state == TCP_SYN_SENT)
- return mask;
- }
+ if (sk->sk_type == SOCK_SEQPACKET && state == TCP_CLOSE)
+ mask |= EPOLLHUP;
/* No write status requested, avoid expensive OUT tests. */
if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
return mask;
- writable = unix_writable(sk);
+ writable = unix_writable(sk, state);
if (writable) {
unix_state_lock(sk);
@@ -3623,6 +3696,7 @@ static int __net_init unix_net_init(struct net *net)
for (i = 0; i < UNIX_HASH_SIZE; i++) {
spin_lock_init(&net->unx.table.locks[i]);
+ lock_set_cmp_fn(&net->unx.table.locks[i], unix_table_lock_cmp_fn, NULL);
INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
}
diff --git a/net/unix/diag.c b/net/unix/diag.c
index ae39538c5042..9138af8b465e 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -47,9 +47,7 @@ static int sk_diag_dump_peer(struct sock *sk, struct sk_buff *nlskb)
peer = unix_peer_get(sk);
if (peer) {
- unix_state_lock(peer);
ino = sock_i_ino(peer);
- unix_state_unlock(peer);
sock_put(peer);
return nla_put_u32(nlskb, UNIX_DIAG_PEER, ino);
@@ -65,7 +63,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb)
u32 *buf;
int i;
- if (sk->sk_state == TCP_LISTEN) {
+ if (READ_ONCE(sk->sk_state) == TCP_LISTEN) {
spin_lock(&sk->sk_receive_queue.lock);
attr = nla_reserve(nlskb, UNIX_DIAG_ICONS,
@@ -75,20 +73,9 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb)
buf = nla_data(attr);
i = 0;
- skb_queue_walk(&sk->sk_receive_queue, skb) {
- struct sock *req, *peer;
-
- req = skb->sk;
- /*
- * The state lock is outer for the same sk's
- * queue lock. With the other's queue locked it's
- * OK to lock the state.
- */
- unix_state_lock_nested(req, U_LOCK_DIAG);
- peer = unix_sk(req)->peer;
- buf[i++] = (peer ? sock_i_ino(peer) : 0);
- unix_state_unlock(req);
- }
+ skb_queue_walk(&sk->sk_receive_queue, skb)
+ buf[i++] = sock_i_ino(unix_peer(skb->sk));
+
spin_unlock(&sk->sk_receive_queue.lock);
}
@@ -103,8 +90,8 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb)
{
struct unix_diag_rqlen rql;
- if (sk->sk_state == TCP_LISTEN) {
- rql.udiag_rqueue = sk->sk_receive_queue.qlen;
+ if (READ_ONCE(sk->sk_state) == TCP_LISTEN) {
+ rql.udiag_rqueue = skb_queue_len_lockless(&sk->sk_receive_queue);
rql.udiag_wqueue = sk->sk_max_ack_backlog;
} else {
rql.udiag_rqueue = (u32) unix_inq_len(sk);
@@ -136,7 +123,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
rep = nlmsg_data(nlh);
rep->udiag_family = AF_UNIX;
rep->udiag_type = sk->sk_type;
- rep->udiag_state = sk->sk_state;
+ rep->udiag_state = READ_ONCE(sk->sk_state);
rep->pad = 0;
rep->udiag_ino = sk_ino;
sock_diag_save_cookie(sk, rep->udiag_cookie);
@@ -165,7 +152,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
sock_diag_put_meminfo(sk, skb, UNIX_DIAG_MEMINFO))
goto out_nlmsg_trim;
- if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, sk->sk_shutdown))
+ if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, READ_ONCE(sk->sk_shutdown)))
goto out_nlmsg_trim;
if ((req->udiag_show & UDIAG_SHOW_UID) &&
@@ -180,22 +167,6 @@ out_nlmsg_trim:
return -EMSGSIZE;
}
-static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
- struct user_namespace *user_ns,
- u32 portid, u32 seq, u32 flags)
-{
- int sk_ino;
-
- unix_state_lock(sk);
- sk_ino = sock_i_ino(sk);
- unix_state_unlock(sk);
-
- if (!sk_ino)
- return 0;
-
- return sk_diag_fill(sk, skb, req, user_ns, portid, seq, flags, sk_ino);
-}
-
static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
@@ -213,14 +184,22 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
num = 0;
spin_lock(&net->unx.table.locks[slot]);
sk_for_each(sk, &net->unx.table.buckets[slot]) {
+ int sk_ino;
+
if (num < s_num)
goto next;
- if (!(req->udiag_states & (1 << sk->sk_state)))
+
+ if (!(req->udiag_states & (1 << READ_ONCE(sk->sk_state))))
goto next;
- if (sk_diag_dump(sk, skb, req, sk_user_ns(skb->sk),
+
+ sk_ino = sock_i_ino(sk);
+ if (!sk_ino)
+ goto next;
+
+ if (sk_diag_fill(sk, skb, req, sk_user_ns(skb->sk),
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- NLM_F_MULTI) < 0) {
+ NLM_F_MULTI, sk_ino) < 0) {
spin_unlock(&net->unx.table.locks[slot]);
goto done;
}
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 0104be9d4704..06d94ad999e9 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -101,277 +101,493 @@ struct unix_sock *unix_get_socket(struct file *filp)
return NULL;
}
-DEFINE_SPINLOCK(unix_gc_lock);
+static struct unix_vertex *unix_edge_successor(struct unix_edge *edge)
+{
+ /* If an embryo socket has a fd,
+ * the listener indirectly holds the fd's refcnt.
+ */
+ if (edge->successor->listener)
+ return unix_sk(edge->successor->listener)->vertex;
+
+ return edge->successor->vertex;
+}
+
+static bool unix_graph_maybe_cyclic;
+static bool unix_graph_grouped;
+
+static void unix_update_graph(struct unix_vertex *vertex)
+{
+ /* If the receiver socket is not inflight, no cyclic
+ * reference could be formed.
+ */
+ if (!vertex)
+ return;
+
+ unix_graph_maybe_cyclic = true;
+ unix_graph_grouped = false;
+}
+
+static LIST_HEAD(unix_unvisited_vertices);
+
+enum unix_vertex_index {
+ UNIX_VERTEX_INDEX_MARK1,
+ UNIX_VERTEX_INDEX_MARK2,
+ UNIX_VERTEX_INDEX_START,
+};
+
+static unsigned long unix_vertex_unvisited_index = UNIX_VERTEX_INDEX_MARK1;
+
+static void unix_add_edge(struct scm_fp_list *fpl, struct unix_edge *edge)
+{
+ struct unix_vertex *vertex = edge->predecessor->vertex;
+
+ if (!vertex) {
+ vertex = list_first_entry(&fpl->vertices, typeof(*vertex), entry);
+ vertex->index = unix_vertex_unvisited_index;
+ vertex->out_degree = 0;
+ INIT_LIST_HEAD(&vertex->edges);
+ INIT_LIST_HEAD(&vertex->scc_entry);
+
+ list_move_tail(&vertex->entry, &unix_unvisited_vertices);
+ edge->predecessor->vertex = vertex;
+ }
+
+ vertex->out_degree++;
+ list_add_tail(&edge->vertex_entry, &vertex->edges);
+
+ unix_update_graph(unix_edge_successor(edge));
+}
+
+static void unix_del_edge(struct scm_fp_list *fpl, struct unix_edge *edge)
+{
+ struct unix_vertex *vertex = edge->predecessor->vertex;
+
+ if (!fpl->dead)
+ unix_update_graph(unix_edge_successor(edge));
+
+ list_del(&edge->vertex_entry);
+ vertex->out_degree--;
+
+ if (!vertex->out_degree) {
+ edge->predecessor->vertex = NULL;
+ list_move_tail(&vertex->entry, &fpl->vertices);
+ }
+}
+
+static void unix_free_vertices(struct scm_fp_list *fpl)
+{
+ struct unix_vertex *vertex, *next_vertex;
+
+ list_for_each_entry_safe(vertex, next_vertex, &fpl->vertices, entry) {
+ list_del(&vertex->entry);
+ kfree(vertex);
+ }
+}
+
+static DEFINE_SPINLOCK(unix_gc_lock);
unsigned int unix_tot_inflight;
-static LIST_HEAD(gc_candidates);
-static LIST_HEAD(gc_inflight_list);
-/* Keep the number of times in flight count for the file
- * descriptor if it is for an AF_UNIX socket.
- */
-void unix_inflight(struct user_struct *user, struct file *filp)
+void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver)
{
- struct unix_sock *u = unix_get_socket(filp);
+ int i = 0, j = 0;
spin_lock(&unix_gc_lock);
- if (u) {
- if (!u->inflight) {
- WARN_ON_ONCE(!list_empty(&u->link));
- list_add_tail(&u->link, &gc_inflight_list);
- } else {
- WARN_ON_ONCE(list_empty(&u->link));
- }
- u->inflight++;
+ if (!fpl->count_unix)
+ goto out;
- /* Paired with READ_ONCE() in wait_for_unix_gc() */
- WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
- }
+ do {
+ struct unix_sock *inflight = unix_get_socket(fpl->fp[j++]);
+ struct unix_edge *edge;
+
+ if (!inflight)
+ continue;
- WRITE_ONCE(user->unix_inflight, user->unix_inflight + 1);
+ edge = fpl->edges + i++;
+ edge->predecessor = inflight;
+ edge->successor = receiver;
+
+ unix_add_edge(fpl, edge);
+ } while (i < fpl->count_unix);
+
+ receiver->scm_stat.nr_unix_fds += fpl->count_unix;
+ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + fpl->count_unix);
+out:
+ WRITE_ONCE(fpl->user->unix_inflight, fpl->user->unix_inflight + fpl->count);
spin_unlock(&unix_gc_lock);
+
+ fpl->inflight = true;
+
+ unix_free_vertices(fpl);
}
-void unix_notinflight(struct user_struct *user, struct file *filp)
+void unix_del_edges(struct scm_fp_list *fpl)
{
- struct unix_sock *u = unix_get_socket(filp);
+ struct unix_sock *receiver;
+ int i = 0;
spin_lock(&unix_gc_lock);
- if (u) {
- WARN_ON_ONCE(!u->inflight);
- WARN_ON_ONCE(list_empty(&u->link));
+ if (!fpl->count_unix)
+ goto out;
- u->inflight--;
- if (!u->inflight)
- list_del_init(&u->link);
+ do {
+ struct unix_edge *edge = fpl->edges + i++;
- /* Paired with READ_ONCE() in wait_for_unix_gc() */
- WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
- }
+ unix_del_edge(fpl, edge);
+ } while (i < fpl->count_unix);
- WRITE_ONCE(user->unix_inflight, user->unix_inflight - 1);
+ if (!fpl->dead) {
+ receiver = fpl->edges[0].successor;
+ receiver->scm_stat.nr_unix_fds -= fpl->count_unix;
+ }
+ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - fpl->count_unix);
+out:
+ WRITE_ONCE(fpl->user->unix_inflight, fpl->user->unix_inflight - fpl->count);
spin_unlock(&unix_gc_lock);
+
+ fpl->inflight = false;
}
-static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
- struct sk_buff_head *hitlist)
+void unix_update_edges(struct unix_sock *receiver)
{
- struct sk_buff *skb;
- struct sk_buff *next;
-
- spin_lock(&x->sk_receive_queue.lock);
- skb_queue_walk_safe(&x->sk_receive_queue, skb, next) {
- /* Do we have file descriptors ? */
- if (UNIXCB(skb).fp) {
- bool hit = false;
- /* Process the descriptors of this socket */
- int nfd = UNIXCB(skb).fp->count;
- struct file **fp = UNIXCB(skb).fp->fp;
-
- while (nfd--) {
- /* Get the socket the fd matches if it indeed does so */
- struct unix_sock *u = unix_get_socket(*fp++);
-
- /* Ignore non-candidates, they could have been added
- * to the queues after starting the garbage collection
- */
- if (u && test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) {
- hit = true;
-
- func(u);
- }
- }
- if (hit && hitlist != NULL) {
- __skb_unlink(skb, &x->sk_receive_queue);
- __skb_queue_tail(hitlist, skb);
- }
- }
+ /* nr_unix_fds is only updated under unix_state_lock().
+ * If it's 0 here, the embryo socket is not part of the
+ * inflight graph, and GC will not see it, so no lock needed.
+ */
+ if (!receiver->scm_stat.nr_unix_fds) {
+ receiver->listener = NULL;
+ } else {
+ spin_lock(&unix_gc_lock);
+ unix_update_graph(unix_sk(receiver->listener)->vertex);
+ receiver->listener = NULL;
+ spin_unlock(&unix_gc_lock);
}
- spin_unlock(&x->sk_receive_queue.lock);
}
-static void scan_children(struct sock *x, void (*func)(struct unix_sock *),
- struct sk_buff_head *hitlist)
+int unix_prepare_fpl(struct scm_fp_list *fpl)
{
- if (x->sk_state != TCP_LISTEN) {
- scan_inflight(x, func, hitlist);
- } else {
- struct sk_buff *skb;
- struct sk_buff *next;
- struct unix_sock *u;
- LIST_HEAD(embryos);
+ struct unix_vertex *vertex;
+ int i;
- /* For a listening socket collect the queued embryos
- * and perform a scan on them as well.
- */
- spin_lock(&x->sk_receive_queue.lock);
- skb_queue_walk_safe(&x->sk_receive_queue, skb, next) {
- u = unix_sk(skb->sk);
+ if (!fpl->count_unix)
+ return 0;
- /* An embryo cannot be in-flight, so it's safe
- * to use the list link.
- */
- WARN_ON_ONCE(!list_empty(&u->link));
- list_add_tail(&u->link, &embryos);
- }
- spin_unlock(&x->sk_receive_queue.lock);
+ for (i = 0; i < fpl->count_unix; i++) {
+ vertex = kmalloc(sizeof(*vertex), GFP_KERNEL);
+ if (!vertex)
+ goto err;
- while (!list_empty(&embryos)) {
- u = list_entry(embryos.next, struct unix_sock, link);
- scan_inflight(&u->sk, func, hitlist);
- list_del_init(&u->link);
- }
+ list_add(&vertex->entry, &fpl->vertices);
}
+
+ fpl->edges = kvmalloc_array(fpl->count_unix, sizeof(*fpl->edges),
+ GFP_KERNEL_ACCOUNT);
+ if (!fpl->edges)
+ goto err;
+
+ return 0;
+
+err:
+ unix_free_vertices(fpl);
+ return -ENOMEM;
}
-static void dec_inflight(struct unix_sock *usk)
+void unix_destroy_fpl(struct scm_fp_list *fpl)
{
- usk->inflight--;
+ if (fpl->inflight)
+ unix_del_edges(fpl);
+
+ kvfree(fpl->edges);
+ unix_free_vertices(fpl);
}
-static void inc_inflight(struct unix_sock *usk)
+static bool unix_vertex_dead(struct unix_vertex *vertex)
{
- usk->inflight++;
+ struct unix_edge *edge;
+ struct unix_sock *u;
+ long total_ref;
+
+ list_for_each_entry(edge, &vertex->edges, vertex_entry) {
+ struct unix_vertex *next_vertex = unix_edge_successor(edge);
+
+ /* The vertex's fd can be received by a non-inflight socket. */
+ if (!next_vertex)
+ return false;
+
+ /* The vertex's fd can be received by an inflight socket in
+ * another SCC.
+ */
+ if (next_vertex->scc_index != vertex->scc_index)
+ return false;
+ }
+
+ /* No receiver exists out of the same SCC. */
+
+ edge = list_first_entry(&vertex->edges, typeof(*edge), vertex_entry);
+ u = edge->predecessor;
+ total_ref = file_count(u->sk.sk_socket->file);
+
+ /* If not close()d, total_ref > out_degree. */
+ if (total_ref != vertex->out_degree)
+ return false;
+
+ return true;
}
-static void inc_inflight_move_tail(struct unix_sock *u)
+static void unix_collect_queue(struct unix_sock *u, struct sk_buff_head *hitlist)
{
- u->inflight++;
+ skb_queue_splice_init(&u->sk.sk_receive_queue, hitlist);
- /* If this still might be part of a cycle, move it to the end
- * of the list, so that it's checked even if it was already
- * passed over
- */
- if (test_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags))
- list_move_tail(&u->link, &gc_candidates);
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ if (u->oob_skb) {
+ WARN_ON_ONCE(skb_unref(u->oob_skb));
+ u->oob_skb = NULL;
+ }
+#endif
}
-static bool gc_in_progress;
-
-static void __unix_gc(struct work_struct *work)
+static void unix_collect_skb(struct list_head *scc, struct sk_buff_head *hitlist)
{
- struct sk_buff_head hitlist;
- struct unix_sock *u, *next;
- LIST_HEAD(not_cycle_list);
- struct list_head cursor;
+ struct unix_vertex *vertex;
- spin_lock(&unix_gc_lock);
+ list_for_each_entry_reverse(vertex, scc, scc_entry) {
+ struct sk_buff_head *queue;
+ struct unix_edge *edge;
+ struct unix_sock *u;
- /* First, select candidates for garbage collection. Only
- * in-flight sockets are considered, and from those only ones
- * which don't have any external reference.
- *
- * Holding unix_gc_lock will protect these candidates from
- * being detached, and hence from gaining an external
- * reference. Since there are no possible receivers, all
- * buffers currently on the candidates' queues stay there
- * during the garbage collection.
- *
- * We also know that no new candidate can be added onto the
- * receive queues. Other, non candidate sockets _can_ be
- * added to queue, so we must make sure only to touch
- * candidates.
- *
- * Embryos, though never candidates themselves, affect which
- * candidates are reachable by the garbage collector. Before
- * being added to a listener's queue, an embryo may already
- * receive data carrying SCM_RIGHTS, potentially making the
- * passed socket a candidate that is not yet reachable by the
- * collector. It becomes reachable once the embryo is
- * enqueued. Therefore, we must ensure that no SCM-laden
- * embryo appears in a (candidate) listener's queue between
- * consecutive scan_children() calls.
- */
- list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
- struct sock *sk = &u->sk;
- long total_refs;
-
- total_refs = file_count(sk->sk_socket->file);
-
- WARN_ON_ONCE(!u->inflight);
- WARN_ON_ONCE(total_refs < u->inflight);
- if (total_refs == u->inflight) {
- list_move_tail(&u->link, &gc_candidates);
- __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
- __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
-
- if (sk->sk_state == TCP_LISTEN) {
- unix_state_lock_nested(sk, U_LOCK_GC_LISTENER);
- unix_state_unlock(sk);
+ edge = list_first_entry(&vertex->edges, typeof(*edge), vertex_entry);
+ u = edge->predecessor;
+ queue = &u->sk.sk_receive_queue;
+
+ spin_lock(&queue->lock);
+
+ if (u->sk.sk_state == TCP_LISTEN) {
+ struct sk_buff *skb;
+
+ skb_queue_walk(queue, skb) {
+ struct sk_buff_head *embryo_queue = &skb->sk->sk_receive_queue;
+
+ spin_lock(&embryo_queue->lock);
+ unix_collect_queue(unix_sk(skb->sk), hitlist);
+ spin_unlock(&embryo_queue->lock);
}
+ } else {
+ unix_collect_queue(u, hitlist);
}
+
+ spin_unlock(&queue->lock);
}
+}
- /* Now remove all internal in-flight reference to children of
- * the candidates.
- */
- list_for_each_entry(u, &gc_candidates, link)
- scan_children(&u->sk, dec_inflight, NULL);
+static bool unix_scc_cyclic(struct list_head *scc)
+{
+ struct unix_vertex *vertex;
+ struct unix_edge *edge;
- /* Restore the references for children of all candidates,
- * which have remaining references. Do this recursively, so
- * only those remain, which form cyclic references.
- *
- * Use a "cursor" link, to make the list traversal safe, even
- * though elements might be moved about.
+ /* SCC containing multiple vertices ? */
+ if (!list_is_singular(scc))
+ return true;
+
+ vertex = list_first_entry(scc, typeof(*vertex), scc_entry);
+
+ /* Self-reference or a embryo-listener circle ? */
+ list_for_each_entry(edge, &vertex->edges, vertex_entry) {
+ if (unix_edge_successor(edge) == vertex)
+ return true;
+ }
+
+ return false;
+}
+
+static LIST_HEAD(unix_visited_vertices);
+static unsigned long unix_vertex_grouped_index = UNIX_VERTEX_INDEX_MARK2;
+
+static void __unix_walk_scc(struct unix_vertex *vertex, unsigned long *last_index,
+ struct sk_buff_head *hitlist)
+{
+ LIST_HEAD(vertex_stack);
+ struct unix_edge *edge;
+ LIST_HEAD(edge_stack);
+
+next_vertex:
+ /* Push vertex to vertex_stack and mark it as on-stack
+ * (index >= UNIX_VERTEX_INDEX_START).
+ * The vertex will be popped when finalising SCC later.
*/
- list_add(&cursor, &gc_candidates);
- while (cursor.next != &gc_candidates) {
- u = list_entry(cursor.next, struct unix_sock, link);
+ list_add(&vertex->scc_entry, &vertex_stack);
+
+ vertex->index = *last_index;
+ vertex->scc_index = *last_index;
+ (*last_index)++;
+
+ /* Explore neighbour vertices (receivers of the current vertex's fd). */
+ list_for_each_entry(edge, &vertex->edges, vertex_entry) {
+ struct unix_vertex *next_vertex = unix_edge_successor(edge);
+
+ if (!next_vertex)
+ continue;
+
+ if (next_vertex->index == unix_vertex_unvisited_index) {
+ /* Iterative deepening depth first search
+ *
+ * 1. Push a forward edge to edge_stack and set
+ * the successor to vertex for the next iteration.
+ */
+ list_add(&edge->stack_entry, &edge_stack);
+
+ vertex = next_vertex;
+ goto next_vertex;
- /* Move cursor to after the current position. */
- list_move(&cursor, &u->link);
+ /* 2. Pop the edge directed to the current vertex
+ * and restore the ancestor for backtracking.
+ */
+prev_vertex:
+ edge = list_first_entry(&edge_stack, typeof(*edge), stack_entry);
+ list_del_init(&edge->stack_entry);
+
+ next_vertex = vertex;
+ vertex = edge->predecessor->vertex;
- if (u->inflight) {
- list_move_tail(&u->link, &not_cycle_list);
- __clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
- scan_children(&u->sk, inc_inflight_move_tail, NULL);
+ /* If the successor has a smaller scc_index, two vertices
+ * are in the same SCC, so propagate the smaller scc_index
+ * to skip SCC finalisation.
+ */
+ vertex->scc_index = min(vertex->scc_index, next_vertex->scc_index);
+ } else if (next_vertex->index != unix_vertex_grouped_index) {
+ /* Loop detected by a back/cross edge.
+ *
+ * The successor is on vertex_stack, so two vertices are in
+ * the same SCC. If the successor has a smaller *scc_index*,
+ * propagate it to skip SCC finalisation.
+ */
+ vertex->scc_index = min(vertex->scc_index, next_vertex->scc_index);
+ } else {
+ /* The successor was already grouped as another SCC */
}
}
- list_del(&cursor);
- /* Now gc_candidates contains only garbage. Restore original
- * inflight counters for these as well, and remove the skbuffs
- * which are creating the cycle(s).
- */
- skb_queue_head_init(&hitlist);
- list_for_each_entry(u, &gc_candidates, link) {
- scan_children(&u->sk, inc_inflight, &hitlist);
+ if (vertex->index == vertex->scc_index) {
+ struct unix_vertex *v;
+ struct list_head scc;
+ bool scc_dead = true;
-#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
- if (u->oob_skb) {
- kfree_skb(u->oob_skb);
- u->oob_skb = NULL;
+ /* SCC finalised.
+ *
+ * If the scc_index was not updated, all the vertices above on
+ * vertex_stack are in the same SCC. Group them using scc_entry.
+ */
+ __list_cut_position(&scc, &vertex_stack, &vertex->scc_entry);
+
+ list_for_each_entry_reverse(v, &scc, scc_entry) {
+ /* Don't restart DFS from this vertex in unix_walk_scc(). */
+ list_move_tail(&v->entry, &unix_visited_vertices);
+
+ /* Mark vertex as off-stack. */
+ v->index = unix_vertex_grouped_index;
+
+ if (scc_dead)
+ scc_dead = unix_vertex_dead(v);
}
-#endif
+
+ if (scc_dead)
+ unix_collect_skb(&scc, hitlist);
+ else if (!unix_graph_maybe_cyclic)
+ unix_graph_maybe_cyclic = unix_scc_cyclic(&scc);
+
+ list_del(&scc);
}
- /* not_cycle_list contains those sockets which do not make up a
- * cycle. Restore these to the inflight list.
+ /* Need backtracking ? */
+ if (!list_empty(&edge_stack))
+ goto prev_vertex;
+}
+
+static void unix_walk_scc(struct sk_buff_head *hitlist)
+{
+ unsigned long last_index = UNIX_VERTEX_INDEX_START;
+
+ unix_graph_maybe_cyclic = false;
+
+ /* Visit every vertex exactly once.
+ * __unix_walk_scc() moves visited vertices to unix_visited_vertices.
*/
- while (!list_empty(&not_cycle_list)) {
- u = list_entry(not_cycle_list.next, struct unix_sock, link);
- __clear_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
- list_move_tail(&u->link, &gc_inflight_list);
+ while (!list_empty(&unix_unvisited_vertices)) {
+ struct unix_vertex *vertex;
+
+ vertex = list_first_entry(&unix_unvisited_vertices, typeof(*vertex), entry);
+ __unix_walk_scc(vertex, &last_index, hitlist);
}
- spin_unlock(&unix_gc_lock);
+ list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices);
+ swap(unix_vertex_unvisited_index, unix_vertex_grouped_index);
- /* Here we are. Hitlist is filled. Die. */
- __skb_queue_purge(&hitlist);
+ unix_graph_grouped = true;
+}
+
+static void unix_walk_scc_fast(struct sk_buff_head *hitlist)
+{
+ unix_graph_maybe_cyclic = false;
+
+ while (!list_empty(&unix_unvisited_vertices)) {
+ struct unix_vertex *vertex;
+ struct list_head scc;
+ bool scc_dead = true;
+
+ vertex = list_first_entry(&unix_unvisited_vertices, typeof(*vertex), entry);
+ list_add(&scc, &vertex->scc_entry);
+
+ list_for_each_entry_reverse(vertex, &scc, scc_entry) {
+ list_move_tail(&vertex->entry, &unix_visited_vertices);
+
+ if (scc_dead)
+ scc_dead = unix_vertex_dead(vertex);
+ }
+
+ if (scc_dead)
+ unix_collect_skb(&scc, hitlist);
+ else if (!unix_graph_maybe_cyclic)
+ unix_graph_maybe_cyclic = unix_scc_cyclic(&scc);
+
+ list_del(&scc);
+ }
+
+ list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices);
+}
+
+static bool gc_in_progress;
+
+static void __unix_gc(struct work_struct *work)
+{
+ struct sk_buff_head hitlist;
+ struct sk_buff *skb;
spin_lock(&unix_gc_lock);
- /* All candidates should have been detached by now. */
- WARN_ON_ONCE(!list_empty(&gc_candidates));
+ if (!unix_graph_maybe_cyclic) {
+ spin_unlock(&unix_gc_lock);
+ goto skip_gc;
+ }
- /* Paired with READ_ONCE() in wait_for_unix_gc(). */
- WRITE_ONCE(gc_in_progress, false);
+ __skb_queue_head_init(&hitlist);
+
+ if (unix_graph_grouped)
+ unix_walk_scc_fast(&hitlist);
+ else
+ unix_walk_scc(&hitlist);
spin_unlock(&unix_gc_lock);
+
+ skb_queue_walk(&hitlist, skb) {
+ if (UNIXCB(skb).fp)
+ UNIXCB(skb).fp->dead = true;
+ }
+
+ __skb_queue_purge(&hitlist);
+skip_gc:
+ WRITE_ONCE(gc_in_progress, false);
}
static DECLARE_WORK(unix_gc_work, __unix_gc);
diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c
index 3e84b31c355a..357b3e5f3847 100644
--- a/net/unix/sysctl_net_unix.c
+++ b/net/unix/sysctl_net_unix.c
@@ -19,7 +19,6 @@ static struct ctl_table unix_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
- { }
};
int __net_init unix_sysctl_register(struct net *net)
@@ -52,7 +51,7 @@ err_alloc:
void unix_sysctl_unregister(struct net *net)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
table = net->unx.ctl->ctl_table_arg;
unregister_net_sysctl_table(net->unx.ctl);
diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
index bd84785bf8d6..bca2d86ba97d 100644
--- a/net/unix/unix_bpf.c
+++ b/net/unix/unix_bpf.c
@@ -54,6 +54,9 @@ static int unix_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
struct sk_psock *psock;
int copied;
+ if (flags & MSG_OOB)
+ return -EOPNOTSUPP;
+
if (!len)
return 0;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 54ba7316f808..0ff9b2dd86ba 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1270,25 +1270,28 @@ out:
return err;
}
+int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
+ size_t len, int flags)
+{
+ struct sock *sk = sock->sk;
+ struct vsock_sock *vsk = vsock_sk(sk);
+
+ return vsk->transport->dgram_dequeue(vsk, msg, len, flags);
+}
+
int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags)
{
#ifdef CONFIG_BPF_SYSCALL
+ struct sock *sk = sock->sk;
const struct proto *prot;
-#endif
- struct vsock_sock *vsk;
- struct sock *sk;
- sk = sock->sk;
- vsk = vsock_sk(sk);
-
-#ifdef CONFIG_BPF_SYSCALL
prot = READ_ONCE(sk->sk_prot);
if (prot != &vsock_proto)
return prot->recvmsg(sk, msg, len, flags, NULL);
#endif
- return vsk->transport->dgram_dequeue(vsk, msg, len, flags);
+ return __vsock_dgram_recvmsg(sock, msg, len, flags);
}
EXPORT_SYMBOL_GPL(vsock_dgram_recvmsg);
@@ -1500,8 +1503,8 @@ out:
return err;
}
-static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int vsock_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sock *listener;
int err;
@@ -1528,7 +1531,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
/* Wait for children sockets to appear; these are the new sockets
* created upon connection establishment.
*/
- timeout = sock_rcvtimeo(listener, flags & O_NONBLOCK);
+ timeout = sock_rcvtimeo(listener, arg->flags & O_NONBLOCK);
prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
while ((connected = vsock_dequeue_accept(listener)) == NULL &&
@@ -2174,15 +2177,12 @@ out:
}
int
-vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
- int flags)
+__vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+ int flags)
{
struct sock *sk;
struct vsock_sock *vsk;
const struct vsock_transport *transport;
-#ifdef CONFIG_BPF_SYSCALL
- const struct proto *prot;
-#endif
int err;
sk = sock->sk;
@@ -2233,14 +2233,6 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
goto out;
}
-#ifdef CONFIG_BPF_SYSCALL
- prot = READ_ONCE(sk->sk_prot);
- if (prot != &vsock_proto) {
- release_sock(sk);
- return prot->recvmsg(sk, msg, len, flags, NULL);
- }
-#endif
-
if (sk->sk_type == SOCK_STREAM)
err = __vsock_stream_recvmsg(sk, msg, len, flags);
else
@@ -2250,6 +2242,22 @@ out:
release_sock(sk);
return err;
}
+
+int
+vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+ int flags)
+{
+#ifdef CONFIG_BPF_SYSCALL
+ struct sock *sk = sock->sk;
+ const struct proto *prot;
+
+ prot = READ_ONCE(sk->sk_prot);
+ if (prot != &vsock_proto)
+ return prot->recvmsg(sk, msg, len, flags, NULL);
+#endif
+
+ return __vsock_connectible_recvmsg(sock, msg, len, flags);
+}
EXPORT_SYMBOL_GPL(vsock_connectible_recvmsg);
static int vsock_set_rcvlowat(struct sock *sk, int val)
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index ee5d306a96d0..64a07acfef12 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -617,20 +617,14 @@ out:
static int virtio_vsock_vqs_init(struct virtio_vsock *vsock)
{
struct virtio_device *vdev = vsock->vdev;
- static const char * const names[] = {
- "rx",
- "tx",
- "event",
- };
- vq_callback_t *callbacks[] = {
- virtio_vsock_rx_done,
- virtio_vsock_tx_done,
- virtio_vsock_event_done,
+ struct virtqueue_info vqs_info[] = {
+ { "rx", virtio_vsock_rx_done },
+ { "tx", virtio_vsock_tx_done },
+ { "event", virtio_vsock_event_done },
};
int ret;
- ret = virtio_find_vqs(vdev, VSOCK_VQ_MAX, vsock->vqs, callbacks, names,
- NULL);
+ ret = virtio_find_vqs(vdev, VSOCK_VQ_MAX, vsock->vqs, vqs_info, NULL);
if (ret < 0)
return ret;
@@ -859,7 +853,6 @@ static struct virtio_driver virtio_vsock_driver = {
.feature_table = features,
.feature_table_size = ARRAY_SIZE(features),
.driver.name = KBUILD_MODNAME,
- .driver.owner = THIS_MODULE,
.id_table = id_table,
.probe = virtio_vsock_probe,
.remove = virtio_vsock_remove,
diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c
index a3c97546ab84..c42c5cc18f32 100644
--- a/net/vmw_vsock/vsock_bpf.c
+++ b/net/vmw_vsock/vsock_bpf.c
@@ -64,9 +64,9 @@ static int __vsock_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int
int err;
if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)
- err = vsock_connectible_recvmsg(sock, msg, len, flags);
+ err = __vsock_connectible_recvmsg(sock, msg, len, flags);
else if (sk->sk_type == SOCK_DGRAM)
- err = vsock_dgram_recvmsg(sock, msg, len, flags);
+ err = __vsock_dgram_recvmsg(sock, msg, len, flags);
else
err = -EPROTOTYPE;
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 72074fd36df4..1d49cc8b6da1 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -25,7 +25,7 @@ ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),)
cfg80211-y += extra-certs.o
endif
-$(obj)/shipped-certs.c: $(sort $(wildcard $(srctree)/$(src)/certs/*.hex))
+$(obj)/shipped-certs.c: $(sort $(wildcard $(src)/certs/*.hex))
@$(kecho) " GEN $@"
$(Q)(echo '#include "reg.h"'; \
echo 'const u8 shipped_regdb_certs[] = {'; \
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 3414b2c3abcc..e579d7e1425f 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -263,6 +263,37 @@ static int cfg80211_chandef_get_width(const struct cfg80211_chan_def *c)
return nl80211_chan_width_to_mhz(c->width);
}
+static bool cfg80211_valid_center_freq(u32 center,
+ enum nl80211_chan_width width)
+{
+ int bw;
+ int step;
+
+ /* We only do strict verification on 6 GHz */
+ if (center < 5955 || center > 7115)
+ return true;
+
+ bw = nl80211_chan_width_to_mhz(width);
+ if (bw < 0)
+ return false;
+
+ /* Validate that the channels bw is entirely within the 6 GHz band */
+ if (center - bw / 2 < 5945 || center + bw / 2 > 7125)
+ return false;
+
+ /* With 320 MHz the permitted channels overlap */
+ if (bw == 320)
+ step = 160;
+ else
+ step = bw;
+
+ /*
+ * Valid channels are packed from lowest frequency towards higher ones.
+ * So test that the lower frequency alignes with one of these steps.
+ */
+ return (center - bw / 2 - 5945) % step == 0;
+}
+
bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef)
{
u32 control_freq, oper_freq;
@@ -374,6 +405,13 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef)
return false;
}
+ if (!cfg80211_valid_center_freq(chandef->center_freq1, chandef->width))
+ return false;
+
+ if (chandef->width == NL80211_CHAN_WIDTH_80P80 &&
+ !cfg80211_valid_center_freq(chandef->center_freq2, chandef->width))
+ return false;
+
/* channel 14 is only for IEEE 802.11b */
if (chandef->center_freq1 == 2484 &&
chandef->width != NL80211_CHAN_WIDTH_20_NOHT)
@@ -1145,7 +1183,8 @@ EXPORT_SYMBOL(cfg80211_chandef_dfs_cac_time);
static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy,
u32 center_freq, u32 bandwidth,
- u32 prohibited_flags, bool monitor)
+ u32 prohibited_flags,
+ u32 permitting_flags)
{
struct ieee80211_channel *c;
u32 freq, start_freq, end_freq;
@@ -1157,7 +1196,7 @@ static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy,
c = ieee80211_get_channel_khz(wiphy, freq);
if (!c)
return false;
- if (monitor && c->flags & IEEE80211_CHAN_CAN_MONITOR)
+ if (c->flags & permitting_flags)
continue;
if (c->flags & prohibited_flags)
return false;
@@ -1221,7 +1260,8 @@ static bool cfg80211_edmg_usable(struct wiphy *wiphy, u8 edmg_channels,
bool _cfg80211_chandef_usable(struct wiphy *wiphy,
const struct cfg80211_chan_def *chandef,
- u32 prohibited_flags, bool monitor)
+ u32 prohibited_flags,
+ u32 permitting_flags)
{
struct ieee80211_sta_ht_cap *ht_cap;
struct ieee80211_sta_vht_cap *vht_cap;
@@ -1383,22 +1423,23 @@ bool _cfg80211_chandef_usable(struct wiphy *wiphy,
if (!cfg80211_secondary_chans_ok(wiphy,
ieee80211_chandef_to_khz(chandef),
- width, prohibited_flags, monitor))
+ width, prohibited_flags,
+ permitting_flags))
return false;
if (!chandef->center_freq2)
return true;
return cfg80211_secondary_chans_ok(wiphy,
MHZ_TO_KHZ(chandef->center_freq2),
- width, prohibited_flags, monitor);
+ width, prohibited_flags,
+ permitting_flags);
}
bool cfg80211_chandef_usable(struct wiphy *wiphy,
const struct cfg80211_chan_def *chandef,
u32 prohibited_flags)
{
- return _cfg80211_chandef_usable(wiphy, chandef, prohibited_flags,
- false);
+ return _cfg80211_chandef_usable(wiphy, chandef, prohibited_flags, 0);
}
EXPORT_SYMBOL(cfg80211_chandef_usable);
@@ -1520,49 +1561,50 @@ static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy,
static bool _cfg80211_reg_can_beacon(struct wiphy *wiphy,
struct cfg80211_chan_def *chandef,
enum nl80211_iftype iftype,
- bool check_no_ir)
+ u32 prohibited_flags,
+ u32 permitting_flags)
{
- bool res;
- u32 prohibited_flags = IEEE80211_CHAN_DISABLED;
+ bool res, check_radar;
int dfs_required;
- trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype, check_no_ir);
+ trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype,
+ prohibited_flags,
+ permitting_flags);
- if (check_no_ir)
- prohibited_flags |= IEEE80211_CHAN_NO_IR;
+ if (!_cfg80211_chandef_usable(wiphy, chandef,
+ IEEE80211_CHAN_DISABLED, 0))
+ return false;
dfs_required = cfg80211_chandef_dfs_required(wiphy, chandef, iftype);
- if (dfs_required != 0)
- prohibited_flags |= IEEE80211_CHAN_RADAR;
+ check_radar = dfs_required != 0;
if (dfs_required > 0 &&
cfg80211_chandef_dfs_available(wiphy, chandef)) {
/* We can skip IEEE80211_CHAN_NO_IR if chandef dfs available */
- prohibited_flags = IEEE80211_CHAN_DISABLED;
+ prohibited_flags &= ~IEEE80211_CHAN_NO_IR;
+ check_radar = false;
}
- res = cfg80211_chandef_usable(wiphy, chandef, prohibited_flags);
+ if (check_radar &&
+ !_cfg80211_chandef_usable(wiphy, chandef,
+ IEEE80211_CHAN_RADAR, 0))
+ return false;
+
+ res = _cfg80211_chandef_usable(wiphy, chandef,
+ prohibited_flags,
+ permitting_flags);
trace_cfg80211_return_bool(res);
return res;
}
-bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
- struct cfg80211_chan_def *chandef,
- enum nl80211_iftype iftype)
-{
- return _cfg80211_reg_can_beacon(wiphy, chandef, iftype, true);
-}
-EXPORT_SYMBOL(cfg80211_reg_can_beacon);
-
-bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy,
- struct cfg80211_chan_def *chandef,
- enum nl80211_iftype iftype)
+bool cfg80211_reg_check_beaconing(struct wiphy *wiphy,
+ struct cfg80211_chan_def *chandef,
+ struct cfg80211_beaconing_check_config *cfg)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
- bool check_no_ir;
-
- lockdep_assert_held(&rdev->wiphy.mtx);
+ u32 permitting_flags = 0;
+ bool check_no_ir = true;
/*
* Under certain conditions suggested by some regulatory bodies a
@@ -1570,12 +1612,20 @@ bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy,
* only if such relaxations are not enabled and the conditions are not
* met.
*/
- check_no_ir = !cfg80211_ir_permissive_chan(wiphy, iftype,
- chandef->chan);
+ if (cfg->relax) {
+ lockdep_assert_held(&rdev->wiphy.mtx);
+ check_no_ir = !cfg80211_ir_permissive_chan(wiphy, cfg->iftype,
+ chandef->chan);
+ }
+
+ if (cfg->reg_power == IEEE80211_REG_VLP_AP)
+ permitting_flags |= IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP;
- return _cfg80211_reg_can_beacon(wiphy, chandef, iftype, check_no_ir);
+ return _cfg80211_reg_can_beacon(wiphy, chandef, cfg->iftype,
+ check_no_ir ? IEEE80211_CHAN_NO_IR : 0,
+ permitting_flags);
}
-EXPORT_SYMBOL(cfg80211_reg_can_beacon_relax);
+EXPORT_SYMBOL(cfg80211_reg_check_beaconing);
int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev,
struct cfg80211_chan_def *chandef)
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 3fb1b637352a..4d5d351bd0b5 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -421,6 +421,8 @@ static void cfg80211_wiphy_work(struct work_struct *work)
rdev = container_of(work, struct cfg80211_registered_device, wiphy_work);
+ trace_wiphy_work_worker_start(&rdev->wiphy);
+
wiphy_lock(&rdev->wiphy);
if (rdev->suspended)
goto out;
@@ -431,9 +433,10 @@ static void cfg80211_wiphy_work(struct work_struct *work)
if (wk) {
list_del_init(&wk->entry);
if (!list_empty(&rdev->wiphy_work_list))
- schedule_work(work);
+ queue_work(system_unbound_wq, work);
spin_unlock_irq(&rdev->wiphy_work_lock);
+ trace_wiphy_work_run(&rdev->wiphy, wk);
wk->func(&rdev->wiphy, wk);
} else {
spin_unlock_irq(&rdev->wiphy_work_lock);
@@ -1066,6 +1069,7 @@ void cfg80211_process_wiphy_works(struct cfg80211_registered_device *rdev,
list_del_init(&wk->entry);
spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags);
+ trace_wiphy_work_run(&rdev->wiphy, wk);
wk->func(&rdev->wiphy, wk);
spin_lock_irqsave(&rdev->wiphy_work_lock, flags);
@@ -1141,7 +1145,8 @@ void wiphy_unregister(struct wiphy *wiphy)
flush_work(&rdev->background_cac_abort_wk);
cfg80211_rdev_free_wowlan(rdev);
- cfg80211_rdev_free_coalesce(rdev);
+ cfg80211_free_coalesce(rdev->coalesce);
+ rdev->coalesce = NULL;
}
EXPORT_SYMBOL(wiphy_unregister);
@@ -1610,6 +1615,8 @@ void wiphy_work_queue(struct wiphy *wiphy, struct wiphy_work *work)
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
unsigned long flags;
+ trace_wiphy_work_queue(wiphy, work);
+
spin_lock_irqsave(&rdev->wiphy_work_lock, flags);
if (list_empty(&work->entry))
list_add_tail(&work->entry, &rdev->wiphy_work_list);
@@ -1626,6 +1633,8 @@ void wiphy_work_cancel(struct wiphy *wiphy, struct wiphy_work *work)
lockdep_assert_held(&wiphy->mtx);
+ trace_wiphy_work_cancel(wiphy, work);
+
spin_lock_irqsave(&rdev->wiphy_work_lock, flags);
if (!list_empty(&work->entry))
list_del_init(&work->entry);
@@ -1639,6 +1648,8 @@ void wiphy_work_flush(struct wiphy *wiphy, struct wiphy_work *work)
unsigned long flags;
bool run;
+ trace_wiphy_work_flush(wiphy, work);
+
spin_lock_irqsave(&rdev->wiphy_work_lock, flags);
run = !work || !list_empty(&work->entry);
spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags);
@@ -1660,6 +1671,8 @@ void wiphy_delayed_work_queue(struct wiphy *wiphy,
struct wiphy_delayed_work *dwork,
unsigned long delay)
{
+ trace_wiphy_delayed_work_queue(wiphy, &dwork->work, delay);
+
if (!delay) {
del_timer(&dwork->timer);
wiphy_work_queue(wiphy, &dwork->work);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 118f2f619828..41c8c0e3ba2e 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -494,7 +494,8 @@ bool cfg80211_wdev_on_sub_chan(struct wireless_dev *wdev,
bool primary_only);
bool _cfg80211_chandef_usable(struct wiphy *wiphy,
const struct cfg80211_chan_def *chandef,
- u32 prohibited_flags, bool monitor);
+ u32 prohibited_flags,
+ u32 permitting_flags);
static inline unsigned int elapsed_jiffies_msecs(unsigned long start)
{
@@ -532,6 +533,10 @@ struct cfg80211_internal_bss *
cfg80211_bss_update(struct cfg80211_registered_device *rdev,
struct cfg80211_internal_bss *tmp,
bool signal_valid, unsigned long ts);
+
+enum ieee80211_ap_reg_power
+cfg80211_get_6ghz_power_type(const u8 *elems, size_t elems_len);
+
#ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS
#define CFG80211_DEV_WARN_ON(cond) WARN_ON(cond)
#else
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 9f02ee5f08be..34e5acff3935 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -3,7 +3,7 @@
* Some IBSS support code for cfg80211.
*
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#include <linux/etherdevice.h>
@@ -94,6 +94,9 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
lockdep_assert_held(&rdev->wiphy.mtx);
+ if (wdev->cac_started)
+ return -EBUSY;
+
if (wdev->u.ibss.ssid_len)
return -EALREADY;
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 83306979fbe2..aaca65b66af4 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Portions
- * Copyright (C) 2022-2023 Intel Corporation
+ * Copyright (C) 2022-2024 Intel Corporation
*/
#include <linux/ieee80211.h>
#include <linux/export.h>
@@ -127,6 +127,9 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
if (!rdev->ops->join_mesh)
return -EOPNOTSUPP;
+ if (wdev->cac_started)
+ return -EBUSY;
+
if (!setup->chandef.chan) {
/* if no channel explicitly given, use preset channel */
setup->chandef = wdev->u.mesh.preset_chandef;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 30ff9a470813..7397a372c78e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -315,8 +315,7 @@ nl80211_pmsr_ftm_req_attr_policy[NL80211_PMSR_FTM_REQ_ATTR_MAX + 1] = {
[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD] = { .type = NLA_U16 },
[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION] =
NLA_POLICY_MAX(NLA_U8, 15),
- [NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST] =
- NLA_POLICY_MAX(NLA_U8, 31),
+ [NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST] = { .type = NLA_U8 },
[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES] = { .type = NLA_U8 },
[NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI] = { .type = NLA_FLAG },
[NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC] = { .type = NLA_FLAG },
@@ -468,6 +467,10 @@ static const struct netlink_range_validation nl80211_punct_bitmap_range = {
.max = 0xffff,
};
+static const struct netlink_range_validation q_range = {
+ .max = INT_MAX,
+};
+
static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD },
[NL80211_ATTR_WIPHY] = { .type = NLA_U32 },
@@ -754,7 +757,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_TXQ_LIMIT] = { .type = NLA_U32 },
[NL80211_ATTR_TXQ_MEMORY_LIMIT] = { .type = NLA_U32 },
- [NL80211_ATTR_TXQ_QUANTUM] = { .type = NLA_U32 },
+ [NL80211_ATTR_TXQ_QUANTUM] = NLA_POLICY_FULL_RANGE(NLA_U32, &q_range),
[NL80211_ATTR_HE_CAPABILITY] =
NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_he_capa,
NL80211_HE_MAX_CAPABILITY_LEN),
@@ -1204,6 +1207,12 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy,
if ((chan->flags & IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT) &&
nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_6GHZ_AFC_CLIENT))
goto nla_put_failure;
+ if ((chan->flags & IEEE80211_CHAN_CAN_MONITOR) &&
+ nla_put_flag(msg, NL80211_FREQUENCY_ATTR_CAN_MONITOR))
+ goto nla_put_failure;
+ if ((chan->flags & IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP) &&
+ nla_put_flag(msg, NL80211_FREQUENCY_ATTR_ALLOW_6GHZ_VLP_AP))
+ goto nla_put_failure;
}
if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER,
@@ -1622,71 +1631,87 @@ nla_put_failure:
return -ENOBUFS;
}
-static int nl80211_put_iface_combinations(struct wiphy *wiphy,
- struct sk_buff *msg,
- bool large)
+static int nl80211_put_ifcomb_data(struct sk_buff *msg, bool large, int idx,
+ const struct ieee80211_iface_combination *c,
+ u16 nested)
{
- struct nlattr *nl_combis;
- int i, j;
+ struct nlattr *nl_combi, *nl_limits;
+ int i;
- nl_combis = nla_nest_start_noflag(msg,
- NL80211_ATTR_INTERFACE_COMBINATIONS);
- if (!nl_combis)
+ nl_combi = nla_nest_start_noflag(msg, idx | nested);
+ if (!nl_combi)
goto nla_put_failure;
- for (i = 0; i < wiphy->n_iface_combinations; i++) {
- const struct ieee80211_iface_combination *c;
- struct nlattr *nl_combi, *nl_limits;
+ nl_limits = nla_nest_start_noflag(msg, NL80211_IFACE_COMB_LIMITS |
+ nested);
+ if (!nl_limits)
+ goto nla_put_failure;
- c = &wiphy->iface_combinations[i];
+ for (i = 0; i < c->n_limits; i++) {
+ struct nlattr *nl_limit;
- nl_combi = nla_nest_start_noflag(msg, i + 1);
- if (!nl_combi)
+ nl_limit = nla_nest_start_noflag(msg, i + 1);
+ if (!nl_limit)
goto nla_put_failure;
-
- nl_limits = nla_nest_start_noflag(msg,
- NL80211_IFACE_COMB_LIMITS);
- if (!nl_limits)
+ if (nla_put_u32(msg, NL80211_IFACE_LIMIT_MAX, c->limits[i].max))
goto nla_put_failure;
+ if (nl80211_put_iftypes(msg, NL80211_IFACE_LIMIT_TYPES,
+ c->limits[i].types))
+ goto nla_put_failure;
+ nla_nest_end(msg, nl_limit);
+ }
- for (j = 0; j < c->n_limits; j++) {
- struct nlattr *nl_limit;
+ nla_nest_end(msg, nl_limits);
- nl_limit = nla_nest_start_noflag(msg, j + 1);
- if (!nl_limit)
- goto nla_put_failure;
- if (nla_put_u32(msg, NL80211_IFACE_LIMIT_MAX,
- c->limits[j].max))
- goto nla_put_failure;
- if (nl80211_put_iftypes(msg, NL80211_IFACE_LIMIT_TYPES,
- c->limits[j].types))
- goto nla_put_failure;
- nla_nest_end(msg, nl_limit);
- }
+ if (c->beacon_int_infra_match &&
+ nla_put_flag(msg, NL80211_IFACE_COMB_STA_AP_BI_MATCH))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, NL80211_IFACE_COMB_NUM_CHANNELS,
+ c->num_different_channels) ||
+ nla_put_u32(msg, NL80211_IFACE_COMB_MAXNUM,
+ c->max_interfaces))
+ goto nla_put_failure;
+ if (large &&
+ (nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS,
+ c->radar_detect_widths) ||
+ nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_REGIONS,
+ c->radar_detect_regions)))
+ goto nla_put_failure;
+ if (c->beacon_int_min_gcd &&
+ nla_put_u32(msg, NL80211_IFACE_COMB_BI_MIN_GCD,
+ c->beacon_int_min_gcd))
+ goto nla_put_failure;
- nla_nest_end(msg, nl_limits);
+ nla_nest_end(msg, nl_combi);
- if (c->beacon_int_infra_match &&
- nla_put_flag(msg, NL80211_IFACE_COMB_STA_AP_BI_MATCH))
- goto nla_put_failure;
- if (nla_put_u32(msg, NL80211_IFACE_COMB_NUM_CHANNELS,
- c->num_different_channels) ||
- nla_put_u32(msg, NL80211_IFACE_COMB_MAXNUM,
- c->max_interfaces))
- goto nla_put_failure;
- if (large &&
- (nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS,
- c->radar_detect_widths) ||
- nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_REGIONS,
- c->radar_detect_regions)))
- goto nla_put_failure;
- if (c->beacon_int_min_gcd &&
- nla_put_u32(msg, NL80211_IFACE_COMB_BI_MIN_GCD,
- c->beacon_int_min_gcd))
- goto nla_put_failure;
+ return 0;
+nla_put_failure:
+ return -ENOBUFS;
+}
+
+static int nl80211_put_iface_combinations(struct wiphy *wiphy,
+ struct sk_buff *msg,
+ int attr, int radio,
+ bool large, u16 nested)
+{
+ const struct ieee80211_iface_combination *c;
+ struct nlattr *nl_combis;
+ int i, n;
- nla_nest_end(msg, nl_combi);
+ nl_combis = nla_nest_start_noflag(msg, attr | nested);
+ if (!nl_combis)
+ goto nla_put_failure;
+
+ if (radio >= 0) {
+ c = wiphy->radio[0].iface_combinations;
+ n = wiphy->radio[0].n_iface_combinations;
+ } else {
+ c = wiphy->iface_combinations;
+ n = wiphy->n_iface_combinations;
}
+ for (i = 0; i < n; i++)
+ if (nl80211_put_ifcomb_data(msg, large, i + 1, &c[i], nested))
+ goto nla_put_failure;
nla_nest_end(msg, nl_combis);
@@ -2392,6 +2417,80 @@ fail:
return -ENOBUFS;
}
+static int nl80211_put_radio(struct wiphy *wiphy, struct sk_buff *msg, int idx)
+{
+ const struct wiphy_radio *r = &wiphy->radio[idx];
+ struct nlattr *radio, *freq;
+ int i;
+
+ radio = nla_nest_start(msg, idx);
+ if (!radio)
+ return -ENOBUFS;
+
+ if (nla_put_u32(msg, NL80211_WIPHY_RADIO_ATTR_INDEX, idx))
+ goto nla_put_failure;
+
+ for (i = 0; i < r->n_freq_range; i++) {
+ const struct wiphy_radio_freq_range *range = &r->freq_range[i];
+
+ freq = nla_nest_start(msg, NL80211_WIPHY_RADIO_ATTR_FREQ_RANGE);
+ if (!freq)
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, NL80211_WIPHY_RADIO_FREQ_ATTR_START,
+ range->start_freq) ||
+ nla_put_u32(msg, NL80211_WIPHY_RADIO_FREQ_ATTR_END,
+ range->end_freq))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, freq);
+ }
+
+ for (i = 0; i < r->n_iface_combinations; i++)
+ if (nl80211_put_ifcomb_data(msg, true,
+ NL80211_WIPHY_RADIO_ATTR_INTERFACE_COMBINATION,
+ &r->iface_combinations[i],
+ NLA_F_NESTED))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, radio);
+
+ return 0;
+
+nla_put_failure:
+ return -ENOBUFS;
+}
+
+static int nl80211_put_radios(struct wiphy *wiphy, struct sk_buff *msg)
+{
+ struct nlattr *radios;
+ int i;
+
+ if (!wiphy->n_radio)
+ return 0;
+
+ radios = nla_nest_start(msg, NL80211_ATTR_WIPHY_RADIOS);
+ if (!radios)
+ return -ENOBUFS;
+
+ for (i = 0; i < wiphy->n_radio; i++)
+ if (nl80211_put_radio(wiphy, msg, i))
+ goto fail;
+
+ nla_nest_end(msg, radios);
+
+ if (nl80211_put_iface_combinations(wiphy, msg,
+ NL80211_ATTR_WIPHY_INTERFACE_COMBINATIONS,
+ -1, true, NLA_F_NESTED))
+ return -ENOBUFS;
+
+ return 0;
+
+fail:
+ nla_nest_cancel(msg, radios);
+ return -ENOBUFS;
+}
+
struct nl80211_dump_wiphy_state {
s64 filter_wiphy;
long start;
@@ -2687,7 +2786,9 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
goto nla_put_failure;
if (nl80211_put_iface_combinations(&rdev->wiphy, msg,
- state->split))
+ NL80211_ATTR_INTERFACE_COMBINATIONS,
+ rdev->wiphy.n_radio ? 0 : -1,
+ state->split, 0))
goto nla_put_failure;
state->split_start++;
@@ -3001,6 +3102,12 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
rdev->wiphy.hw_timestamp_max_peers))
goto nla_put_failure;
+ state->split_start++;
+ break;
+ case 17:
+ if (nl80211_put_radios(&rdev->wiphy, msg))
+ goto nla_put_failure;
+
/* done */
state->split_start = 0;
break;
@@ -3344,7 +3451,7 @@ static int _nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
if (!_cfg80211_chandef_usable(&rdev->wiphy, chandef,
IEEE80211_CHAN_DISABLED,
- monitor)) {
+ monitor ? IEEE80211_CHAN_CAN_MONITOR : 0)) {
NL_SET_ERR_MSG(extack, "(extension) channel is disabled");
return -EINVAL;
}
@@ -3415,6 +3522,33 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
if (chandef.chan != cur_chan)
return -EBUSY;
+ /* only allow this for regular channel widths */
+ switch (wdev->links[link_id].ap.chandef.width) {
+ case NL80211_CHAN_WIDTH_20_NOHT:
+ case NL80211_CHAN_WIDTH_20:
+ case NL80211_CHAN_WIDTH_40:
+ case NL80211_CHAN_WIDTH_80:
+ case NL80211_CHAN_WIDTH_80P80:
+ case NL80211_CHAN_WIDTH_160:
+ case NL80211_CHAN_WIDTH_320:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ switch (chandef.width) {
+ case NL80211_CHAN_WIDTH_20_NOHT:
+ case NL80211_CHAN_WIDTH_20:
+ case NL80211_CHAN_WIDTH_40:
+ case NL80211_CHAN_WIDTH_80:
+ case NL80211_CHAN_WIDTH_80P80:
+ case NL80211_CHAN_WIDTH_160:
+ case NL80211_CHAN_WIDTH_320:
+ break;
+ default:
+ return -EINVAL;
+ }
+
result = rdev_set_ap_chanwidth(rdev, dev, link_id,
&chandef);
if (result)
@@ -4451,10 +4585,7 @@ static void get_key_callback(void *c, struct key_params *params)
struct nlattr *key;
struct get_key_cookie *cookie = c;
- if ((params->key &&
- nla_put(cookie->msg, NL80211_ATTR_KEY_DATA,
- params->key_len, params->key)) ||
- (params->seq &&
+ if ((params->seq &&
nla_put(cookie->msg, NL80211_ATTR_KEY_SEQ,
params->seq_len, params->seq)) ||
(params->cipher &&
@@ -4466,10 +4597,7 @@ static void get_key_callback(void *c, struct key_params *params)
if (!key)
goto nla_put_failure;
- if ((params->key &&
- nla_put(cookie->msg, NL80211_KEY_DATA,
- params->key_len, params->key)) ||
- (params->seq &&
+ if ((params->seq &&
nla_put(cookie->msg, NL80211_KEY_SEQ,
params->seq_len, params->seq)) ||
(params->cipher &&
@@ -5924,6 +6052,7 @@ static int nl80211_validate_ap_phy_operation(struct cfg80211_ap_settings *params
static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct cfg80211_beaconing_check_config beacon_check = {};
unsigned int link_id = nl80211_link_id(info->attrs);
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
@@ -5937,6 +6066,9 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
if (!rdev->ops->start_ap)
return -EOPNOTSUPP;
+ if (wdev->cac_started)
+ return -EBUSY;
+
if (wdev->links[link_id].ap.beacon_interval)
return -EALREADY;
@@ -6070,8 +6202,13 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
goto out;
}
- if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, &params->chandef,
- wdev->iftype)) {
+ beacon_check.iftype = wdev->iftype;
+ beacon_check.relax = true;
+ beacon_check.reg_power =
+ cfg80211_get_6ghz_power_type(params->beacon.tail,
+ params->beacon.tail_len);
+ if (!cfg80211_reg_check_beaconing(&rdev->wiphy, &params->chandef,
+ &beacon_check)) {
err = -EINVAL;
goto out;
}
@@ -6228,6 +6365,7 @@ out:
static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct cfg80211_beaconing_check_config beacon_check = {};
unsigned int link_id = nl80211_link_id(info->attrs);
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
@@ -6254,6 +6392,19 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info)
if (err)
goto out;
+ /* recheck beaconing is permitted with possibly changed power type */
+ beacon_check.iftype = wdev->iftype;
+ beacon_check.relax = true;
+ beacon_check.reg_power =
+ cfg80211_get_6ghz_power_type(params->beacon.tail,
+ params->beacon.tail_len);
+ if (!cfg80211_reg_check_beaconing(&rdev->wiphy,
+ &wdev->links[link_id].ap.chandef,
+ &beacon_check)) {
+ err = -EINVAL;
+ goto out;
+ }
+
attr = info->attrs[NL80211_ATTR_FILS_DISCOVERY];
if (attr) {
err = nl80211_parse_fils_discovery(rdev, attr,
@@ -8116,7 +8267,8 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
is_indoor = true;
}
- return regulatory_hint_indoor(is_indoor, owner_nlportid);
+ regulatory_hint_indoor(is_indoor, owner_nlportid);
+ return 0;
default:
return -EINVAL;
}
@@ -9162,6 +9314,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
struct wiphy *wiphy;
int err, tmp, n_ssids = 0, n_channels, i;
size_t ie_len, size;
+ size_t ssids_offset, ie_offset;
wiphy = &rdev->wiphy;
@@ -9207,21 +9360,20 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
size = struct_size(request, channels, n_channels);
+ ssids_offset = size;
size = size_add(size, array_size(sizeof(*request->ssids), n_ssids));
+ ie_offset = size;
size = size_add(size, ie_len);
request = kzalloc(size, GFP_KERNEL);
if (!request)
return -ENOMEM;
+ request->n_channels = n_channels;
if (n_ssids)
- request->ssids = (void *)&request->channels[n_channels];
+ request->ssids = (void *)request + ssids_offset;
request->n_ssids = n_ssids;
- if (ie_len) {
- if (n_ssids)
- request->ie = (void *)(request->ssids + n_ssids);
- else
- request->ie = (void *)(request->channels + n_channels);
- }
+ if (ie_len)
+ request->ie = (void *)request + ie_offset;
i = 0;
if (scan_freqs) {
@@ -9928,6 +10080,17 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
flush_delayed_work(&rdev->dfs_update_channels_wk);
+ switch (wdev->iftype) {
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_P2P_GO:
+ case NL80211_IFTYPE_MESH_POINT:
+ case NL80211_IFTYPE_ADHOC:
+ break;
+ default:
+ /* caution - see cfg80211_beaconing_iface_active() below */
+ return -EINVAL;
+ }
+
wiphy_lock(wiphy);
dfs_region = reg_get_dfs_region(wiphy);
@@ -9958,12 +10121,7 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
goto unlock;
}
- if (netif_carrier_ok(dev)) {
- err = -EBUSY;
- goto unlock;
- }
-
- if (wdev->cac_started) {
+ if (cfg80211_beaconing_iface_active(wdev) || wdev->cac_started) {
err = -EBUSY;
goto unlock;
}
@@ -13860,9 +14018,8 @@ nla_put_failure:
return -ENOBUFS;
}
-void cfg80211_rdev_free_coalesce(struct cfg80211_registered_device *rdev)
+void cfg80211_free_coalesce(struct cfg80211_coalesce *coalesce)
{
- struct cfg80211_coalesce *coalesce = rdev->coalesce;
int i, j;
struct cfg80211_coalesce_rules *rule;
@@ -13871,13 +14028,13 @@ void cfg80211_rdev_free_coalesce(struct cfg80211_registered_device *rdev)
for (i = 0; i < coalesce->n_rules; i++) {
rule = &coalesce->rules[i];
+ if (!rule)
+ continue;
for (j = 0; j < rule->n_patterns; j++)
kfree(rule->patterns[j].mask);
kfree(rule->patterns);
}
- kfree(coalesce->rules);
kfree(coalesce);
- rdev->coalesce = NULL;
}
static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
@@ -13975,17 +14132,16 @@ static int nl80211_set_coalesce(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
const struct wiphy_coalesce_support *coalesce = rdev->wiphy.coalesce;
- struct cfg80211_coalesce new_coalesce = {};
- struct cfg80211_coalesce *n_coalesce;
- int err, rem_rule, n_rules = 0, i, j;
+ struct cfg80211_coalesce *new_coalesce;
+ int err, rem_rule, n_rules = 0, i;
struct nlattr *rule;
- struct cfg80211_coalesce_rules *tmp_rule;
if (!rdev->wiphy.coalesce || !rdev->ops->set_coalesce)
return -EOPNOTSUPP;
if (!info->attrs[NL80211_ATTR_COALESCE_RULE]) {
- cfg80211_rdev_free_coalesce(rdev);
+ cfg80211_free_coalesce(rdev->coalesce);
+ rdev->coalesce = NULL;
rdev_set_coalesce(rdev, NULL);
return 0;
}
@@ -13996,47 +14152,34 @@ static int nl80211_set_coalesce(struct sk_buff *skb, struct genl_info *info)
if (n_rules > coalesce->n_rules)
return -EINVAL;
- new_coalesce.rules = kcalloc(n_rules, sizeof(new_coalesce.rules[0]),
- GFP_KERNEL);
- if (!new_coalesce.rules)
+ new_coalesce = kzalloc(struct_size(new_coalesce, rules, n_rules),
+ GFP_KERNEL);
+ if (!new_coalesce)
return -ENOMEM;
- new_coalesce.n_rules = n_rules;
+ new_coalesce->n_rules = n_rules;
i = 0;
nla_for_each_nested(rule, info->attrs[NL80211_ATTR_COALESCE_RULE],
rem_rule) {
err = nl80211_parse_coalesce_rule(rdev, rule,
- &new_coalesce.rules[i]);
+ &new_coalesce->rules[i]);
if (err)
goto error;
i++;
}
- err = rdev_set_coalesce(rdev, &new_coalesce);
+ err = rdev_set_coalesce(rdev, new_coalesce);
if (err)
goto error;
- n_coalesce = kmemdup(&new_coalesce, sizeof(new_coalesce), GFP_KERNEL);
- if (!n_coalesce) {
- err = -ENOMEM;
- goto error;
- }
- cfg80211_rdev_free_coalesce(rdev);
- rdev->coalesce = n_coalesce;
+ cfg80211_free_coalesce(rdev->coalesce);
+ rdev->coalesce = new_coalesce;
return 0;
error:
- for (i = 0; i < new_coalesce.n_rules; i++) {
- tmp_rule = &new_coalesce.rules[i];
- if (!tmp_rule)
- continue;
- for (j = 0; j < tmp_rule->n_patterns; j++)
- kfree(tmp_rule->patterns[j].mask);
- kfree(tmp_rule->patterns);
- }
- kfree(new_coalesce.rules);
+ cfg80211_free_coalesce(new_coalesce);
return err;
}
@@ -16031,6 +16174,7 @@ static int nl80211_color_change(struct sk_buff *skb, struct genl_info *info)
params.counter_offset_presp = offset;
}
+ params.link_id = nl80211_link_id(info->attrs);
err = rdev_color_change(rdev, dev, &params);
out:
@@ -17433,7 +17577,8 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_color_change,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_MLO_VALID_LINK_ID),
},
{
.cmd = NL80211_CMD_SET_FILS_AAD,
@@ -19455,7 +19600,7 @@ EXPORT_SYMBOL(cfg80211_ch_switch_started_notify);
int cfg80211_bss_color_notify(struct net_device *dev,
enum nl80211_commands cmd, u8 count,
- u64 color_bitmap)
+ u64 color_bitmap, u8 link_id)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
@@ -19478,6 +19623,10 @@ int cfg80211_bss_color_notify(struct net_device *dev,
if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex))
goto nla_put_failure;
+ if (wdev->valid_links &&
+ nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link_id))
+ goto nla_put_failure;
+
if (cmd == NL80211_CMD_COLOR_CHANGE_STARTED &&
nla_put_u32(msg, NL80211_ATTR_COLOR_CHANGE_COUNT, count))
goto nla_put_failure;
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 6376f3a87f8a..ffaab9a92e5b 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Portions of this file
- * Copyright (C) 2018, 2020-2022 Intel Corporation
+ * Copyright (C) 2018, 2020-2024 Intel Corporation
*/
#ifndef __NET_WIRELESS_NL80211_H
#define __NET_WIRELESS_NL80211_H
@@ -119,7 +119,7 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev,
void nl80211_send_ap_stopped(struct wireless_dev *wdev, unsigned int link_id);
-void cfg80211_rdev_free_coalesce(struct cfg80211_registered_device *rdev);
+void cfg80211_free_coalesce(struct cfg80211_coalesce *coalesce);
/* peer measurement */
int nl80211_pmsr_start(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c
index e106dcea3977..0396fa19bdf1 100644
--- a/net/wireless/pmsr.c
+++ b/net/wireless/pmsr.c
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * Copyright (C) 2018 - 2021, 2023 Intel Corporation
+ * Copyright (C) 2018 - 2021, 2023 - 2024 Intel Corporation
*/
#include <net/cfg80211.h>
#include "core.h"
@@ -56,7 +56,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev,
out->ftm.burst_period = 0;
if (tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD])
out->ftm.burst_period =
- nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD]);
+ nla_get_u16(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD]);
out->ftm.asap = !!tb[NL80211_PMSR_FTM_REQ_ATTR_ASAP];
if (out->ftm.asap && !capa->ftm.asap) {
@@ -75,7 +75,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev,
out->ftm.num_bursts_exp = 0;
if (tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP])
out->ftm.num_bursts_exp =
- nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP]);
+ nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP]);
if (capa->ftm.max_bursts_exponent >= 0 &&
out->ftm.num_bursts_exp > capa->ftm.max_bursts_exponent) {
@@ -88,7 +88,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev,
out->ftm.burst_duration = 15;
if (tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION])
out->ftm.burst_duration =
- nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION]);
+ nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION]);
out->ftm.ftms_per_burst = 0;
if (tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST])
@@ -107,7 +107,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev,
out->ftm.ftmr_retries = 3;
if (tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES])
out->ftm.ftmr_retries =
- nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES]);
+ nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES]);
out->ftm.request_lci = !!tb[NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI];
if (out->ftm.request_lci && !capa->ftm.request_lci) {
@@ -148,6 +148,14 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev,
return -EINVAL;
}
+ if (out->ftm.ftms_per_burst > 31 && !out->ftm.non_trigger_based &&
+ !out->ftm.trigger_based) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST],
+ "FTM: FTMs per burst must be set lower than 31");
+ return -ERANGE;
+ }
+
if ((out->ftm.trigger_based || out->ftm.non_trigger_based) &&
out->ftm.preamble != NL80211_PREAMBLE_HE) {
NL_SET_ERR_MSG_ATTR(info->extack,
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 43897a5269b6..ec3f4aa1c807 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -2,7 +2,7 @@
/*
* Portions of this file
* Copyright(c) 2016-2017 Intel Deutschland GmbH
- * Copyright (C) 2018, 2021-2023 Intel Corporation
+ * Copyright (C) 2018, 2021-2024 Intel Corporation
*/
#ifndef __CFG80211_RDEV_OPS
#define __CFG80211_RDEV_OPS
@@ -458,6 +458,10 @@ static inline int rdev_scan(struct cfg80211_registered_device *rdev,
struct cfg80211_scan_request *request)
{
int ret;
+
+ if (WARN_ON_ONCE(!request->n_ssids && request->ssids))
+ return -EINVAL;
+
trace_rdev_scan(&rdev->wiphy, request);
ret = rdev->ops->scan(&rdev->wiphy, request);
trace_rdev_return_int(&rdev->wiphy, ret);
@@ -574,13 +578,11 @@ static inline int rdev_leave_ibss(struct cfg80211_registered_device *rdev,
static inline int
rdev_set_wiphy_params(struct cfg80211_registered_device *rdev, u32 changed)
{
- int ret;
-
- if (!rdev->ops->set_wiphy_params)
- return -EOPNOTSUPP;
+ int ret = -EOPNOTSUPP;
trace_rdev_set_wiphy_params(&rdev->wiphy, changed);
- ret = rdev->ops->set_wiphy_params(&rdev->wiphy, changed);
+ if (rdev->ops->set_wiphy_params)
+ ret = rdev->ops->set_wiphy_params(&rdev->wiphy, changed);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
@@ -1421,13 +1423,11 @@ rdev_set_radar_background(struct cfg80211_registered_device *rdev,
struct cfg80211_chan_def *chandef)
{
struct wiphy *wiphy = &rdev->wiphy;
- int ret;
-
- if (!rdev->ops->set_radar_background)
- return -EOPNOTSUPP;
+ int ret = -EOPNOTSUPP;
trace_rdev_set_radar_background(wiphy, chandef);
- ret = rdev->ops->set_radar_background(wiphy, chandef);
+ if (rdev->ops->set_radar_background)
+ ret = rdev->ops->set_radar_background(wiphy, chandef);
trace_rdev_return_int(wiphy, ret);
return ret;
@@ -1464,13 +1464,11 @@ rdev_add_link_station(struct cfg80211_registered_device *rdev,
struct net_device *dev,
struct link_station_parameters *params)
{
- int ret;
-
- if (!rdev->ops->add_link_station)
- return -EOPNOTSUPP;
+ int ret = -EOPNOTSUPP;
trace_rdev_add_link_station(&rdev->wiphy, dev, params);
- ret = rdev->ops->add_link_station(&rdev->wiphy, dev, params);
+ if (rdev->ops->add_link_station)
+ ret = rdev->ops->add_link_station(&rdev->wiphy, dev, params);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
@@ -1480,13 +1478,11 @@ rdev_mod_link_station(struct cfg80211_registered_device *rdev,
struct net_device *dev,
struct link_station_parameters *params)
{
- int ret;
-
- if (!rdev->ops->mod_link_station)
- return -EOPNOTSUPP;
+ int ret = -EOPNOTSUPP;
trace_rdev_mod_link_station(&rdev->wiphy, dev, params);
- ret = rdev->ops->mod_link_station(&rdev->wiphy, dev, params);
+ if (rdev->ops->mod_link_station)
+ ret = rdev->ops->mod_link_station(&rdev->wiphy, dev, params);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
@@ -1496,13 +1492,11 @@ rdev_del_link_station(struct cfg80211_registered_device *rdev,
struct net_device *dev,
struct link_station_del_parameters *params)
{
- int ret;
-
- if (!rdev->ops->del_link_station)
- return -EOPNOTSUPP;
+ int ret = -EOPNOTSUPP;
trace_rdev_del_link_station(&rdev->wiphy, dev, params);
- ret = rdev->ops->del_link_station(&rdev->wiphy, dev, params);
+ if (rdev->ops->del_link_station)
+ ret = rdev->ops->del_link_station(&rdev->wiphy, dev, params);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
@@ -1513,13 +1507,11 @@ rdev_set_hw_timestamp(struct cfg80211_registered_device *rdev,
struct cfg80211_set_hw_timestamp *hwts)
{
struct wiphy *wiphy = &rdev->wiphy;
- int ret;
-
- if (!rdev->ops->set_hw_timestamp)
- return -EOPNOTSUPP;
+ int ret = -EOPNOTSUPP;
trace_rdev_set_hw_timestamp(wiphy, dev, hwts);
- ret = rdev->ops->set_hw_timestamp(wiphy, dev, hwts);
+ if (rdev->ops->set_hw_timestamp)
+ ret = rdev->ops->set_hw_timestamp(wiphy, dev, hwts);
trace_rdev_return_int(wiphy, ret);
return ret;
@@ -1531,15 +1523,25 @@ rdev_set_ttlm(struct cfg80211_registered_device *rdev,
struct cfg80211_ttlm_params *params)
{
struct wiphy *wiphy = &rdev->wiphy;
- int ret;
-
- if (!rdev->ops->set_ttlm)
- return -EOPNOTSUPP;
+ int ret = -EOPNOTSUPP;
trace_rdev_set_ttlm(wiphy, dev, params);
- ret = rdev->ops->set_ttlm(wiphy, dev, params);
+ if (rdev->ops->set_ttlm)
+ ret = rdev->ops->set_ttlm(wiphy, dev, params);
trace_rdev_return_int(wiphy, ret);
return ret;
}
+
+static inline u32
+rdev_get_radio_mask(struct cfg80211_registered_device *rdev,
+ struct net_device *dev)
+{
+ struct wiphy *wiphy = &rdev->wiphy;
+
+ if (!rdev->ops->get_radio_mask)
+ return 0;
+
+ return rdev->ops->get_radio_mask(wiphy, dev);
+}
#endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 753f8e9aa4b1..4a27f3823e25 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1600,6 +1600,8 @@ static u32 map_regdom_flags(u32 rd_flags)
channel_flags |= IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT;
if (rd_flags & NL80211_RRF_PSD)
channel_flags |= IEEE80211_CHAN_PSD;
+ if (rd_flags & NL80211_RRF_ALLOW_6GHZ_VLP_AP)
+ channel_flags |= IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP;
return channel_flags;
}
@@ -3284,7 +3286,7 @@ int regulatory_hint_user(const char *alpha2,
return 0;
}
-int regulatory_hint_indoor(bool is_indoor, u32 portid)
+void regulatory_hint_indoor(bool is_indoor, u32 portid)
{
spin_lock(&reg_indoor_lock);
@@ -3307,8 +3309,6 @@ int regulatory_hint_indoor(bool is_indoor, u32 portid)
if (!is_indoor)
reg_check_channels();
-
- return 0;
}
void regulatory_netlink_notify(u32 portid)
@@ -3666,9 +3666,9 @@ static bool pending_reg_beacon(struct ieee80211_channel *beacon_chan)
return false;
}
-int regulatory_hint_found_beacon(struct wiphy *wiphy,
- struct ieee80211_channel *beacon_chan,
- gfp_t gfp)
+void regulatory_hint_found_beacon(struct wiphy *wiphy,
+ struct ieee80211_channel *beacon_chan,
+ gfp_t gfp)
{
struct reg_beacon *reg_beacon;
bool processing;
@@ -3677,18 +3677,18 @@ int regulatory_hint_found_beacon(struct wiphy *wiphy,
beacon_chan->flags & IEEE80211_CHAN_RADAR ||
(beacon_chan->band == NL80211_BAND_2GHZ &&
!freq_is_chan_12_13_14(beacon_chan->center_freq)))
- return 0;
+ return;
spin_lock_bh(&reg_pending_beacons_lock);
processing = pending_reg_beacon(beacon_chan);
spin_unlock_bh(&reg_pending_beacons_lock);
if (processing)
- return 0;
+ return;
reg_beacon = kzalloc(sizeof(struct reg_beacon), gfp);
if (!reg_beacon)
- return -ENOMEM;
+ return;
pr_debug("Found new beacon on frequency: %d.%03d MHz (Ch %d) on %s\n",
beacon_chan->center_freq, beacon_chan->freq_offset,
@@ -3708,8 +3708,6 @@ int regulatory_hint_found_beacon(struct wiphy *wiphy,
spin_unlock_bh(&reg_pending_beacons_lock);
schedule_work(&reg_work);
-
- return 0;
}
static void print_rd_rules(const struct ieee80211_regdomain *rd)
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index a02ef5609f52..e1b211c4f75c 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -42,7 +42,7 @@ int regulatory_hint_user(const char *alpha2,
* device is operating in an indoor environment.
* @portid: the netlink port ID on which the hint was given.
*/
-int regulatory_hint_indoor(bool is_indoor, u32 portid);
+void regulatory_hint_indoor(bool is_indoor, u32 portid);
/**
* regulatory_netlink_notify - notify on released netlink socket
@@ -82,9 +82,9 @@ bool reg_last_request_cell_base(void);
* on a newly found BSS. If you cannot make use of this feature you can
* set the wiphy->disable_beacon_hints to true.
*/
-int regulatory_hint_found_beacon(struct wiphy *wiphy,
- struct ieee80211_channel *beacon_chan,
- gfp_t gfp);
+void regulatory_hint_found_beacon(struct wiphy *wiphy,
+ struct ieee80211_channel *beacon_chan,
+ gfp_t gfp);
/**
* regulatory_hint_country_ie - hints a country IE as a regulatory domain
@@ -137,13 +137,14 @@ void regulatory_hint_disconnect(void);
* Get a value specifying the U-NII band frequency belongs to.
* U-NII bands are defined by the FCC in C.F.R 47 part 15.
*
- * Returns -EINVAL if freq is invalid, 0 for UNII-1, 1 for UNII-2A,
+ * Return: -EINVAL if freq is invalid, 0 for UNII-1, 1 for UNII-2A,
* 2 for UNII-2B, 3 for UNII-2C and 4 for UNII-3.
*/
int cfg80211_get_unii(int freq);
/**
* regulatory_indoor_allowed - is indoor operation allowed
+ * Return: %true if indoor operation is allowed, %false otherwise
*/
bool regulatory_indoor_allowed(void);
@@ -173,11 +174,13 @@ void regulatory_propagate_dfs_state(struct wiphy *wiphy,
* reg_dfs_domain_same - Checks if both wiphy have same DFS domain configured
* @wiphy1: wiphy it's dfs_region to be checked against that of wiphy2
* @wiphy2: wiphy it's dfs_region to be checked against that of wiphy1
+ * Return: %true if both wiphys have the same DFS domain, %false otherwise
*/
bool reg_dfs_domain_same(struct wiphy *wiphy1, struct wiphy *wiphy2);
/**
* reg_reload_regdb - reload the regulatory.db firmware file
+ * Return: 0 for success, an error code otherwise
*/
int reg_reload_regdb(void);
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 5a5dd3ce497f..64eeed82d43d 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -812,6 +812,7 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
LIST_HEAD(coloc_ap_list);
bool need_scan_psc = true;
const struct ieee80211_sband_iftype_data *iftd;
+ size_t size, offs_ssids, offs_6ghz_params, offs_ies;
rdev_req->scan_6ghz = true;
@@ -877,10 +878,15 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
spin_unlock_bh(&rdev->bss_lock);
}
- request = kzalloc(struct_size(request, channels, n_channels) +
- sizeof(*request->scan_6ghz_params) * count +
- sizeof(*request->ssids) * rdev_req->n_ssids,
- GFP_KERNEL);
+ size = struct_size(request, channels, n_channels);
+ offs_ssids = size;
+ size += sizeof(*request->ssids) * rdev_req->n_ssids;
+ offs_6ghz_params = size;
+ size += sizeof(*request->scan_6ghz_params) * count;
+ offs_ies = size;
+ size += rdev_req->ie_len;
+
+ request = kzalloc(size, GFP_KERNEL);
if (!request) {
cfg80211_free_coloc_ap_list(&coloc_ap_list);
return -ENOMEM;
@@ -888,8 +894,26 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
*request = *rdev_req;
request->n_channels = 0;
- request->scan_6ghz_params =
- (void *)&request->channels[n_channels];
+ request->n_6ghz_params = 0;
+ if (rdev_req->n_ssids) {
+ /*
+ * Add the ssids from the parent scan request to the new
+ * scan request, so the driver would be able to use them
+ * in its probe requests to discover hidden APs on PSC
+ * channels.
+ */
+ request->ssids = (void *)request + offs_ssids;
+ memcpy(request->ssids, rdev_req->ssids,
+ sizeof(*request->ssids) * request->n_ssids);
+ }
+ request->scan_6ghz_params = (void *)request + offs_6ghz_params;
+
+ if (rdev_req->ie_len) {
+ void *ie = (void *)request + offs_ies;
+
+ memcpy(ie, rdev_req->ie, rdev_req->ie_len);
+ request->ie = ie;
+ }
/*
* PSC channels should not be scanned in case of direct scan with 1 SSID
@@ -978,17 +1002,8 @@ skip:
if (request->n_channels) {
struct cfg80211_scan_request *old = rdev->int_scan_req;
- rdev->int_scan_req = request;
- /*
- * Add the ssids from the parent scan request to the new scan
- * request, so the driver would be able to use them in its
- * probe requests to discover hidden APs on PSC channels.
- */
- request->ssids = (void *)&request->channels[request->n_channels];
- request->n_ssids = rdev_req->n_ssids;
- memcpy(request->ssids, rdev_req->ssids, sizeof(*request->ssids) *
- request->n_ssids);
+ rdev->int_scan_req = request;
/*
* If this scan follows a previous scan, save the scan start
@@ -1589,7 +1604,7 @@ struct cfg80211_bss *__cfg80211_get_bss(struct wiphy *wiphy,
}
EXPORT_SYMBOL(__cfg80211_get_bss);
-static void rb_insert_bss(struct cfg80211_registered_device *rdev,
+static bool rb_insert_bss(struct cfg80211_registered_device *rdev,
struct cfg80211_internal_bss *bss)
{
struct rb_node **p = &rdev->bss_tree.rb_node;
@@ -1605,7 +1620,7 @@ static void rb_insert_bss(struct cfg80211_registered_device *rdev,
if (WARN_ON(!cmp)) {
/* will sort of leak this BSS */
- return;
+ return false;
}
if (cmp < 0)
@@ -1616,6 +1631,7 @@ static void rb_insert_bss(struct cfg80211_registered_device *rdev,
rb_link_node(&bss->rbn, parent, p);
rb_insert_color(&bss->rbn, &rdev->bss_tree);
+ return true;
}
static struct cfg80211_internal_bss *
@@ -1642,6 +1658,34 @@ rb_find_bss(struct cfg80211_registered_device *rdev,
return NULL;
}
+static void cfg80211_insert_bss(struct cfg80211_registered_device *rdev,
+ struct cfg80211_internal_bss *bss)
+{
+ lockdep_assert_held(&rdev->bss_lock);
+
+ if (!rb_insert_bss(rdev, bss))
+ return;
+ list_add_tail(&bss->list, &rdev->bss_list);
+ rdev->bss_entries++;
+}
+
+static void cfg80211_rehash_bss(struct cfg80211_registered_device *rdev,
+ struct cfg80211_internal_bss *bss)
+{
+ lockdep_assert_held(&rdev->bss_lock);
+
+ rb_erase(&bss->rbn, &rdev->bss_tree);
+ if (!rb_insert_bss(rdev, bss)) {
+ list_del(&bss->list);
+ if (!list_empty(&bss->hidden_list))
+ list_del_init(&bss->hidden_list);
+ if (!list_empty(&bss->pub.nontrans_list))
+ list_del_init(&bss->pub.nontrans_list);
+ rdev->bss_entries--;
+ }
+ rdev->bss_generation++;
+}
+
static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev,
struct cfg80211_internal_bss *new)
{
@@ -1954,9 +1998,7 @@ __cfg80211_bss_update(struct cfg80211_registered_device *rdev,
bss_ref_get(rdev, bss_from_pub(tmp->pub.transmitted_bss));
}
- list_add_tail(&new->list, &rdev->bss_list);
- rdev->bss_entries++;
- rb_insert_bss(rdev, new);
+ cfg80211_insert_bss(rdev, new);
found = new;
}
@@ -2121,33 +2163,53 @@ struct cfg80211_inform_single_bss_data {
u64 cannot_use_reasons;
};
-static bool cfg80211_6ghz_power_type_valid(const u8 *ie, size_t ielen,
- const u32 flags)
+enum ieee80211_ap_reg_power
+cfg80211_get_6ghz_power_type(const u8 *elems, size_t elems_len)
{
- const struct element *tmp;
+ const struct ieee80211_he_6ghz_oper *he_6ghz_oper;
struct ieee80211_he_operation *he_oper;
+ const struct element *tmp;
- tmp = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ie, ielen);
- if (tmp && tmp->datalen >= sizeof(*he_oper) + 1) {
- const struct ieee80211_he_6ghz_oper *he_6ghz_oper;
-
- he_oper = (void *)&tmp->data[1];
- he_6ghz_oper = ieee80211_he_6ghz_oper(he_oper);
-
- if (!he_6ghz_oper)
- return false;
+ tmp = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION,
+ elems, elems_len);
+ if (!tmp || tmp->datalen < sizeof(*he_oper) + 1 ||
+ tmp->datalen < ieee80211_he_oper_size(tmp->data + 1))
+ return IEEE80211_REG_UNSET_AP;
+
+ he_oper = (void *)&tmp->data[1];
+ he_6ghz_oper = ieee80211_he_6ghz_oper(he_oper);
+
+ if (!he_6ghz_oper)
+ return IEEE80211_REG_UNSET_AP;
+
+ switch (u8_get_bits(he_6ghz_oper->control,
+ IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO)) {
+ case IEEE80211_6GHZ_CTRL_REG_LPI_AP:
+ case IEEE80211_6GHZ_CTRL_REG_INDOOR_LPI_AP:
+ return IEEE80211_REG_LPI_AP;
+ case IEEE80211_6GHZ_CTRL_REG_SP_AP:
+ case IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP:
+ return IEEE80211_REG_SP_AP;
+ case IEEE80211_6GHZ_CTRL_REG_VLP_AP:
+ return IEEE80211_REG_VLP_AP;
+ default:
+ return IEEE80211_REG_UNSET_AP;
+ }
+}
- switch (u8_get_bits(he_6ghz_oper->control,
- IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO)) {
- case IEEE80211_6GHZ_CTRL_REG_LPI_AP:
- return true;
- case IEEE80211_6GHZ_CTRL_REG_SP_AP:
- return !(flags & IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT);
- case IEEE80211_6GHZ_CTRL_REG_VLP_AP:
- return !(flags & IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT);
- }
+static bool cfg80211_6ghz_power_type_valid(const u8 *elems, size_t elems_len,
+ const u32 flags)
+{
+ switch (cfg80211_get_6ghz_power_type(elems, elems_len)) {
+ case IEEE80211_REG_LPI_AP:
+ return true;
+ case IEEE80211_REG_SP_AP:
+ return !(flags & IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT);
+ case IEEE80211_REG_VLP_AP:
+ return !(flags & IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT);
+ default:
+ return false;
}
- return false;
}
/* Returned bss is reference counted and must be cleaned up appropriately. */
@@ -2207,12 +2269,16 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
tmp.pub.use_for = data->use_for;
tmp.pub.cannot_use_reasons = data->cannot_use_reasons;
- if (data->bss_source != BSS_SOURCE_DIRECT) {
+ switch (data->bss_source) {
+ case BSS_SOURCE_MBSSID:
tmp.pub.transmitted_bss = data->source_bss;
+ fallthrough;
+ case BSS_SOURCE_STA_PROFILE:
ts = bss_from_pub(data->source_bss)->ts;
tmp.pub.bssid_index = data->bssid_index;
tmp.pub.max_bssid_indicator = data->max_bssid_indicator;
- } else {
+ break;
+ case BSS_SOURCE_DIRECT:
ts = jiffies;
if (channel->band == NL80211_BAND_60GHZ) {
@@ -2227,6 +2293,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
regulatory_hint_found_beacon(wiphy, channel,
gfp);
}
+ break;
}
/*
@@ -2443,7 +2510,8 @@ cfg80211_parse_mbssid_data(struct wiphy *wiphy,
profile, profile_len);
if (!mbssid_index_ie || mbssid_index_ie[1] < 1 ||
mbssid_index_ie[2] == 0 ||
- mbssid_index_ie[2] > 46) {
+ mbssid_index_ie[2] > 46 ||
+ mbssid_index_ie[2] >= (1 << elem->data[0])) {
/* No valid Multiple BSSID-Index element */
continue;
}
@@ -2655,6 +2723,7 @@ struct tbtt_info_iter_data {
u8 param_ch_count;
u32 use_for;
u8 mld_id, link_id;
+ bool non_tx;
};
static enum cfg80211_rnr_iter_ret
@@ -2665,14 +2734,20 @@ cfg802121_mld_ap_rnr_iter(void *_data, u8 type,
const struct ieee80211_rnr_mld_params *mld_params;
struct tbtt_info_iter_data *data = _data;
u8 link_id;
+ bool non_tx = false;
if (type == IEEE80211_TBTT_INFO_TYPE_TBTT &&
tbtt_info_len >= offsetofend(struct ieee80211_tbtt_info_ge_11,
- mld_params))
- mld_params = (void *)(tbtt_info +
- offsetof(struct ieee80211_tbtt_info_ge_11,
- mld_params));
- else if (type == IEEE80211_TBTT_INFO_TYPE_MLD &&
+ mld_params)) {
+ const struct ieee80211_tbtt_info_ge_11 *tbtt_info_ge_11 =
+ (void *)tbtt_info;
+
+ non_tx = (tbtt_info_ge_11->bss_params &
+ (IEEE80211_RNR_TBTT_PARAMS_MULTI_BSSID |
+ IEEE80211_RNR_TBTT_PARAMS_TRANSMITTED_BSSID)) ==
+ IEEE80211_RNR_TBTT_PARAMS_MULTI_BSSID;
+ mld_params = &tbtt_info_ge_11->mld_params;
+ } else if (type == IEEE80211_TBTT_INFO_TYPE_MLD &&
tbtt_info_len >= sizeof(struct ieee80211_rnr_mld_params))
mld_params = (void *)tbtt_info;
else
@@ -2691,6 +2766,7 @@ cfg802121_mld_ap_rnr_iter(void *_data, u8 type,
data->param_ch_count =
le16_get_bits(mld_params->params,
IEEE80211_RNR_MLD_PARAMS_BSS_CHANGE_COUNT);
+ data->non_tx = non_tx;
if (type == IEEE80211_TBTT_INFO_TYPE_TBTT)
data->use_for = NL80211_BSS_USE_FOR_ALL;
@@ -2702,7 +2778,7 @@ cfg802121_mld_ap_rnr_iter(void *_data, u8 type,
static u8
cfg80211_rnr_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id,
const struct ieee80211_neighbor_ap_info **ap_info,
- u8 *param_ch_count)
+ u8 *param_ch_count, bool *non_tx)
{
struct tbtt_info_iter_data data = {
.mld_id = mld_id,
@@ -2713,6 +2789,7 @@ cfg80211_rnr_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id,
*ap_info = data.ap_info;
*param_ch_count = data.param_ch_count;
+ *non_tx = data.non_tx;
return data.use_for;
}
@@ -2892,6 +2969,7 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy,
ssize_t profile_len;
u8 param_ch_count;
u8 link_id, use_for;
+ bool non_tx;
if (!ieee80211_mle_basic_sta_prof_size_ok((u8 *)mle->sta_prof[i],
mle->sta_prof_len[i]))
@@ -2937,10 +3015,24 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy,
tx_data->ielen,
mld_id, link_id,
&ap_info,
- &param_ch_count);
+ &param_ch_count,
+ &non_tx);
if (!use_for)
continue;
+ /*
+ * As of 802.11be_D5.0, the specification does not give us any
+ * way of discovering both the MaxBSSID and the Multiple-BSSID
+ * Index. It does seem like the Multiple-BSSID Index element
+ * may be provided, but section 9.4.2.45 explicitly forbids
+ * including a Multiple-BSSID Element (in this case without any
+ * subelements).
+ * Without both pieces of information we cannot calculate the
+ * reference BSSID, so simply ignore the BSS.
+ */
+ if (non_tx)
+ continue;
+
/* We could sanity check the BSSID is included */
if (!ieee80211_operating_class_to_band(ap_info->op_class,
@@ -3086,8 +3178,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
struct ieee80211_mgmt *mgmt, size_t len,
gfp_t gfp)
{
- size_t min_hdr_len = offsetof(struct ieee80211_mgmt,
- u.probe_resp.variable);
+ size_t min_hdr_len;
struct ieee80211_ext *ext = NULL;
enum cfg80211_bss_frame_type ftype;
u16 beacon_interval;
@@ -3110,10 +3201,16 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
if (ieee80211_is_s1g_beacon(mgmt->frame_control)) {
ext = (void *) mgmt;
- min_hdr_len = offsetof(struct ieee80211_ext, u.s1g_beacon);
if (ieee80211_is_s1g_short_beacon(mgmt->frame_control))
min_hdr_len = offsetof(struct ieee80211_ext,
u.s1g_short_beacon.variable);
+ else
+ min_hdr_len = offsetof(struct ieee80211_ext,
+ u.s1g_beacon.variable);
+ } else {
+ /* same for beacons */
+ min_hdr_len = offsetof(struct ieee80211_mgmt,
+ u.probe_resp.variable);
}
if (WARN_ON(len < min_hdr_len))
@@ -3299,19 +3396,14 @@ void cfg80211_update_assoc_bss_entry(struct wireless_dev *wdev,
if (!WARN_ON(!__cfg80211_unlink_bss(rdev, new)))
rdev->bss_generation++;
}
-
- rb_erase(&cbss->rbn, &rdev->bss_tree);
- rb_insert_bss(rdev, cbss);
- rdev->bss_generation++;
+ cfg80211_rehash_bss(rdev, cbss);
list_for_each_entry_safe(nontrans_bss, tmp,
&cbss->pub.nontrans_list,
nontrans_list) {
bss = bss_from_pub(nontrans_bss);
bss->pub.channel = chan;
- rb_erase(&bss->rbn, &rdev->bss_tree);
- rb_insert_bss(rdev, bss);
- rdev->bss_generation++;
+ cfg80211_rehash_bss(rdev, bss);
}
done:
@@ -3366,10 +3458,14 @@ int cfg80211_wext_siwscan(struct net_device *dev,
wiphy = &rdev->wiphy;
/* Determine number of channels, needed to allocate creq */
- if (wreq && wreq->num_channels)
+ if (wreq && wreq->num_channels) {
+ /* Passed from userspace so should be checked */
+ if (unlikely(wreq->num_channels > IW_MAX_FREQUENCIES))
+ return -EINVAL;
n_channels = wreq->num_channels;
- else
+ } else {
n_channels = ieee80211_get_num_supported_channels(wiphy);
+ }
creq = kzalloc(sizeof(*creq) + sizeof(struct cfg80211_ssid) +
n_channels * sizeof(void *),
@@ -3443,8 +3539,10 @@ int cfg80211_wext_siwscan(struct net_device *dev,
memcpy(creq->ssids[0].ssid, wreq->essid, wreq->essid_len);
creq->ssids[0].ssid_len = wreq->essid_len;
}
- if (wreq->scan_type == IW_SCAN_TYPE_PASSIVE)
+ if (wreq->scan_type == IW_SCAN_TYPE_PASSIVE) {
+ creq->ssids = NULL;
creq->n_ssids = 0;
+ }
}
for (i = 0; i < NUM_NL80211_BANDS; i++)
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 82e3ce42206c..d9d7bf8bb5c1 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -5,7 +5,7 @@
* (for nl80211's connect() and wext)
*
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2009, 2020, 2022-2023 Intel Corporation. All rights reserved.
+ * Copyright (C) 2009, 2020, 2022-2024 Intel Corporation. All rights reserved.
* Copyright 2017 Intel Deutschland GmbH
*/
@@ -130,7 +130,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev)
rdev->scan_req = request;
- err = rdev_scan(rdev, request);
+ err = cfg80211_scan(rdev);
if (!err) {
wdev->conn->state = CFG80211_CONN_SCANNING;
nl80211_send_scan_start(rdev, wdev);
@@ -1045,6 +1045,7 @@ void cfg80211_connect_done(struct net_device *dev,
cfg80211_hold_bss(
bss_from_pub(params->links[link].bss));
ev->cr.links[link].bss = params->links[link].bss;
+ ev->cr.links[link].status = params->links[link].status;
if (params->links[link].addr) {
ev->cr.links[link].addr = next;
@@ -1353,6 +1354,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
return;
cfg80211_wdev_release_bsses(wdev);
+ wdev->valid_links = 0;
wdev->connected = false;
wdev->u.client.ssid_len = 0;
wdev->conn_owner_nlportid = 0;
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 565511a3f461..62f26618f674 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -5,7 +5,7 @@
*
* Copyright 2005-2006 Jiri Benc <jbenc@suse.cz>
* Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2020-2021, 2023 Intel Corporation
+ * Copyright (C) 2020-2021, 2023-2024 Intel Corporation
*/
#include <linux/device.h>
@@ -137,7 +137,7 @@ static int wiphy_resume(struct device *dev)
if (rdev->wiphy.registered && rdev->ops->resume)
ret = rdev_resume(rdev);
rdev->suspended = false;
- schedule_work(&rdev->wiphy_work);
+ queue_work(system_unbound_wq, &rdev->wiphy_work);
wiphy_unlock(&rdev->wiphy);
if (ret)
diff --git a/net/wireless/tests/chan.c b/net/wireless/tests/chan.c
index d02258ac2dab..74bbee25085f 100644
--- a/net/wireless/tests/chan.c
+++ b/net/wireless/tests/chan.c
@@ -113,16 +113,16 @@ static const struct chandef_compat_case {
},
},
{
- .desc = "different primary 160 MHz",
+ .desc = "different primary 320 MHz",
.c1 = {
.width = NL80211_CHAN_WIDTH_320,
.chan = &chan_6ghz_105,
- .center_freq1 = 6475 + 150,
+ .center_freq1 = 6475 + 110,
},
.c2 = {
.width = NL80211_CHAN_WIDTH_320,
.chan = &chan_6ghz_105,
- .center_freq1 = 6475 - 10,
+ .center_freq1 = 6475 - 50,
},
},
{
@@ -131,12 +131,12 @@ static const struct chandef_compat_case {
.c1 = {
.width = NL80211_CHAN_WIDTH_160,
.chan = &chan_6ghz_105,
- .center_freq1 = 6475 + 70,
+ .center_freq1 = 6475 + 30,
},
.c2 = {
.width = NL80211_CHAN_WIDTH_320,
.chan = &chan_6ghz_105,
- .center_freq1 = 6475 - 10,
+ .center_freq1 = 6475 - 50,
},
.compat = true,
},
@@ -145,12 +145,12 @@ static const struct chandef_compat_case {
.c1 = {
.width = NL80211_CHAN_WIDTH_160,
.chan = &chan_6ghz_105,
- .center_freq1 = 6475 + 70,
+ .center_freq1 = 6475 + 30,
},
.c2 = {
.width = NL80211_CHAN_WIDTH_320,
.chan = &chan_6ghz_105,
- .center_freq1 = 6475 - 10,
+ .center_freq1 = 6475 - 50,
.punctured = 0xf,
},
.compat = true,
@@ -160,13 +160,13 @@ static const struct chandef_compat_case {
.c1 = {
.width = NL80211_CHAN_WIDTH_160,
.chan = &chan_6ghz_105,
- .center_freq1 = 6475 + 70,
+ .center_freq1 = 6475 + 30,
.punctured = 0xc0,
},
.c2 = {
.width = NL80211_CHAN_WIDTH_320,
.chan = &chan_6ghz_105,
- .center_freq1 = 6475 - 10,
+ .center_freq1 = 6475 - 50,
.punctured = 0xc000,
},
.compat = true,
@@ -176,13 +176,13 @@ static const struct chandef_compat_case {
.c1 = {
.width = NL80211_CHAN_WIDTH_160,
.chan = &chan_6ghz_105,
- .center_freq1 = 6475 + 70,
+ .center_freq1 = 6475 + 30,
.punctured = 0x80,
},
.c2 = {
.width = NL80211_CHAN_WIDTH_320,
.chan = &chan_6ghz_105,
- .center_freq1 = 6475 - 10,
+ .center_freq1 = 6475 - 50,
.punctured = 0xc000,
},
},
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index df013c98b80d..5c26f065bd68 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -243,6 +243,80 @@
} while (0)
/*************************************************************
+ * wiphy work traces *
+ *************************************************************/
+
+DECLARE_EVENT_CLASS(wiphy_work_event,
+ TP_PROTO(struct wiphy *wiphy, struct wiphy_work *work),
+ TP_ARGS(wiphy, work),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ __field(void *, instance)
+ __field(void *, func)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ __entry->instance = work;
+ __entry->func = work ? work->func : NULL;
+ ),
+ TP_printk(WIPHY_PR_FMT " instance=%p func=%pS",
+ WIPHY_PR_ARG, __entry->instance, __entry->func)
+);
+
+DEFINE_EVENT(wiphy_work_event, wiphy_work_queue,
+ TP_PROTO(struct wiphy *wiphy, struct wiphy_work *work),
+ TP_ARGS(wiphy, work)
+);
+
+DEFINE_EVENT(wiphy_work_event, wiphy_work_run,
+ TP_PROTO(struct wiphy *wiphy, struct wiphy_work *work),
+ TP_ARGS(wiphy, work)
+);
+
+DEFINE_EVENT(wiphy_work_event, wiphy_work_cancel,
+ TP_PROTO(struct wiphy *wiphy, struct wiphy_work *work),
+ TP_ARGS(wiphy, work)
+);
+
+DEFINE_EVENT(wiphy_work_event, wiphy_work_flush,
+ TP_PROTO(struct wiphy *wiphy, struct wiphy_work *work),
+ TP_ARGS(wiphy, work)
+);
+
+TRACE_EVENT(wiphy_delayed_work_queue,
+ TP_PROTO(struct wiphy *wiphy, struct wiphy_work *work,
+ unsigned long delay),
+ TP_ARGS(wiphy, work, delay),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ __field(void *, instance)
+ __field(void *, func)
+ __field(unsigned long, delay)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ __entry->instance = work;
+ __entry->func = work->func;
+ __entry->delay = delay;
+ ),
+ TP_printk(WIPHY_PR_FMT " instance=%p func=%pS delay=%ld",
+ WIPHY_PR_ARG, __entry->instance, __entry->func,
+ __entry->delay)
+);
+
+TRACE_EVENT(wiphy_work_worker_start,
+ TP_PROTO(struct wiphy *wiphy),
+ TP_ARGS(wiphy),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ ),
+ TP_printk(WIPHY_PR_FMT, WIPHY_PR_ARG)
+);
+
+/*************************************************************
* rdev->ops traces *
*************************************************************/
@@ -372,7 +446,7 @@ TRACE_EVENT(rdev_add_virtual_intf,
),
TP_fast_assign(
WIPHY_ASSIGN;
- __assign_str(vir_intf_name, name ? name : "<noname>");
+ __assign_str(vir_intf_name);
__entry->type = type;
),
TP_printk(WIPHY_PR_FMT ", virtual intf name: %s, type: %d",
@@ -2842,6 +2916,7 @@ TRACE_EVENT(rdev_color_change,
__field(u8, count)
__field(u16, bcn_ofs)
__field(u16, pres_ofs)
+ __field(u8, link_id)
),
TP_fast_assign(
WIPHY_ASSIGN;
@@ -2849,11 +2924,12 @@ TRACE_EVENT(rdev_color_change,
__entry->count = params->count;
__entry->bcn_ofs = params->counter_offset_beacon;
__entry->pres_ofs = params->counter_offset_presp;
+ __entry->link_id = params->link_id;
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT
- ", count: %u",
+ ", count: %u, link_id: %d",
WIPHY_PR_ARG, NETDEV_PR_ARG,
- __entry->count)
+ __entry->count, __entry->link_id)
);
TRACE_EVENT(rdev_set_radar_background,
@@ -2887,6 +2963,75 @@ DEFINE_EVENT(wiphy_wdev_link_evt, rdev_del_intf_link,
TP_ARGS(wiphy, wdev, link_id)
);
+TRACE_EVENT(rdev_del_link_station,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ struct link_station_del_parameters *params),
+ TP_ARGS(wiphy, netdev, params),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ __array(u8, mld_mac, 6)
+ __field(u32, link_id)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ memset(__entry->mld_mac, 0, 6);
+ if (params->mld_mac)
+ memcpy(__entry->mld_mac, params->mld_mac, 6);
+ __entry->link_id = params->link_id;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM"
+ ", link id: %u",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->mld_mac,
+ __entry->link_id)
+);
+
+TRACE_EVENT(rdev_set_hw_timestamp,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ struct cfg80211_set_hw_timestamp *hwts),
+
+ TP_ARGS(wiphy, netdev, hwts),
+
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ MAC_ENTRY(macaddr)
+ __field(bool, enable)
+ ),
+
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ MAC_ASSIGN(macaddr, hwts->macaddr);
+ __entry->enable = hwts->enable;
+ ),
+
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", mac %pM, enable: %u",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->macaddr,
+ __entry->enable)
+);
+
+TRACE_EVENT(rdev_set_ttlm,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ struct cfg80211_ttlm_params *params),
+ TP_ARGS(wiphy, netdev, params),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ __array(u8, dlink, sizeof(u16) * 8)
+ __array(u8, ulink, sizeof(u16) * 8)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ memcpy(__entry->dlink, params->dlink, sizeof(params->dlink));
+ memcpy(__entry->ulink, params->ulink, sizeof(params->ulink));
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT,
+ WIPHY_PR_ARG, NETDEV_PR_ARG)
+);
+
/*************************************************************
* cfg80211 exported functions traces *
*************************************************************/
@@ -3244,23 +3389,26 @@ TRACE_EVENT(cfg80211_cqm_rssi_notify,
TRACE_EVENT(cfg80211_reg_can_beacon,
TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef,
- enum nl80211_iftype iftype, bool check_no_ir),
- TP_ARGS(wiphy, chandef, iftype, check_no_ir),
+ enum nl80211_iftype iftype, u32 prohibited_flags,
+ u32 permitting_flags),
+ TP_ARGS(wiphy, chandef, iftype, prohibited_flags, permitting_flags),
TP_STRUCT__entry(
WIPHY_ENTRY
CHAN_DEF_ENTRY
__field(enum nl80211_iftype, iftype)
- __field(bool, check_no_ir)
+ __field(u32, prohibited_flags)
+ __field(u32, permitting_flags)
),
TP_fast_assign(
WIPHY_ASSIGN;
CHAN_DEF_ASSIGN(chandef);
__entry->iftype = iftype;
- __entry->check_no_ir = check_no_ir;
+ __entry->prohibited_flags = prohibited_flags;
+ __entry->permitting_flags = permitting_flags;
),
- TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", iftype=%d check_no_ir=%s",
+ TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", iftype=%d prohibited_flags=0x%x permitting_flags=0x%x",
WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->iftype,
- BOOL_TO_STR(__entry->check_no_ir))
+ __entry->prohibited_flags, __entry->permitting_flags)
);
TRACE_EVENT(cfg80211_chandef_dfs_required,
@@ -3921,55 +4069,6 @@ DEFINE_EVENT(link_station_add_mod, rdev_mod_link_station,
TP_ARGS(wiphy, netdev, params)
);
-TRACE_EVENT(rdev_del_link_station,
- TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
- struct link_station_del_parameters *params),
- TP_ARGS(wiphy, netdev, params),
- TP_STRUCT__entry(
- WIPHY_ENTRY
- NETDEV_ENTRY
- __array(u8, mld_mac, 6)
- __field(u32, link_id)
- ),
- TP_fast_assign(
- WIPHY_ASSIGN;
- NETDEV_ASSIGN;
- memset(__entry->mld_mac, 0, 6);
- if (params->mld_mac)
- memcpy(__entry->mld_mac, params->mld_mac, 6);
- __entry->link_id = params->link_id;
- ),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM"
- ", link id: %u",
- WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->mld_mac,
- __entry->link_id)
-);
-
-TRACE_EVENT(rdev_set_hw_timestamp,
- TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
- struct cfg80211_set_hw_timestamp *hwts),
-
- TP_ARGS(wiphy, netdev, hwts),
-
- TP_STRUCT__entry(
- WIPHY_ENTRY
- NETDEV_ENTRY
- MAC_ENTRY(macaddr)
- __field(bool, enable)
- ),
-
- TP_fast_assign(
- WIPHY_ASSIGN;
- NETDEV_ASSIGN;
- MAC_ASSIGN(macaddr, hwts->macaddr);
- __entry->enable = hwts->enable;
- ),
-
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", mac %pM, enable: %u",
- WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->macaddr,
- __entry->enable)
-);
-
TRACE_EVENT(cfg80211_links_removed,
TP_PROTO(struct net_device *netdev, u16 link_mask),
TP_ARGS(netdev, link_mask),
@@ -3985,26 +4084,6 @@ TRACE_EVENT(cfg80211_links_removed,
__entry->link_mask)
);
-TRACE_EVENT(rdev_set_ttlm,
- TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
- struct cfg80211_ttlm_params *params),
- TP_ARGS(wiphy, netdev, params),
- TP_STRUCT__entry(
- WIPHY_ENTRY
- NETDEV_ENTRY
- __array(u8, dlink, sizeof(u16) * 8)
- __array(u8, ulink, sizeof(u16) * 8)
- ),
- TP_fast_assign(
- WIPHY_ASSIGN;
- NETDEV_ASSIGN;
- memcpy(__entry->dlink, params->dlink, sizeof(params->dlink));
- memcpy(__entry->ulink, params->ulink, sizeof(params->ulink));
- ),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT,
- WIPHY_PR_ARG, NETDEV_PR_ARG)
-);
-
#endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */
#undef TRACE_INCLUDE_PATH
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 2bde8a354631..9a7c3adc8a3b 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1504,7 +1504,7 @@ static u32 cfg80211_calculate_bitrate_he(struct rate_info *rate)
5120, /* 0.833333... */
};
u32 rates_160M[3] = { 960777777, 907400000, 816666666 };
- u32 rates_969[3] = { 480388888, 453700000, 408333333 };
+ u32 rates_996[3] = { 480388888, 453700000, 408333333 };
u32 rates_484[3] = { 229411111, 216666666, 195000000 };
u32 rates_242[3] = { 114711111, 108333333, 97500000 };
u32 rates_106[3] = { 40000000, 37777777, 34000000 };
@@ -1524,12 +1524,14 @@ static u32 cfg80211_calculate_bitrate_he(struct rate_info *rate)
if (WARN_ON_ONCE(rate->nss < 1 || rate->nss > 8))
return 0;
- if (rate->bw == RATE_INFO_BW_160)
+ if (rate->bw == RATE_INFO_BW_160 ||
+ (rate->bw == RATE_INFO_BW_HE_RU &&
+ rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_2x996))
result = rates_160M[rate->he_gi];
else if (rate->bw == RATE_INFO_BW_80 ||
(rate->bw == RATE_INFO_BW_HE_RU &&
rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_996))
- result = rates_969[rate->he_gi];
+ result = rates_996[rate->he_gi];
else if (rate->bw == RATE_INFO_BW_40 ||
(rate->bw == RATE_INFO_BW_HE_RU &&
rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_484))
@@ -2305,13 +2307,16 @@ static int cfg80211_wdev_bi(struct wireless_dev *wdev)
static void cfg80211_calculate_bi_data(struct wiphy *wiphy, u32 new_beacon_int,
u32 *beacon_int_gcd,
- bool *beacon_int_different)
+ bool *beacon_int_different,
+ int radio_idx)
{
+ struct cfg80211_registered_device *rdev;
struct wireless_dev *wdev;
*beacon_int_gcd = 0;
*beacon_int_different = false;
+ rdev = wiphy_to_rdev(wiphy);
list_for_each_entry(wdev, &wiphy->wdev_list, list) {
int wdev_bi;
@@ -2319,6 +2324,11 @@ static void cfg80211_calculate_bi_data(struct wiphy *wiphy, u32 new_beacon_int,
if (wdev->valid_links)
continue;
+ /* skip wdevs not active on the given wiphy radio */
+ if (radio_idx >= 0 &&
+ !(rdev_get_radio_mask(rdev, wdev->netdev) & BIT(radio_idx)))
+ continue;
+
wdev_bi = cfg80211_wdev_bi(wdev);
if (!wdev_bi)
@@ -2366,14 +2376,19 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
void *data),
void *data)
{
+ const struct wiphy_radio *radio = NULL;
+ const struct ieee80211_iface_combination *c, *cs;
const struct ieee80211_regdomain *regdom;
enum nl80211_dfs_regions region = 0;
- int i, j, iftype;
+ int i, j, n, iftype;
int num_interfaces = 0;
u32 used_iftypes = 0;
u32 beacon_int_gcd;
bool beacon_int_different;
+ if (params->radio_idx >= 0)
+ radio = &wiphy->radio[params->radio_idx];
+
/*
* This is a bit strange, since the iteration used to rely only on
* the data given by the driver, but here it now relies on context,
@@ -2385,7 +2400,8 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
* interfaces (while being brought up) and channel/radar data.
*/
cfg80211_calculate_bi_data(wiphy, params->new_beacon_int,
- &beacon_int_gcd, &beacon_int_different);
+ &beacon_int_gcd, &beacon_int_different,
+ params->radio_idx);
if (params->radar_detect) {
rcu_read_lock();
@@ -2402,13 +2418,18 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
used_iftypes |= BIT(iftype);
}
- for (i = 0; i < wiphy->n_iface_combinations; i++) {
- const struct ieee80211_iface_combination *c;
+ if (radio) {
+ cs = radio->iface_combinations;
+ n = radio->n_iface_combinations;
+ } else {
+ cs = wiphy->iface_combinations;
+ n = wiphy->n_iface_combinations;
+ }
+ for (i = 0; i < n; i++) {
struct ieee80211_iface_limit *limits;
u32 all_iftypes = 0;
- c = &wiphy->iface_combinations[i];
-
+ c = &cs[i];
if (num_interfaces > c->max_interfaces)
continue;
if (params->num_different_channels > c->num_different_channels)
@@ -2549,6 +2570,7 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr,
{
struct cfg80211_registered_device *rdev;
struct wireless_dev *wdev;
+ int ret;
wdev = dev->ieee80211_ptr;
if (!wdev)
@@ -2560,7 +2582,11 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr,
memset(sinfo, 0, sizeof(*sinfo));
- return rdev_get_station(rdev, dev, mac_addr, sinfo);
+ wiphy_lock(&rdev->wiphy);
+ ret = rdev_get_station(rdev, dev, mac_addr, sinfo);
+ wiphy_unlock(&rdev->wiphy);
+
+ return ret;
}
EXPORT_SYMBOL(cfg80211_get_station);
@@ -2860,3 +2886,38 @@ cfg80211_get_iftype_ext_capa(struct wiphy *wiphy, enum nl80211_iftype type)
return NULL;
}
EXPORT_SYMBOL(cfg80211_get_iftype_ext_capa);
+
+static bool
+ieee80211_radio_freq_range_valid(const struct wiphy_radio *radio,
+ u32 freq, u32 width)
+{
+ const struct wiphy_radio_freq_range *r;
+ int i;
+
+ for (i = 0; i < radio->n_freq_range; i++) {
+ r = &radio->freq_range[i];
+ if (freq - width / 2 >= r->start_freq &&
+ freq + width / 2 <= r->end_freq)
+ return true;
+ }
+
+ return false;
+}
+
+bool cfg80211_radio_chandef_valid(const struct wiphy_radio *radio,
+ const struct cfg80211_chan_def *chandef)
+{
+ u32 freq, width;
+
+ freq = ieee80211_chandef_to_khz(chandef);
+ width = nl80211_chan_width_to_mhz(chandef->width);
+ if (!ieee80211_radio_freq_range_valid(radio, freq, width))
+ return false;
+
+ freq = MHZ_TO_KHZ(chandef->center_freq2);
+ if (freq && !ieee80211_radio_freq_range_valid(radio, freq, width))
+ return false;
+
+ return true;
+}
+EXPORT_SYMBOL(cfg80211_radio_chandef_valid);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index d18d51412cc0..8dda4178497c 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -871,8 +871,8 @@ static int x25_wait_for_data(struct sock *sk, long timeout)
return rc;
}
-static int x25_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int x25_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sock *sk = sock->sk;
struct sock *newsk;
diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c
index e9802afa43d0..643f50874dfe 100644
--- a/net/x25/sysctl_net_x25.c
+++ b/net/x25/sysctl_net_x25.c
@@ -71,7 +71,6 @@ static struct ctl_table x25_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { },
};
int __init x25_register_sysctl(void)
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index caa340134b0e..9f76ca591d54 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -151,6 +151,7 @@ static int xdp_umem_account_pages(struct xdp_umem *umem)
#define XDP_UMEM_FLAGS_VALID ( \
XDP_UMEM_UNALIGNED_CHUNK_FLAG | \
XDP_UMEM_TX_SW_CSUM | \
+ XDP_UMEM_TX_METADATA_LEN | \
0)
static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
@@ -204,8 +205,11 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
return -EINVAL;
- if (mr->tx_metadata_len >= 256 || mr->tx_metadata_len % 8)
- return -EINVAL;
+ if (mr->flags & XDP_UMEM_TX_METADATA_LEN) {
+ if (mr->tx_metadata_len >= 256 || mr->tx_metadata_len % 8)
+ return -EINVAL;
+ umem->tx_metadata_len = mr->tx_metadata_len;
+ }
umem->size = size;
umem->headroom = headroom;
@@ -215,7 +219,6 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
umem->pgs = NULL;
umem->user = NULL;
umem->flags = mr->flags;
- umem->tx_metadata_len = mr->tx_metadata_len;
INIT_LIST_HEAD(&umem->xsk_dma_list);
refcount_set(&umem->users, 1);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 727aa20be4bd..7e16336044b2 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -35,8 +35,6 @@
#define TX_BATCH_SIZE 32
#define MAX_PER_SOCKET_BUDGET (TX_BATCH_SIZE)
-static DEFINE_PER_CPU(struct list_head, xskmap_flush_list);
-
void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
{
if (pool->cached_need_wakeup & XDP_WAKEUP_RX)
@@ -313,13 +311,10 @@ static bool xsk_is_bound(struct xdp_sock *xs)
static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{
- struct net_device *dev = xdp->rxq->dev;
- u32 qid = xdp->rxq->queue_index;
-
if (!xsk_is_bound(xs))
return -ENXIO;
- if (!dev->_rx[qid].pool || xs->umem != dev->_rx[qid].pool->umem)
+ if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
return -EINVAL;
if (len > xsk_pool_get_rx_frame_size(xs->pool) && !xs->sg) {
@@ -375,22 +370,23 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
{
- struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
int err;
err = xsk_rcv(xs, xdp);
if (err)
return err;
- if (!xs->flush_node.prev)
+ if (!xs->flush_node.prev) {
+ struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list();
+
list_add(&xs->flush_node, flush_list);
+ }
return 0;
}
-void __xsk_map_flush(void)
+void __xsk_map_flush(struct list_head *flush_list)
{
- struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
struct xdp_sock *xs, *tmp;
list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
@@ -399,16 +395,6 @@ void __xsk_map_flush(void)
}
}
-#ifdef CONFIG_DEBUG_NET
-bool xsk_map_check_flush(void)
-{
- if (list_empty(this_cpu_ptr(&xskmap_flush_list)))
- return false;
- __xsk_map_flush();
- return true;
-}
-#endif
-
void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries)
{
xskq_prod_submit_n(pool->cq, nb_entries);
@@ -1775,7 +1761,7 @@ static struct pernet_operations xsk_net_ops = {
static int __init xsk_init(void)
{
- int err, cpu;
+ int err;
err = proto_register(&xsk_proto, 0 /* no slab */);
if (err)
@@ -1793,8 +1779,6 @@ static int __init xsk_init(void)
if (err)
goto out_pernet;
- for_each_possible_cpu(cpu)
- INIT_LIST_HEAD(&per_cpu(xskmap_flush_list, cpu));
return 0;
out_pernet:
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index ce60ecd48a4d..c0e0204b9630 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -338,7 +338,6 @@ static struct xsk_dma_map *xp_create_dma_map(struct device *dev, struct net_devi
dma_map->netdev = netdev;
dma_map->dev = dev;
- dma_map->dma_need_sync = false;
dma_map->dma_pages_cnt = nr_pages;
refcount_set(&dma_map->users, 1);
list_add(&dma_map->list, &umem->xsk_dma_list);
@@ -424,7 +423,6 @@ static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_
pool->dev = dma_map->dev;
pool->dma_pages_cnt = dma_map->dma_pages_cnt;
- pool->dma_need_sync = dma_map->dma_need_sync;
memcpy(pool->dma_pages, dma_map->dma_pages,
pool->dma_pages_cnt * sizeof(*pool->dma_pages));
@@ -460,8 +458,6 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
__xp_dma_unmap(dma_map, attrs);
return -ENOMEM;
}
- if (dma_need_sync(dev, dma))
- dma_map->dma_need_sync = true;
dma_map->dma_pages[i] = dma;
}
@@ -557,11 +553,9 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool)
xskb->xdp.data_meta = xskb->xdp.data;
xskb->xdp.flags = 0;
- if (pool->dma_need_sync) {
- dma_sync_single_range_for_device(pool->dev, xskb->dma, 0,
- pool->frame_len,
- DMA_BIDIRECTIONAL);
- }
+ if (pool->dev)
+ xp_dma_sync_for_device(pool, xskb->dma, pool->frame_len);
+
return &xskb->xdp;
}
EXPORT_SYMBOL(xp_alloc);
@@ -633,7 +627,7 @@ u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
{
u32 nb_entries1 = 0, nb_entries2;
- if (unlikely(pool->dma_need_sync)) {
+ if (unlikely(pool->dev && dma_dev_need_sync(pool->dev))) {
struct xdp_buff *buff;
/* Slow path */
@@ -693,18 +687,3 @@ dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr)
(addr & ~PAGE_MASK);
}
EXPORT_SYMBOL(xp_raw_get_dma);
-
-void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb)
-{
- dma_sync_single_range_for_cpu(xskb->pool->dev, xskb->dma, 0,
- xskb->pool->frame_len, DMA_BIDIRECTIONAL);
-}
-EXPORT_SYMBOL(xp_dma_sync_for_cpu_slow);
-
-void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma,
- size_t size)
-{
- dma_sync_single_range_for_device(pool->dev, dma, 0,
- size, DMA_BIDIRECTIONAL);
-}
-EXPORT_SYMBOL(xp_dma_sync_for_device_slow);
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index 547cec77ba03..512e0b2f8514 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -13,7 +13,8 @@ endif
obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
xfrm_input.o xfrm_output.o \
- xfrm_sysctl.o xfrm_replay.o xfrm_device.o
+ xfrm_sysctl.o xfrm_replay.o xfrm_device.o \
+ xfrm_nat_keepalive.o
obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o
obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o
obj-$(CONFIG_XFRM_USER) += xfrm_user.o
diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c
index 655fe4ff8621..91357ccaf4af 100644
--- a/net/xfrm/xfrm_compat.c
+++ b/net/xfrm/xfrm_compat.c
@@ -98,6 +98,7 @@ static const int compat_msg_min[XFRM_NR_MSGTYPES] = {
};
static const struct nla_policy compat_policy[XFRMA_MAX+1] = {
+ [XFRMA_UNSPEC] = { .strict_start_type = XFRMA_SA_DIR },
[XFRMA_SA] = { .len = XMSGSIZE(compat_xfrm_usersa_info)},
[XFRMA_POLICY] = { .len = XMSGSIZE(compat_xfrm_userpolicy_info)},
[XFRMA_LASTUSED] = { .type = NLA_U64},
@@ -129,6 +130,8 @@ static const struct nla_policy compat_policy[XFRMA_MAX+1] = {
[XFRMA_SET_MARK_MASK] = { .type = NLA_U32 },
[XFRMA_IF_ID] = { .type = NLA_U32 },
[XFRMA_MTIMER_THRESH] = { .type = NLA_U32 },
+ [XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT),
+ [XFRMA_NAT_KEEPALIVE_INTERVAL] = { .type = NLA_U32 },
};
static struct nlmsghdr *xfrm_nlmsg_put_compat(struct sk_buff *skb,
@@ -277,9 +280,11 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src)
case XFRMA_SET_MARK_MASK:
case XFRMA_IF_ID:
case XFRMA_MTIMER_THRESH:
+ case XFRMA_SA_DIR:
+ case XFRMA_NAT_KEEPALIVE_INTERVAL:
return xfrm_nla_cpy(dst, src, nla_len(src));
default:
- BUILD_BUG_ON(XFRMA_MAX != XFRMA_MTIMER_THRESH);
+ BUILD_BUG_ON(XFRMA_MAX != XFRMA_NAT_KEEPALIVE_INTERVAL);
pr_warn_once("unsupported nla_type %d\n", src->nla_type);
return -EOPNOTSUPP;
}
@@ -434,7 +439,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla,
int err;
if (type > XFRMA_MAX) {
- BUILD_BUG_ON(XFRMA_MAX != XFRMA_MTIMER_THRESH);
+ BUILD_BUG_ON(XFRMA_MAX != XFRMA_NAT_KEEPALIVE_INTERVAL);
NL_SET_ERR_MSG(extack, "Bad attribute");
return -EOPNOTSUPP;
}
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 6346690d5c69..9a44d363ba62 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -253,11 +253,17 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
return -EINVAL;
}
+ if ((xuo->flags & XFRM_OFFLOAD_INBOUND && x->dir == XFRM_SA_DIR_OUT) ||
+ (!(xuo->flags & XFRM_OFFLOAD_INBOUND) && x->dir == XFRM_SA_DIR_IN)) {
+ NL_SET_ERR_MSG(extack, "Mismatched SA and offload direction");
+ return -EINVAL;
+ }
+
is_packet_offload = xuo->flags & XFRM_OFFLOAD_PACKET;
- /* We don't yet support UDP encapsulation and TFC padding. */
- if ((!is_packet_offload && x->encap) || x->tfcpad) {
- NL_SET_ERR_MSG(extack, "Encapsulation and TFC padding can't be offloaded");
+ /* We don't yet support TFC padding. */
+ if (x->tfcpad) {
+ NL_SET_ERR_MSG(extack, "TFC padding can't be offloaded");
return -EINVAL;
}
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 161f535c8b94..749e7eea99e4 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -389,11 +389,15 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
*/
static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
{
+ struct xfrm_offload *xo = xfrm_offload(skb);
int ihl = skb->data - skb_transport_header(skb);
if (skb->transport_header != skb->network_header) {
memmove(skb_transport_header(skb),
skb_network_header(skb), ihl);
+ if (xo)
+ xo->orig_mac_len =
+ skb_mac_header_was_set(skb) ? skb_mac_header_len(skb) : 0;
skb->network_header = skb->transport_header;
}
ip_hdr(skb)->tot_len = htons(skb->len + ihl);
@@ -404,11 +408,15 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_IPV6)
+ struct xfrm_offload *xo = xfrm_offload(skb);
int ihl = skb->data - skb_transport_header(skb);
if (skb->transport_header != skb->network_header) {
memmove(skb_transport_header(skb),
skb_network_header(skb), ihl);
+ if (xo)
+ xo->orig_mac_len =
+ skb_mac_header_was_set(skb) ? skb_mac_header_len(skb) : 0;
skb->network_header = skb->transport_header;
}
ipv6_hdr(skb)->payload_len = htons(skb->len + ihl -
@@ -463,7 +471,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
struct xfrm_offload *xo = xfrm_offload(skb);
struct sec_path *sp;
- if (encap_type < 0 || (xo && xo->flags & XFRM_GRO)) {
+ if (encap_type < 0 || (xo && (xo->flags & XFRM_GRO || encap_type == 0 ||
+ encap_type == UDP_ENCAP_ESPINUDP))) {
x = xfrm_input_state(skb);
if (unlikely(x->km.state != XFRM_STATE_VALID)) {
@@ -571,6 +580,15 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
goto drop;
}
+ if (unlikely(x->dir && x->dir != XFRM_SA_DIR_IN)) {
+ secpath_reset(skb);
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEDIRERROR);
+ xfrm_audit_state_notfound(skb, family, spi, seq);
+ xfrm_state_put(x);
+ x = NULL;
+ goto drop;
+ }
+
skb->mark = xfrm_smark_get(skb->mark, x);
sp->xvec[sp->len++] = x;
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index 4df5c06e3ece..e50e4bf993fa 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -926,7 +926,7 @@ static struct net *xfrmi_get_link_net(const struct net_device *dev)
{
struct xfrm_if *xi = netdev_priv(dev);
- return xi->net;
+ return READ_ONCE(xi->net);
}
static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = {
diff --git a/net/xfrm/xfrm_nat_keepalive.c b/net/xfrm/xfrm_nat_keepalive.c
new file mode 100644
index 000000000000..82f0a301683f
--- /dev/null
+++ b/net/xfrm/xfrm_nat_keepalive.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * xfrm_nat_keepalive.c
+ *
+ * (c) 2024 Eyal Birger <eyal.birger@gmail.com>
+ */
+
+#include <net/inet_common.h>
+#include <net/ip6_checksum.h>
+#include <net/xfrm.h>
+
+static DEFINE_PER_CPU(struct sock *, nat_keepalive_sk_ipv4);
+#if IS_ENABLED(CONFIG_IPV6)
+static DEFINE_PER_CPU(struct sock *, nat_keepalive_sk_ipv6);
+#endif
+
+struct nat_keepalive {
+ struct net *net;
+ u16 family;
+ xfrm_address_t saddr;
+ xfrm_address_t daddr;
+ __be16 encap_sport;
+ __be16 encap_dport;
+ __u32 smark;
+};
+
+static void nat_keepalive_init(struct nat_keepalive *ka, struct xfrm_state *x)
+{
+ ka->net = xs_net(x);
+ ka->family = x->props.family;
+ ka->saddr = x->props.saddr;
+ ka->daddr = x->id.daddr;
+ ka->encap_sport = x->encap->encap_sport;
+ ka->encap_dport = x->encap->encap_dport;
+ ka->smark = xfrm_smark_get(0, x);
+}
+
+static int nat_keepalive_send_ipv4(struct sk_buff *skb,
+ struct nat_keepalive *ka)
+{
+ struct net *net = ka->net;
+ struct flowi4 fl4;
+ struct rtable *rt;
+ struct sock *sk;
+ __u8 tos = 0;
+ int err;
+
+ flowi4_init_output(&fl4, 0 /* oif */, skb->mark, tos,
+ RT_SCOPE_UNIVERSE, IPPROTO_UDP, 0,
+ ka->daddr.a4, ka->saddr.a4, ka->encap_dport,
+ ka->encap_sport, sock_net_uid(net, NULL));
+
+ rt = ip_route_output_key(net, &fl4);
+ if (IS_ERR(rt))
+ return PTR_ERR(rt);
+
+ skb_dst_set(skb, &rt->dst);
+
+ sk = *this_cpu_ptr(&nat_keepalive_sk_ipv4);
+ sock_net_set(sk, net);
+ err = ip_build_and_send_pkt(skb, sk, fl4.saddr, fl4.daddr, NULL, tos);
+ sock_net_set(sk, &init_net);
+ return err;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int nat_keepalive_send_ipv6(struct sk_buff *skb,
+ struct nat_keepalive *ka,
+ struct udphdr *uh)
+{
+ struct net *net = ka->net;
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+ struct sock *sk;
+ __wsum csum;
+ int err;
+
+ csum = skb_checksum(skb, 0, skb->len, 0);
+ uh->check = csum_ipv6_magic(&ka->saddr.in6, &ka->daddr.in6,
+ skb->len, IPPROTO_UDP, csum);
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_mark = skb->mark;
+ fl6.saddr = ka->saddr.in6;
+ fl6.daddr = ka->daddr.in6;
+ fl6.flowi6_proto = IPPROTO_UDP;
+ fl6.fl6_sport = ka->encap_sport;
+ fl6.fl6_dport = ka->encap_dport;
+
+ sk = *this_cpu_ptr(&nat_keepalive_sk_ipv6);
+ sock_net_set(sk, net);
+ dst = ipv6_stub->ipv6_dst_lookup_flow(net, sk, &fl6, NULL);
+ if (IS_ERR(dst))
+ return PTR_ERR(dst);
+
+ skb_dst_set(skb, dst);
+ err = ipv6_stub->ip6_xmit(sk, skb, &fl6, skb->mark, NULL, 0, 0);
+ sock_net_set(sk, &init_net);
+ return err;
+}
+#endif
+
+static void nat_keepalive_send(struct nat_keepalive *ka)
+{
+ const int nat_ka_hdrs_len = max(sizeof(struct iphdr),
+ sizeof(struct ipv6hdr)) +
+ sizeof(struct udphdr);
+ const u8 nat_ka_payload = 0xFF;
+ int err = -EAFNOSUPPORT;
+ struct sk_buff *skb;
+ struct udphdr *uh;
+
+ skb = alloc_skb(nat_ka_hdrs_len + sizeof(nat_ka_payload), GFP_ATOMIC);
+ if (unlikely(!skb))
+ return;
+
+ skb_reserve(skb, nat_ka_hdrs_len);
+
+ skb_put_u8(skb, nat_ka_payload);
+
+ uh = skb_push(skb, sizeof(*uh));
+ uh->source = ka->encap_sport;
+ uh->dest = ka->encap_dport;
+ uh->len = htons(skb->len);
+ uh->check = 0;
+
+ skb->mark = ka->smark;
+
+ switch (ka->family) {
+ case AF_INET:
+ err = nat_keepalive_send_ipv4(skb, ka);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ err = nat_keepalive_send_ipv6(skb, ka, uh);
+ break;
+#endif
+ }
+ if (err)
+ kfree_skb(skb);
+}
+
+struct nat_keepalive_work_ctx {
+ time64_t next_run;
+ time64_t now;
+};
+
+static int nat_keepalive_work_single(struct xfrm_state *x, int count, void *ptr)
+{
+ struct nat_keepalive_work_ctx *ctx = ptr;
+ bool send_keepalive = false;
+ struct nat_keepalive ka;
+ time64_t next_run;
+ u32 interval;
+ int delta;
+
+ interval = x->nat_keepalive_interval;
+ if (!interval)
+ return 0;
+
+ spin_lock(&x->lock);
+
+ delta = (int)(ctx->now - x->lastused);
+ if (delta < interval) {
+ x->nat_keepalive_expiration = ctx->now + interval - delta;
+ next_run = x->nat_keepalive_expiration;
+ } else if (x->nat_keepalive_expiration > ctx->now) {
+ next_run = x->nat_keepalive_expiration;
+ } else {
+ next_run = ctx->now + interval;
+ nat_keepalive_init(&ka, x);
+ send_keepalive = true;
+ }
+
+ spin_unlock(&x->lock);
+
+ if (send_keepalive)
+ nat_keepalive_send(&ka);
+
+ if (!ctx->next_run || next_run < ctx->next_run)
+ ctx->next_run = next_run;
+ return 0;
+}
+
+static void nat_keepalive_work(struct work_struct *work)
+{
+ struct nat_keepalive_work_ctx ctx;
+ struct xfrm_state_walk walk;
+ struct net *net;
+
+ ctx.next_run = 0;
+ ctx.now = ktime_get_real_seconds();
+
+ net = container_of(work, struct net, xfrm.nat_keepalive_work.work);
+ xfrm_state_walk_init(&walk, IPPROTO_ESP, NULL);
+ xfrm_state_walk(net, &walk, nat_keepalive_work_single, &ctx);
+ xfrm_state_walk_done(&walk, net);
+ if (ctx.next_run)
+ schedule_delayed_work(&net->xfrm.nat_keepalive_work,
+ (ctx.next_run - ctx.now) * HZ);
+}
+
+static int nat_keepalive_sk_init(struct sock * __percpu *socks,
+ unsigned short family)
+{
+ struct sock *sk;
+ int err, i;
+
+ for_each_possible_cpu(i) {
+ err = inet_ctl_sock_create(&sk, family, SOCK_RAW, IPPROTO_UDP,
+ &init_net);
+ if (err < 0)
+ goto err;
+
+ *per_cpu_ptr(socks, i) = sk;
+ }
+
+ return 0;
+err:
+ for_each_possible_cpu(i)
+ inet_ctl_sock_destroy(*per_cpu_ptr(socks, i));
+ return err;
+}
+
+static void nat_keepalive_sk_fini(struct sock * __percpu *socks)
+{
+ int i;
+
+ for_each_possible_cpu(i)
+ inet_ctl_sock_destroy(*per_cpu_ptr(socks, i));
+}
+
+void xfrm_nat_keepalive_state_updated(struct xfrm_state *x)
+{
+ struct net *net;
+
+ if (!x->nat_keepalive_interval)
+ return;
+
+ net = xs_net(x);
+ schedule_delayed_work(&net->xfrm.nat_keepalive_work, 0);
+}
+
+int __net_init xfrm_nat_keepalive_net_init(struct net *net)
+{
+ INIT_DELAYED_WORK(&net->xfrm.nat_keepalive_work, nat_keepalive_work);
+ return 0;
+}
+
+int xfrm_nat_keepalive_net_fini(struct net *net)
+{
+ cancel_delayed_work_sync(&net->xfrm.nat_keepalive_work);
+ return 0;
+}
+
+int xfrm_nat_keepalive_init(unsigned short family)
+{
+ int err = -EAFNOSUPPORT;
+
+ switch (family) {
+ case AF_INET:
+ err = nat_keepalive_sk_init(&nat_keepalive_sk_ipv4, PF_INET);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ err = nat_keepalive_sk_init(&nat_keepalive_sk_ipv6, PF_INET6);
+ break;
+#endif
+ }
+
+ if (err)
+ pr_err("xfrm nat keepalive init: failed to init err:%d\n", err);
+ return err;
+}
+EXPORT_SYMBOL_GPL(xfrm_nat_keepalive_init);
+
+void xfrm_nat_keepalive_fini(unsigned short family)
+{
+ switch (family) {
+ case AF_INET:
+ nat_keepalive_sk_fini(&nat_keepalive_sk_ipv4);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ nat_keepalive_sk_fini(&nat_keepalive_sk_ipv6);
+ break;
+#endif
+ }
+}
+EXPORT_SYMBOL_GPL(xfrm_nat_keepalive_fini);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 6affe5cd85d8..c56c61b0c12e 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -452,6 +452,8 @@ EXPORT_SYMBOL(xfrm_policy_destroy);
static void xfrm_policy_kill(struct xfrm_policy *policy)
{
+ xfrm_dev_policy_delete(policy);
+
write_lock_bh(&policy->lock);
policy->walk.dead = 1;
write_unlock_bh(&policy->lock);
@@ -1850,7 +1852,6 @@ again:
__xfrm_policy_unlink(pol, dir);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
- xfrm_dev_policy_delete(pol);
cnt++;
xfrm_audit_policy_delete(pol, 1, task_valid);
xfrm_policy_kill(pol);
@@ -1891,7 +1892,6 @@ again:
__xfrm_policy_unlink(pol, dir);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
- xfrm_dev_policy_delete(pol);
cnt++;
xfrm_audit_policy_delete(pol, 1, task_valid);
xfrm_policy_kill(pol);
@@ -2342,7 +2342,6 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
pol = __xfrm_policy_unlink(pol, dir);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
if (pol) {
- xfrm_dev_policy_delete(pol);
xfrm_policy_kill(pol);
return 0;
}
@@ -2489,6 +2488,12 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
x = xfrm_state_find(remote, local, fl, tmpl, policy, &error,
family, policy->if_id);
+ if (x && x->dir && x->dir != XFRM_SA_DIR_OUT) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEDIRERROR);
+ xfrm_state_put(x);
+ error = -EINVAL;
+ goto fail;
+ }
if (x && x->km.state == XFRM_STATE_VALID) {
xfrm[nx++] = x;
@@ -2598,8 +2603,7 @@ static void xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
int nfheader_len)
{
if (dst->ops->family == AF_INET6) {
- struct rt6_info *rt = (struct rt6_info *)dst;
- path->path_cookie = rt6_get_cookie(rt);
+ path->path_cookie = rt6_get_cookie(dst_rt6_info(dst));
path->u.rt6.rt6i_nfheader_len = nfheader_len;
}
}
@@ -3593,6 +3597,8 @@ xfrm_policy *xfrm_in_fwd_icmp(struct sk_buff *skb,
return pol;
pol = xfrm_policy_lookup(net, &fl1, family, XFRM_POLICY_FWD, if_id);
+ if (IS_ERR(pol))
+ pol = NULL;
}
return pol;
@@ -3711,12 +3717,15 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
pol = xfrm_in_fwd_icmp(skb, &fl, family, if_id);
if (!pol) {
+ const bool is_crypto_offload = sp &&
+ (xfrm_input_state(skb)->xso.type == XFRM_DEV_OFFLOAD_CRYPTO);
+
if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
return 0;
}
- if (sp && secpath_has_nontransport(sp, 0, &xerr_idx)) {
+ if (sp && secpath_has_nontransport(sp, 0, &xerr_idx) && !is_crypto_offload) {
xfrm_secpath_reject(xerr_idx, skb, &fl);
XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
return 0;
@@ -3903,15 +3912,10 @@ static void xfrm_link_failure(struct sk_buff *skb)
/* Impossible. Such dst must be popped before reaches point of failure. */
}
-static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
+static void xfrm_negative_advice(struct sock *sk, struct dst_entry *dst)
{
- if (dst) {
- if (dst->obsolete) {
- dst_release(dst);
- dst = NULL;
- }
- }
- return dst;
+ if (dst->obsolete)
+ sk_dst_reset(sk);
}
static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr)
@@ -4282,8 +4286,14 @@ static int __net_init xfrm_net_init(struct net *net)
if (rv < 0)
goto out_sysctl;
+ rv = xfrm_nat_keepalive_net_init(net);
+ if (rv < 0)
+ goto out_nat_keepalive;
+
return 0;
+out_nat_keepalive:
+ xfrm_sysctl_fini(net);
out_sysctl:
xfrm_policy_fini(net);
out_policy:
@@ -4296,6 +4306,7 @@ out_statistics:
static void __net_exit xfrm_net_exit(struct net *net)
{
+ xfrm_nat_keepalive_net_fini(net);
xfrm_sysctl_fini(net);
xfrm_policy_fini(net);
xfrm_state_fini(net);
@@ -4357,6 +4368,7 @@ void __init xfrm_init(void)
#endif
register_xfrm_state_bpf();
+ xfrm_nat_keepalive_init(AF_INET);
}
#ifdef CONFIG_AUDITSYSCALL
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index 5f9bf8e5c933..eeb984be03a7 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -41,6 +41,8 @@ static const struct snmp_mib xfrm_mib_list[] = {
SNMP_MIB_ITEM("XfrmFwdHdrError", LINUX_MIB_XFRMFWDHDRERROR),
SNMP_MIB_ITEM("XfrmOutStateInvalid", LINUX_MIB_XFRMOUTSTATEINVALID),
SNMP_MIB_ITEM("XfrmAcquireError", LINUX_MIB_XFRMACQUIREERROR),
+ SNMP_MIB_ITEM("XfrmOutStateDirError", LINUX_MIB_XFRMOUTSTATEDIRERROR),
+ SNMP_MIB_ITEM("XfrmInStateDirError", LINUX_MIB_XFRMINSTATEDIRERROR),
SNMP_MIB_SENTINEL
};
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index ce56d659c55a..bc56c6305725 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -778,7 +778,8 @@ int xfrm_init_replay(struct xfrm_state *x, struct netlink_ext_ack *extack)
}
if (x->props.flags & XFRM_STATE_ESN) {
- if (replay_esn->replay_window == 0) {
+ if (replay_esn->replay_window == 0 &&
+ (!x->dir || x->dir == XFRM_SA_DIR_IN)) {
NL_SET_ERR_MSG(extack, "ESN replay window must be > 0");
return -EINVAL;
}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 0c306473a79d..37478d36a8df 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -49,6 +49,7 @@ static struct kmem_cache *xfrm_state_cache __ro_after_init;
static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task);
static HLIST_HEAD(xfrm_state_gc_list);
+static HLIST_HEAD(xfrm_state_dev_gc_list);
static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x)
{
@@ -214,6 +215,7 @@ static DEFINE_SPINLOCK(xfrm_state_afinfo_lock);
static struct xfrm_state_afinfo __rcu *xfrm_state_afinfo[NPROTO];
static DEFINE_SPINLOCK(xfrm_state_gc_lock);
+static DEFINE_SPINLOCK(xfrm_state_dev_gc_lock);
int __xfrm_state_delete(struct xfrm_state *x);
@@ -683,6 +685,41 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
}
EXPORT_SYMBOL(xfrm_state_alloc);
+#ifdef CONFIG_XFRM_OFFLOAD
+void xfrm_dev_state_delete(struct xfrm_state *x)
+{
+ struct xfrm_dev_offload *xso = &x->xso;
+ struct net_device *dev = READ_ONCE(xso->dev);
+
+ if (dev) {
+ dev->xfrmdev_ops->xdo_dev_state_delete(x);
+ spin_lock_bh(&xfrm_state_dev_gc_lock);
+ hlist_add_head(&x->dev_gclist, &xfrm_state_dev_gc_list);
+ spin_unlock_bh(&xfrm_state_dev_gc_lock);
+ }
+}
+EXPORT_SYMBOL_GPL(xfrm_dev_state_delete);
+
+void xfrm_dev_state_free(struct xfrm_state *x)
+{
+ struct xfrm_dev_offload *xso = &x->xso;
+ struct net_device *dev = READ_ONCE(xso->dev);
+
+ if (dev && dev->xfrmdev_ops) {
+ spin_lock_bh(&xfrm_state_dev_gc_lock);
+ if (!hlist_unhashed(&x->dev_gclist))
+ hlist_del(&x->dev_gclist);
+ spin_unlock_bh(&xfrm_state_dev_gc_lock);
+
+ if (dev->xfrmdev_ops->xdo_dev_state_free)
+ dev->xfrmdev_ops->xdo_dev_state_free(x);
+ WRITE_ONCE(xso->dev, NULL);
+ xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
+ netdev_put(dev, &xso->dev_tracker);
+ }
+}
+#endif
+
void __xfrm_state_destroy(struct xfrm_state *x, bool sync)
{
WARN_ON(x->km.state != XFRM_STATE_DEAD);
@@ -715,6 +752,7 @@ int __xfrm_state_delete(struct xfrm_state *x)
if (x->id.spi)
hlist_del_rcu(&x->byspi);
net->xfrm.state_num--;
+ xfrm_nat_keepalive_state_updated(x);
spin_unlock(&net->xfrm.xfrm_state_lock);
if (x->encap_sk)
@@ -848,6 +886,9 @@ EXPORT_SYMBOL(xfrm_state_flush);
int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid)
{
+ struct xfrm_state *x;
+ struct hlist_node *tmp;
+ struct xfrm_dev_offload *xso;
int i, err = 0, cnt = 0;
spin_lock_bh(&net->xfrm.xfrm_state_lock);
@@ -857,8 +898,6 @@ int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_vali
err = -ESRCH;
for (i = 0; i <= net->xfrm.state_hmask; i++) {
- struct xfrm_state *x;
- struct xfrm_dev_offload *xso;
restart:
hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
xso = &x->xso;
@@ -868,6 +907,8 @@ restart:
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
err = xfrm_state_delete(x);
+ xfrm_dev_state_free(x);
+
xfrm_audit_state_delete(x, err ? 0 : 1,
task_valid);
xfrm_state_put(x);
@@ -884,6 +925,24 @@ restart:
out:
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+
+ spin_lock_bh(&xfrm_state_dev_gc_lock);
+restart_gc:
+ hlist_for_each_entry_safe(x, tmp, &xfrm_state_dev_gc_list, dev_gclist) {
+ xso = &x->xso;
+
+ if (xso->dev == dev) {
+ spin_unlock_bh(&xfrm_state_dev_gc_lock);
+ xfrm_dev_state_free(x);
+ spin_lock_bh(&xfrm_state_dev_gc_lock);
+ goto restart_gc;
+ }
+
+ }
+ spin_unlock_bh(&xfrm_state_dev_gc_lock);
+
+ xfrm_flush_gc();
+
return err;
}
EXPORT_SYMBOL(xfrm_dev_state_flush);
@@ -1273,8 +1332,7 @@ found:
xso->dev = xdo->dev;
xso->real_dev = xdo->real_dev;
xso->flags = XFRM_DEV_OFFLOAD_FLAG_ACQ;
- netdev_tracker_alloc(xso->dev, &xso->dev_tracker,
- GFP_ATOMIC);
+ netdev_hold(xso->dev, &xso->dev_tracker, GFP_ATOMIC);
error = xso->dev->xfrmdev_ops->xdo_dev_state_add(x, NULL);
if (error) {
xso->dir = 0;
@@ -1292,6 +1350,7 @@ found:
if (km_query(x, tmpl, pol) == 0) {
spin_lock_bh(&net->xfrm.xfrm_state_lock);
x->km.state = XFRM_STATE_ACQ;
+ x->dir = XFRM_SA_DIR_OUT;
list_add(&x->km.all, &net->xfrm.state_all);
XFRM_STATE_INSERT(bydst, &x->bydst,
net->xfrm.state_bydst + h,
@@ -1452,6 +1511,7 @@ static void __xfrm_state_insert(struct xfrm_state *x)
net->xfrm.state_num++;
xfrm_hash_grow_check(net, x->bydst.next != NULL);
+ xfrm_nat_keepalive_state_updated(x);
}
/* net->xfrm.xfrm_state_lock is held */
@@ -1744,6 +1804,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
x->lastused = orig->lastused;
x->new_mapping = 0;
x->new_mapping_sport = 0;
+ x->dir = orig->dir;
return x;
@@ -1864,8 +1925,14 @@ int xfrm_state_update(struct xfrm_state *x)
}
if (x1->km.state == XFRM_STATE_ACQ) {
+ if (x->dir && x1->dir != x->dir)
+ goto out;
+
__xfrm_state_insert(x);
x = NULL;
+ } else {
+ if (x1->dir != x->dir)
+ goto out;
}
err = 0;
@@ -2863,6 +2930,21 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload,
goto error;
}
+ if (x->nat_keepalive_interval) {
+ if (x->dir != XFRM_SA_DIR_OUT) {
+ NL_SET_ERR_MSG(extack, "NAT keepalive is only supported for outbound SAs");
+ err = -EINVAL;
+ goto error;
+ }
+
+ if (!x->encap || x->encap->encap_type != UDP_ENCAP_ESPINUDP) {
+ NL_SET_ERR_MSG(extack,
+ "NAT keepalive is only supported for UDP encapsulation");
+ err = -EINVAL;
+ goto error;
+ }
+ }
+
error:
return err;
}
diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c
index 7fdeafc838a7..ca003e8a0376 100644
--- a/net/xfrm/xfrm_sysctl.c
+++ b/net/xfrm/xfrm_sysctl.c
@@ -38,7 +38,6 @@ static struct ctl_table xfrm_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
- {}
};
int __net_init xfrm_sysctl_init(struct net *net)
@@ -57,10 +56,8 @@ int __net_init xfrm_sysctl_init(struct net *net)
table[3].data = &net->xfrm.sysctl_acq_expires;
/* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns) {
- table[0].procname = NULL;
+ if (net->user_ns != &init_user_ns)
table_size = 0;
- }
net->xfrm.sysctl_hdr = register_net_sysctl_sz(net, "net/core", table,
table_size);
@@ -76,7 +73,7 @@ out_kmemdup:
void __net_exit xfrm_sysctl_fini(struct net *net)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
table = net->xfrm.sysctl_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->xfrm.sysctl_hdr);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 810b520493f3..55f039ec3d59 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -130,7 +130,7 @@ static inline int verify_sec_ctx_len(struct nlattr **attrs, struct netlink_ext_a
}
static inline int verify_replay(struct xfrm_usersa_info *p,
- struct nlattr **attrs,
+ struct nlattr **attrs, u8 sa_dir,
struct netlink_ext_ack *extack)
{
struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL];
@@ -168,6 +168,30 @@ static inline int verify_replay(struct xfrm_usersa_info *p,
return -EINVAL;
}
+ if (sa_dir == XFRM_SA_DIR_OUT) {
+ if (rs->replay_window) {
+ NL_SET_ERR_MSG(extack, "Replay window should be 0 for output SA");
+ return -EINVAL;
+ }
+ if (rs->seq || rs->seq_hi) {
+ NL_SET_ERR_MSG(extack,
+ "Replay seq and seq_hi should be 0 for output SA");
+ return -EINVAL;
+ }
+ if (rs->bmp_len) {
+ NL_SET_ERR_MSG(extack, "Replay bmp_len should 0 for output SA");
+ return -EINVAL;
+ }
+ }
+
+ if (sa_dir == XFRM_SA_DIR_IN) {
+ if (rs->oseq || rs->oseq_hi) {
+ NL_SET_ERR_MSG(extack,
+ "Replay oseq and oseq_hi should be 0 for input SA");
+ return -EINVAL;
+ }
+ }
+
return 0;
}
@@ -176,6 +200,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
struct netlink_ext_ack *extack)
{
int err;
+ u8 sa_dir = attrs[XFRMA_SA_DIR] ? nla_get_u8(attrs[XFRMA_SA_DIR]) : 0;
err = -EINVAL;
switch (p->family) {
@@ -334,7 +359,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
goto out;
if ((err = verify_sec_ctx_len(attrs, extack)))
goto out;
- if ((err = verify_replay(p, attrs, extack)))
+ if ((err = verify_replay(p, attrs, sa_dir, extack)))
goto out;
err = -EINVAL;
@@ -358,6 +383,77 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
err = -EINVAL;
goto out;
}
+
+ if (sa_dir == XFRM_SA_DIR_OUT) {
+ NL_SET_ERR_MSG(extack,
+ "MTIMER_THRESH attribute should not be set on output SA");
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ if (sa_dir == XFRM_SA_DIR_OUT) {
+ if (p->flags & XFRM_STATE_DECAP_DSCP) {
+ NL_SET_ERR_MSG(extack, "Flag DECAP_DSCP should not be set for output SA");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (p->flags & XFRM_STATE_ICMP) {
+ NL_SET_ERR_MSG(extack, "Flag ICMP should not be set for output SA");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (p->flags & XFRM_STATE_WILDRECV) {
+ NL_SET_ERR_MSG(extack, "Flag WILDRECV should not be set for output SA");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (p->replay_window) {
+ NL_SET_ERR_MSG(extack, "Replay window should be 0 for output SA");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (attrs[XFRMA_REPLAY_VAL]) {
+ struct xfrm_replay_state *replay;
+
+ replay = nla_data(attrs[XFRMA_REPLAY_VAL]);
+
+ if (replay->seq || replay->bitmap) {
+ NL_SET_ERR_MSG(extack,
+ "Replay seq and bitmap should be 0 for output SA");
+ err = -EINVAL;
+ goto out;
+ }
+ }
+ }
+
+ if (sa_dir == XFRM_SA_DIR_IN) {
+ if (p->flags & XFRM_STATE_NOPMTUDISC) {
+ NL_SET_ERR_MSG(extack, "Flag NOPMTUDISC should not be set for input SA");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (attrs[XFRMA_SA_EXTRA_FLAGS]) {
+ u32 xflags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]);
+
+ if (xflags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) {
+ NL_SET_ERR_MSG(extack, "Flag DONT_ENCAP_DSCP should not be set for input SA");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (xflags & XFRM_SA_XFLAG_OSEQ_MAY_WRAP) {
+ NL_SET_ERR_MSG(extack, "Flag OSEQ_MAY_WRAP should not be set for input SA");
+ err = -EINVAL;
+ goto out;
+ }
+
+ }
}
out:
@@ -734,6 +830,13 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
if (attrs[XFRMA_IF_ID])
x->if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+ if (attrs[XFRMA_SA_DIR])
+ x->dir = nla_get_u8(attrs[XFRMA_SA_DIR]);
+
+ if (attrs[XFRMA_NAT_KEEPALIVE_INTERVAL])
+ x->nat_keepalive_interval =
+ nla_get_u32(attrs[XFRMA_NAT_KEEPALIVE_INTERVAL]);
+
err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV], extack);
if (err)
goto error;
@@ -1182,8 +1285,20 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
if (ret)
goto out;
}
- if (x->mapping_maxage)
+ if (x->mapping_maxage) {
ret = nla_put_u32(skb, XFRMA_MTIMER_THRESH, x->mapping_maxage);
+ if (ret)
+ goto out;
+ }
+ if (x->dir)
+ ret = nla_put_u8(skb, XFRMA_SA_DIR, x->dir);
+
+ if (x->nat_keepalive_interval) {
+ ret = nla_put_u32(skb, XFRMA_NAT_KEEPALIVE_INTERVAL,
+ x->nat_keepalive_interval);
+ if (ret)
+ goto out;
+ }
out:
return ret;
}
@@ -1618,6 +1733,9 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err)
goto out;
+ if (attrs[XFRMA_SA_DIR])
+ x->dir = nla_get_u8(attrs[XFRMA_SA_DIR]);
+
resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq);
if (IS_ERR(resp_skb)) {
err = PTR_ERR(resp_skb);
@@ -2348,7 +2466,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
NETLINK_CB(skb).portid);
}
} else {
- xfrm_dev_policy_delete(xp);
xfrm_audit_policy_delete(xp, err ? 0 : 1, true);
if (err != 0)
@@ -2402,7 +2519,8 @@ static inline unsigned int xfrm_aevent_msgsize(struct xfrm_state *x)
+ nla_total_size_64bit(sizeof(struct xfrm_lifetime_cur))
+ nla_total_size(sizeof(struct xfrm_mark))
+ nla_total_size(4) /* XFRM_AE_RTHR */
- + nla_total_size(4); /* XFRM_AE_ETHR */
+ + nla_total_size(4) /* XFRM_AE_ETHR */
+ + nla_total_size(sizeof(x->dir)); /* XFRMA_SA_DIR */
}
static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c)
@@ -2459,6 +2577,12 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct
if (err)
goto out_cancel;
+ if (x->dir) {
+ err = nla_put_u8(skb, XFRMA_SA_DIR, x->dir);
+ if (err)
+ goto out_cancel;
+ }
+
nlmsg_end(skb, nlh);
return 0;
@@ -3018,6 +3142,7 @@ EXPORT_SYMBOL_GPL(xfrm_msg_min);
#undef XMSGSIZE
const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
+ [XFRMA_UNSPEC] = { .strict_start_type = XFRMA_SA_DIR },
[XFRMA_SA] = { .len = sizeof(struct xfrm_usersa_info)},
[XFRMA_POLICY] = { .len = sizeof(struct xfrm_userpolicy_info)},
[XFRMA_LASTUSED] = { .type = NLA_U64},
@@ -3049,6 +3174,8 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_SET_MARK_MASK] = { .type = NLA_U32 },
[XFRMA_IF_ID] = { .type = NLA_U32 },
[XFRMA_MTIMER_THRESH] = { .type = NLA_U32 },
+ [XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT),
+ [XFRMA_NAT_KEEPALIVE_INTERVAL] = { .type = NLA_U32 },
};
EXPORT_SYMBOL_GPL(xfrma_policy);
@@ -3097,6 +3224,24 @@ static const struct xfrm_link {
[XFRM_MSG_GETDEFAULT - XFRM_MSG_BASE] = { .doit = xfrm_get_default },
};
+static int xfrm_reject_unused_attr(int type, struct nlattr **attrs,
+ struct netlink_ext_ack *extack)
+{
+ if (attrs[XFRMA_SA_DIR]) {
+ switch (type) {
+ case XFRM_MSG_NEWSA:
+ case XFRM_MSG_UPDSA:
+ case XFRM_MSG_ALLOCSPI:
+ break;
+ default:
+ NL_SET_ERR_MSG(extack, "Invalid attribute SA_DIR");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -3156,6 +3301,12 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
goto err;
+ if (!link->nla_pol || link->nla_pol == xfrma_policy) {
+ err = xfrm_reject_unused_attr((type + XFRM_MSG_BASE), attrs, extack);
+ if (err < 0)
+ goto err;
+ }
+
if (link->doit == NULL) {
err = -EINVAL;
goto err;
@@ -3189,8 +3340,9 @@ static void xfrm_netlink_rcv(struct sk_buff *skb)
static inline unsigned int xfrm_expire_msgsize(void)
{
- return NLMSG_ALIGN(sizeof(struct xfrm_user_expire))
- + nla_total_size(sizeof(struct xfrm_mark));
+ return NLMSG_ALIGN(sizeof(struct xfrm_user_expire)) +
+ nla_total_size(sizeof(struct xfrm_mark)) +
+ nla_total_size(sizeof_field(struct xfrm_state, dir));
}
static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c)
@@ -3217,6 +3369,12 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct
if (err)
return err;
+ if (x->dir) {
+ err = nla_put_u8(skb, XFRMA_SA_DIR, x->dir);
+ if (err)
+ return err;
+ }
+
nlmsg_end(skb, nlh);
return 0;
}
@@ -3324,6 +3482,12 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x)
if (x->mapping_maxage)
l += nla_total_size(sizeof(x->mapping_maxage));
+ if (x->dir)
+ l += nla_total_size(sizeof(x->dir));
+
+ if (x->nat_keepalive_interval)
+ l += nla_total_size(sizeof(x->nat_keepalive_interval));
+
return l;
}