summaryrefslogtreecommitdiffstats
path: root/include/net/netns/ipv4.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-04-29 11:57:23 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-04-29 11:57:23 -0700
commit9d31d2338950293ec19d9b095fbaa9030899dcb4 (patch)
treee688040d0557c24a2eeb9f6c9c223d949f6f7ef9 /include/net/netns/ipv4.h
parent635de956a7f5a6ffcb04f29d70630c64c717b56b (diff)
parent4a52dd8fefb45626dace70a63c0738dbd83b7edb (diff)
downloadlinux-stable-9d31d2338950293ec19d9b095fbaa9030899dcb4.tar.gz
linux-stable-9d31d2338950293ec19d9b095fbaa9030899dcb4.tar.bz2
linux-stable-9d31d2338950293ec19d9b095fbaa9030899dcb4.zip
Merge tag 'net-next-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core: - bpf: - allow bpf programs calling kernel functions (initially to reuse TCP congestion control implementations) - enable task local storage for tracing programs - remove the need to store per-task state in hash maps, and allow tracing programs access to task local storage previously added for BPF_LSM - add bpf_for_each_map_elem() helper, allowing programs to walk all map elements in a more robust and easier to verify fashion - sockmap: support UDP and cross-protocol BPF_SK_SKB_VERDICT redirection - lpm: add support for batched ops in LPM trie - add BTF_KIND_FLOAT support - mostly to allow use of BTF on s390 which has floats in its headers files - improve BPF syscall documentation and extend the use of kdoc parsing scripts we already employ for bpf-helpers - libbpf, bpftool: support static linking of BPF ELF files - improve support for encapsulation of L2 packets - xdp: restructure redirect actions to avoid a runtime lookup, improving performance by 4-8% in microbenchmarks - xsk: build skb by page (aka generic zerocopy xmit) - improve performance of software AF_XDP path by 33% for devices which don't need headers in the linear skb part (e.g. virtio) - nexthop: resilient next-hop groups - improve path stability on next-hops group changes (incl. offload for mlxsw) - ipv6: segment routing: add support for IPv4 decapsulation - icmp: add support for RFC 8335 extended PROBE messages - inet: use bigger hash table for IP ID generation - tcp: deal better with delayed TX completions - make sure we don't give up on fast TCP retransmissions only because driver is slow in reporting that it completed transmitting the original - tcp: reorder tcp_congestion_ops for better cache locality - mptcp: - add sockopt support for common TCP options - add support for common TCP msg flags - include multiple address ids in RM_ADDR - add reset option support for resetting one subflow - udp: GRO L4 improvements - improve 'forward' / 'frag_list' co-existence with UDP tunnel GRO, allowing the first to take place correctly even for encapsulated UDP traffic - micro-optimize dev_gro_receive() and flow dissection, avoid retpoline overhead on VLAN and TEB GRO - use less memory for sysctls, add a new sysctl type, to allow using u8 instead of "int" and "long" and shrink networking sysctls - veth: allow GRO without XDP - this allows aggregating UDP packets before handing them off to routing, bridge, OvS, etc. - allow specifing ifindex when device is moved to another namespace - netfilter: - nft_socket: add support for cgroupsv2 - nftables: add catch-all set element - special element used to define a default action in case normal lookup missed - use net_generic infra in many modules to avoid allocating per-ns memory unnecessarily - xps: improve the xps handling to avoid potential out-of-bound accesses and use-after-free when XPS change race with other re-configuration under traffic - add a config knob to turn off per-cpu netdev refcnt to catch underflows in testing Device APIs: - add WWAN subsystem to organize the WWAN interfaces better and hopefully start driving towards more unified and vendor- independent APIs - ethtool: - add interface for reading IEEE MIB stats (incl. mlx5 and bnxt support) - allow network drivers to dump arbitrary SFP EEPROM data, current offset+length API was a poor fit for modern SFP which define EEPROM in terms of pages (incl. mlx5 support) - act_police, flow_offload: add support for packet-per-second policing (incl. offload for nfp) - psample: add additional metadata attributes like transit delay for packets sampled from switch HW (and corresponding egress and policy-based sampling in the mlxsw driver) - dsa: improve support for sandwiched LAGs with bridge and DSA - netfilter: - flowtable: use direct xmit in topologies with IP forwarding, bridging, vlans etc. - nftables: counter hardware offload support - Bluetooth: - improvements for firmware download w/ Intel devices - add support for reading AOSP vendor capabilities - add support for virtio transport driver - mac80211: - allow concurrent monitor iface and ethernet rx decap - set priority and queue mapping for injected frames - phy: add support for Clause-45 PHY Loopback - pci/iov: add sysfs MSI-X vector assignment interface to distribute MSI-X resources to VFs (incl. mlx5 support) New hardware/drivers: - dsa: mv88e6xxx: add support for Marvell mv88e6393x - 11-port Ethernet switch with 8x 1-Gigabit Ethernet and 3x 10-Gigabit interfaces. - dsa: support for legacy Broadcom tags used on BCM5325, BCM5365 and BCM63xx switches - Microchip KSZ8863 and KSZ8873; 3x 10/100Mbps Ethernet switches - ath11k: support for QCN9074 a 802.11ax device - Bluetooth: Broadcom BCM4330 and BMC4334 - phy: Marvell 88X2222 transceiver support - mdio: add BCM6368 MDIO mux bus controller - r8152: support RTL8153 and RTL8156 (USB Ethernet) chips - mana: driver for Microsoft Azure Network Adapter (MANA) - Actions Semi Owl Ethernet MAC - can: driver for ETAS ES58X CAN/USB interfaces Pure driver changes: - add XDP support to: enetc, igc, stmmac - add AF_XDP support to: stmmac - virtio: - page_to_skb() use build_skb when there's sufficient tailroom (21% improvement for 1000B UDP frames) - support XDP even without dedicated Tx queues - share the Tx queues with the stack when necessary - mlx5: - flow rules: add support for mirroring with conntrack, matching on ICMP, GTP, flex filters and more - support packet sampling with flow offloads - persist uplink representor netdev across eswitch mode changes - allow coexistence of CQE compression and HW time-stamping - add ethtool extended link error state reporting - ice, iavf: support flow filters, UDP Segmentation Offload - dpaa2-switch: - move the driver out of staging - add spanning tree (STP) support - add rx copybreak support - add tc flower hardware offload on ingress traffic - ionic: - implement Rx page reuse - support HW PTP time-stamping - octeon: support TC hardware offloads - flower matching on ingress and egress ratelimitting. - stmmac: - add RX frame steering based on VLAN priority in tc flower - support frame preemption (FPE) - intel: add cross time-stamping freq difference adjustment - ocelot: - support forwarding of MRP frames in HW - support multiple bridges - support PTP Sync one-step timestamping - dsa: mv88e6xxx, dpaa2-switch: offload bridge port flags like learning, flooding etc. - ipa: add IPA v4.5, v4.9 and v4.11 support (Qualcomm SDX55, SM8350, SC7280 SoCs) - mt7601u: enable TDLS support - mt76: - add support for 802.3 rx frames (mt7915/mt7615) - mt7915 flash pre-calibration support - mt7921/mt7663 runtime power management fixes" * tag 'net-next-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2451 commits) net: selftest: fix build issue if INET is disabled net: netrom: nr_in: Remove redundant assignment to ns net: tun: Remove redundant assignment to ret net: phy: marvell: add downshift support for M88E1240 net: dsa: ksz: Make reg_mib_cnt a u8 as it never exceeds 255 net/sched: act_ct: Remove redundant ct get and check icmp: standardize naming of RFC 8335 PROBE constants bpf, selftests: Update array map tests for per-cpu batched ops bpf: Add batched ops support for percpu array bpf: Implement formatted output helpers with bstr_printf seq_file: Add a seq_bprintf function sfc: adjust efx->xdp_tx_queue_count with the real number of initialized queues net:nfc:digital: Fix a double free in digital_tg_recv_dep_req net: fix a concurrency bug in l2tp_tunnel_register() net/smc: Remove redundant assignment to rc mpls: Remove redundant assignment to err llc2: Remove redundant assignment to rc net/tls: Remove redundant initialization of record rds: Remove redundant assignment to nr_sig dt-bindings: net: mdio-gpio: add compatible for microchip,mdio-smi0 ...
Diffstat (limited to 'include/net/netns/ipv4.h')
-rw-r--r--include/net/netns/ipv4.h143
1 files changed, 68 insertions, 75 deletions
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 70a2a085dd1a..f6af8d96d3c6 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -11,7 +11,6 @@
#include <linux/rcupdate.h>
#include <linux/siphash.h>
-struct tcpm_hash_bucket;
struct ctl_table_header;
struct ipv4_devconf;
struct fib_rules_ops;
@@ -33,14 +32,18 @@ struct inet_hashinfo;
struct inet_timewait_death_row {
atomic_t tw_count;
+ char tw_pad[L1_CACHE_BYTES - sizeof(atomic_t)];
- struct inet_hashinfo *hashinfo ____cacheline_aligned_in_smp;
+ struct inet_hashinfo *hashinfo;
int sysctl_max_tw_buckets;
};
struct tcp_fastopen_context;
struct netns_ipv4 {
+ /* Please keep tcp_death_row at first field in netns_ipv4 */
+ struct inet_timewait_death_row tcp_death_row ____cacheline_aligned_in_smp;
+
#ifdef CONFIG_SYSCTL
struct ctl_table_header *forw_hdr;
struct ctl_table_header *frags_hdr;
@@ -54,17 +57,17 @@ struct netns_ipv4 {
struct mutex ra_mutex;
#ifdef CONFIG_IP_MULTIPLE_TABLES
struct fib_rules_ops *rules_ops;
- bool fib_has_custom_rules;
- unsigned int fib_rules_require_fldissect;
struct fib_table __rcu *fib_main;
struct fib_table __rcu *fib_default;
+ unsigned int fib_rules_require_fldissect;
+ bool fib_has_custom_rules;
#endif
bool fib_has_custom_local_routes;
+ bool fib_offload_disabled;
#ifdef CONFIG_IP_ROUTE_CLASSID
int fib_num_tclassid_users;
#endif
struct hlist_head *fib_table_hash;
- bool fib_offload_disabled;
struct sock *fibnl;
struct sock * __percpu *icmp_sk;
@@ -73,52 +76,43 @@ struct netns_ipv4 {
struct inet_peer_base *peers;
struct sock * __percpu *tcp_sk;
struct fqdir *fqdir;
-#ifdef CONFIG_NETFILTER
- struct xt_table *iptable_filter;
- struct xt_table *iptable_mangle;
- struct xt_table *iptable_raw;
- struct xt_table *arptable_filter;
-#ifdef CONFIG_SECURITY
- struct xt_table *iptable_security;
-#endif
- struct xt_table *nat_table;
-#endif
- int sysctl_icmp_echo_ignore_all;
- int sysctl_icmp_echo_ignore_broadcasts;
- int sysctl_icmp_ignore_bogus_error_responses;
+ u8 sysctl_icmp_echo_ignore_all;
+ u8 sysctl_icmp_echo_enable_probe;
+ u8 sysctl_icmp_echo_ignore_broadcasts;
+ u8 sysctl_icmp_ignore_bogus_error_responses;
+ u8 sysctl_icmp_errors_use_inbound_ifaddr;
int sysctl_icmp_ratelimit;
int sysctl_icmp_ratemask;
- int sysctl_icmp_errors_use_inbound_ifaddr;
struct local_ports ip_local_ports;
- int sysctl_tcp_ecn;
- int sysctl_tcp_ecn_fallback;
+ u8 sysctl_tcp_ecn;
+ u8 sysctl_tcp_ecn_fallback;
- int sysctl_ip_default_ttl;
- int sysctl_ip_no_pmtu_disc;
- int sysctl_ip_fwd_use_pmtu;
- int sysctl_ip_fwd_update_priority;
- int sysctl_ip_nonlocal_bind;
- int sysctl_ip_autobind_reuse;
+ u8 sysctl_ip_default_ttl;
+ u8 sysctl_ip_no_pmtu_disc;
+ u8 sysctl_ip_fwd_use_pmtu;
+ u8 sysctl_ip_fwd_update_priority;
+ u8 sysctl_ip_nonlocal_bind;
+ u8 sysctl_ip_autobind_reuse;
/* Shall we try to damage output packets if routing dev changes? */
- int sysctl_ip_dynaddr;
- int sysctl_ip_early_demux;
+ u8 sysctl_ip_dynaddr;
+ u8 sysctl_ip_early_demux;
#ifdef CONFIG_NET_L3_MASTER_DEV
- int sysctl_raw_l3mdev_accept;
+ u8 sysctl_raw_l3mdev_accept;
#endif
- int sysctl_tcp_early_demux;
- int sysctl_udp_early_demux;
+ u8 sysctl_tcp_early_demux;
+ u8 sysctl_udp_early_demux;
- int sysctl_nexthop_compat_mode;
+ u8 sysctl_nexthop_compat_mode;
- int sysctl_fwmark_reflect;
- int sysctl_tcp_fwmark_accept;
+ u8 sysctl_fwmark_reflect;
+ u8 sysctl_tcp_fwmark_accept;
#ifdef CONFIG_NET_L3_MASTER_DEV
- int sysctl_tcp_l3mdev_accept;
+ u8 sysctl_tcp_l3mdev_accept;
#endif
- int sysctl_tcp_mtu_probing;
+ u8 sysctl_tcp_mtu_probing;
int sysctl_tcp_mtu_probe_floor;
int sysctl_tcp_base_mss;
int sysctl_tcp_min_snd_mss;
@@ -126,55 +120,55 @@ struct netns_ipv4 {
u32 sysctl_tcp_probe_interval;
int sysctl_tcp_keepalive_time;
- int sysctl_tcp_keepalive_probes;
int sysctl_tcp_keepalive_intvl;
+ u8 sysctl_tcp_keepalive_probes;
- int sysctl_tcp_syn_retries;
- int sysctl_tcp_synack_retries;
- int sysctl_tcp_syncookies;
+ u8 sysctl_tcp_syn_retries;
+ u8 sysctl_tcp_synack_retries;
+ u8 sysctl_tcp_syncookies;
int sysctl_tcp_reordering;
- int sysctl_tcp_retries1;
- int sysctl_tcp_retries2;
- int sysctl_tcp_orphan_retries;
+ u8 sysctl_tcp_retries1;
+ u8 sysctl_tcp_retries2;
+ u8 sysctl_tcp_orphan_retries;
+ u8 sysctl_tcp_tw_reuse;
int sysctl_tcp_fin_timeout;
unsigned int sysctl_tcp_notsent_lowat;
- int sysctl_tcp_tw_reuse;
- int sysctl_tcp_sack;
- int sysctl_tcp_window_scaling;
- int sysctl_tcp_timestamps;
- int sysctl_tcp_early_retrans;
- int sysctl_tcp_recovery;
- int sysctl_tcp_thin_linear_timeouts;
- int sysctl_tcp_slow_start_after_idle;
- int sysctl_tcp_retrans_collapse;
- int sysctl_tcp_stdurg;
- int sysctl_tcp_rfc1337;
- int sysctl_tcp_abort_on_overflow;
- int sysctl_tcp_fack;
+ u8 sysctl_tcp_sack;
+ u8 sysctl_tcp_window_scaling;
+ u8 sysctl_tcp_timestamps;
+ u8 sysctl_tcp_early_retrans;
+ u8 sysctl_tcp_recovery;
+ u8 sysctl_tcp_thin_linear_timeouts;
+ u8 sysctl_tcp_slow_start_after_idle;
+ u8 sysctl_tcp_retrans_collapse;
+ u8 sysctl_tcp_stdurg;
+ u8 sysctl_tcp_rfc1337;
+ u8 sysctl_tcp_abort_on_overflow;
+ u8 sysctl_tcp_fack; /* obsolete */
int sysctl_tcp_max_reordering;
- int sysctl_tcp_dsack;
- int sysctl_tcp_app_win;
int sysctl_tcp_adv_win_scale;
- int sysctl_tcp_frto;
- int sysctl_tcp_nometrics_save;
- int sysctl_tcp_no_ssthresh_metrics_save;
- int sysctl_tcp_moderate_rcvbuf;
- int sysctl_tcp_tso_win_divisor;
- int sysctl_tcp_workaround_signed_windows;
+ u8 sysctl_tcp_dsack;
+ u8 sysctl_tcp_app_win;
+ u8 sysctl_tcp_frto;
+ u8 sysctl_tcp_nometrics_save;
+ u8 sysctl_tcp_no_ssthresh_metrics_save;
+ u8 sysctl_tcp_moderate_rcvbuf;
+ u8 sysctl_tcp_tso_win_divisor;
+ u8 sysctl_tcp_workaround_signed_windows;
int sysctl_tcp_limit_output_bytes;
int sysctl_tcp_challenge_ack_limit;
- int sysctl_tcp_min_tso_segs;
int sysctl_tcp_min_rtt_wlen;
- int sysctl_tcp_autocorking;
+ u8 sysctl_tcp_min_tso_segs;
+ u8 sysctl_tcp_autocorking;
+ u8 sysctl_tcp_reflect_tos;
+ u8 sysctl_tcp_comp_sack_nr;
int sysctl_tcp_invalid_ratelimit;
int sysctl_tcp_pacing_ss_ratio;
int sysctl_tcp_pacing_ca_ratio;
int sysctl_tcp_wmem[3];
int sysctl_tcp_rmem[3];
- int sysctl_tcp_comp_sack_nr;
unsigned long sysctl_tcp_comp_sack_delay_ns;
unsigned long sysctl_tcp_comp_sack_slack_ns;
- struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
const struct tcp_congestion_ops __rcu *tcp_congestion_control;
@@ -183,20 +177,19 @@ struct netns_ipv4 {
unsigned int sysctl_tcp_fastopen_blackhole_timeout;
atomic_t tfo_active_disable_times;
unsigned long tfo_active_disable_stamp;
- int sysctl_tcp_reflect_tos;
int sysctl_udp_wmem_min;
int sysctl_udp_rmem_min;
- int sysctl_fib_notify_on_flag_change;
+ u8 sysctl_fib_notify_on_flag_change;
#ifdef CONFIG_NET_L3_MASTER_DEV
- int sysctl_udp_l3mdev_accept;
+ u8 sysctl_udp_l3mdev_accept;
#endif
+ u8 sysctl_igmp_llm_reports;
int sysctl_igmp_max_memberships;
int sysctl_igmp_max_msf;
- int sysctl_igmp_llm_reports;
int sysctl_igmp_qrv;
struct ping_group_range ping_group_range;
@@ -217,8 +210,8 @@ struct netns_ipv4 {
#endif
#endif
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- int sysctl_fib_multipath_use_neigh;
- int sysctl_fib_multipath_hash_policy;
+ u8 sysctl_fib_multipath_use_neigh;
+ u8 sysctl_fib_multipath_hash_policy;
#endif
struct fib_notifier_ops *notifier_ops;