From 40cbfa90218bc570a7959b436b9d48a18c361041 Mon Sep 17 00:00:00 2001 From: Srinivas Dasari Date: Thu, 25 Jan 2018 17:13:38 +0200 Subject: cfg80211/nl80211: Optional authentication offload to userspace This interface allows the host driver to offload the authentication to user space. This is exclusively defined for host drivers that do not define separate commands for authentication and association, but rely on userspace SME (e.g., in wpa_supplicant for the ~WPA_DRIVER_FLAGS_SME case) for the authentication to happen. This can be used to implement SAE without full implementation in the kernel/firmware while still being able to use NL80211_CMD_CONNECT with driver-based BSS selection. Host driver sends NL80211_CMD_EXTERNAL_AUTH event to start/abort authentication to the port on which connect is triggered and status of authentication is further indicated by user space to host driver through the same command response interface. User space entities advertise this capability through the NL80211_ATTR_EXTERNAL_AUTH_SUPP flag in the NL80211_CMD_CONNECT request. Host drivers shall look at this capability to offload the authentication. Signed-off-by: Srinivas Dasari Signed-off-by: Jouni Malinen [add socket connection ownership check] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 54 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 81174f9b8d14..68def3e5b013 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1905,11 +1905,16 @@ struct cfg80211_auth_request { * @ASSOC_REQ_DISABLE_HT: Disable HT (802.11n) * @ASSOC_REQ_DISABLE_VHT: Disable VHT * @ASSOC_REQ_USE_RRM: Declare RRM capability in this association + * @CONNECT_REQ_EXTERNAL_AUTH_SUPPORT: User space indicates external + * authentication capability. Drivers can offload authentication to + * userspace if this flag is set. Only applicable for cfg80211_connect() + * request (connect callback). */ enum cfg80211_assoc_req_flags { - ASSOC_REQ_DISABLE_HT = BIT(0), - ASSOC_REQ_DISABLE_VHT = BIT(1), - ASSOC_REQ_USE_RRM = BIT(2), + ASSOC_REQ_DISABLE_HT = BIT(0), + ASSOC_REQ_DISABLE_VHT = BIT(1), + ASSOC_REQ_USE_RRM = BIT(2), + CONNECT_REQ_EXTERNAL_AUTH_SUPPORT = BIT(3), }; /** @@ -2600,6 +2605,33 @@ struct cfg80211_pmk_conf { const u8 *pmk_r0_name; }; +/** + * struct cfg80211_external_auth_params - Trigger External authentication. + * + * Commonly used across the external auth request and event interfaces. + * + * @action: action type / trigger for external authentication. Only significant + * for the authentication request event interface (driver to user space). + * @bssid: BSSID of the peer with which the authentication has + * to happen. Used by both the authentication request event and + * authentication response command interface. + * @ssid: SSID of the AP. Used by both the authentication request event and + * authentication response command interface. + * @key_mgmt_suite: AKM suite of the respective authentication. Used by the + * authentication request event interface. + * @status: status code, %WLAN_STATUS_SUCCESS for successful authentication, + * use %WLAN_STATUS_UNSPECIFIED_FAILURE if user space cannot give you + * the real status code for failures. Used only for the authentication + * response command interface (user space to driver). + */ +struct cfg80211_external_auth_params { + enum nl80211_external_auth_action action; + u8 bssid[ETH_ALEN] __aligned(2); + struct cfg80211_ssid ssid; + unsigned int key_mgmt_suite; + u16 status; +}; + /** * struct cfg80211_ops - backend description for wireless configuration * @@ -2923,6 +2955,9 @@ struct cfg80211_pmk_conf { * (invoked with the wireless_dev mutex held) * @del_pmk: delete the previously configured PMK for the given authenticator. * (invoked with the wireless_dev mutex held) + * + * @external_auth: indicates result of offloaded authentication processing from + * user space */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -3216,6 +3251,8 @@ struct cfg80211_ops { const struct cfg80211_pmk_conf *conf); int (*del_pmk)(struct wiphy *wiphy, struct net_device *dev, const u8 *aa); + int (*external_auth)(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_external_auth_params *params); }; /* @@ -6202,6 +6239,17 @@ void cfg80211_nan_func_terminated(struct wireless_dev *wdev, /* ethtool helper */ void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info); +/** + * cfg80211_external_auth_request - userspace request for authentication + * @netdev: network device + * @params: External authentication parameters + * @gfp: allocation flags + * Returns: 0 on success, < 0 on error + */ +int cfg80211_external_auth_request(struct net_device *netdev, + struct cfg80211_external_auth_params *params, + gfp_t gfp); + /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ -- cgit v1.2.3 From 466b9936bf93b7ec3bce1dcd493262ff0a8a4f44 Mon Sep 17 00:00:00 2001 From: "tamizhr@codeaurora.org" Date: Wed, 31 Jan 2018 16:24:49 +0530 Subject: cfg80211: Add support to notify station's opmode change to userspace ht/vht action frames will be sent to AP from station to notify change of its ht/vht opmode(max bandwidth, smps mode or nss) modified values. Currently these valuse used by driver/firmware for rate control algorithm. This patch introduces NL80211_CMD_STA_OPMODE_CHANGED command to notify those modified/current supported values(max bandwidth, smps mode, max nss) to userspace application. This will be useful for the application like steering, which closely monitoring station's capability changes. Since the application has taken these values during station association. Signed-off-by: Tamizh chelvam Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 68def3e5b013..7d49cd0cf92d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3553,6 +3553,35 @@ enum wiphy_vendor_command_flags { WIPHY_VENDOR_CMD_NEED_RUNNING = BIT(2), }; +/** + * enum wiphy_opmode_flag - Station's ht/vht operation mode information flags + * + * @STA_OPMODE_MAX_BW_CHANGED: Max Bandwidth changed + * @STA_OPMODE_SMPS_MODE_CHANGED: SMPS mode changed + * @STA_OPMODE_N_SS_CHANGED: max N_SS (number of spatial streams) changed + * + */ +enum wiphy_opmode_flag { + STA_OPMODE_MAX_BW_CHANGED = BIT(0), + STA_OPMODE_SMPS_MODE_CHANGED = BIT(1), + STA_OPMODE_N_SS_CHANGED = BIT(2), +}; + +/** + * struct sta_opmode_info - Station's ht/vht operation mode information + * @changed: contains value from &enum wiphy_opmode_flag + * @smps_mode: New SMPS mode of a station + * @bw: new max bandwidth value of a station + * @rx_nss: new rx_nss value of a station + */ + +struct sta_opmode_info { + u32 changed; + u8 smps_mode; + u8 bw; + u8 rx_nss; +}; + /** * struct wiphy_vendor_command - vendor command definition * @info: vendor command identifying information, as used in nl80211 @@ -5721,6 +5750,20 @@ void cfg80211_cqm_beacon_loss_notify(struct net_device *dev, gfp_t gfp); void cfg80211_radar_event(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, gfp_t gfp); +/** + * cfg80211_sta_opmode_change_notify - STA's ht/vht operation mode change event + * @dev: network device + * @mac: MAC address of a station which opmode got modified + * @sta_opmode: station's current opmode value + * @gfp: context flags + * + * Driver should call this function when station's opmode modified via action + * frame. + */ +void cfg80211_sta_opmode_change_notify(struct net_device *dev, const u8 *mac, + struct sta_opmode_info *sta_opmode, + gfp_t gfp); + /** * cfg80211_cac_event - Channel availability check (CAC) event * @netdev: network device -- cgit v1.2.3 From 9b2c45d479d0fb8647c9e83359df69162b5fbe5f Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 12 Feb 2018 20:00:20 +0100 Subject: net: make getname() functions return length rather than use int* parameter Changes since v1: Added changes in these files: drivers/infiniband/hw/usnic/usnic_transport.c drivers/staging/lustre/lnet/lnet/lib-socket.c drivers/target/iscsi/iscsi_target_login.c drivers/vhost/net.c fs/dlm/lowcomms.c fs/ocfs2/cluster/tcp.c security/tomoyo/network.c Before: All these functions either return a negative error indicator, or store length of sockaddr into "int *socklen" parameter and return zero on success. "int *socklen" parameter is awkward. For example, if caller does not care, it still needs to provide on-stack storage for the value it does not need. None of the many FOO_getname() functions of various protocols ever used old value of *socklen. They always just overwrite it. This change drops this parameter, and makes all these functions, on success, return length of sockaddr. It's always >= 0 and can be differentiated from an error. Tests in callers are changed from "if (err)" to "if (err < 0)", where needed. rpc_sockname() lost "int buflen" parameter, since its only use was to be passed to kernel_getsockname() as &buflen and subsequently not used in any way. Userspace API is not changed. text data bss dec hex filename 30108430 2633624 873672 33615726 200ef6e vmlinux.before.o 30108109 2633612 873672 33615393 200ee21 vmlinux.o Signed-off-by: Denys Vlasenko CC: David S. Miller CC: linux-kernel@vger.kernel.org CC: netdev@vger.kernel.org CC: linux-bluetooth@vger.kernel.org CC: linux-decnet-user@lists.sourceforge.net CC: linux-wireless@vger.kernel.org CC: linux-rdma@vger.kernel.org CC: linux-sctp@vger.kernel.org CC: linux-nfs@vger.kernel.org CC: linux-x25@vger.kernel.org Signed-off-by: David S. Miller --- include/net/inet_common.h | 2 +- include/net/ipv6.h | 2 +- include/net/sock.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 5a54c9570977..500f81375200 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -32,7 +32,7 @@ int inet_shutdown(struct socket *sock, int how); int inet_listen(struct socket *sock, int backlog); void inet_sock_destruct(struct sock *sk); int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); -int inet_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, +int inet_getname(struct socket *sock, struct sockaddr *uaddr, int peer); int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int inet_ctl_sock_create(struct sock **sk, unsigned short family, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 8606c9113d3f..7a98cd583c73 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1056,7 +1056,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu); int inet6_release(struct socket *sock); int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); -int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, +int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int peer); int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); diff --git a/include/net/sock.h b/include/net/sock.h index 169c92afcafa..3aa7b7d6e6c7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1584,7 +1584,7 @@ int sock_no_bind(struct socket *, struct sockaddr *, int); int sock_no_connect(struct socket *, struct sockaddr *, int, int); int sock_no_socketpair(struct socket *, struct socket *); int sock_no_accept(struct socket *, struct socket *, int, bool); -int sock_no_getname(struct socket *, struct sockaddr *, int *, int); +int sock_no_getname(struct socket *, struct sockaddr *, int); __poll_t sock_no_poll(struct file *, struct socket *, struct poll_table_struct *); int sock_no_ioctl(struct socket *, unsigned int, unsigned long); -- cgit v1.2.3 From 447cd7a0d7d1e5b4486e99cce289654fec9951e3 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:26:44 +0300 Subject: net: Allow pernet_operations to be executed in parallel This adds new pernet_operations::async flag to indicate operations, which ->init(), ->exit() and ->exit_batch() methods are allowed to be executed in parallel with the methods of any other pernet_operations. When there are only asynchronous pernet_operations in the system, net_mutex won't be taken for a net construction and destruction. Also, remove BUG_ON(mutex_is_locked()) from net_assign_generic() without replacing with the equivalent net_sem check, as there is one more lockdep assert below. v3: Add comment near net_mutex. Suggested-by: Eric W. Biederman Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- include/net/net_namespace.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index f306b2aa15a4..9158ec1ad06f 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -313,6 +313,12 @@ struct pernet_operations { void (*exit_batch)(struct list_head *net_exit_list); unsigned int *id; size_t size; + /* + * Indicates above methods are allowed to be executed in parallel + * with methods of any other pernet_operations, i.e. they are not + * need synchronization via net_mutex. + */ + bool async; }; /* -- cgit v1.2.3 From 19ff13f2a411d99af67d8e51867d54b86e1bf017 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 13 Feb 2018 08:52:01 -0800 Subject: net: Make ax25_ptr depend on CONFIG_AX25 Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ax25.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/ax25.h b/include/net/ax25.h index 76fb39c272a7..c91bc87931c7 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -318,10 +318,12 @@ void ax25_digi_invert(const ax25_digi *, ax25_digi *); extern ax25_dev *ax25_dev_list; extern spinlock_t ax25_dev_lock; +#if IS_ENABLED(CONFIG_AX25) static inline ax25_dev *ax25_dev_ax25dev(struct net_device *dev) { return dev->ax25_ptr; } +#endif ax25_dev *ax25_addr_ax25dev(ax25_address *); void ax25_dev_device_up(struct net_device *); -- cgit v1.2.3 From e0f9759f530bf789e984961dce79f525b151ecf3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Feb 2018 06:14:12 -0800 Subject: tcp: try to keep packet if SYN_RCV race is lost MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 배석진 reported that in some situations, packets for a given 5-tuple end up being processed by different CPUS. This involves RPS, and fragmentation. 배석진 is seeing packet drops when a SYN_RECV request socket is moved into ESTABLISH state. Other states are protected by socket lock. This is caused by a CPU losing the race, and simply not caring enough. Since this seems to occur frequently, we can do better and perform a second lookup. Note that all needed memory barriers are already in the existing code, thanks to the spin_lock()/spin_unlock() pair in inet_ehash_insert() and reqsk_put(). The second lookup must find the new socket, unless it has already been accepted and closed by another cpu. Note that the fragmentation could be avoided in the first place by use of a correct TCP MSS option in the SYN{ACK} packet, but this does not mean we can not be more robust. Many thanks to 배석진 for a very detailed analysis. Reported-by: 배석진 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index e3fc667f9ac2..92b06c6e7732 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -374,7 +374,8 @@ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, const struct tcphdr *th); struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, bool fastopen); + struct request_sock *req, bool fastopen, + bool *lost_race); int tcp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb); void tcp_enter_loss(struct sock *sk); -- cgit v1.2.3 From 0336369d3a4d65c9332476b618ff3bb9b41045e1 Mon Sep 17 00:00:00 2001 From: Brandon Streiff Date: Wed, 14 Feb 2018 01:07:48 +0100 Subject: net: dsa: forward hardware timestamping ioctls to switch driver This patch adds support to the dsa slave network device so that switch drivers can implement the SIOC[GS]HWTSTAMP ioctls and the ethtool timestamp-info interface. Signed-off-by: Brandon Streiff Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/net/dsa.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 6cb602dd970c..4c0df83dddaf 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -367,6 +368,12 @@ struct dsa_switch_ops { int (*set_wol)(struct dsa_switch *ds, int port, struct ethtool_wolinfo *w); + /* + * ethtool timestamp info + */ + int (*get_ts_info)(struct dsa_switch *ds, int port, + struct ethtool_ts_info *ts); + /* * Suspend and resume */ @@ -469,6 +476,14 @@ struct dsa_switch_ops { int port, struct net_device *br); void (*crosschip_bridge_leave)(struct dsa_switch *ds, int sw_index, int port, struct net_device *br); + + /* + * PTP functionality + */ + int (*port_hwtstamp_get)(struct dsa_switch *ds, int port, + struct ifreq *ifr); + int (*port_hwtstamp_set)(struct dsa_switch *ds, int port, + struct ifreq *ifr); }; struct dsa_switch_driver { -- cgit v1.2.3 From 90af1059c52c0031f3bfd8279c9ede153ca83275 Mon Sep 17 00:00:00 2001 From: Brandon Streiff Date: Wed, 14 Feb 2018 01:07:49 +0100 Subject: net: dsa: forward timestamping callbacks to switch drivers Forward the rx/tx timestamp machinery from the dsa infrastructure to the switch driver. On the rx side, defer delivery of skbs until we have an rx timestamp. This mimicks the behavior of skb_defer_rx_timestamp. On the tx side, identify PTP packets, clone them, and pass them to the underlying switch driver before we transmit. This mimicks the behavior of skb_tx_timestamp. Adjusted txstamp API to keep the allocation and freeing of the clone in the same central function by Richard Cochran Signed-off-by: Brandon Streiff Signed-off-by: Richard Cochran Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/net/dsa.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 4c0df83dddaf..0ad17b63684d 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -102,6 +102,7 @@ struct dsa_platform_data { }; struct packet_type; +struct dsa_switch; struct dsa_device_ops { struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev); @@ -484,6 +485,10 @@ struct dsa_switch_ops { struct ifreq *ifr); int (*port_hwtstamp_set)(struct dsa_switch *ds, int port, struct ifreq *ifr); + bool (*port_txtstamp)(struct dsa_switch *ds, int port, + struct sk_buff *clone, unsigned int type); + bool (*port_rxtstamp)(struct dsa_switch *ds, int port, + struct sk_buff *skb, unsigned int type); }; struct dsa_switch_driver { -- cgit v1.2.3 From 9942895b5ee4b0db53f32fbcb4a51360607aac1b Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 13 Feb 2018 20:32:04 -0800 Subject: net: Move ipv4 set_lwt_redirect helper to lwtunnel IPv4 uses set_lwt_redirect to set the lwtunnel redirect functions as needed. Move it to lwtunnel.h as lwtunnel_set_redirect and change IPv6 to also use it. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/lwtunnel.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/net') diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index d747ef975cd8..33fd9ba7e0e5 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -127,6 +127,17 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb); int lwtunnel_input(struct sk_buff *skb); int lwtunnel_xmit(struct sk_buff *skb); +static inline void lwtunnel_set_redirect(struct dst_entry *dst) +{ + if (lwtunnel_output_redirect(dst->lwtstate)) { + dst->lwtstate->orig_output = dst->output; + dst->output = lwtunnel_output; + } + if (lwtunnel_input_redirect(dst->lwtstate)) { + dst->lwtstate->orig_input = dst->input; + dst->input = lwtunnel_input; + } +} #else static inline void lwtstate_free(struct lwtunnel_state *lws) @@ -158,6 +169,10 @@ static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate) return false; } +static inline void lwtunnel_set_redirect(struct dst_entry *dst) +{ +} + static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate, unsigned int mtu) { -- cgit v1.2.3 From 68e813aa43071377b698c662bc0214f2a833bcbb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 14 Feb 2018 14:24:28 -0800 Subject: net/ipv4: Remove fib table id from rtable Remove rt_table_id from rtable. It was added for getroute to return the table id that was hit in the lookup. With the changes for fibmatch the table id can be extracted from the fib_info returned in the fib_result so it no longer needs to be in rtable directly. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/route.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 1eb9ce470e25..158833ea7988 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -65,8 +65,6 @@ struct rtable { /* Miscellaneous cached information */ u32 rt_pmtu; - u32 rt_table_id; - struct list_head rt_uncached; struct uncached_list *rt_uncached_list; }; -- cgit v1.2.3 From b7b347fa3cd496ad5b4cbcc8ea2931847c4d0d78 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Thu, 15 Feb 2018 10:54:53 -0500 Subject: net: sched: act: fix code style This patch is used by subsequent patches. It fixes code style issues caught by checkpatch. Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- include/net/act_api.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index 6ed9692f20bd..32ef544f4ddc 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -87,12 +87,13 @@ struct tc_action_ops { struct tcf_result *); int (*dump)(struct sk_buff *, struct tc_action *, int, int); void (*cleanup)(struct tc_action *); - int (*lookup)(struct net *, struct tc_action **, u32); + int (*lookup)(struct net *net, struct tc_action **a, u32 index); int (*init)(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **act, int ovr, int bind); int (*walk)(struct net *, struct sk_buff *, - struct netlink_callback *, int, const struct tc_action_ops *); + struct netlink_callback *, int, + const struct tc_action_ops *); void (*stats_update)(struct tc_action *, u64, u32, u64); struct net_device *(*get_dev)(const struct tc_action *a); }; -- cgit v1.2.3 From 10defbd29e6218c1cab5c217a9d808fc05e3938a Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Thu, 15 Feb 2018 10:54:54 -0500 Subject: net: sched: act: add extack to init This patch adds extack to tcf_action_init and tcf_action_init_1 functions. These are necessary to make individual extack handling in each act implementation. Based on work by David Ahern Cc: David Ahern Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- include/net/act_api.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index 32ef544f4ddc..41d95930ffbc 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -163,10 +163,11 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, int nr_actions, struct tcf_result *res); int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, - struct list_head *actions); + struct list_head *actions, struct netlink_ext_ack *extack); struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, - char *name, int ovr, int bind); + char *name, int ovr, int bind, + struct netlink_ext_ack *extack); int tcf_action_dump(struct sk_buff *skb, struct list_head *, int, int); int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int); int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int); -- cgit v1.2.3 From ee99b2d8bf4ad6d03046a8c2f25bad7cfd9de64a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 16 Feb 2018 16:03:39 -0500 Subject: net: Revert sched action extack support series. It was mis-applied and the changes had rejects. Signed-off-by: David S. Miller --- include/net/act_api.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index 41d95930ffbc..6ed9692f20bd 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -87,13 +87,12 @@ struct tc_action_ops { struct tcf_result *); int (*dump)(struct sk_buff *, struct tc_action *, int, int); void (*cleanup)(struct tc_action *); - int (*lookup)(struct net *net, struct tc_action **a, u32 index); + int (*lookup)(struct net *, struct tc_action **, u32); int (*init)(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **act, int ovr, int bind); int (*walk)(struct net *, struct sk_buff *, - struct netlink_callback *, int, - const struct tc_action_ops *); + struct netlink_callback *, int, const struct tc_action_ops *); void (*stats_update)(struct tc_action *, u64, u32, u64); struct net_device *(*get_dev)(const struct tc_action *a); }; @@ -163,11 +162,10 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, int nr_actions, struct tcf_result *res); int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, - struct list_head *actions, struct netlink_ext_ack *extack); + struct list_head *actions); struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, - char *name, int ovr, int bind, - struct netlink_ext_ack *extack); + char *name, int ovr, int bind); int tcf_action_dump(struct sk_buff *skb, struct list_head *, int, int); int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int); int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int); -- cgit v1.2.3 From 1af85155813622767d223af6d4dff283ebeea7a7 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Thu, 15 Feb 2018 10:54:53 -0500 Subject: net: sched: act: fix code style This patch is used by subsequent patches. It fixes code style issues caught by checkpatch. Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- include/net/act_api.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index 6ed9692f20bd..32ef544f4ddc 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -87,12 +87,13 @@ struct tc_action_ops { struct tcf_result *); int (*dump)(struct sk_buff *, struct tc_action *, int, int); void (*cleanup)(struct tc_action *); - int (*lookup)(struct net *, struct tc_action **, u32); + int (*lookup)(struct net *net, struct tc_action **a, u32 index); int (*init)(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **act, int ovr, int bind); int (*walk)(struct net *, struct sk_buff *, - struct netlink_callback *, int, const struct tc_action_ops *); + struct netlink_callback *, int, + const struct tc_action_ops *); void (*stats_update)(struct tc_action *, u64, u32, u64); struct net_device *(*get_dev)(const struct tc_action *a); }; -- cgit v1.2.3 From aea0d727899140820a631bac78f36e9d9ef15ef6 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Thu, 15 Feb 2018 10:54:54 -0500 Subject: net: sched: act: add extack to init This patch adds extack to tcf_action_init and tcf_action_init_1 functions. These are necessary to make individual extack handling in each act implementation. Based on work by David Ahern Cc: David Ahern Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- include/net/act_api.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index 32ef544f4ddc..41d95930ffbc 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -163,10 +163,11 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, int nr_actions, struct tcf_result *res); int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, - struct list_head *actions); + struct list_head *actions, struct netlink_ext_ack *extack); struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, - char *name, int ovr, int bind); + char *name, int ovr, int bind, + struct netlink_ext_ack *extack); int tcf_action_dump(struct sk_buff *skb, struct list_head *, int, int); int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int); int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int); -- cgit v1.2.3 From 589dad6d71a72dd7912e5070c63f6bf1f561b5cf Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Thu, 15 Feb 2018 10:54:56 -0500 Subject: net: sched: act: add extack to init callback This patch adds extack support for act init callback api. This prepares to handle extack support inside each specific act implementation. Based on work by David Ahern Cc: David Ahern Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- include/net/act_api.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index 41d95930ffbc..3717e0f2bb1b 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -90,7 +90,7 @@ struct tc_action_ops { int (*lookup)(struct net *net, struct tc_action **a, u32 index); int (*init)(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **act, int ovr, - int bind); + int bind, struct netlink_ext_ack *extack); int (*walk)(struct net *, struct sk_buff *, struct netlink_callback *, int, const struct tc_action_ops *); -- cgit v1.2.3 From 331a9295de23a9428adb7f593d0701d393a2079e Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Thu, 15 Feb 2018 10:54:57 -0500 Subject: net: sched: act: add extack for lookup callback This patch adds extack support for act lookup callback api. This prepares to handle extack support inside each specific act implementation. Cc: David Ahern Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- include/net/act_api.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index 3717e0f2bb1b..0bd65db506ba 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -87,7 +87,8 @@ struct tc_action_ops { struct tcf_result *); int (*dump)(struct sk_buff *, struct tc_action *, int, int); void (*cleanup)(struct tc_action *); - int (*lookup)(struct net *net, struct tc_action **a, u32 index); + int (*lookup)(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack); int (*init)(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **act, int ovr, int bind, struct netlink_ext_ack *extack); -- cgit v1.2.3 From 417801055b8cb4c052e989289ccf24a673178bbc Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Thu, 15 Feb 2018 10:54:58 -0500 Subject: net: sched: act: add extack for walk callback This patch adds extack support for act walker callback api. This prepares to handle extack support inside each specific act implementation. Cc: David Ahern Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- include/net/act_api.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index 0bd65db506ba..ab3529255377 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -94,7 +94,8 @@ struct tc_action_ops { int bind, struct netlink_ext_ack *extack); int (*walk)(struct net *, struct sk_buff *, struct netlink_callback *, int, - const struct tc_action_ops *); + const struct tc_action_ops *, + struct netlink_ext_ack *); void (*stats_update)(struct tc_action *, u64, u32, u64); struct net_device *(*get_dev)(const struct tc_action *a); }; -- cgit v1.2.3 From b36201455aa0749e8708ef97ed9c1c9ece29a113 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Thu, 15 Feb 2018 10:54:59 -0500 Subject: net: sched: act: handle extack in tcf_generic_walker This patch adds extack handling for a common used TC act function "tcf_generic_walker()" to add an extack message on failures. The tcf_generic_walker() function can fail if get a invalid command different than DEL and GET. The naming "action" here is wrong, the correct naming would be command. Cc: David Ahern Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- include/net/act_api.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index ab3529255377..9c2f22695025 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -140,7 +140,8 @@ static inline void tc_action_net_exit(struct list_head *net_list, int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops); + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack); int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index); bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a, int bind); -- cgit v1.2.3 From c4b50cd31d25c3d17886ffc47ca4a9a12c6dc9bf Mon Sep 17 00:00:00 2001 From: Venkateswara Naralasetty Date: Tue, 13 Feb 2018 11:03:06 +0530 Subject: cfg80211: send ack_signal to user in probe client response This patch provides support to get ack signal in probe client response and in station info from user. Signed-off-by: Venkateswara Naralasetty [squash in compilation fixes] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7d49cd0cf92d..56e905cd4b07 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1147,6 +1147,7 @@ struct cfg80211_tid_stats { * @rx_duration: aggregate PPDU duration(usecs) for all the frames from a peer * @pertid: per-TID statistics, see &struct cfg80211_tid_stats, using the last * (IEEE80211_NUM_TIDS) index for MSDUs not encapsulated in QoS-MPDUs. + * @ack_signal: signal strength (in dBm) of the last ACK frame. */ struct station_info { u64 filled; @@ -1191,6 +1192,7 @@ struct station_info { u64 rx_duration; u8 rx_beacon_signal_avg; struct cfg80211_tid_stats pertid[IEEE80211_NUM_TIDS + 1]; + s8 ack_signal; }; #if IS_ENABLED(CONFIG_CFG80211) @@ -5838,10 +5840,13 @@ bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev, * @addr: the address of the peer * @cookie: the cookie filled in @probe_client previously * @acked: indicates whether probe was acked or not + * @ack_signal: signal strength (in dBm) of the ACK frame. + * @is_valid_ack_signal: indicates the ack_signal is valid or not. * @gfp: allocation flags */ void cfg80211_probe_status(struct net_device *dev, const u8 *addr, - u64 cookie, bool acked, gfp_t gfp); + u64 cookie, bool acked, s32 ack_signal, + bool is_valid_ack_signal, gfp_t gfp); /** * cfg80211_report_obss_beacon - report beacon from other APs -- cgit v1.2.3 From a78b26fffd2368fcd079802897f4c97f9baea833 Mon Sep 17 00:00:00 2001 From: Venkateswara Naralasetty Date: Tue, 13 Feb 2018 11:04:46 +0530 Subject: mac80211: Add tx ack signal support in sta info This allows users to get ack signal strength of last transmitted frame. Signed-off-by: Venkateswara Naralasetty Signed-off-by: Johannes Berg --- include/net/mac80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 906e90223066..854037b8163e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -934,6 +934,7 @@ struct ieee80211_tx_info { u8 ampdu_len; u8 antenna; u16 tx_time; + bool is_valid_ack_signal; void *status_driver_data[19 / sizeof(void *)]; } status; struct { -- cgit v1.2.3 From 19efbd93e6fb05eab81856b4fc8d64211dd37088 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 19 Feb 2018 12:58:38 +0300 Subject: net: Kill net_mutex We take net_mutex, when there are !async pernet_operations registered, and read locking of net_sem is not enough. But we may get rid of taking the mutex, and just change the logic to write lock net_sem in such cases. This obviously reduces the number of lock operations, we do. Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- include/net/net_namespace.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 9158ec1ad06f..115b01b92f4d 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -60,8 +60,11 @@ struct net { struct list_head list; /* list of network namespaces */ struct list_head cleanup_list; /* namespaces on death row */ - struct list_head exit_list; /* Use only net_mutex */ - + struct list_head exit_list; /* To linked to call pernet exit + * methods on dead net (net_sem + * read locked), or to unregister + * pernet ops (net_sem wr locked). + */ struct user_namespace *user_ns; /* Owning user namespace */ struct ucounts *ucounts; spinlock_t nsid_lock; @@ -89,7 +92,7 @@ struct net { /* core fib_rules */ struct list_head rules_ops; - struct list_head fib_notifier_ops; /* protected by net_mutex */ + struct list_head fib_notifier_ops; /* protected by net_sem */ struct net_device *loopback_dev; /* The loopback */ struct netns_core core; @@ -316,7 +319,7 @@ struct pernet_operations { /* * Indicates above methods are allowed to be executed in parallel * with methods of any other pernet_operations, i.e. they are not - * need synchronization via net_mutex. + * need write locked net_sem. */ bool async; }; -- cgit v1.2.3 From 65b7b5b90fcd17b25ef43b0cd02bda47bf286675 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 19 Feb 2018 12:58:45 +0300 Subject: net: Make cleanup_list and net::cleanup_list of llist type This simplifies cleanup queueing and makes cleanup lists to use llist primitives. Since llist has its own cmpxchg() ordering, cleanup_list_lock is not more need. Also, struct llist_node is smaller, than struct list_head, so we save some bytes in struct net with this patch. Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- include/net/net_namespace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 115b01b92f4d..d4417495773a 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -59,12 +59,13 @@ struct net { atomic64_t cookie_gen; struct list_head list; /* list of network namespaces */ - struct list_head cleanup_list; /* namespaces on death row */ struct list_head exit_list; /* To linked to call pernet exit * methods on dead net (net_sem * read locked), or to unregister * pernet ops (net_sem wr locked). */ + struct llist_node cleanup_list; /* namespaces on death row */ + struct user_namespace *user_ns; /* Owning user namespace */ struct ucounts *ucounts; spinlock_t nsid_lock; -- cgit v1.2.3 From 4f4bbf7c4e3d4bd14987a13041c6b5b1ea59e21f Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Tue, 20 Feb 2018 08:44:21 +0100 Subject: devlink: Perform cleanup of resource_set cb After adding size validation logic into core cleanup is required. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index 6545b03e97f7..8d1c3f276dea 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -234,13 +234,9 @@ struct devlink_dpipe_headers { /** * struct devlink_resource_ops - resource ops * @occ_get: get the occupied size - * @size_validate: validate the size of the resource before update, reload - * is needed for changes to take place */ struct devlink_resource_ops { u64 (*occ_get)(struct devlink *devlink); - int (*size_validate)(struct devlink *devlink, u64 size, - struct netlink_ext_ack *extack); }; /** -- cgit v1.2.3 From 494a973e22954249d35152cce1dcfba6d10c52e4 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sun, 18 Feb 2018 21:39:17 -0500 Subject: net/mac8390: Convert to nubus_driver This resolves an old bug that constrained this driver to no more than one card. Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: David S. Miller --- include/net/Space.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/Space.h b/include/net/Space.h index 27fb5c937c4f..336da258885a 100644 --- a/include/net/Space.h +++ b/include/net/Space.h @@ -20,7 +20,6 @@ struct net_device *cs89x0_probe(int unit); struct net_device *mvme147lance_probe(int unit); struct net_device *tc515_probe(int unit); struct net_device *lance_probe(int unit); -struct net_device *mac8390_probe(int unit); struct net_device *mac89x0_probe(int unit); struct net_device *cops_probe(int unit); struct net_device *ltpc_probe(void); -- cgit v1.2.3 From 0a6b2a1dc2a2105f178255fe495eb914b09cb37a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 19 Feb 2018 11:56:47 -0800 Subject: tcp: switch to GSO being always on Oleksandr Natalenko reported performance issues with BBR without FQ packet scheduler that were root caused to lack of SG and GSO/TSO on his configuration. In this mode, TCP internal pacing has to setup a high resolution timer for each MSS sent. We could implement in TCP a strategy similar to the one adopted in commit fefa569a9d4b ("net_sched: sch_fq: account for schedule/timers drifts") or decide to finally switch TCP stack to a GSO only mode. This has many benefits : 1) Most TCP developments are done with TSO in mind. 2) Less high-resolution timers needs to be armed for TCP-pacing 3) GSO can benefit of xmit_more hint 4) Receiver GRO is more effective (as if TSO was used for real on sender) -> Lower ACK traffic 5) Write queues have less overhead (one skb holds about 64KB of payload) 6) SACK coalescing just works. 7) rtx rb-tree contains less packets, SACK is cheaper. This patch implements the minimum patch, but we can remove some legacy code as follow ups. Tested: On 40Gbit link, one netperf -t TCP_STREAM BBR+fq: sg on: 26 Gbits/sec sg off: 15.7 Gbits/sec (was 2.3 Gbit before patch) BBR+pfifo_fast: sg on: 24.2 Gbits/sec sg off: 14.9 Gbits/sec (was 0.66 Gbit before patch !!! ) BBR+fq_codel: sg on: 24.4 Gbits/sec sg off: 15 Gbits/sec (was 0.66 Gbit before patch !!! ) Signed-off-by: Eric Dumazet Reported-by: Oleksandr Natalenko Signed-off-by: David S. Miller --- include/net/sock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 3aa7b7d6e6c7..f0f576ff5603 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -417,6 +417,7 @@ struct sock { struct page_frag sk_frag; netdev_features_t sk_route_caps; netdev_features_t sk_route_nocaps; + netdev_features_t sk_route_forced_caps; int sk_gso_type; unsigned int sk_gso_max_size; gfp_t sk_allocation; -- cgit v1.2.3 From dead7cdb0daec58490891e59f4fae0c5c76fa5f3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 19 Feb 2018 11:56:49 -0800 Subject: tcp: remove sk_check_csum_caps() Since TCP relies on GSO, we do not need this helper anymore. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index f0f576ff5603..b9624581d639 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1863,15 +1863,6 @@ static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags) sk->sk_route_caps &= ~flags; } -static inline bool sk_check_csum_caps(struct sock *sk) -{ - return (sk->sk_route_caps & NETIF_F_HW_CSUM) || - (sk->sk_family == PF_INET && - (sk->sk_route_caps & NETIF_F_IP_CSUM)) || - (sk->sk_family == PF_INET6 && - (sk->sk_route_caps & NETIF_F_IPV6_CSUM)); -} - static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb, struct iov_iter *from, char *to, int copy, int offset) -- cgit v1.2.3 From a823fed03b5d940e4d57271222a0b959fc2ab201 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Tue, 20 Feb 2018 21:28:31 +0800 Subject: tcp: remove the hardcode in the definition of TCPF Macro TCPF_ macro depends on the definition of TCP_ macro. So it is better to define them with TCP_ marco. Signed-off-by: Yafang Shao Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp_states.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h index 50e78a74d0df..2875e169d744 100644 --- a/include/net/tcp_states.h +++ b/include/net/tcp_states.h @@ -32,21 +32,21 @@ enum { #define TCP_STATE_MASK 0xF -#define TCP_ACTION_FIN (1 << 7) +#define TCP_ACTION_FIN (1 << TCP_CLOSE) enum { - TCPF_ESTABLISHED = (1 << 1), - TCPF_SYN_SENT = (1 << 2), - TCPF_SYN_RECV = (1 << 3), - TCPF_FIN_WAIT1 = (1 << 4), - TCPF_FIN_WAIT2 = (1 << 5), - TCPF_TIME_WAIT = (1 << 6), - TCPF_CLOSE = (1 << 7), - TCPF_CLOSE_WAIT = (1 << 8), - TCPF_LAST_ACK = (1 << 9), - TCPF_LISTEN = (1 << 10), - TCPF_CLOSING = (1 << 11), - TCPF_NEW_SYN_RECV = (1 << 12), + TCPF_ESTABLISHED = (1 << TCP_ESTABLISHED), + TCPF_SYN_SENT = (1 << TCP_SYN_SENT), + TCPF_SYN_RECV = (1 << TCP_SYN_RECV), + TCPF_FIN_WAIT1 = (1 << TCP_FIN_WAIT1), + TCPF_FIN_WAIT2 = (1 << TCP_FIN_WAIT2), + TCPF_TIME_WAIT = (1 << TCP_TIME_WAIT), + TCPF_CLOSE = (1 << TCP_CLOSE), + TCPF_CLOSE_WAIT = (1 << TCP_CLOSE_WAIT), + TCPF_LAST_ACK = (1 << TCP_LAST_ACK), + TCPF_LISTEN = (1 << TCP_LISTEN), + TCPF_CLOSING = (1 << TCP_CLOSING), + TCPF_NEW_SYN_RECV = (1 << TCP_NEW_SYN_RECV), }; #endif /* _LINUX_TCP_STATES_H */ -- cgit v1.2.3 From cac56209a66ea3b0be67aa2966b2c628b944da1e Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Tue, 20 Feb 2018 08:55:58 -0500 Subject: net: Allow a rule to track originating protocol Allow a rule that is being added/deleted/modified or dumped to contain the originating protocol's id. The protocol is handled just like a routes originating protocol is. This is especially useful because there is starting to be a plethora of different user space programs adding rules. Allow the vrf device to specify that the kernel is the originator of the rule created for this device. Signed-off-by: Donald Sharp Signed-off-by: David S. Miller --- include/net/fib_rules.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 648caf90ec07..b166ef07e6d4 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -26,7 +26,8 @@ struct fib_rule { u32 table; u8 action; u8 l3mdev; - /* 2 bytes hole, try to use */ + u8 proto; + /* 1 byte hole, try to use */ u32 target; __be64 tun_id; struct fib_rule __rcu *ctarget; -- cgit v1.2.3 From 7299d6f7bfd1921c0cfb5e202155f1a5cfdb57d0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 19 Feb 2018 14:48:39 +0200 Subject: mac80211: support reporting A-MPDU EOF bit value/known Support getting the EOF bit value reported from hardware and writing it out to radiotap. Signed-off-by: Johannes Berg --- include/net/ieee80211_radiotap.h | 2 ++ include/net/mac80211.h | 5 +++++ 2 files changed, 7 insertions(+) (limited to 'include/net') diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index d91f9e7f4d71..960236fb1681 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -149,6 +149,8 @@ enum ieee80211_radiotap_ampdu_flags { IEEE80211_RADIOTAP_AMPDU_IS_LAST = 0x0008, IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR = 0x0010, IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_KNOWN = 0x0020, + IEEE80211_RADIOTAP_AMPDU_EOF = 0x0040, + IEEE80211_RADIOTAP_AMPDU_EOF_KNOWN = 0x0080, }; /* for IEEE80211_RADIOTAP_VHT */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 854037b8163e..649f073eb6df 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1099,6 +1099,9 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * the first subframe. * @RX_FLAG_ICV_STRIPPED: The ICV is stripped from this frame. CRC checking must * be done in the hardware. + * @RX_FLAG_AMPDU_EOF_BIT: Value of the EOF bit in the A-MPDU delimiter for this + * frame + * @RX_FLAG_AMPDU_EOF_BIT_KNOWN: The EOF value is known */ enum mac80211_rx_flags { RX_FLAG_MMIC_ERROR = BIT(0), @@ -1125,6 +1128,8 @@ enum mac80211_rx_flags { RX_FLAG_MIC_STRIPPED = BIT(21), RX_FLAG_ALLOW_SAME_PN = BIT(22), RX_FLAG_ICV_STRIPPED = BIT(23), + RX_FLAG_AMPDU_EOF_BIT = BIT(24), + RX_FLAG_AMPDU_EOF_BIT_KNOWN = BIT(25), }; /** -- cgit v1.2.3 From 94ba92713f8329c96e0a8e2880b3c1a785d1c95c Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 19 Feb 2018 14:48:41 +0200 Subject: mac80211: Call mgd_prep_tx before transmitting deauthentication In multi channel scenarios, when disassociating from the AP before a beacon was heard from the AP, it is not guaranteed that the virtual interface is granted air time for the transmission of the deauthentication frame. This in turn can lead to various issues as the AP might never get the deauthentication frame. To mitigate such possible issues, add a HW flag indicating that the driver requires mac80211 to call the mgd_prep_tx() driver callback to make sure that the virtual interface is granted immediate airtime to be able to transmit the frame, in case that no beacon was heard from the AP. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/mac80211.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 649f073eb6df..dc3e9d9c3527 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -6,6 +6,7 @@ * Copyright 2007-2010 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH + * Copyright (C) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -2069,6 +2070,14 @@ struct ieee80211_txq { * @IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA: Hardware supports buffer STA on * TDLS links. * + * @IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP: The driver requires the + * mgd_prepare_tx() callback to be called before transmission of a + * deauthentication frame in case the association was completed but no + * beacon was heard. This is required in multi-channel scenarios, where the + * virtual interface might not be given air time for the transmission of + * the frame, as it is not synced with the AP/P2P GO yet, and thus the + * deauthentication frame might not be transmitted. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2112,6 +2121,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_REPORTS_LOW_ACK, IEEE80211_HW_SUPPORTS_TX_FRAG, IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA, + IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -3356,6 +3366,9 @@ enum ieee80211_reconfig_type { * management frame prior to having successfully associated to allow the * driver to give it channel time for the transmission, to get a response * and to be able to synchronize with the GO. + * For drivers that set %IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP, mac80211 + * would also call this function before transmitting a deauthentication + * frame in case that no beacon was heard from the AP/P2P GO. * The callback will be called before each transmission and upon return * mac80211 will transmit the frame right away. * The callback is optional and can (should!) sleep. -- cgit v1.2.3 From 1b71af6053af1bd2f849e9fda4f71c1e3f145dcf Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Fri, 23 Feb 2018 14:01:52 -0500 Subject: net: fib_rules: Add new attribute to set protocol For ages iproute2 has used `struct rtmsg` as the ancillary header for FIB rules and in the process set the protocol value to RTPROT_BOOT. Until ca56209a66 ("net: Allow a rule to track originating protocol") the kernel rules code ignored the protocol value sent from userspace and always returned 0 in notifications. To avoid incompatibility with existing iproute2, send the protocol as a new attribute. Fixes: cac56209a66 ("net: Allow a rule to track originating protocol") Signed-off-by: Donald Sharp Signed-off-by: David S. Miller --- include/net/fib_rules.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index b166ef07e6d4..b3d216249240 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -109,7 +109,8 @@ struct fib_rule_notifier_info { [FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \ [FRA_GOTO] = { .type = NLA_U32 }, \ [FRA_L3MDEV] = { .type = NLA_U8 }, \ - [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) } + [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) }, \ + [FRA_PROTOCOL] = { .type = NLA_U8 } static inline void fib_rule_get(struct fib_rule *rule) { -- cgit v1.2.3 From 24bba078eca099b5bd25e17e97b485f013589f8c Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 27 Feb 2018 13:03:07 +0100 Subject: mac80211: support A-MSDU in fast-rx Only works if the IV was stripped from packets. Create a smaller variant of ieee80211_rx_h_amsdu, which bypasses checks already done within the fast-rx context. In order to do so, update cfg80211's ieee80211_data_to_8023_exthdr() to take the offset between header and snap. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 56e905cd4b07..fc40843baed3 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4410,10 +4410,12 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr); * of it being pushed into the SKB * @addr: the device MAC address * @iftype: the virtual interface type + * @data_offset: offset of payload after the 802.11 header * Return: 0 on success. Non-zero on error. */ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, - const u8 *addr, enum nl80211_iftype iftype); + const u8 *addr, enum nl80211_iftype iftype, + u8 data_offset); /** * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3 @@ -4425,7 +4427,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, enum nl80211_iftype iftype) { - return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype); + return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype, 0); } /** -- cgit v1.2.3 From d1b2a6c4bed99fc7e8a11e7abcff19293d1974f5 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 27 Feb 2018 14:53:37 +0100 Subject: net: GRE: Add is_gretap_dev, is_ip6gretap_dev Determining whether a device is a GRE device is easily done by inspecting struct net_device.type. However, for the tap variants, the type is just ARPHRD_ETHER. Therefore introduce two predicate functions that use netdev_ops to tell the tap devices. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/gre.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/gre.h b/include/net/gre.h index f90585decbce..797142eee9cd 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -37,6 +37,9 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, bool *csum_err, __be16 proto, int nhs); +bool is_gretap_dev(const struct net_device *dev); +bool is_ip6gretap_dev(const struct net_device *dev); + static inline int gre_calc_hlen(__be16 o_flags) { int addend = 4; -- cgit v1.2.3 From b0066da52ea53bae2b4ceed3f47d488df27dab66 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 27 Feb 2018 14:53:38 +0100 Subject: ip_tunnel: Rename & publish init_tunnel_flow Initializing struct flowi4 is useful for drivers that need to emulate routing decisions made by a tunnel interface. Publish the function (appropriately renamed) so that the drivers in question don't need to cut'n'paste it around. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/net') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 1f16773cfd76..cbe5addb9293 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -254,6 +254,22 @@ static inline __be32 tunnel_id_to_key32(__be64 tun_id) #ifdef CONFIG_INET +static inline void ip_tunnel_init_flow(struct flowi4 *fl4, + int proto, + __be32 daddr, __be32 saddr, + __be32 key, __u8 tos, int oif, + __u32 mark) +{ + memset(fl4, 0, sizeof(*fl4)); + fl4->flowi4_oif = oif; + fl4->daddr = daddr; + fl4->saddr = saddr; + fl4->flowi4_tos = tos; + fl4->flowi4_proto = proto; + fl4->fl4_gre_key = key; + fl4->flowi4_mark = mark; +} + int ip_tunnel_init(struct net_device *dev); void ip_tunnel_uninit(struct net_device *dev); void ip_tunnel_dellink(struct net_device *dev, struct list_head *head); -- cgit v1.2.3 From 82695b30ffeeab665f41416c6f5015dea3147bd5 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 27 Feb 2018 15:48:21 -0800 Subject: inet: whitespace cleanup Ran simple script to find/remove trailing whitespace and blank lines at EOF because that kind of stuff git whines about and editors leave behind. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/ethoc.h | 1 - include/net/flow.h | 2 +- include/net/inet_connection_sock.h | 10 +++++----- include/net/ip.h | 12 ++++++------ include/net/ip_fib.h | 2 +- include/net/ipv6.h | 10 +++++----- include/net/xfrm.h | 14 +++++++------- 7 files changed, 25 insertions(+), 26 deletions(-) (limited to 'include/net') diff --git a/include/net/ethoc.h b/include/net/ethoc.h index bb7f467da7fc..29ba069a1d93 100644 --- a/include/net/ethoc.h +++ b/include/net/ethoc.h @@ -21,4 +21,3 @@ struct ethoc_platform_data { }; #endif /* !LINUX_NET_ETHOC_H */ - diff --git a/include/net/flow.h b/include/net/flow.h index f1624fd5b1d0..64e7ee9cb980 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -125,7 +125,7 @@ static inline void flowi4_update_output(struct flowi4 *fl4, int oif, __u8 tos, fl4->daddr = daddr; fl4->saddr = saddr; } - + struct flowi6 { struct flowi_common __fl_common; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index c1a93ce35e62..b68fea022a82 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -49,9 +49,9 @@ struct inet_connection_sock_af_ops { u16 net_header_len; u16 net_frag_header_len; u16 sockaddr_len; - int (*setsockopt)(struct sock *sk, int level, int optname, + int (*setsockopt)(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); - int (*getsockopt)(struct sock *sk, int level, int optname, + int (*getsockopt)(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); #ifdef CONFIG_COMPAT int (*compat_setsockopt)(struct sock *sk, @@ -67,7 +67,7 @@ struct inet_connection_sock_af_ops { /** inet_connection_sock - INET connection oriented sock * - * @icsk_accept_queue: FIFO of established children + * @icsk_accept_queue: FIFO of established children * @icsk_bind_hash: Bind node * @icsk_timeout: Timeout * @icsk_retransmit_timer: Resend (no ack) @@ -122,7 +122,7 @@ struct inet_connection_sock { unsigned long timeout; /* Currently scheduled timeout */ __u32 lrcvtime; /* timestamp of last received data packet */ __u16 last_seg_size; /* Size of last incoming segment */ - __u16 rcv_mss; /* MSS used for delayed ACK decisions */ + __u16 rcv_mss; /* MSS used for delayed ACK decisions */ } icsk_ack; struct { int enabled; @@ -201,7 +201,7 @@ extern const char inet_csk_timer_bug_msg[]; static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) { struct inet_connection_sock *icsk = inet_csk(sk); - + if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { icsk->icsk_pending = 0; #ifdef INET_CSK_CLEAR_TIMERS diff --git a/include/net/ip.h b/include/net/ip.h index 746abff9ce51..fe63ba95d12b 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -186,15 +186,15 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); void ip4_datagram_release_cb(struct sock *sk); struct ip_reply_arg { - struct kvec iov[1]; + struct kvec iov[1]; int flags; __wsum csum; int csumoffset; /* u16 offset of csum in iov[0].iov_base */ - /* -1 if not needed */ + /* -1 if not needed */ int bound_dev_if; u8 tos; kuid_t uid; -}; +}; #define IP_REPLY_ARG_NOSRCCHECK 1 @@ -577,13 +577,13 @@ int ip_frag_mem(struct net *net); /* * Functions provided by ip_forward.c */ - + int ip_forward(struct sk_buff *skb); - + /* * Functions provided by ip_options.c */ - + void ip_options_build(struct sk_buff *skb, struct ip_options *opt, __be32 daddr, struct rtable *rt, int is_frag); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index f80524396c06..15e19c5c6f26 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -157,7 +157,7 @@ struct fib_result_nl { unsigned char nh_sel; unsigned char type; unsigned char scope; - int err; + int err; }; #ifdef CONFIG_IP_ROUTE_MULTIPATH diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 7a98cd583c73..cabd3cdd4015 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -105,8 +105,8 @@ #define IPV6_ADDR_ANY 0x0000U -#define IPV6_ADDR_UNICAST 0x0001U -#define IPV6_ADDR_MULTICAST 0x0002U +#define IPV6_ADDR_UNICAST 0x0001U +#define IPV6_ADDR_MULTICAST 0x0002U #define IPV6_ADDR_LOOPBACK 0x0010U #define IPV6_ADDR_LINKLOCAL 0x0020U @@ -447,7 +447,7 @@ ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m, #endif } -static inline void ipv6_addr_prefix(struct in6_addr *pfx, +static inline void ipv6_addr_prefix(struct in6_addr *pfx, const struct in6_addr *addr, int plen) { @@ -496,7 +496,7 @@ static inline void __ipv6_addr_set_half(__be32 *addr, addr[1] = wl; } -static inline void ipv6_addr_set(struct in6_addr *addr, +static inline void ipv6_addr_set(struct in6_addr *addr, __be32 w1, __be32 w2, __be32 w3, __be32 w4) { @@ -732,7 +732,7 @@ static inline int __ipv6_addr_diff32(const void *token1, const void *token2, int } /* - * we should *never* get to this point since that + * we should *never* get to this point since that * would mean the addrs are equal * * However, we do get to it 8) And exacly, when diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 7d2077665c0b..aa027ba1d032 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1267,12 +1267,12 @@ static inline void xfrm_sk_free_policy(struct sock *sk) static inline void xfrm_sk_free_policy(struct sock *sk) {} static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { return 0; } -static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; } -static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; } +static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; } +static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; } static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *skb) -{ - return 1; -} +{ + return 1; +} static inline int xfrm4_policy_check(struct sock *sk, int dir, struct sk_buff *skb) { return 1; @@ -1356,7 +1356,7 @@ __xfrm6_state_addr_check(const struct xfrm_state *x, { if (ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)&x->id.daddr) && (ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)&x->props.saddr) || - ipv6_addr_any((struct in6_addr *)saddr) || + ipv6_addr_any((struct in6_addr *)saddr) || ipv6_addr_any((struct in6_addr *)&x->props.saddr))) return 1; return 0; @@ -1666,7 +1666,7 @@ int xfrm_user_policy(struct sock *sk, int optname, static inline int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen) { return -ENOPROTOOPT; -} +} static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) { -- cgit v1.2.3 From b9c7a7acc749f3d0667a2ab44ea38110d5a1f286 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 28 Feb 2018 10:45:06 +0100 Subject: net: sch: prio: Add offload ability for grafting a child Offload sch_prio graft command for capable drivers. Warn in case of a failure, unless the graft was done as part of a destroy operation (the new qdisc is a noop) or if all the qdiscs (the parent, the old child, and the new one) are not offloaded. Signed-off-by: Nogah Frankel Reviewed-by: Yuval Mintz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 87406252f0a3..e828d31be5da 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -806,6 +806,7 @@ enum tc_prio_command { TC_PRIO_REPLACE, TC_PRIO_DESTROY, TC_PRIO_STATS, + TC_PRIO_GRAFT, }; struct tc_prio_qopt_offload_params { @@ -818,6 +819,11 @@ struct tc_prio_qopt_offload_params { struct gnet_stats_queue *qstats; }; +struct tc_prio_qopt_offload_graft_params { + u8 band; + u32 child_handle; +}; + struct tc_prio_qopt_offload { enum tc_prio_command command; u32 handle; @@ -825,6 +831,8 @@ struct tc_prio_qopt_offload { union { struct tc_prio_qopt_offload_params replace_params; struct tc_qopt_offload_stats stats; + struct tc_prio_qopt_offload_graft_params graft_params; }; }; + #endif -- cgit v1.2.3 From bfff4862653bb96001ab57c1edd6d03f48e5f035 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Wed, 28 Feb 2018 22:40:16 -0500 Subject: net: fib_rules: support for match on ip_proto, sport and dport uapi for ip_proto, sport and dport range match in fib rules. Signed-off-by: Roopa Prabhu Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/fib_rules.h | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index b3d216249240..6dd0a00653ae 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -27,7 +27,7 @@ struct fib_rule { u8 action; u8 l3mdev; u8 proto; - /* 1 byte hole, try to use */ + u8 ip_proto; u32 target; __be64 tun_id; struct fib_rule __rcu *ctarget; @@ -40,6 +40,8 @@ struct fib_rule { char iifname[IFNAMSIZ]; char oifname[IFNAMSIZ]; struct fib_kuid_range uid_range; + struct fib_rule_port_range sport_range; + struct fib_rule_port_range dport_range; struct rcu_head rcu; }; @@ -144,6 +146,38 @@ static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla) return frh->table; } +static inline bool fib_rule_port_range_set(const struct fib_rule_port_range *range) +{ + return range->start != 0 && range->end != 0; +} + +static inline bool fib_rule_port_inrange(const struct fib_rule_port_range *a, + __be16 port) +{ + return ntohs(port) >= a->start && + ntohs(port) <= a->end; +} + +static inline bool fib_rule_port_range_valid(const struct fib_rule_port_range *a) +{ + return a->start != 0 && a->end != 0 && a->end < 0xffff && + a->start <= a->end; +} + +static inline bool fib_rule_port_range_compare(struct fib_rule_port_range *a, + struct fib_rule_port_range *b) +{ + return a->start == b->start && + a->end == b->end; +} + +static inline bool fib_rule_requires_fldissect(struct fib_rule *rule) +{ + return rule->ip_proto || + fib_rule_port_range_set(&rule->sport_range) || + fib_rule_port_range_set(&rule->dport_range); +} + struct fib_rules_ops *fib_rules_register(const struct fib_rules_ops *, struct net *); void fib_rules_unregister(struct fib_rules_ops *); -- cgit v1.2.3 From e37b1e978bec5334dc379d8c2423d063af207430 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Wed, 28 Feb 2018 22:42:41 -0500 Subject: ipv6: route: dissect flow in input path if fib rules need it Dissect flow in fwd path if fib rules require it. Controlled by a flag to avoid penatly for the common case. Flag is set when fib rules with sport, dport and proto match that require flow dissect are installed. Also passes the dissected hash keys to the multipath hash function when applicable to avoid dissecting the flow again. icmp packets will continue to use inner header for hash calculations (Thanks to Nikolay Aleksandrov for some review here). Signed-off-by: Roopa Prabhu Acked-by: Paolo Abeni Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/ip_fib.h | 27 ++++++++++++++++++++++++++- include/net/netns/ipv4.h | 1 + 2 files changed, 27 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 15e19c5c6f26..8812582a94d5 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -293,6 +293,13 @@ static inline unsigned int fib4_rules_seq_read(struct net *net) return 0; } +static inline bool fib4_rules_early_flow_dissect(struct net *net, + struct sk_buff *skb, + struct flowi4 *fl4, + struct flow_keys *flkeys) +{ + return false; +} #else /* CONFIG_IP_MULTIPLE_TABLES */ int __net_init fib4_rules_init(struct net *net); void __net_exit fib4_rules_exit(struct net *net); @@ -341,6 +348,24 @@ bool fib4_rule_default(const struct fib_rule *rule); int fib4_rules_dump(struct net *net, struct notifier_block *nb); unsigned int fib4_rules_seq_read(struct net *net); +static inline bool fib4_rules_early_flow_dissect(struct net *net, + struct sk_buff *skb, + struct flowi4 *fl4, + struct flow_keys *flkeys) +{ + unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; + + if (!net->ipv4.fib_rules_require_fldissect) + return false; + + skb_flow_dissect_flow_keys(skb, flkeys, flag); + fl4->fl4_sport = flkeys->ports.src; + fl4->fl4_dport = flkeys->ports.dst; + fl4->flowi4_proto = flkeys->basic.ip_proto; + + return true; +} + #endif /* CONFIG_IP_MULTIPLE_TABLES */ /* Exported by fib_frontend.c */ @@ -371,7 +396,7 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags); #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, - const struct sk_buff *skb); + const struct sk_buff *skb, struct flow_keys *flkeys); #endif void fib_select_multipath(struct fib_result *res, int hash); void fib_select_path(struct net *net, struct fib_result *res, diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 44668c29701a..3a970e429ab6 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -52,6 +52,7 @@ struct netns_ipv4 { #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_rules_ops *rules_ops; bool fib_has_custom_rules; + unsigned int fib_rules_require_fldissect; struct fib_table __rcu *fib_main; struct fib_table __rcu *fib_default; #endif -- cgit v1.2.3 From 5e5d6fed374155ba1a7a5ca5f12fbec2285d06a2 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Wed, 28 Feb 2018 22:43:22 -0500 Subject: ipv6: route: dissect flow in input path if fib rules need it Dissect flow in fwd path if fib rules require it. Controlled by a flag to avoid penatly for the common case. Flag is set when fib rules with sport, dport and proto match that require flow dissect are installed. Also passes the dissected hash keys to the multipath hash function when applicable to avoid dissecting the flow again. icmp packets will continue to use inner header for hash calculations. Signed-off-by: Roopa Prabhu Acked-by: Paolo Abeni Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 25 +++++++++++++++++++++++++ include/net/ip6_route.h | 4 +++- include/net/netns/ipv6.h | 3 ++- 3 files changed, 30 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 34ec321d6a03..8d906a35b534 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -415,6 +415,24 @@ void fib6_rules_cleanup(void); bool fib6_rule_default(const struct fib_rule *rule); int fib6_rules_dump(struct net *net, struct notifier_block *nb); unsigned int fib6_rules_seq_read(struct net *net); + +static inline bool fib6_rules_early_flow_dissect(struct net *net, + struct sk_buff *skb, + struct flowi6 *fl6, + struct flow_keys *flkeys) +{ + unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; + + if (!net->ipv6.fib6_rules_require_fldissect) + return false; + + skb_flow_dissect_flow_keys(skb, flkeys, flag); + fl6->fl6_sport = flkeys->ports.src; + fl6->fl6_dport = flkeys->ports.dst; + fl6->flowi6_proto = flkeys->basic.ip_proto; + + return true; +} #else static inline int fib6_rules_init(void) { @@ -436,5 +454,12 @@ static inline unsigned int fib6_rules_seq_read(struct net *net) { return 0; } +static inline bool fib6_rules_early_flow_dissect(struct net *net, + struct sk_buff *skb, + struct flowi6 *fl6, + struct flow_keys *flkeys) +{ + return false; +} #endif #endif diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 27d23a65f3cd..da2bde5fda8f 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -127,7 +127,8 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr, int oif, int flags); -u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb); +u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb, + struct flow_keys *hkeys); struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6); @@ -266,4 +267,5 @@ static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b) ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) && !lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate); } + #endif diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 987cc4569cb8..2b9194229a56 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -71,7 +71,8 @@ struct netns_ipv6 { unsigned int ip6_rt_gc_expire; unsigned long ip6_rt_last_gc; #ifdef CONFIG_IPV6_MULTIPLE_TABLES - bool fib6_has_custom_rules; + unsigned int fib6_rules_require_fldissect; + bool fib6_has_custom_rules; struct rt6_info *ip6_prohibit_entry; struct rt6_info *ip6_blk_hole_entry; struct fib6_table *fib6_local_tbl; -- cgit v1.2.3 From b70432f7319eb75b24ca57dde8146c5e27244780 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 28 Feb 2018 23:29:32 +0200 Subject: mroute*: Make mr_table a common struct Following previous changes to ip6mr, mr_table and mr6_table are basically the same [up to mr6_table having additional '6' suffixes to its variable names]. Move the common structure definition into a common header; This requires renaming all references in ip6mr to variables that had the distinct suffix. Signed-off-by: Yuval Mintz Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 2b9194229a56..e286fda09fcf 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -85,7 +85,7 @@ struct netns_ipv6 { struct sock *mc_autojoin_sk; #ifdef CONFIG_IPV6_MROUTE #ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES - struct mr6_table *mrt6; + struct mr_table *mrt6; #else struct list_head mr6_tables; struct fib_rules_ops *mr6_rules_ops; -- cgit v1.2.3 From 3a053b1a30dcb4e39569bcce2f4357509260db75 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 28 Feb 2018 15:59:15 +0200 Subject: net: Fix spelling mistake "greater then" -> "greater than" Fix trivial spelling mistake "greater then" -> "greater than". Signed-off-by: Gal Pressman Signed-off-by: David S. Miller --- include/net/sch_generic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index e2ab13687fb9..d4907b584b38 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -540,7 +540,7 @@ static inline bool skb_skip_tc_classify(struct sk_buff *skb) return false; } -/* Reset all TX qdiscs greater then index of a device. */ +/* Reset all TX qdiscs greater than index of a device. */ static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i) { struct Qdisc *qdisc; -- cgit v1.2.3 From 5f6f845b608a3fa13e5da0584eea5803710cf708 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Thu, 1 Mar 2018 17:55:37 -0800 Subject: fib_rules: FRA_GENERIC_POLICY updates for ip proto, sport and dport attrs Fixes: bfff4862653b ("net: fib_rules: support for match on ip_proto, sport and dport") Reported-by: Eric Dumazet Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/net/fib_rules.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 6dd0a00653ae..1c9e17c11953 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -112,7 +112,11 @@ struct fib_rule_notifier_info { [FRA_GOTO] = { .type = NLA_U32 }, \ [FRA_L3MDEV] = { .type = NLA_U8 }, \ [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) }, \ - [FRA_PROTOCOL] = { .type = NLA_U8 } + [FRA_PROTOCOL] = { .type = NLA_U8 }, \ + [FRA_IP_PROTO] = { .type = NLA_U8 }, \ + [FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, \ + [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) } + static inline void fib_rule_get(struct fib_rule *rule) { -- cgit v1.2.3 From 43bf2e6d69dd6c2cea7a28763893a3dff34b7873 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 1 Mar 2018 18:29:28 -0500 Subject: net/mac89x0: Convert to platform_driver Apparently these Dayna cards don't have a pseudoslot declaration ROM which means they can't be probed like NuBus cards. Cc: Geert Uytterhoeven Signed-off-by: Finn Thain Acked-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- include/net/Space.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/Space.h b/include/net/Space.h index 336da258885a..9cce0d80d37a 100644 --- a/include/net/Space.h +++ b/include/net/Space.h @@ -20,7 +20,6 @@ struct net_device *cs89x0_probe(int unit); struct net_device *mvme147lance_probe(int unit); struct net_device *tc515_probe(int unit); struct net_device *lance_probe(int unit); -struct net_device *mac89x0_probe(int unit); struct net_device *cops_probe(int unit); struct net_device *ltpc_probe(void); -- cgit v1.2.3 From dcb8c9b4373a583451b1b8a3e916d33de273633d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2018 14:40:46 -0800 Subject: tcp_bbr: better deal with suboptimal GSO (II) This is second part of dealing with suboptimal device gso parameters. In first patch (350c9f484bde "tcp_bbr: better deal with suboptimal GSO") we dealt with devices having low gso_max_segs Some devices lower gso_max_size from 64KB to 16 KB (r8152 is an example) In order to probe an optimal cwnd, we want BBR being not sensitive to whatever GSO constraint a device can have. This patch removes tso_segs_goal() CC callback in favor of min_tso_segs() for CC wanting to override sysctl_tcp_min_tso_segs Next patch will remove bbr->tso_segs_goal since it does not have to be persistent. Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 92b06c6e7732..9c9b3768b350 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -511,8 +511,6 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss); #endif /* tcp_output.c */ -u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, - int min_tso_segs); void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle); int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs); @@ -981,8 +979,8 @@ struct tcp_congestion_ops { u32 (*undo_cwnd)(struct sock *sk); /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); - /* suggest number of segments for each skb to transmit (optional) */ - u32 (*tso_segs_goal)(struct sock *sk); + /* override sysctl_tcp_min_tso_segs */ + u32 (*min_tso_segs)(struct sock *sk); /* returns the multiplier used in tcp_sndbuf_expand (optional) */ u32 (*sndbuf_expand)(struct sock *sk); /* call when packets are delivered to update cwnd and pacing rate, -- cgit v1.2.3 From 7efc0b6b666d757e07417f59397e7f5f340e74e0 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 2 Mar 2018 08:32:12 -0800 Subject: net/ipv4: Pass net to fib_multipath_hash instead of fib_info fib_multipath_hash only needs net struct to check a sysctl. Make it clear by passing net instead of fib_info. In the end this allows alignment between the ipv4 and ipv6 versions. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 8812582a94d5..7c7522e8585b 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -395,7 +395,7 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local); int fib_sync_up(struct net_device *dev, unsigned int nh_flags); #ifdef CONFIG_IP_ROUTE_MULTIPATH -int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, +int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, const struct sk_buff *skb, struct flow_keys *flkeys); #endif void fib_select_multipath(struct fib_result *res, int hash); -- cgit v1.2.3 From 3192dac64c73d8c0eb4274a3da23d829fb5177af Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 2 Mar 2018 08:32:16 -0800 Subject: net: Rename NETEVENT_MULTIPATH_HASH_UPDATE Rename NETEVENT_MULTIPATH_HASH_UPDATE to NETEVENT_IPV4_MPATH_HASH_UPDATE to denote it relates to a change in the IPv4 hash policy. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/netevent.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netevent.h b/include/net/netevent.h index 40e7bab68490..baee605a94ab 100644 --- a/include/net/netevent.h +++ b/include/net/netevent.h @@ -26,7 +26,7 @@ enum netevent_notif_type { NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */ - NETEVENT_MULTIPATH_HASH_UPDATE, /* arg is struct net ptr */ + NETEVENT_IPV4_MPATH_HASH_UPDATE, /* arg is struct net ptr */ }; int register_netevent_notifier(struct notifier_block *nb); -- cgit v1.2.3 From b75cc8f90f07342467b3bd51dbc0054f185032c9 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 2 Mar 2018 08:32:17 -0800 Subject: net/ipv6: Pass skb to route lookup IPv6 does path selection for multipath routes deep in the lookup functions. The next patch adds L4 hash option and needs the skb for the forward path. To get the skb to the relevant FIB lookup functions it needs to go through the fib rules layer, so add a lookup_data argument to the fib_lookup_arg struct. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/fib_rules.h | 1 + include/net/ip6_fib.h | 4 +++- include/net/ip6_route.h | 11 +++++++---- 3 files changed, 11 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 1c9e17c11953..e5cfcfc7dd93 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -47,6 +47,7 @@ struct fib_rule { struct fib_lookup_arg { void *lookup_ptr; + const void *lookup_data; void *result; struct fib_rule *rule; u32 table; diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 8d906a35b534..5e86fd9dc857 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -350,7 +350,8 @@ struct fib6_table { typedef struct rt6_info *(*pol_lookup_t)(struct net *, struct fib6_table *, - struct flowi6 *, int); + struct flowi6 *, + const struct sk_buff *, int); struct fib6_entry_notifier_info { struct fib_notifier_info info; /* must be first */ @@ -364,6 +365,7 @@ struct fib6_entry_notifier_info { struct fib6_table *fib6_get_table(struct net *net, u32 id); struct fib6_table *fib6_new_table(struct net *net, u32 id); struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, + const struct sk_buff *skb, int flags, pol_lookup_t lookup); struct fib6_node *fib6_lookup(struct fib6_node *root, diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index da2bde5fda8f..9594f9317952 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -75,7 +75,8 @@ static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt) void ip6_route_input(struct sk_buff *skb); struct dst_entry *ip6_route_input_lookup(struct net *net, struct net_device *dev, - struct flowi6 *fl6, int flags); + struct flowi6 *fl6, + const struct sk_buff *skb, int flags); struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, struct flowi6 *fl6, int flags); @@ -88,9 +89,10 @@ static inline struct dst_entry *ip6_route_output(struct net *net, } struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, - int flags); + const struct sk_buff *skb, int flags); struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, - int ifindex, struct flowi6 *fl6, int flags); + int ifindex, struct flowi6 *fl6, + const struct sk_buff *skb, int flags); void ip6_route_init_special_entries(void); int ip6_route_init(void); @@ -126,7 +128,8 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, } struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, - const struct in6_addr *saddr, int oif, int flags); + const struct in6_addr *saddr, int oif, + const struct sk_buff *skb, int flags); u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb, struct flow_keys *hkeys); -- cgit v1.2.3 From b4bac172e90ce4a93df8adf44eb70d91b9d611eb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 2 Mar 2018 08:32:18 -0800 Subject: net/ipv6: Add support for path selection using hash of 5-tuple Some operators prefer IPv6 path selection to use a standard 5-tuple hash rather than just an L3 hash with the flow the label. To that end add support to IPv6 for multipath hash policy similar to bf4e0a3db97eb ("net: ipv4: add support for ECMP hash policy choice"). The default is still L3 which covers source and destination addresses along with flow label and IPv6 protocol. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/ip6_route.h | 4 ++-- include/net/netevent.h | 1 + include/net/netns/ipv6.h | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 9594f9317952..ce2abc0ff102 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -130,8 +130,8 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr, int oif, const struct sk_buff *skb, int flags); -u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb, - struct flow_keys *hkeys); +u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, + const struct sk_buff *skb, struct flow_keys *hkeys); struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6); diff --git a/include/net/netevent.h b/include/net/netevent.h index baee605a94ab..d9918261701c 100644 --- a/include/net/netevent.h +++ b/include/net/netevent.h @@ -27,6 +27,7 @@ enum netevent_notif_type { NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */ NETEVENT_IPV4_MPATH_HASH_UPDATE, /* arg is struct net ptr */ + NETEVENT_IPV6_MPATH_HASH_UPDATE, /* arg is struct net ptr */ }; int register_netevent_notifier(struct notifier_block *nb); diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index e286fda09fcf..5b51110435fc 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 { int ip6_rt_gc_elasticity; int ip6_rt_mtu_expires; int ip6_rt_min_advmss; + int multipath_hash_policy; int flowlabel_consistency; int auto_flowlabels; int icmpv6_time; -- cgit v1.2.3 From de7a0f871fabe74fff7481caf7d3efe03b58fe58 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 2 Mar 2018 08:32:20 -0800 Subject: net: Remove unused get_hash_from_flow functions __get_hash_from_flowi6 is still used for flowlabels, but the IPv4 variant and the wrappers to both are not used. Remove them. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/flow.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include/net') diff --git a/include/net/flow.h b/include/net/flow.h index 64e7ee9cb980..8ce21793094e 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -222,20 +222,4 @@ static inline unsigned int flow_key_size(u16 family) __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys); -static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6) -{ - struct flow_keys keys; - - return __get_hash_from_flowi6(fl6, &keys); -} - -__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys); - -static inline __u32 get_hash_from_flowi4(const struct flowi4 *fl4) -{ - struct flow_keys keys; - - return __get_hash_from_flowi4(fl4, &keys); -} - #endif -- cgit v1.2.3 From 88c060549a4c555d59965801d1e811b71614c2b7 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 1 Mar 2018 02:02:27 +0100 Subject: dsa: Pass the port to get_sset_count() By passing the port, we allow different ports to have different statistics. This is useful since some ports have SERDES interfaces with their own statistic counters. Signed-off-by: Andrew Lunn Tested-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 0ad17b63684d..60fb4ec8ba61 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -359,7 +359,7 @@ struct dsa_switch_ops { void (*get_strings)(struct dsa_switch *ds, int port, uint8_t *data); void (*get_ethtool_stats)(struct dsa_switch *ds, int port, uint64_t *data); - int (*get_sset_count)(struct dsa_switch *ds); + int (*get_sset_count)(struct dsa_switch *ds, int port); /* * ethtool Wake-on-LAN -- cgit v1.2.3 From 76b12974a3981db2a1ae60d62f55dd839d07ac85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Sun, 4 Mar 2018 03:29:51 +0100 Subject: net: core: dst_cache: Fix a typo in a comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jonathan Neuschäfer Signed-off-by: David S. Miller --- include/net/dst_cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dst_cache.h b/include/net/dst_cache.h index 72fd5067c353..844906fbf8c9 100644 --- a/include/net/dst_cache.h +++ b/include/net/dst_cache.h @@ -71,7 +71,7 @@ struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache, * dst_cache_reset - invalidate the cache contents * @dst_cache: the cache * - * This do not free the cached dst to avoid races and contentions. + * This does not free the cached dst to avoid races and contentions. * the dst will be freed on later cache lookup. */ static inline void dst_cache_reset(struct dst_cache *dst_cache) -- cgit v1.2.3 From 4c1342d967cb556ea1c0f34271b125deeb25f0f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Sun, 4 Mar 2018 03:29:52 +0100 Subject: net: core: dst_cache_set_ip6: Rename 'addr' parameter to 'saddr' for consistency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The other dst_cache_{get,set}_ip{4,6} functions, and the doc comment for dst_cache_set_ip6 use 'saddr' for their source address parameter. Rename the parameter to increase consistency. This fixes the following kernel-doc warnings: ./include/net/dst_cache.h:58: warning: Function parameter or member 'addr' not described in 'dst_cache_set_ip6' ./include/net/dst_cache.h:58: warning: Excess function parameter 'saddr' description in 'dst_cache_set_ip6' Fixes: 911362c70df5 ("net: add dst_cache support") Signed-off-by: Jonathan Neuschäfer Signed-off-by: David S. Miller --- include/net/dst_cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dst_cache.h b/include/net/dst_cache.h index 844906fbf8c9..67634675e919 100644 --- a/include/net/dst_cache.h +++ b/include/net/dst_cache.h @@ -54,7 +54,7 @@ void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst, * local BH must be disabled. */ void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst, - const struct in6_addr *addr); + const struct in6_addr *saddr); /** * dst_cache_get_ip6 - perform cache lookup and fetch ipv6 source address -- cgit v1.2.3 From 8eb1a8590f5ca114fabf16ebb26a4bce0255ace9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Sun, 4 Mar 2018 03:29:53 +0100 Subject: net: core: dst: Add kernel-doc for 'net' parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the following kernel-doc warning: ./include/net/dst.h:366: warning: Function parameter or member 'net' not described in 'skb_tunnel_rx' Fixes: ea23192e8e57 ("tunnels: harmonize cleanup done on skb on rx path") Signed-off-by: Jonathan Neuschäfer Signed-off-by: David S. Miller --- include/net/dst.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index c63d2c37f6e9..b3219cd8a5a1 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -356,6 +356,7 @@ static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, * skb_tunnel_rx - prepare skb for rx reinsert * @skb: buffer * @dev: tunnel device + * @net: netns for packet i/o * * After decapsulation, packet is going to re-enter (netif_rx()) our stack, * so make some cleanups, and perform accounting. -- cgit v1.2.3 From 433029ecc62788296cacca50ceb24db90c17a4a2 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 11 Feb 2018 23:28:18 +0900 Subject: netfilter: nf_conntrack_broadcast: remove useless parameter parameter protoff in nf_conntrack_broadcast_help is not used anywhere. Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_helper.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index fc39bbaf107c..32c2a94a219d 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -132,8 +132,7 @@ void nf_conntrack_helper_pernet_fini(struct net *net); int nf_conntrack_helper_init(void); void nf_conntrack_helper_fini(void); -int nf_conntrack_broadcast_help(struct sk_buff *skb, unsigned int protoff, - struct nf_conn *ct, +int nf_conntrack_broadcast_help(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int timeout); -- cgit v1.2.3 From 3427b2ab63faccafe774ea997fc2da7faf690c5a Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 1 Mar 2018 18:58:38 -0800 Subject: netfilter: make xt_rateest hash table per net As suggested by Eric, we need to make the xt_rateest hash table and its lock per netns to reduce lock contentions. Cc: Florian Westphal Cc: Eric Dumazet Cc: Pablo Neira Ayuso Signed-off-by: Cong Wang Reviewed-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/xt_rateest.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h index b1db13772554..832ab69efda5 100644 --- a/include/net/netfilter/xt_rateest.h +++ b/include/net/netfilter/xt_rateest.h @@ -21,7 +21,7 @@ struct xt_rateest { struct net_rate_estimator __rcu *rate_est; }; -struct xt_rateest *xt_rateest_lookup(const char *name); -void xt_rateest_put(struct xt_rateest *est); +struct xt_rateest *xt_rateest_lookup(struct net *net, const char *name); +void xt_rateest_put(struct net *net, struct xt_rateest *est); #endif /* _XT_RATEEST_H */ -- cgit v1.2.3 From ed63afb8a318f6b3558d76afba7809daee4f28e5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 5 Mar 2018 20:44:18 +0800 Subject: sctp: add support for PR-SCTP Information for sendmsg This patch is to add support for PR-SCTP Information for sendmsg, as described in section 5.3.7 of RFC6458. With this option, you can specify pr_policy and pr_value for user data in sendmsg. It's also a necessary send info for sctp_sendv. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 03e92dda1813..d40a2a329888 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -2112,6 +2112,7 @@ struct sctp_cmsgs { struct sctp_initmsg *init; struct sctp_sndrcvinfo *srinfo; struct sctp_sndinfo *sinfo; + struct sctp_prinfo *prinfo; }; /* Structure for tracking memory objects */ -- cgit v1.2.3 From 2c0dbaa0c43d04d8d6daf52adb724c5789676b15 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 5 Mar 2018 20:44:19 +0800 Subject: sctp: add support for SCTP_DSTADDRV4/6 Information for sendmsg This patch is to add support for Destination IPv4/6 Address options for sendmsg, as described in section 5.3.9/10 of RFC6458. With this option, you can provide more than one destination addrs to sendmsg when creating asoc, like sctp_connectx. It's also a necessary send info for sctp_sendv. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index d40a2a329888..ec6e46b7e119 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -2113,6 +2113,7 @@ struct sctp_cmsgs { struct sctp_sndrcvinfo *srinfo; struct sctp_sndinfo *sinfo; struct sctp_prinfo *prinfo; + struct msghdr *addrs_msg; }; /* Structure for tracking memory objects */ -- cgit v1.2.3 From 79134e6ce2c9d1a00eab4d98cb48f975dd2474cb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Mar 2018 12:51:41 -0800 Subject: net: do not create fallback tunnels for non-default namespaces fallback tunnels (like tunl0, gre0, gretap0, erspan0, sit0, ip6tnl0, ip6gre0) are automatically created when the corresponding module is loaded. These tunnels are also automatically created when a new network namespace is created, at a great cost. In many cases, netns are used for isolation purposes, and these extra network devices are a waste of resources. We are using thousands of netns per host, and hit the netns creation/delete bottleneck a lot. (Many thanks to Kirill for recent work on this) Add a new sysctl so that we can opt-out from this automatic creation. Note that these tunnels are still created for the initial namespace, to be the least intrusive for typical setups. Tested: lpk43:~# cat add_del_unshare.sh for i in `seq 1 40` do (for j in `seq 1 100` ; do unshare -n /bin/true >/dev/null ; done) & done wait lpk43:~# echo 0 >/proc/sys/net/core/fb_tunnels_only_for_init_net lpk43:~# time ./add_del_unshare.sh real 0m37.521s user 0m0.886s sys 7m7.084s lpk43:~# echo 1 >/proc/sys/net/core/fb_tunnels_only_for_init_net lpk43:~# time ./add_del_unshare.sh real 0m4.761s user 0m0.851s sys 1m8.343s lpk43:~# Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index cbe5addb9293..540a4b4417bf 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -180,8 +180,10 @@ struct tnl_ptk_info { struct ip_tunnel_net { struct net_device *fb_tunnel_dev; + struct rtnl_link_ops *rtnl_link_ops; struct hlist_head tunnels[IP_TNL_HASH_SIZE]; struct ip_tunnel __rcu *collect_md_tun; + int type; }; static inline void ip_tunnel_key_init(struct ip_tunnel_key *key, -- cgit v1.2.3 From d04e6990c948a3315ea8eca5979ebea48cda56f4 Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Thu, 8 Mar 2018 16:59:17 -0500 Subject: net sched actions: update Add/Delete action API with new argument Introduce a new function argument to carry total attributes size for correct allocation of skb in event messages. Signed-off-by: Roman Mashak Signed-off-by: David S. Miller --- include/net/act_api.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index 9c2f22695025..88c1f99bae46 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -166,7 +166,8 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, int nr_actions, struct tcf_result *res); int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, - struct list_head *actions, struct netlink_ext_ack *extack); + struct list_head *actions, size_t *attr_size, + struct netlink_ext_ack *extack); struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, -- cgit v1.2.3 From a03b91b17684023c45d39b836c85579d5e535983 Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Thu, 8 Mar 2018 16:59:18 -0500 Subject: net sched actions: add new tc_action_ops callback Add a new callback in tc_action_ops, it will be needed by the tc actions to compute its size when a ADD/DELETE notification message is constructed. This routine has to take into account optional/variable size TLVs specific per action. Signed-off-by: Roman Mashak Signed-off-by: David S. Miller --- include/net/act_api.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index 88c1f99bae46..e0a9c2003b24 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -97,6 +97,7 @@ struct tc_action_ops { const struct tc_action_ops *, struct netlink_ext_ack *); void (*stats_update)(struct tc_action *, u64, u32, u64); + size_t (*get_fill_size)(const struct tc_action *act); struct net_device *(*get_dev)(const struct tc_action *a); }; -- cgit v1.2.3 From 918ee5073b0e253649083d731a88588b5c1723a3 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sun, 11 Mar 2018 09:45:47 +0200 Subject: net: ipv6: Introduce ip6_multipath_hash_policy() In order to abstract away access to the ipv6.sysctl.multipath_hash_policy variable, which is not available on systems compiled without IPv6 support, introduce a wrapper function ip6_multipath_hash_policy() that falls back to 0 on non-IPv6 systems. Use this wrapper from mlxsw/spectrum_router instead of a direct reference. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/ipv6.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index cabd3cdd4015..50a6f0ddb878 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -888,6 +888,17 @@ static inline int ip6_default_np_autolabel(struct net *net) } #endif +#if IS_ENABLED(CONFIG_IPV6) +static inline int ip6_multipath_hash_policy(const struct net *net) +{ + return net->ipv6.sysctl.multipath_hash_policy; +} +#else +static inline int ip6_multipath_hash_policy(const struct net *net) +{ + return 0; +} +#endif /* * Header manipulation -- cgit v1.2.3 From 6056415d3a513846f774e7bbee0de0460b1c15df Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Mar 2018 13:55:55 +0300 Subject: net: Add comment about pernet_operations methods and synchronization Make locking scheme be visible for users, and provide a comment what for we are need exit_batch() methods, and when it should be used. Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- include/net/net_namespace.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index d4417495773a..71abc8d79178 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -312,6 +312,20 @@ struct net *get_net_ns_by_id(struct net *net, int id); struct pernet_operations { struct list_head list; + /* + * Below methods are called without any exclusive locks. + * More than one net may be constructed and destructed + * in parallel on several cpus. Every pernet_operations + * have to keep in mind all other pernet_operations and + * to introduce a locking, if they share common resources. + * + * Exit methods using blocking RCU primitives, such as + * synchronize_rcu(), should be implemented via exit_batch. + * Then, destruction of a group of net requires single + * synchronize_rcu() related to these pernet_operations, + * instead of separate synchronize_rcu() for every net. + * Please, avoid synchronize_rcu() at all, where it's possible. + */ int (*init)(struct net *net); void (*exit)(struct net *net); void (*exit_batch)(struct list_head *net_exit_list); -- cgit v1.2.3 From 4c10d56a76bb1d40ea6bede579d1522cbcdc438e Mon Sep 17 00:00:00 2001 From: Prameela Rani Garnepudi Date: Tue, 27 Feb 2018 19:56:13 +0530 Subject: rsi: add header file rsi_91x The common parameters used by wlan and bt modules are add to a new header file "rsi_91x.h" defined in 'include/net' Signed-off-by: Prameela Rani Garnepudi Signed-off-by: Siva Rebbagondla Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- include/net/rsi_91x.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 include/net/rsi_91x.h (limited to 'include/net') diff --git a/include/net/rsi_91x.h b/include/net/rsi_91x.h new file mode 100644 index 000000000000..16a447b46119 --- /dev/null +++ b/include/net/rsi_91x.h @@ -0,0 +1,34 @@ +/** + * Copyright (c) 2017 Redpine Signals Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef __RSI_HEADER_H__ +#define __RSI_HEADER_H__ + +/* HAL queue information */ +#define RSI_COEX_Q 0x0 +#define RSI_BT_Q 0x2 +#define RSI_WLAN_Q 0x3 +#define RSI_WIFI_MGMT_Q 0x4 +#define RSI_WIFI_DATA_Q 0x5 +#define RSI_BT_MGMT_Q 0x6 +#define RSI_BT_DATA_Q 0x7 + +enum rsi_host_intf { + RSI_HOST_INTF_SDIO = 0, + RSI_HOST_INTF_USB +}; + +#endif -- cgit v1.2.3 From 2108df3c4b1856588ca2e7f641900c2bbf38467e Mon Sep 17 00:00:00 2001 From: Prameela Rani Garnepudi Date: Tue, 27 Feb 2018 19:56:14 +0530 Subject: rsi: add coex support With BT support, driver has to handle two streams of data (i.e. wlan and BT). Actual coex implementation is in firmware. Coex module just schedule the packets to firmware by taking them from the corresponding paths. Structures for module and protocol operations are introduced for this purpose. Protocol operations structure is global structure which can be shared among different modules. Move initialization of coex and operating mode values to rsi_91x_init(). Signed-off-by: Prameela Rani Garnepudi Signed-off-by: Siva Rebbagondla Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- include/net/rsi_91x.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/net') diff --git a/include/net/rsi_91x.h b/include/net/rsi_91x.h index 16a447b46119..737ab4e01e3b 100644 --- a/include/net/rsi_91x.h +++ b/include/net/rsi_91x.h @@ -17,6 +17,8 @@ #ifndef __RSI_HEADER_H__ #define __RSI_HEADER_H__ +#include + /* HAL queue information */ #define RSI_COEX_Q 0x0 #define RSI_BT_Q 0x2 @@ -26,9 +28,27 @@ #define RSI_BT_MGMT_Q 0x6 #define RSI_BT_DATA_Q 0x7 +enum rsi_coex_queues { + RSI_COEX_Q_INVALID = -1, + RSI_COEX_Q_COMMON = 0, + RSI_COEX_Q_BT, + RSI_COEX_Q_WLAN +}; + enum rsi_host_intf { RSI_HOST_INTF_SDIO = 0, RSI_HOST_INTF_USB }; +struct rsi_proto_ops { + int (*coex_send_pkt)(void *priv, struct sk_buff *skb, u8 hal_queue); + enum rsi_host_intf (*get_host_intf)(void *priv); + void (*set_bt_context)(void *priv, void *context); +}; + +struct rsi_mod_ops { + int (*attach)(void *priv, struct rsi_proto_ops *ops); + void (*detach)(void *priv); + int (*recv_pkt)(void *priv, u8 *msg); +}; #endif -- cgit v1.2.3 From 38aa4da504837ba8b9c04941e843642f129661eb Mon Sep 17 00:00:00 2001 From: Prameela Rani Garnepudi Date: Tue, 27 Feb 2018 19:56:15 +0530 Subject: Bluetooth: btrsi: add new rsi bluetooth driver Redpine bluetooth driver is a thin driver which depends on 'rsi_91x' driver for transmitting and receiving packets to/from device. It creates hci interface when attach() is called from 'rsi_91x' module. Signed-off-by: Prameela Rani Garnepudi Signed-off-by: Siva Rebbagondla Acked-by: Marcel Holtmann Reviewed-by: Marcel Holtmann Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- include/net/rsi_91x.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/rsi_91x.h b/include/net/rsi_91x.h index 737ab4e01e3b..040f07b47f1f 100644 --- a/include/net/rsi_91x.h +++ b/include/net/rsi_91x.h @@ -49,6 +49,8 @@ struct rsi_proto_ops { struct rsi_mod_ops { int (*attach)(void *priv, struct rsi_proto_ops *ops); void (*detach)(void *priv); - int (*recv_pkt)(void *priv, u8 *msg); + int (*recv_pkt)(void *priv, const u8 *msg); }; + +extern const struct rsi_mod_ops rsi_bt_ops; #endif -- cgit v1.2.3 From 1b1e0bc9947427ae58bbe7de0ce9cfd591b589b9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 14 Mar 2018 19:05:30 +0800 Subject: sctp: add refcnt support for sh_key With refcnt support for sh_key, chunks auth sh_keys can be decided before enqueuing it. Changing the active key later will not affect the chunks already enqueued. Furthermore, this is necessary when adding the support for authinfo for sendmsg in next patch. Note that struct sctp_chunk can't be grown due to that performance drop issue on slow cpu, so it just reuses head_skb memory for shkey in sctp_chunk. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/auth.h | 9 ++++++--- include/net/sctp/sm.h | 3 ++- include/net/sctp/structs.h | 9 +++++++-- 3 files changed, 15 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/auth.h b/include/net/sctp/auth.h index e5c57d0a082d..017c1aa3a2c9 100644 --- a/include/net/sctp/auth.h +++ b/include/net/sctp/auth.h @@ -62,8 +62,9 @@ struct sctp_auth_bytes { /* Definition for a shared key, weather endpoint or association */ struct sctp_shared_key { struct list_head key_list; - __u16 key_id; struct sctp_auth_bytes *key; + refcount_t refcnt; + __u16 key_id; }; #define key_for_each(__key, __list_head) \ @@ -103,8 +104,10 @@ int sctp_auth_send_cid(enum sctp_cid chunk, int sctp_auth_recv_cid(enum sctp_cid chunk, const struct sctp_association *asoc); void sctp_auth_calculate_hmac(const struct sctp_association *asoc, - struct sk_buff *skb, - struct sctp_auth_chunk *auth, gfp_t gfp); + struct sk_buff *skb, struct sctp_auth_chunk *auth, + struct sctp_shared_key *ep_key, gfp_t gfp); +void sctp_auth_shkey_release(struct sctp_shared_key *sh_key); +void sctp_auth_shkey_hold(struct sctp_shared_key *sh_key); /* API Helpers */ int sctp_auth_ep_add_chunkid(struct sctp_endpoint *ep, __u8 chunk_id); diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 2883c43c5258..2d0e782c9055 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -263,7 +263,8 @@ int sctp_process_asconf_ack(struct sctp_association *asoc, struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc, __u32 new_cum_tsn, size_t nstreams, struct sctp_fwdtsn_skip *skiplist); -struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc); +struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc, + __u16 key_id); struct sctp_chunk *sctp_make_strreset_req(const struct sctp_association *asoc, __u16 stream_num, __be16 *stream_list, bool out, bool in); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index ec6e46b7e119..49ad67bbdbb5 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -577,8 +577,12 @@ struct sctp_chunk { /* This points to the sk_buff containing the actual data. */ struct sk_buff *skb; - /* In case of GSO packets, this will store the head one */ - struct sk_buff *head_skb; + union { + /* In case of GSO packets, this will store the head one */ + struct sk_buff *head_skb; + /* In case of auth enabled, this will point to the shkey */ + struct sctp_shared_key *shkey; + }; /* These are the SCTP headers by reverse order in a packet. * Note that some of these may happen more than once. In that @@ -1995,6 +1999,7 @@ struct sctp_association { * The current generated assocaition shared key (secret) */ struct sctp_auth_bytes *asoc_shared_key; + struct sctp_shared_key *shkey; /* SCTP AUTH: hmac id of the first peer requested algorithm * that we support. -- cgit v1.2.3 From 3ff547c06a7d75d72d37dae2c064fcf0672e56c0 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 14 Mar 2018 19:05:31 +0800 Subject: sctp: add support for SCTP AUTH Information for sendmsg This patch is to add support for SCTP AUTH Information for sendmsg, as described in section 5.3.8 of RFC6458. With this option, you can provide shared key identifier used for sending the user message. It's also a necessary send info for sctp_sendv. Note that it reuses sinfo->sinfo_tsn to indicate if this option is set and sinfo->sinfo_ssn to save the shkey ID which can be 0. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 49ad67bbdbb5..012fb3e2f4cf 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -2118,6 +2118,7 @@ struct sctp_cmsgs { struct sctp_sndrcvinfo *srinfo; struct sctp_sndinfo *sinfo; struct sctp_prinfo *prinfo; + struct sctp_authinfo *authinfo; struct msghdr *addrs_msg; }; -- cgit v1.2.3 From 601590ec155aadf5daa17a6f63a06d1bba5b5ce9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 14 Mar 2018 19:05:32 +0800 Subject: sctp: add sockopt SCTP_AUTH_DEACTIVATE_KEY This patch is to add sockopt SCTP_AUTH_DEACTIVATE_KEY, as described in section 8.3.4 of RFC6458. This set option indicates that the application will no longer send user messages using the indicated key identifier. Note that RFC requires that only deactivated keys that are no longer used by an association can be deleted, but for the backward compatibility, it is not to check deactivated when deleting or replacing one sh_key. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/auth.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/auth.h b/include/net/sctp/auth.h index 017c1aa3a2c9..687e7f80037d 100644 --- a/include/net/sctp/auth.h +++ b/include/net/sctp/auth.h @@ -65,6 +65,7 @@ struct sctp_shared_key { struct sctp_auth_bytes *key; refcount_t refcnt; __u16 key_id; + __u8 deactivated; }; #define key_for_each(__key, __list_head) \ @@ -113,14 +114,13 @@ void sctp_auth_shkey_hold(struct sctp_shared_key *sh_key); int sctp_auth_ep_add_chunkid(struct sctp_endpoint *ep, __u8 chunk_id); int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep, struct sctp_hmacalgo *hmacs); -int sctp_auth_set_key(struct sctp_endpoint *ep, - struct sctp_association *asoc, +int sctp_auth_set_key(struct sctp_endpoint *ep, struct sctp_association *asoc, struct sctp_authkey *auth_key); int sctp_auth_set_active_key(struct sctp_endpoint *ep, - struct sctp_association *asoc, - __u16 key_id); + struct sctp_association *asoc, __u16 key_id); int sctp_auth_del_key_id(struct sctp_endpoint *ep, - struct sctp_association *asoc, - __u16 key_id); + struct sctp_association *asoc, __u16 key_id); +int sctp_auth_deact_key_id(struct sctp_endpoint *ep, + struct sctp_association *asoc, __u16 key_id); #endif -- cgit v1.2.3 From 30f6ebf65bc46161c5aaff1db2e6e7c76aa4a06b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 14 Mar 2018 19:05:34 +0800 Subject: sctp: add SCTP_AUTH_NO_AUTH type for AUTHENTICATION_EVENT This patch is to add SCTP_AUTH_NO_AUTH type for AUTHENTICATION_EVENT, as described in section 6.1.8 of RFC6458. SCTP_AUTH_NO_AUTH: This report indicates that the peer does not support SCTP authentication as defined in [RFC4895]. Note that the implementation is quite similar as that of SCTP_ADAPTATION_INDICATION. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/command.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index b55c6a48a206..6640f84fe536 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -100,6 +100,7 @@ enum sctp_verb { SCTP_CMD_SET_SK_ERR, /* Set sk_err */ SCTP_CMD_ASSOC_CHANGE, /* generate and send assoc_change event */ SCTP_CMD_ADAPTATION_IND, /* generate and send adaptation event */ + SCTP_CMD_PEER_NO_AUTH, /* generate and send authentication event */ SCTP_CMD_ASSOC_SHKEY, /* generate the association shared keys */ SCTP_CMD_T1_RETRAN, /* Mark for retransmission after T1 timeout */ SCTP_CMD_UPDATE_INITTAG, /* Update peer inittag */ -- cgit v1.2.3 From 232378e8db4780bc7145d7a0ee47f5f80a41ad6b Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 13 Mar 2018 08:29:37 -0700 Subject: net/ipv6: Change address check to always take a device argument ipv6_chk_addr_and_flags determines if an address is a local address and optionally if it is an address on a specific device. For example, it is called by ip6_route_info_create to determine if a given gateway address is a local address. The address check currently does not consider L3 domains and as a result does not allow a route to be added in one VRF if the nexthop points to an address in a second VRF. e.g., $ ip route add 2001:db8:1::/64 vrf r2 via 2001:db8:102::23 Error: Invalid gateway address. where 2001:db8:102::23 is an address on an interface in vrf r1. ipv6_chk_addr_and_flags needs to allow callers to always pass in a device with a separate argument to not limit the address to the specific device. The device is used used to determine the L3 domain of interest. To that end add an argument to skip the device check and update callers to always pass a device where possible and use the new argument to mean any address in the domain. Update a handful of users of ipv6_chk_addr with a NULL dev argument. This patch handles the change to these callers without adding the domain check. ip6_validate_gw needs to handle 2 cases - one where the device is given as part of the nexthop spec and the other where the device is resolved. There is at least 1 VRF case where deferring the check to only after the route lookup has resolved the device fails with an unintuitive error "RTNETLINK answers: No route to host" as opposed to the preferred "Error: Gateway can not be a local address." The 'no route to host' error is because of the fallback to a full lookup. The check is done twice to avoid this error. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/addrconf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index c4185a7b0e90..132e5b95167a 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -69,8 +69,8 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg); int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict); int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, - const struct net_device *dev, int strict, - u32 banned_flags); + const struct net_device *dev, bool skip_dev_check, + int strict, u32 banned_flags); #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr); -- cgit v1.2.3 From 1e8029515816f771b9b3751f24f19fe6df4c72ae Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Tue, 13 Mar 2018 21:57:16 -0700 Subject: udp: Move the udp sysctl to namespace. This patch moves the udp_rmem_min, udp_wmem_min to namespace and init the udp_l3mdev_accept explicitly. The udp_rmem_min/udp_wmem_min affect udp rx/tx queue, with this patch namespaces can set them differently. Signed-off-by: Tonghao Zhang Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 3a970e429ab6..382bfd7583cf 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -168,6 +168,9 @@ struct netns_ipv4 { atomic_t tfo_active_disable_times; unsigned long tfo_active_disable_stamp; + int sysctl_udp_wmem_min; + int sysctl_udp_rmem_min; + #ifdef CONFIG_NET_L3_MASTER_DEV int sysctl_udp_l3mdev_accept; #endif -- cgit v1.2.3 From d47d08c8ca052df3d9fde7cfff518660335b16e7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 16 Mar 2018 23:32:51 +0000 Subject: sctp: use proc_remove_subtree() use proc_remove_subtree() for subtree removal, both on setup failure halfway through and on teardown. No need to make simple things complex... Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index f7ae6b0a21d0..72c5b8fc3232 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -180,14 +180,7 @@ struct sctp_transport *sctp_epaddr_lookup_transport( /* * sctp/proc.c */ -int sctp_snmp_proc_init(struct net *net); -void sctp_snmp_proc_exit(struct net *net); -int sctp_eps_proc_init(struct net *net); -void sctp_eps_proc_exit(struct net *net); -int sctp_assocs_proc_init(struct net *net); -void sctp_assocs_proc_exit(struct net *net); -int sctp_remaddr_proc_init(struct net *net); -void sctp_remaddr_proc_exit(struct net *net); +int __net_init sctp_proc_init(struct net *net); /* * sctp/offload.c @@ -318,7 +311,6 @@ atomic_t sctp_dbg_objcnt_## name = ATOMIC_INIT(0) {.label= #name, .counter= &sctp_dbg_objcnt_## name} void sctp_dbg_objcnt_init(struct net *); -void sctp_dbg_objcnt_exit(struct net *); #else @@ -326,7 +318,6 @@ void sctp_dbg_objcnt_exit(struct net *); #define SCTP_DBG_OBJCNT_DEC(name) static inline void sctp_dbg_objcnt_init(struct net *net) { return; } -static inline void sctp_dbg_objcnt_exit(struct net *net) { return; } #endif /* CONFIG_SCTP_DBG_OBJCOUNT */ -- cgit v1.2.3 From 2c3682f0be97a5f57c6c8b40fa154dfc77efb461 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sun, 18 Mar 2018 12:56:49 -0700 Subject: sock: make static tls function alloc_sg generic sock helper The TLS ULP module builds scatterlists from a sock using page_frag_refill(). This is going to be useful for other ULPs so move it into sock file for more general use. In the process remove useless goto at end of while loop. Signed-off-by: John Fastabend Acked-by: David S. Miller Signed-off-by: Daniel Borkmann --- include/net/sock.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index b9624581d639..447150c51feb 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2141,6 +2141,10 @@ static inline struct page_frag *sk_page_frag(struct sock *sk) bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag); +int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg, + int *sg_num_elem, unsigned int *sg_size, + int first_coalesce); + /* * Default write policy as shown to user space via poll/select/SIGIO */ -- cgit v1.2.3 From 8c05dbf04b2882c3c0bc43fe7668c720210877f3 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sun, 18 Mar 2018 12:57:05 -0700 Subject: net: generalize sk_alloc_sg to work with scatterlist rings The current implementation of sk_alloc_sg expects scatterlist to always start at entry 0 and complete at entry MAX_SKB_FRAGS. Future patches will want to support starting at arbitrary offset into scatterlist so add an additional sg_start parameters and then default to the current values in TLS code paths. Signed-off-by: John Fastabend Acked-by: David S. Miller Signed-off-by: Daniel Borkmann --- include/net/sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 447150c51feb..b7c75e024e37 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2142,7 +2142,7 @@ static inline struct page_frag *sk_page_frag(struct sock *sk) bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag); int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg, - int *sg_num_elem, unsigned int *sg_size, + int sg_start, int *sg_curr, unsigned int *sg_size, int first_coalesce); /* -- cgit v1.2.3 From 6aec208786c2a54cbf6135a0242b224e845bef98 Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Sun, 4 Mar 2018 15:29:51 -0800 Subject: netfilter: Refactor nf_conncount Remove parameter 'family' in nf_conncount_count() and count_tree(). It is because the parameter is not useful after commit 625c556118f3 ("netfilter: connlimit: split xt_connlimit into front and backend"). Signed-off-by: Yi-Hung Wei Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_count.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h index adf8db44cf86..e61184fbfb71 100644 --- a/include/net/netfilter/nf_conntrack_count.h +++ b/include/net/netfilter/nf_conntrack_count.h @@ -11,7 +11,6 @@ void nf_conncount_destroy(struct net *net, unsigned int family, unsigned int nf_conncount_count(struct net *net, struct nf_conncount_data *data, const u32 *key, - unsigned int family, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_zone *zone); #endif -- cgit v1.2.3 From 94e5e3087a67c765be98592b36d8d187566478d5 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 19 Mar 2018 13:17:30 +0100 Subject: net: add uevent socket member This commit adds struct uevent_sock to struct net. Since struct uevent_sock records the position of the uevent socket in the uevent socket list we can trivially remove it from the uevent socket list during cleanup. This speeds up the old removal codepath. Note, list_del() will hit __list_del_entry_valid() in its call chain which will validate that the element is a member of the list. If it isn't it will take care that the list is not modified. Signed-off-by: Christian Brauner Signed-off-by: David S. Miller --- include/net/net_namespace.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 71abc8d79178..09e30bdc7876 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -40,7 +40,7 @@ struct net_device; struct sock; struct ctl_table_header; struct net_generic; -struct sock; +struct uevent_sock; struct netns_ipvs; @@ -83,6 +83,8 @@ struct net { struct sock *rtnl; /* rtnetlink socket */ struct sock *genl_sock; + struct uevent_sock *uevent_sock; /* uevent socket */ + struct list_head dev_base_head; struct hlist_head *dev_name_head; struct hlist_head *dev_index_head; -- cgit v1.2.3 From 145307460ba9c11489807de7acd3f4c7395f60b7 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 20 Mar 2018 19:31:14 -0700 Subject: devlink: Remove top_hierarchy arg to devlink_resource_register top_hierarchy arg can be determined by comparing parent_resource_id to DEVLINK_RESOURCE_ID_PARENT_TOP so it does not need to be a separate argument. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/devlink.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index c83125ad20ff..d5b707375e48 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -406,7 +406,6 @@ extern struct devlink_dpipe_header devlink_dpipe_header_ipv6; int devlink_resource_register(struct devlink *devlink, const char *resource_name, - bool top_hierarchy, u64 resource_size, u64 resource_id, u64 parent_resource_id, -- cgit v1.2.3 From 5796ef75ec7b6019eac88f66751d663d537a5cd3 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Thu, 22 Mar 2018 12:45:32 +0300 Subject: net: Make ip_ra_chain per struct net This is optimization, which makes ip_call_ra_chain() iterate less sockets to find the sockets it's looking for. Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- include/net/ip.h | 13 +++++++++++-- include/net/netns/ipv4.h | 1 + 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index fe63ba95d12b..d53b5a9eae34 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -91,6 +91,17 @@ static inline int inet_sdif(struct sk_buff *skb) return 0; } +/* Special input handler for packets caught by router alert option. + They are selected only by protocol field, and then processed likely + local ones; but only if someone wants them! Otherwise, router + not running rsvpd will kill RSVP. + + It is user level problem, what it will make with them. + I have no idea, how it will masquearde or NAT them (it is joke, joke :-)), + but receiver should be enough clever f.e. to forward mtrace requests, + sent to multicast group to reach destination designated router. + */ + struct ip_ra_chain { struct ip_ra_chain __rcu *next; struct sock *sk; @@ -101,8 +112,6 @@ struct ip_ra_chain { struct rcu_head rcu; }; -extern struct ip_ra_chain __rcu *ip_ra_chain; - /* IP flags. */ #define IP_CE 0x8000 /* Flag: "Congestion" */ #define IP_DF 0x4000 /* Flag: "Don't Fragment" */ diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 382bfd7583cf..97d7ee6667c7 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -49,6 +49,7 @@ struct netns_ipv4 { #endif struct ipv4_devconf *devconf_all; struct ipv4_devconf *devconf_dflt; + struct ip_ra_chain __rcu *ra_chain; #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_rules_ops *rules_ops; bool fib_has_custom_rules; -- cgit v1.2.3 From d9ff3049739e349b5380b96226f9ad766741773d Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Thu, 22 Mar 2018 12:45:40 +0300 Subject: net: Replace ip_ra_lock with per-net mutex Since ra_chain is per-net, we may use per-net mutexes to protect them in ip_ra_control(). This improves scalability. Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 97d7ee6667c7..8491bc9c86b1 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -50,6 +50,7 @@ struct netns_ipv4 { struct ipv4_devconf *devconf_all; struct ipv4_devconf *devconf_dflt; struct ip_ra_chain __rcu *ra_chain; + struct mutex ra_mutex; #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_rules_ops *rules_ops; bool fib_has_custom_rules; -- cgit v1.2.3 From dcbe73ca55a42712bfd0e9966cd2d5a48355ace3 Mon Sep 17 00:00:00 2001 From: Pradeep Kumar Chitrapu Date: Thu, 22 Mar 2018 12:18:03 -0700 Subject: mac80211: notify driver for change in multicast rates With drivers implementing rate control in driver or firmware rate_control_send_low() may not get called, and thus the driver needs to know about changes in the multicast rate. Add and use a new BSS change flag for this. Signed-off-by: Pradeep Kumar Chitrapu [rewrite commit message] Signed-off-by: Johannes Berg --- include/net/mac80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 2fd59ed3be00..d39fd6838f41 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -302,6 +302,8 @@ struct ieee80211_vif_chanctx_switch { * @BSS_CHANGED_MU_GROUPS: VHT MU-MIMO group id or user position changed * @BSS_CHANGED_KEEP_ALIVE: keep alive options (idle period or protected * keep alive) changed. + * @BSS_CHANGED_MCAST_RATE: Multicast Rate setting changed for this interface + * */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, @@ -329,6 +331,7 @@ enum ieee80211_bss_change { BSS_CHANGED_OCB = 1<<22, BSS_CHANGED_MU_GROUPS = 1<<23, BSS_CHANGED_KEEP_ALIVE = 1<<24, + BSS_CHANGED_MCAST_RATE = 1<<25, /* when adding here, make sure to change ieee80211_reconfig */ }; -- cgit v1.2.3 From e9de0018d1fa97f8db9a39fcb69b55266c52835b Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 23 Mar 2018 08:09:48 -0700 Subject: devlink: Remove top_hierarchy arg for DEVLINK disabled path Earlier change missed the path where CONFIG_NET_DEVLINK is disabled. Thanks to Jiri for spotting. Fixes: 145307460ba9 ("devlink: Remove top_hierarchy arg to devlink_resource_register") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/devlink.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index d5b707375e48..e21d8cadd480 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -559,7 +559,6 @@ devlink_dpipe_match_put(struct sk_buff *skb, static inline int devlink_resource_register(struct devlink *devlink, const char *resource_name, - bool top_hierarchy, u64 resource_size, u64 resource_id, u64 parent_resource_id, -- cgit v1.2.3 From dbe425599ba05c7415f632e6f5f018453098eb69 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Thu, 22 Mar 2018 10:10:06 -0700 Subject: tls: Move cipher info to a separate struct Separate tx crypto parameters to a separate cipher_context struct. The same parameters will be used for rx using the same struct. tls_advance_record_sn is modified to only take the cipher info. Signed-off-by: Dave Watson Signed-off-by: David S. Miller --- include/net/tls.h | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/tls.h b/include/net/tls.h index 4913430ab807..019e52db1817 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -81,6 +81,16 @@ enum { TLS_PENDING_CLOSED_RECORD }; +struct cipher_context { + u16 prepend_size; + u16 tag_size; + u16 overhead_size; + u16 iv_size; + char *iv; + u16 rec_seq_size; + char *rec_seq; +}; + struct tls_context { union { struct tls_crypto_info crypto_send; @@ -91,13 +101,7 @@ struct tls_context { u8 tx_conf:2; - u16 prepend_size; - u16 tag_size; - u16 overhead_size; - u16 iv_size; - char *iv; - u16 rec_seq_size; - char *rec_seq; + struct cipher_context tx; struct scatterlist *partially_sent_record; u16 partially_sent_offset; @@ -190,7 +194,7 @@ static inline bool tls_bigint_increment(unsigned char *seq, int len) } static inline void tls_advance_record_sn(struct sock *sk, - struct tls_context *ctx) + struct cipher_context *ctx) { if (tls_bigint_increment(ctx->rec_seq, ctx->rec_seq_size)) tls_err_abort(sk); @@ -203,9 +207,9 @@ static inline void tls_fill_prepend(struct tls_context *ctx, size_t plaintext_len, unsigned char record_type) { - size_t pkt_len, iv_size = ctx->iv_size; + size_t pkt_len, iv_size = ctx->tx.iv_size; - pkt_len = plaintext_len + iv_size + ctx->tag_size; + pkt_len = plaintext_len + iv_size + ctx->tx.tag_size; /* we cover nonce explicit here as well, so buf should be of * size KTLS_DTLS_HEADER_SIZE + KTLS_DTLS_NONCE_EXPLICIT_SIZE @@ -217,7 +221,7 @@ static inline void tls_fill_prepend(struct tls_context *ctx, buf[3] = pkt_len >> 8; buf[4] = pkt_len & 0xFF; memcpy(buf + TLS_NONCE_OFFSET, - ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv_size); + ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv_size); } static inline void tls_make_aad(char *buf, -- cgit v1.2.3 From f4a8e43f1f0abc0e93ed5ee132288ee4142afde1 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Thu, 22 Mar 2018 10:10:15 -0700 Subject: tls: Pass error code explicitly to tls_err_abort Pass EBADMSG explicitly to tls_err_abort. Receive path will pass additional codes - EMSGSIZE if framing is larger than max TLS record size, EINVAL if TLS version mismatch. Signed-off-by: Dave Watson Signed-off-by: David S. Miller --- include/net/tls.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/tls.h b/include/net/tls.h index 019e52db1817..6b44875a78e5 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -174,9 +174,9 @@ static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx) return tls_ctx->pending_open_record_frags; } -static inline void tls_err_abort(struct sock *sk) +static inline void tls_err_abort(struct sock *sk, int err) { - sk->sk_err = EBADMSG; + sk->sk_err = err; sk->sk_error_report(sk); } @@ -197,7 +197,7 @@ static inline void tls_advance_record_sn(struct sock *sk, struct cipher_context *ctx) { if (tls_bigint_increment(ctx->rec_seq, ctx->rec_seq_size)) - tls_err_abort(sk); + tls_err_abort(sk, EBADMSG); tls_bigint_increment(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, ctx->iv_size); } -- cgit v1.2.3 From 583715853a25b4f2720b847e4fb8e37727299152 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Thu, 22 Mar 2018 10:10:26 -0700 Subject: tls: Refactor variable names Several config variables are prefixed with tx, drop the prefix since these will be used for both tx and rx. Signed-off-by: Dave Watson Signed-off-by: David S. Miller --- include/net/tls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tls.h b/include/net/tls.h index 6b44875a78e5..095b72283861 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -99,7 +99,7 @@ struct tls_context { void *priv_ctx; - u8 tx_conf:2; + u8 conf:2; struct cipher_context tx; -- cgit v1.2.3 From c46234ebb4d1eee5e09819f49169e51cfc6eb909 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Thu, 22 Mar 2018 10:10:35 -0700 Subject: tls: RX path for ktls Add rx path for tls software implementation. recvmsg, splice_read, and poll implemented. An additional sockopt TLS_RX is added, with the same interface as TLS_TX. Either TLX_RX or TLX_TX may be provided separately, or together (with two different setsockopt calls with appropriate keys). Control messages are passed via CMSG in a similar way to transmit. If no cmsg buffer is passed, then only application data records will be passed to userspace, and EIO is returned for other types of alerts. EBADMSG is passed for decryption errors, and EMSGSIZE is passed for framing too big, and EBADMSG for framing too small (matching openssl semantics). EINVAL is returned for TLS versions that do not match the original setsockopt call. All are unrecoverable. strparser is used to parse TLS framing. Decryption is done directly in to userspace buffers if they are large enough to support it, otherwise sk_cow_data is called (similar to ipsec), and buffers are decrypted in place and copied. splice_read always decrypts in place, since no buffers are provided to decrypt in to. sk_poll is overridden, and only returns POLLIN if a full TLS message is received. Otherwise we wait for strparser to finish reading a full frame. Actual decryption is only done during recvmsg or splice_read calls. Signed-off-by: Dave Watson Signed-off-by: David S. Miller --- include/net/tls.h | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/tls.h b/include/net/tls.h index 095b72283861..437a746300bf 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -40,6 +40,7 @@ #include #include #include +#include #include @@ -58,8 +59,18 @@ struct tls_sw_context { struct crypto_aead *aead_send; + struct crypto_aead *aead_recv; struct crypto_wait async_wait; + /* Receive context */ + struct strparser strp; + void (*saved_data_ready)(struct sock *sk); + unsigned int (*sk_poll)(struct file *file, struct socket *sock, + struct poll_table_struct *wait); + struct sk_buff *recv_pkt; + u8 control; + bool decrypted; + /* Sending context */ char aad_space[TLS_AAD_SPACE_SIZE]; @@ -96,12 +107,17 @@ struct tls_context { struct tls_crypto_info crypto_send; struct tls12_crypto_info_aes_gcm_128 crypto_send_aes_gcm_128; }; + union { + struct tls_crypto_info crypto_recv; + struct tls12_crypto_info_aes_gcm_128 crypto_recv_aes_gcm_128; + }; void *priv_ctx; u8 conf:2; struct cipher_context tx; + struct cipher_context rx; struct scatterlist *partially_sent_record; u16 partially_sent_offset; @@ -128,12 +144,19 @@ int tls_sk_attach(struct sock *sk, int optname, char __user *optval, unsigned int optlen); -int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx); +int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx); int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tls_sw_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); void tls_sw_close(struct sock *sk, long timeout); -void tls_sw_free_tx_resources(struct sock *sk); +void tls_sw_free_resources(struct sock *sk); +int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int nonblock, int flags, int *addr_len); +unsigned int tls_sw_poll(struct file *file, struct socket *sock, + struct poll_table_struct *wait); +ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, unsigned int flags); void tls_sk_destruct(struct sock *sk, struct tls_context *ctx); void tls_icsk_clean_acked(struct sock *sk); -- cgit v1.2.3 From affaa0c724c14c914625647efe7b95dfbe8d08f2 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 23 Mar 2018 19:09:39 +0100 Subject: net/sched: remove tcf_idr_cleanup() tcf_idr_cleanup() is no more used, so remove it. Suggested-by: Cong Wang Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- include/net/act_api.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index e0a9c2003b24..9e59ebfded62 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -149,7 +149,6 @@ bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a, int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, struct tc_action **a, const struct tc_action_ops *ops, int bind, bool cpustats); -void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est); void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a); int __tcf_idr_release(struct tc_action *a, bool bind, bool strict); -- cgit v1.2.3 From 088aa3eec2ce340b5d0f0f54430f5706223d5e45 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 26 Mar 2018 15:01:34 +0300 Subject: ip6mr: Support fib notifications In similar fashion to ipmr, support fib notifications for ip6mr mfc and vif related events. This would later allow drivers to react to said notifications and offload the IPv6 mroutes. Signed-off-by: Yuval Mintz Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 5b51110435fc..c29f09cfc9d7 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -96,6 +96,8 @@ struct netns_ipv6 { atomic_t fib6_sernum; struct seg6_pernet_data *seg6_data; struct fib_notifier_ops *notifier_ops; + struct fib_notifier_ops *ip6mr_notifier_ops; + unsigned int ipmr_seq; /* protected by rtnl_mutex */ struct { struct hlist_head head; spinlock_t lock; -- cgit v1.2.3 From 5306653850b444452937834adc5a5ac63bae275e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 26 Mar 2018 16:55:00 +0800 Subject: sctp: remove unnecessary asoc in sctp_has_association After Commit dae399d7fdee ("sctp: hold transport instead of assoc when lookup assoc in rx path"), it put transport instead of asoc in sctp_has_association. Variable 'asoc' is not used any more. So this patch is to remove it, while at it, it also changes the return type of sctp_has_association to bool, and does the same for it's caller sctp_endpoint_is_peeled_off. Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 012fb3e2f4cf..c63249ea34c3 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1341,12 +1341,12 @@ struct sctp_association *sctp_endpoint_lookup_assoc( const struct sctp_endpoint *ep, const union sctp_addr *paddr, struct sctp_transport **); -int sctp_endpoint_is_peeled_off(struct sctp_endpoint *, - const union sctp_addr *); +bool sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep, + const union sctp_addr *paddr); struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *, struct net *, const union sctp_addr *); -int sctp_has_association(struct net *net, const union sctp_addr *laddr, - const union sctp_addr *paddr); +bool sctp_has_association(struct net *net, const union sctp_addr *laddr, + const union sctp_addr *paddr); int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, -- cgit v1.2.3 From 2f635ceeb22ba13c307236d69795fbb29cfa3e7c Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 27 Mar 2018 18:02:13 +0300 Subject: net: Drop pernet_operations::async Synchronous pernet_operations are not allowed anymore. All are asynchronous. So, drop the structure member. Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- include/net/net_namespace.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 09e30bdc7876..37bcf8382b61 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -333,12 +333,6 @@ struct pernet_operations { void (*exit_batch)(struct list_head *net_exit_list); unsigned int *id; size_t size; - /* - * Indicates above methods are allowed to be executed in parallel - * with methods of any other pernet_operations, i.e. they are not - * need write locked net_sem. - */ - bool async; }; /* -- cgit v1.2.3 From 4420bf21fb6c0306e36ad58ade1e741fba57ce65 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 27 Mar 2018 18:02:23 +0300 Subject: net: Rename net_sem to pernet_ops_rwsem net_sem is some undefined area name, so it will be better to make the area more defined. Rename it to pernet_ops_rwsem for better readability and better intelligibility. Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- include/net/net_namespace.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 37bcf8382b61..922e8b6fb422 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -60,9 +60,10 @@ struct net { struct list_head list; /* list of network namespaces */ struct list_head exit_list; /* To linked to call pernet exit - * methods on dead net (net_sem - * read locked), or to unregister - * pernet ops (net_sem wr locked). + * methods on dead net ( + * pernet_ops_rwsem read locked), + * or to unregister pernet ops + * (pernet_ops_rwsem write locked). */ struct llist_node cleanup_list; /* namespaces on death row */ @@ -95,8 +96,9 @@ struct net { /* core fib_rules */ struct list_head rules_ops; - struct list_head fib_notifier_ops; /* protected by net_sem */ - + struct list_head fib_notifier_ops; /* Populated by + * register_pernet_subsys() + */ struct net_device *loopback_dev; /* The loopback */ struct netns_core core; struct netns_mib mib; -- cgit v1.2.3 From 8518e9bb98b602eca0717d5aaad63ccbe56539d2 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 27 Mar 2018 18:02:32 +0300 Subject: net: Add more comments This adds comments to different places to improve readability. Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- include/net/net_namespace.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 922e8b6fb422..1ab4f920f109 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -323,6 +323,10 @@ struct pernet_operations { * have to keep in mind all other pernet_operations and * to introduce a locking, if they share common resources. * + * The only time they are called with exclusive lock is + * from register_pernet_subsys(), unregister_pernet_subsys() + * register_pernet_device() and unregister_pernet_device(). + * * Exit methods using blocking RCU primitives, such as * synchronize_rcu(), should be implemented via exit_batch. * Then, destruction of a group of net requires single -- cgit v1.2.3 From a25e21f0bcd25673b91b97b9805db33350feec0f Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 27 Mar 2018 23:03:00 +0100 Subject: rxrpc, afs: Use debug_ids rather than pointers in traces In rxrpc and afs, use the debug_ids that are monotonically allocated to various objects as they're allocated rather than pointers as kernel pointers are now hashed making them less useful. Further, the debug ids aren't reused anywhere nearly as quickly. In addition, allow kernel services that use rxrpc, such as afs, to take numbers from the rxrpc counter, assign them to their own call struct and pass them in to rxrpc for both client and service calls so that the trace lines for each will have the same ID tag. Signed-off-by: David Howells --- include/net/af_rxrpc.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 2b3a6eec4570..8ae8ee004258 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -31,6 +31,11 @@ enum rxrpc_call_completion { NR__RXRPC_CALL_COMPLETIONS }; +/* + * Debug ID counter for tracing. + */ +extern atomic_t rxrpc_debug_id; + typedef void (*rxrpc_notify_rx_t)(struct sock *, struct rxrpc_call *, unsigned long); typedef void (*rxrpc_notify_end_tx_t)(struct sock *, struct rxrpc_call *, @@ -50,7 +55,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *, s64, gfp_t, rxrpc_notify_rx_t, - bool); + bool, + unsigned int); int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, struct msghdr *, size_t, rxrpc_notify_end_tx_t); @@ -63,7 +69,8 @@ void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *); u64 rxrpc_kernel_get_rtt(struct socket *, struct rxrpc_call *); int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, - rxrpc_user_attach_call_t, unsigned long, gfp_t); + rxrpc_user_attach_call_t, unsigned long, gfp_t, + unsigned int); void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64); int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *, struct key *); -- cgit v1.2.3 From 5e78abd075e562fd5748ac3bfb067941e8baf6c7 Mon Sep 17 00:00:00 2001 From: "tamizhr@codeaurora.org" Date: Tue, 27 Mar 2018 19:16:15 +0530 Subject: cfg80211: fix data type of sta_opmode_info parameter Currently bw and smps_mode are u8 type value in sta_opmode_info structure. This values filled in mac80211 from ieee80211_sta_rx_bandwidth and ieee80211_smps_mode. These enum values are specific to mac80211 and userspace/cfg80211 doesn't know about that. This will lead to incorrect result/assumption by the user space application. Change bw and smps_mode parameters to their respective enums in nl80211. Signed-off-by: Tamizh chelvam Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index fc40843baed3..4341508bc6a4 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3572,15 +3572,15 @@ enum wiphy_opmode_flag { /** * struct sta_opmode_info - Station's ht/vht operation mode information * @changed: contains value from &enum wiphy_opmode_flag - * @smps_mode: New SMPS mode of a station - * @bw: new max bandwidth value of a station + * @smps_mode: New SMPS mode value from &enum nl80211_smps_mode of a station + * @bw: new max bandwidth value from &enum nl80211_chan_width of a station * @rx_nss: new rx_nss value of a station */ struct sta_opmode_info { u32 changed; - u8 smps_mode; - u8 bw; + enum nl80211_smps_mode smps_mode; + enum nl80211_chan_width bw; u8 rx_nss; }; -- cgit v1.2.3 From 230ebaa189af44d50dccb4a1846e39ca594e347b Mon Sep 17 00:00:00 2001 From: Haim Dreyfuss Date: Wed, 28 Mar 2018 13:24:09 +0300 Subject: cfg80211: read wmm rules from regulatory database ETSI EN 301 893 v2.1.1 (2017-05) standard defines a new channel access mechanism that all devices (WLAN and LAA) need to comply with. The regulatory database can now be loaded into the kernel and also has the option to load optional data. In order to be able to comply with ETSI standard, we add wmm_rule into regulatory rule and add the option to read its value from the regulatory database. Signed-off-by: Haim Dreyfuss Signed-off-by: Luca Coelho [johannes: fix memory leak in error path] Signed-off-by: Johannes Berg --- include/net/regulatory.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/net') diff --git a/include/net/regulatory.h b/include/net/regulatory.h index f83cacce3308..60f8cc86a447 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -4,6 +4,7 @@ * regulatory support structures * * Copyright 2008-2009 Luis R. Rodriguez + * Copyright (C) 2018 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -188,9 +189,35 @@ struct ieee80211_power_rule { u32 max_eirp; }; +/** + * struct ieee80211_wmm_ac - used to store per ac wmm regulatory limitation + * + * The information provided in this structure is required for QoS + * transmit queue configuration. Cf. IEEE 802.11 7.3.2.29. + * + * @cw_min: minimum contention window [a value of the form + * 2^n-1 in the range 1..32767] + * @cw_max: maximum contention window [like @cw_min] + * @cot: maximum burst time in units of 32 usecs, 0 meaning disabled + * @aifsn: arbitration interframe space [0..255] + * + */ +struct ieee80211_wmm_ac { + u16 cw_min; + u16 cw_max; + u16 cot; + u8 aifsn; +}; + +struct ieee80211_wmm_rule { + struct ieee80211_wmm_ac client[IEEE80211_NUM_ACS]; + struct ieee80211_wmm_ac ap[IEEE80211_NUM_ACS]; +}; + struct ieee80211_reg_rule { struct ieee80211_freq_range freq_range; struct ieee80211_power_rule power_rule; + struct ieee80211_wmm_rule *wmm_rule; u32 flags; u32 dfs_cac_ms; }; @@ -198,6 +225,7 @@ struct ieee80211_reg_rule { struct ieee80211_regdomain { struct rcu_head rcu_head; u32 n_reg_rules; + u32 n_wmm_rules; char alpha2[3]; enum nl80211_dfs_regions dfs_region; struct ieee80211_reg_rule reg_rules[]; -- cgit v1.2.3 From 19d3577e35e0cbb42694811b096e749a0f89a824 Mon Sep 17 00:00:00 2001 From: Haim Dreyfuss Date: Wed, 28 Mar 2018 13:24:11 +0300 Subject: cfg80211: Add API to allow querying regdb for wmm_rule In general regulatory self managed devices maintain their own regulatory profiles thus it doesn't have to query the regulatory database on country change. ETSI has recently introduced a new channel access mechanism for 5GHz that all wlan devices need to comply with. These values are stored in the regulatory database. There are self managed devices which can't maintain these values on their own. Add API to allow self managed regulatory devices to query the regulatory database for high band wmm rule. Signed-off-by: Haim Dreyfuss Signed-off-by: Luca Coelho [johannes: fix documentation] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 4341508bc6a4..bfe174896fcf 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6,6 +6,7 @@ * Copyright 2006-2010 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH + * Copyright (C) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -4657,6 +4658,33 @@ const struct ieee80211_reg_rule *freq_reg_info(struct wiphy *wiphy, */ const char *reg_initiator_name(enum nl80211_reg_initiator initiator); +/** + * DOC: Internal regulatory db functions + * + */ + +/** + * reg_query_regdb_wmm - Query internal regulatory db for wmm rule + * Regulatory self-managed driver can use it to proactively + * + * @alpha2: the ISO/IEC 3166 alpha2 wmm rule to be queried. + * @freq: the freqency(in MHz) to be queried. + * @ptr: pointer where the regdb wmm data is to be stored (or %NULL if + * irrelevant). This can be used later for deduplication. + * @rule: pointer to store the wmm rule from the regulatory db. + * + * Self-managed wireless drivers can use this function to query + * the internal regulatory database to check whether the given + * ISO/IEC 3166 alpha2 country and freq have wmm rule limitations. + * + * Drivers should check the return value, its possible you can get + * an -ENODATA. + * + * Return: 0 on success. -ENODATA. + */ +int reg_query_regdb_wmm(char *alpha2, int freq, u32 *ptr, + struct ieee80211_wmm_rule *rule); + /* * callbacks for asynchronous cfg80211 methods, notification * functions and BSS handling helpers -- cgit v1.2.3 From 6a671a50f8199b3e1fe49fa8afff0fc8335da79c Mon Sep 17 00:00:00 2001 From: Denis Kenzior Date: Mon, 26 Mar 2018 12:52:41 -0500 Subject: nl80211: Add CMD_CONTROL_PORT_FRAME API This commit also adds cfg80211_rx_control_port function. This is used to generate a CMD_CONTROL_PORT_FRAME event out to userspace. The conn_owner_nlportid is used as the unicast destination. This means that userspace must specify NL80211_ATTR_SOCKET_OWNER flag if control port over nl80211 routing is requested in NL80211_CMD_CONNECT, NL80211_CMD_ASSOCIATE, NL80211_CMD_START_AP or IBSS/mesh join. Signed-off-by: Denis Kenzior [johannes: fix return value of cfg80211_rx_control_port()] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index bfe174896fcf..df145f76adad 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5721,6 +5721,28 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, const u8 *buf, size_t len, bool ack, gfp_t gfp); +/** + * cfg80211_rx_control_port - notification about a received control port frame + * @dev: The device the frame matched to + * @buf: control port frame + * @len: length of the frame data + * @addr: The peer from which the frame was received + * @proto: frame protocol, typically PAE or Pre-authentication + * @unencrypted: Whether the frame was received unencrypted + * + * This function is used to inform userspace about a received control port + * frame. It should only be used if userspace indicated it wants to receive + * control port frames over nl80211. + * + * The frame is the data portion of the 802.3 or 802.11 data frame with all + * network layer headers removed (e.g. the raw EAPoL frame). + * + * Return: %true if the frame was passed to userspace + */ +bool cfg80211_rx_control_port(struct net_device *dev, + const u8 *buf, size_t len, + const u8 *addr, u16 proto, bool unencrypted); + /** * cfg80211_cqm_rssi_notify - connection quality monitoring rssi event * @dev: network device -- cgit v1.2.3 From 2576a9ace47eba28a682d249d1d6402f891808c9 Mon Sep 17 00:00:00 2001 From: Denis Kenzior Date: Mon, 26 Mar 2018 12:52:42 -0500 Subject: nl80211: Implement TX of control port frames This commit implements the TX side of NL80211_CMD_CONTROL_PORT_FRAME. Userspace provides the raw EAPoL frame using NL80211_ATTR_FRAME. Userspace should also provide the destination address and the protocol type to use when sending the frame. This is used to implement TX of Pre-authentication frames. If CONTROL_PORT_ETHERTYPE_NO_ENCRYPT is specified, then the driver will be asked not to encrypt the outgoing frame. A new EXT_FEATURE flag is introduced so that nl80211 code can check whether a given wiphy has capability to pass EAPoL frames over nl80211. Signed-off-by: Denis Kenzior Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index df145f76adad..de2894a4ad10 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2961,6 +2961,9 @@ struct cfg80211_external_auth_params { * * @external_auth: indicates result of offloaded authentication processing from * user space + * + * @tx_control_port: TX a control port frame (EAPoL). The noencrypt parameter + * tells the driver that the frame should not be encrypted. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -3256,6 +3259,12 @@ struct cfg80211_ops { const u8 *aa); int (*external_auth)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_external_auth_params *params); + + int (*tx_control_port)(struct wiphy *wiphy, + struct net_device *dev, + const u8 *buf, size_t len, + const u8 *dest, const __be16 proto, + const bool noencrypt); }; /* -- cgit v1.2.3 From 64bf3d4bc2b0725b3c5ffadd982a9746bfc738b7 Mon Sep 17 00:00:00 2001 From: Denis Kenzior Date: Mon, 26 Mar 2018 12:52:43 -0500 Subject: nl80211: Add CONTROL_PORT_OVER_NL80211 attribute Signed-off-by: Denis Kenzior Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index de2894a4ad10..0bd957b37208 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -647,6 +647,8 @@ struct survey_info { * allowed through even on unauthorized ports * @control_port_no_encrypt: TRUE to prevent encryption of control port * protocol frames. + * @control_port_over_nl80211: TRUE if userspace expects to exchange control + * port frames over NL80211 instead of the network interface. * @wep_keys: static WEP keys, if not NULL points to an array of * CFG80211_MAX_WEP_KEYS WEP keys * @wep_tx_key: key index (0..3) of the default TX static WEP key @@ -662,6 +664,7 @@ struct cfg80211_crypto_settings { bool control_port; __be16 control_port_ethertype; bool control_port_no_encrypt; + bool control_port_over_nl80211; struct key_params *wep_keys; int wep_tx_key; const u8 *psk; -- cgit v1.2.3 From c3bfe1f6fc98e7185ff5ee9279ba259fe484597c Mon Sep 17 00:00:00 2001 From: Denis Kenzior Date: Mon, 26 Mar 2018 12:52:48 -0500 Subject: nl80211: Add control_port_over_nl80211 for ibss Signed-off-by: Denis Kenzior Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0bd957b37208..ed2773f8558e 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2034,6 +2034,8 @@ struct cfg80211_disassoc_request { * sets/clears %NL80211_STA_FLAG_AUTHORIZED. If true, the driver is * required to assume that the port is unauthorized until authorized by * user space. Otherwise, port is marked authorized by default. + * @control_port_over_nl80211: TRUE if userspace expects to exchange control + * port frames over NL80211 instead of the network interface. * @userspace_handles_dfs: whether user space controls DFS operation, i.e. * changes the channel when a radar is detected. This is required * to operate on DFS channels. @@ -2057,6 +2059,7 @@ struct cfg80211_ibss_params { bool channel_fixed; bool privacy; bool control_port; + bool control_port_over_nl80211; bool userspace_handles_dfs; int mcast_rate[NUM_NL80211_BANDS]; struct ieee80211_ht_cap ht_capa; -- cgit v1.2.3 From 1224f5831a22977f30c1842874be12c58608cee7 Mon Sep 17 00:00:00 2001 From: Denis Kenzior Date: Mon, 26 Mar 2018 12:52:49 -0500 Subject: nl80211: Add control_port_over_nl80211 to mesh_setup Signed-off-by: Denis Kenzior Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ed2773f8558e..250dac390806 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1454,6 +1454,8 @@ struct mesh_config { * @userspace_handles_dfs: whether user space controls DFS operation, i.e. * changes the channel when a radar is detected. This is required * to operate on DFS channels. + * @control_port_over_nl80211: TRUE if userspace expects to exchange control + * port frames over NL80211 instead of the network interface. * * These parameters are fixed when the mesh is created. */ @@ -1476,6 +1478,7 @@ struct mesh_setup { u32 basic_rates; struct cfg80211_bitrate_mask beacon_rate; bool userspace_handles_dfs; + bool control_port_over_nl80211; }; /** -- cgit v1.2.3 From f0b07bb151b098d291fd1fd71ef7a2df56fb124a Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Thu, 29 Mar 2018 19:20:32 +0300 Subject: net: Introduce net_rwsem to protect net_namespace_list rtnl_lock() is used everywhere, and contention is very high. When someone wants to iterate over alive net namespaces, he/she has no a possibility to do that without exclusive lock. But the exclusive rtnl_lock() in such places is overkill, and it just increases the contention. Yes, there is already for_each_net_rcu() in kernel, but it requires rcu_read_lock(), and this can't be sleepable. Also, sometimes it may be need really prevent net_namespace_list growth, so for_each_net_rcu() is not fit there. This patch introduces new rw_semaphore, which will be used instead of rtnl_mutex to protect net_namespace_list. It is sleepable and allows not-exclusive iterations over net namespaces list. It allows to stop using rtnl_lock() in several places (what is made in next patches) and makes less the time, we keep rtnl_mutex. Here we just add new lock, while the explanation of we can remove rtnl_lock() there are in next patches. Fine grained locks generally are better, then one big lock, so let's do that with net_namespace_list, while the situation allows that. Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- include/net/net_namespace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 1ab4f920f109..47e35cce3b64 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -291,6 +291,7 @@ static inline struct net *read_pnet(const possible_net_t *pnet) #endif } +/* Protected by net_rwsem */ #define for_each_net(VAR) \ list_for_each_entry(VAR, &net_namespace_list, list) -- cgit v1.2.3 From 8934ce2fd08171e8605f7fada91ee7619fe17ab8 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 28 Mar 2018 12:49:15 -0700 Subject: bpf: sockmap redirect ingress support Add support for the BPF_F_INGRESS flag in sk_msg redirect helper. To do this add a scatterlist ring for receiving socks to check before calling into regular recvmsg call path. Additionally, because the poll wakeup logic only checked the skb recv queue we need to add a hook in TCP stack (similar to write side) so that we have a way to wake up polling socks when a scatterlist is redirected to that sock. After this all that is needed is for the redirect helper to push the scatterlist into the psock receive queue. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- include/net/sock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 709311132d4c..b8ff435fa96e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1085,6 +1085,7 @@ struct proto { #endif bool (*stream_memory_free)(const struct sock *sk); + bool (*stream_memory_read)(const struct sock *sk); /* Memory pressure */ void (*enter_memory_pressure)(struct sock *sk); void (*leave_memory_pressure)(struct sock *sk); -- cgit v1.2.3 From 32537e91847a5686d57d3811c075a46b2d9b6434 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 27 Mar 2018 11:53:05 +0200 Subject: netfilter: nf_tables: rename struct nf_chain_type Use nft_ prefix. By when I added chain types, I forgot to use the nftables prefix. Rename enum nft_chain_type to enum nft_chain_types too, otherwise there is an overlap. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 663b015dace5..4a304997c304 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -868,7 +868,7 @@ struct nft_chain { char *name; }; -enum nft_chain_type { +enum nft_chain_types { NFT_CHAIN_T_DEFAULT = 0, NFT_CHAIN_T_ROUTE, NFT_CHAIN_T_NAT, @@ -876,7 +876,7 @@ enum nft_chain_type { }; /** - * struct nf_chain_type - nf_tables chain type info + * struct nft_chain_type - nf_tables chain type info * * @name: name of the type * @type: numeric identifier @@ -885,9 +885,9 @@ enum nft_chain_type { * @hook_mask: mask of valid hooks * @hooks: array of hook functions */ -struct nf_chain_type { +struct nft_chain_type { const char *name; - enum nft_chain_type type; + enum nft_chain_types type; int family; struct module *owner; unsigned int hook_mask; @@ -895,7 +895,7 @@ struct nf_chain_type { }; int nft_chain_validate_dependency(const struct nft_chain *chain, - enum nft_chain_type type); + enum nft_chain_types type); int nft_chain_validate_hooks(const struct nft_chain *chain, unsigned int hook_flags); @@ -917,7 +917,7 @@ struct nft_stats { */ struct nft_base_chain { struct nf_hook_ops ops; - const struct nf_chain_type *type; + const struct nft_chain_type *type; u8 policy; u8 flags; struct nft_stats __percpu *stats; @@ -970,8 +970,8 @@ struct nft_table { char *name; }; -int nft_register_chain_type(const struct nf_chain_type *); -void nft_unregister_chain_type(const struct nf_chain_type *); +int nft_register_chain_type(const struct nft_chain_type *); +void nft_unregister_chain_type(const struct nft_chain_type *); int nft_register_expr(struct nft_expr_type *); void nft_unregister_expr(struct nft_expr_type *); -- cgit v1.2.3 From cc07eeb0e5ee18895241460bdccf91a4952731f9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 27 Mar 2018 11:53:06 +0200 Subject: netfilter: nf_tables: nft_register_chain_type() returns void Use WARN_ON() instead since it should not happen that neither family goes over NFPROTO_NUMPROTO nor there is already a chain of this type already registered. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 4a304997c304..1f7148fe0504 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -970,7 +970,7 @@ struct nft_table { char *name; }; -int nft_register_chain_type(const struct nft_chain_type *); +void nft_register_chain_type(const struct nft_chain_type *); void nft_unregister_chain_type(const struct nft_chain_type *); int nft_register_expr(struct nft_expr_type *); -- cgit v1.2.3 From 02c7b25e5f54321b9063e18d4f52cce07f8e081d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 27 Mar 2018 11:53:07 +0200 Subject: netfilter: nf_tables: build-in filter chain type One module per supported filter chain family type takes too much memory for very little code - too much modularization - place all chain filter definitions in one single file. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 1f7148fe0504..77c3c04c27ac 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1345,4 +1345,7 @@ struct nft_trans_flowtable { #define nft_trans_flowtable(trans) \ (((struct nft_trans_flowtable *)trans->data)->flowtable) +int __init nft_chain_filter_init(void); +void __exit nft_chain_filter_fini(void); + #endif /* _NET_NF_TABLES_H */ -- cgit v1.2.3 From 43a605f2f722b6e08addedae8545b490fca252c4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 27 Mar 2018 11:53:08 +0200 Subject: netfilter: nf_tables: enable conntrack if NAT chain is registered Register conntrack hooks if the user adds NAT chains. Users get confused with the existing behaviour since they will see no packets hitting this chain until they add the first rule that refers to conntrack. This patch adds new ->init() and ->free() indirections to chain types that can be used by NAT chains to invoke the conntrack dependency. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 77c3c04c27ac..e26b94a61a99 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -884,6 +884,8 @@ enum nft_chain_types { * @owner: module owner * @hook_mask: mask of valid hooks * @hooks: array of hook functions + * @init: chain initialization function + * @free: chain release function */ struct nft_chain_type { const char *name; @@ -892,6 +894,8 @@ struct nft_chain_type { struct module *owner; unsigned int hook_mask; nf_hookfn *hooks[NF_MAX_HOOKS]; + int (*init)(struct nft_ctx *ctx); + void (*free)(struct nft_ctx *ctx); }; int nft_chain_validate_dependency(const struct nft_chain *chain, -- cgit v1.2.3 From 10659cbab72b7bfee1a886018d1915a9549b6378 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 28 Mar 2018 12:06:49 +0200 Subject: netfilter: nf_tables: rename to nft_set_lookup_global() To prepare shorter introduction of shorter function prefix. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e26b94a61a99..bd2a18d66189 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -434,11 +434,11 @@ static inline struct nft_set *nft_set_container_of(const void *priv) return (void *)priv - offsetof(struct nft_set, data); } -struct nft_set *nft_set_lookup(const struct net *net, - const struct nft_table *table, - const struct nlattr *nla_set_name, - const struct nlattr *nla_set_id, - u8 genmask); +struct nft_set *nft_set_lookup_global(const struct net *net, + const struct nft_table *table, + const struct nlattr *nla_set_name, + const struct nlattr *nla_set_id, + u8 genmask); static inline unsigned long nft_set_gc_interval(const struct nft_set *set) { -- cgit v1.2.3 From e9a441b6e729e16092fcc18e3962b952a01d1e3c Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Thu, 29 Mar 2018 17:03:25 +0300 Subject: xfrm: Register xfrm_dev_notifier in appropriate place Currently, driver registers it from pernet_operations::init method, and this breaks modularity, because initialization of net namespace and netdevice notifiers are orthogonal actions. We don't have per-namespace netdevice notifiers; all of them are global for all devices in all namespaces. Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index aa027ba1d032..a872379b69da 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1894,7 +1894,7 @@ static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb) #endif } -void __net_init xfrm_dev_init(void); +void __init xfrm_dev_init(void); #ifdef CONFIG_XFRM_OFFLOAD void xfrm_dev_resume(struct sk_buff *skb); -- cgit v1.2.3 From 3679d585bbc07a1ac4448d5b478b492cad3587ce Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 30 Mar 2018 15:08:04 -0700 Subject: net: Introduce __inet_bind() and __inet6_bind Refactor `bind()` code to make it ready to be called from BPF helper function `bpf_bind()` (will be added soon). Implementation of `inet_bind()` and `inet6_bind()` is separated into `__inet_bind()` and `__inet6_bind()` correspondingly. These function can be used from both `sk_prot->bind` and `bpf_bind()` contexts. New functions have two additional arguments. `force_bind_address_no_port` forces binding to IP only w/o checking `inet_sock.bind_address_no_port` field. It'll allow to bind local end of a connection to desired IP in `bpf_bind()` w/o changing `bind_address_no_port` field of a socket. It's useful since `bpf_bind()` can return an error and we'd need to restore original value of `bind_address_no_port` in that case if we changed this before calling to the helper. `with_lock` specifies whether to lock socket when working with `struct sk` or not. The argument is set to `true` for `sk_prot->bind`, i.e. old behavior is preserved. But it will be set to `false` for `bpf_bind()` use-case. The reason is all call-sites, where `bpf_bind()` will be called, already hold that socket lock. Signed-off-by: Andrey Ignatov Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/net/inet_common.h | 2 ++ include/net/ipv6.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 500f81375200..384b90c62c0b 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -32,6 +32,8 @@ int inet_shutdown(struct socket *sock, int how); int inet_listen(struct socket *sock, int backlog); void inet_sock_destruct(struct sock *sk); int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); +int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, + bool force_bind_address_no_port, bool with_lock); int inet_getname(struct socket *sock, struct sockaddr *uaddr, int peer); int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 50a6f0ddb878..2e5fedc56e59 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1066,6 +1066,8 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info); void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu); int inet6_release(struct socket *sock); +int __inet6_bind(struct sock *sock, struct sockaddr *uaddr, int addr_len, + bool force_bind_address_no_port, bool with_lock); int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int peer); -- cgit v1.2.3 From d74bad4e74ee373787a9ae24197c17b7cdc428d5 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 30 Mar 2018 15:08:05 -0700 Subject: bpf: Hooks for sys_connect == The problem == See description of the problem in the initial patch of this patch set. == The solution == The patch provides much more reliable in-kernel solution for the 2nd part of the problem: making outgoing connecttion from desired IP. It adds new attach types `BPF_CGROUP_INET4_CONNECT` and `BPF_CGROUP_INET6_CONNECT` for program type `BPF_PROG_TYPE_CGROUP_SOCK_ADDR` that can be used to override both source and destination of a connection at connect(2) time. Local end of connection can be bound to desired IP using newly introduced BPF-helper `bpf_bind()`. It allows to bind to only IP though, and doesn't support binding to port, i.e. leverages `IP_BIND_ADDRESS_NO_PORT` socket option. There are two reasons for this: * looking for a free port is expensive and can affect performance significantly; * there is no use-case for port. As for remote end (`struct sockaddr *` passed by user), both parts of it can be overridden, remote IP and remote port. It's useful if an application inside cgroup wants to connect to another application inside same cgroup or to itself, but knows nothing about IP assigned to the cgroup. Support is added for IPv4 and IPv6, for TCP and UDP. IPv4 and IPv6 have separate attach types for same reason as sys_bind hooks, i.e. to prevent reading from / writing to e.g. user_ip6 fields when user passes sockaddr_in since it'd be out-of-bound. == Implementation notes == The patch introduces new field in `struct proto`: `pre_connect` that is a pointer to a function with same signature as `connect` but is called before it. The reason is in some cases BPF hooks should be called way before control is passed to `sk->sk_prot->connect`. Specifically `inet_dgram_connect` autobinds socket before calling `sk->sk_prot->connect` and there is no way to call `bpf_bind()` from hooks from e.g. `ip4_datagram_connect` or `ip6_datagram_connect` since it'd cause double-bind. On the other hand `proto.pre_connect` provides a flexible way to add BPF hooks for connect only for necessary `proto` and call them at desired time before `connect`. Since `bpf_bind()` is allowed to bind only to IP and autobind in `inet_dgram_connect` binds only port there is no chance of double-bind. bpf_bind() sets `force_bind_address_no_port` to bind to only IP despite of value of `bind_address_no_port` socket field. bpf_bind() sets `with_lock` to `false` when calling to __inet_bind() and __inet6_bind() since all call-sites, where bpf_bind() is called, already hold socket lock. Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/net/addrconf.h | 7 +++++++ include/net/sock.h | 3 +++ include/net/udp.h | 1 + 3 files changed, 11 insertions(+) (limited to 'include/net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 132e5b95167a..378d601258be 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -231,6 +231,13 @@ struct ipv6_stub { }; extern const struct ipv6_stub *ipv6_stub __read_mostly; +/* A stub used by bpf helpers. Similarly ugly as ipv6_stub */ +struct ipv6_bpf_stub { + int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len, + bool force_bind_address_no_port, bool with_lock); +}; +extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; + /* * identify MLD packets for MLD filter exceptions */ diff --git a/include/net/sock.h b/include/net/sock.h index b8ff435fa96e..49bd2c1796b0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1026,6 +1026,9 @@ static inline void sk_prot_clear_nulls(struct sock *sk, int size) struct proto { void (*close)(struct sock *sk, long timeout); + int (*pre_connect)(struct sock *sk, + struct sockaddr *uaddr, + int addr_len); int (*connect)(struct sock *sk, struct sockaddr *uaddr, int addr_len); diff --git a/include/net/udp.h b/include/net/udp.h index 850a8e581cce..0676b272f6ac 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -273,6 +273,7 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst); int udp_rcv(struct sk_buff *skb); int udp_ioctl(struct sock *sk, int cmd, unsigned long arg); int udp_init_sock(struct sock *sk); +int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); int __udp_disconnect(struct sock *sk, int flags); int udp_disconnect(struct sock *sk, int flags); __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait); -- cgit v1.2.3 From c22af22cbdc206a0273d0e6d773bd3dfc99d2b02 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 31 Mar 2018 12:58:42 -0700 Subject: ipv6: frag: remove unused field csum field in struct frag_queue is not used, remove it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ipv6.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 50a6f0ddb878..5c18836672e9 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -603,7 +603,6 @@ struct frag_queue { struct in6_addr daddr; int iif; - unsigned int csum; __u16 nhoffset; u8 ecn; }; -- cgit v1.2.3 From 787bea7748a76130566f881c2342a0be4127d182 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 31 Mar 2018 12:58:43 -0700 Subject: inet: frags: change inet_frags_init_net() return value We will soon initialize one rhashtable per struct netns_frags in inet_frags_init_net(). This patch changes the return value to eventually propagate an error. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_frag.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 351f0c3cdcd9..b1d62176f3b4 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -104,9 +104,10 @@ struct inet_frags { int inet_frags_init(struct inet_frags *); void inet_frags_fini(struct inet_frags *); -static inline void inet_frags_init_net(struct netns_frags *nf) +static inline int inet_frags_init_net(struct netns_frags *nf) { atomic_set(&nf->mem, 0); + return 0; } void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f); -- cgit v1.2.3 From 093ba72914b696521e4885756a68a3332782c8de Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 31 Mar 2018 12:58:44 -0700 Subject: inet: frags: add a pointer to struct netns_frags In order to simplify the API, add a pointer to struct inet_frags. This will allow us to make things less complex. These functions no longer have a struct inet_frags parameter : inet_frag_destroy(struct inet_frag_queue *q /*, struct inet_frags *f */) inet_frag_put(struct inet_frag_queue *q /*, struct inet_frags *f */) inet_frag_kill(struct inet_frag_queue *q /*, struct inet_frags *f */) inet_frags_exit_net(struct netns_frags *nf /*, struct inet_frags *f */) ip6_expire_frag_queue(struct net *net, struct frag_queue *fq) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_frag.h | 11 ++++++----- include/net/ipv6.h | 3 +-- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index b1d62176f3b4..69e531ed8189 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -10,6 +10,7 @@ struct netns_frags { int high_thresh; int low_thresh; int max_dist; + struct inet_frags *f; }; /** @@ -109,20 +110,20 @@ static inline int inet_frags_init_net(struct netns_frags *nf) atomic_set(&nf->mem, 0); return 0; } -void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f); +void inet_frags_exit_net(struct netns_frags *nf); -void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f); -void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f); +void inet_frag_kill(struct inet_frag_queue *q); +void inet_frag_destroy(struct inet_frag_queue *q); struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, struct inet_frags *f, void *key, unsigned int hash); void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, const char *prefix); -static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f) +static inline void inet_frag_put(struct inet_frag_queue *q) { if (refcount_dec_and_test(&q->refcnt)) - inet_frag_destroy(q, f); + inet_frag_destroy(q); } static inline bool inet_frag_evicting(struct inet_frag_queue *q) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 5c18836672e9..57b7fe43d2ab 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -607,8 +607,7 @@ struct frag_queue { u8 ecn; }; -void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, - struct inet_frags *frags); +void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq); static inline bool ipv6_addr_any(const struct in6_addr *a) { -- cgit v1.2.3 From 648700f76b03b7e8149d13cc2bdb3355035258a9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 31 Mar 2018 12:58:49 -0700 Subject: inet: frags: use rhashtables for reassembly units Some applications still rely on IP fragmentation, and to be fair linux reassembly unit is not working under any serious load. It uses static hash tables of 1024 buckets, and up to 128 items per bucket (!!!) A work queue is supposed to garbage collect items when host is under memory pressure, and doing a hash rebuild, changing seed used in hash computations. This work queue blocks softirqs for up to 25 ms when doing a hash rebuild, occurring every 5 seconds if host is under fire. Then there is the problem of sharing this hash table for all netns. It is time to switch to rhashtables, and allocate one of them per netns to speedup netns dismantle, since this is a critical metric these days. Lookup is now using RCU. A followup patch will even remove the refcount hold/release left from prior implementation and save a couple of atomic operations. Before this patch, 16 cpus (16 RX queue NIC) could not handle more than 1 Mpps frags DDOS. After the patch, I reach 9 Mpps without any tuning, and can use up to 2GB of storage for the fragments (exact number depends on frags being evicted after timeout) $ grep FRAG /proc/net/sockstat FRAG: inuse 1966916 memory 2140004608 A followup patch will change the limits for 64bit arches. Signed-off-by: Eric Dumazet Cc: Kirill Tkhai Cc: Herbert Xu Cc: Florian Westphal Cc: Jesper Dangaard Brouer Cc: Alexander Aring Cc: Stefan Schmidt Signed-off-by: David S. Miller --- include/net/inet_frag.h | 81 ++++++++++++++++++++++--------------------------- include/net/ipv6.h | 16 +--------- 2 files changed, 38 insertions(+), 59 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 69e531ed8189..3fec0d3a0d01 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -2,7 +2,11 @@ #ifndef __NET_FRAG_H__ #define __NET_FRAG_H__ +#include + struct netns_frags { + struct rhashtable rhashtable ____cacheline_aligned_in_smp; + /* Keep atomic mem on separate cachelines in structs that include it */ atomic_t mem ____cacheline_aligned_in_smp; /* sysctls */ @@ -26,12 +30,30 @@ enum { INET_FRAG_COMPLETE = BIT(2), }; +struct frag_v4_compare_key { + __be32 saddr; + __be32 daddr; + u32 user; + u32 vif; + __be16 id; + u16 protocol; +}; + +struct frag_v6_compare_key { + struct in6_addr saddr; + struct in6_addr daddr; + u32 user; + __be32 id; + u32 iif; +}; + /** * struct inet_frag_queue - fragment queue * - * @lock: spinlock protecting the queue + * @node: rhash node + * @key: keys identifying this frag. * @timer: queue expiration timer - * @list: hash bucket list + * @lock: spinlock protecting this frag * @refcnt: reference count of the queue * @fragments: received fragments head * @fragments_tail: received fragments tail @@ -41,12 +63,16 @@ enum { * @flags: fragment queue flags * @max_size: maximum received fragment size * @net: namespace that this frag belongs to - * @list_evictor: list of queues to forcefully evict (e.g. due to low memory) + * @rcu: rcu head for freeing deferall */ struct inet_frag_queue { - spinlock_t lock; + struct rhash_head node; + union { + struct frag_v4_compare_key v4; + struct frag_v6_compare_key v6; + } key; struct timer_list timer; - struct hlist_node list; + spinlock_t lock; refcount_t refcnt; struct sk_buff *fragments; struct sk_buff *fragments_tail; @@ -55,51 +81,20 @@ struct inet_frag_queue { int meat; __u8 flags; u16 max_size; - struct netns_frags *net; - struct hlist_node list_evictor; -}; - -#define INETFRAGS_HASHSZ 1024 - -/* averaged: - * max_depth = default ipfrag_high_thresh / INETFRAGS_HASHSZ / - * rounded up (SKB_TRUELEN(0) + sizeof(struct ipq or - * struct frag_queue)) - */ -#define INETFRAGS_MAXDEPTH 128 - -struct inet_frag_bucket { - struct hlist_head chain; - spinlock_t chain_lock; + struct netns_frags *net; + struct rcu_head rcu; }; struct inet_frags { - struct inet_frag_bucket hash[INETFRAGS_HASHSZ]; - - struct work_struct frags_work; - unsigned int next_bucket; - unsigned long last_rebuild_jiffies; - bool rebuild; - - /* The first call to hashfn is responsible to initialize - * rnd. This is best done with net_get_random_once. - * - * rnd_seqlock is used to let hash insertion detect - * when it needs to re-lookup the hash chain to use. - */ - u32 rnd; - seqlock_t rnd_seqlock; unsigned int qsize; - unsigned int (*hashfn)(const struct inet_frag_queue *); - bool (*match)(const struct inet_frag_queue *q, - const void *arg); void (*constructor)(struct inet_frag_queue *q, const void *arg); void (*destructor)(struct inet_frag_queue *); void (*frag_expire)(struct timer_list *t); struct kmem_cache *frags_cachep; const char *frags_cache_name; + struct rhashtable_params rhash_params; }; int inet_frags_init(struct inet_frags *); @@ -108,15 +103,13 @@ void inet_frags_fini(struct inet_frags *); static inline int inet_frags_init_net(struct netns_frags *nf) { atomic_set(&nf->mem, 0); - return 0; + return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params); } void inet_frags_exit_net(struct netns_frags *nf); void inet_frag_kill(struct inet_frag_queue *q); void inet_frag_destroy(struct inet_frag_queue *q); -struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, - struct inet_frags *f, void *key, unsigned int hash); - +struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key); void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, const char *prefix); @@ -128,7 +121,7 @@ static inline void inet_frag_put(struct inet_frag_queue *q) static inline bool inet_frag_evicting(struct inet_frag_queue *q) { - return !hlist_unhashed(&q->list_evictor); + return false; } /* Memory Tracking Functions. */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 57b7fe43d2ab..6fa9a2bc5896 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -579,17 +579,8 @@ enum ip6_defrag_users { __IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX, }; -struct ip6_create_arg { - __be32 id; - u32 user; - const struct in6_addr *src; - const struct in6_addr *dst; - int iif; - u8 ecn; -}; - void ip6_frag_init(struct inet_frag_queue *q, const void *a); -bool ip6_frag_match(const struct inet_frag_queue *q, const void *a); +extern const struct rhashtable_params ip6_rhash_params; /* * Equivalent of ipv4 struct ip @@ -597,11 +588,6 @@ bool ip6_frag_match(const struct inet_frag_queue *q, const void *a); struct frag_queue { struct inet_frag_queue q; - __be32 id; /* fragment id */ - u32 user; - struct in6_addr saddr; - struct in6_addr daddr; - int iif; __u16 nhoffset; u8 ecn; -- cgit v1.2.3 From 6befe4a78b1553edb6eed3a78b4bcd9748526672 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 31 Mar 2018 12:58:50 -0700 Subject: inet: frags: remove some helpers Remove sum_frag_mem_limit(), ip_frag_mem() & ip6_frag_mem() Also since we use rhashtable we can bring back the number of fragments in "grep FRAG /proc/net/sockstat /proc/net/sockstat6" that was removed in commit 434d305405ab ("inet: frag: don't account number of fragment queues") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_frag.h | 5 ----- include/net/ip.h | 1 - include/net/ipv6.h | 7 ------- 3 files changed, 13 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 3fec0d3a0d01..4b5449df0aad 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -141,11 +141,6 @@ static inline void add_frag_mem_limit(struct netns_frags *nf, int i) atomic_add(i, &nf->mem); } -static inline int sum_frag_mem_limit(struct netns_frags *nf) -{ - return atomic_read(&nf->mem); -} - /* RFC 3168 support : * We want to check ECN values of all fragments, do detect invalid combinations. * In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value. diff --git a/include/net/ip.h b/include/net/ip.h index 36f8f7811093..ecffd843e7b8 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -588,7 +588,6 @@ static inline struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *s return skb; } #endif -int ip_frag_mem(struct net *net); /* * Functions provided by ip_forward.c diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 6fa9a2bc5896..37455e840347 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -379,13 +379,6 @@ static inline bool ipv6_accept_ra(struct inet6_dev *idev) idev->cnf.accept_ra; } -#if IS_ENABLED(CONFIG_IPV6) -static inline int ip6_frag_mem(struct net *net) -{ - return sum_frag_mem_limit(&net->ipv6.frags); -} -#endif - #define IPV6_FRAG_HIGH_THRESH (4 * 1024*1024) /* 4194304 */ #define IPV6_FRAG_LOW_THRESH (3 * 1024*1024) /* 3145728 */ #define IPV6_FRAG_TIMEOUT (60 * HZ) /* 60 seconds */ -- cgit v1.2.3 From 399d1404be660d355192ff4df5ccc3f4159ec1e4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 31 Mar 2018 12:58:51 -0700 Subject: inet: frags: get rif of inet_frag_evicting() This refactors ip_expire() since one indentation level is removed. Note: in the future, we should try hard to avoid the skb_clone() since this is a serious performance cost. Under DDOS, the ICMP message wont be sent because of rate limits. Fact that ip6_expire_frag_queue() does not use skb_clone() is disturbing too. Presumably IPv6 should have the same issue than the one we fixed in commit ec4fbd64751d ("inet: frag: release spinlock before calling icmp_send()") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_frag.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 4b5449df0aad..0e8e159d88f7 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -119,11 +119,6 @@ static inline void inet_frag_put(struct inet_frag_queue *q) inet_frag_destroy(q); } -static inline bool inet_frag_evicting(struct inet_frag_queue *q) -{ - return false; -} - /* Memory Tracking Functions. */ static inline int frag_mem_limit(struct netns_frags *nf) -- cgit v1.2.3 From 2d44ed22e607f9a285b049de2263e3840673a260 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 31 Mar 2018 12:58:52 -0700 Subject: inet: frags: remove inet_frag_maybe_warn_overflow() This function is obsolete, after rhashtable addition to inet defrag. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_frag.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 0e8e159d88f7..95e353e3305b 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -110,8 +110,6 @@ void inet_frags_exit_net(struct netns_frags *nf); void inet_frag_kill(struct inet_frag_queue *q); void inet_frag_destroy(struct inet_frag_queue *q); struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key); -void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, - const char *prefix); static inline void inet_frag_put(struct inet_frag_queue *q) { -- cgit v1.2.3 From 3e67f106f619dcfaf6f4e2039599bdb69848c714 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 31 Mar 2018 12:58:53 -0700 Subject: inet: frags: break the 2GB limit for frags storage Some users are willing to provision huge amounts of memory to be able to perform reassembly reasonnably well under pressure. Current memory tracking is using one atomic_t and integers. Switch to atomic_long_t so that 64bit arches can use more than 2GB, without any cost for 32bit arches. Note that this patch avoids an overflow error, if high_thresh was set to ~2GB, since this test in inet_frag_alloc() was never true : if (... || frag_mem_limit(nf) > nf->high_thresh) Tested: $ echo 16000000000 >/proc/sys/net/ipv4/ipfrag_high_thresh $ grep FRAG /proc/net/sockstat FRAG: inuse 14705885 memory 16000002880 $ nstat -n ; sleep 1 ; nstat | grep Reas IpReasmReqds 3317150 0.0 IpReasmFails 3317112 0.0 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_frag.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 95e353e3305b..a52e7273e7a5 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -8,11 +8,11 @@ struct netns_frags { struct rhashtable rhashtable ____cacheline_aligned_in_smp; /* Keep atomic mem on separate cachelines in structs that include it */ - atomic_t mem ____cacheline_aligned_in_smp; + atomic_long_t mem ____cacheline_aligned_in_smp; /* sysctls */ + long high_thresh; + long low_thresh; int timeout; - int high_thresh; - int low_thresh; int max_dist; struct inet_frags *f; }; @@ -102,7 +102,7 @@ void inet_frags_fini(struct inet_frags *); static inline int inet_frags_init_net(struct netns_frags *nf) { - atomic_set(&nf->mem, 0); + atomic_long_set(&nf->mem, 0); return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params); } void inet_frags_exit_net(struct netns_frags *nf); @@ -119,19 +119,19 @@ static inline void inet_frag_put(struct inet_frag_queue *q) /* Memory Tracking Functions. */ -static inline int frag_mem_limit(struct netns_frags *nf) +static inline long frag_mem_limit(const struct netns_frags *nf) { - return atomic_read(&nf->mem); + return atomic_long_read(&nf->mem); } -static inline void sub_frag_mem_limit(struct netns_frags *nf, int i) +static inline void sub_frag_mem_limit(struct netns_frags *nf, long val) { - atomic_sub(i, &nf->mem); + atomic_long_sub(val, &nf->mem); } -static inline void add_frag_mem_limit(struct netns_frags *nf, int i) +static inline void add_frag_mem_limit(struct netns_frags *nf, long val) { - atomic_add(i, &nf->mem); + atomic_long_add(val, &nf->mem); } /* RFC 3168 support : -- cgit v1.2.3 From c2615cf5a761b32bf74e85bddc223dfff3d9b9f0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 31 Mar 2018 12:58:57 -0700 Subject: inet: frags: reorganize struct netns_frags Put the read-mostly fields in a separate cache line at the beginning of struct netns_frags, to reduce false sharing noticed in inet_frag_kill() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_frag.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index a52e7273e7a5..ed07e3786d98 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -5,16 +5,17 @@ #include struct netns_frags { - struct rhashtable rhashtable ____cacheline_aligned_in_smp; - - /* Keep atomic mem on separate cachelines in structs that include it */ - atomic_long_t mem ____cacheline_aligned_in_smp; /* sysctls */ long high_thresh; long low_thresh; int timeout; int max_dist; struct inet_frags *f; + + struct rhashtable rhashtable ____cacheline_aligned_in_smp; + + /* Keep atomic mem on separate cachelines in structs that include it */ + atomic_long_t mem ____cacheline_aligned_in_smp; }; /** -- cgit v1.2.3 From dd0bed1665d6ca17efd747a90a0bb804b4bf2005 Mon Sep 17 00:00:00 2001 From: Atul Gupta Date: Sat, 31 Mar 2018 21:41:52 +0530 Subject: tls: support for Inline tls record Facility to register Inline TLS drivers to net/tls. Setup TLS_HW_RECORD prot to listen on offload device. Cases handled - Inline TLS device exists, setup prot for TLS_HW_RECORD - Atleast one Inline TLS exists, sets TLS_HW_RECORD. - If non-inline device establish connection, move to TLS_SW_TX Signed-off-by: Atul Gupta Reviewed-by: Steve Wise Signed-off-by: David S. Miller --- include/net/tls.h | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tls.h b/include/net/tls.h index 437a746300bf..3da8e13a6d96 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -56,6 +56,32 @@ #define TLS_RECORD_TYPE_DATA 0x17 #define TLS_AAD_SPACE_SIZE 13 +#define TLS_DEVICE_NAME_MAX 32 + +/* + * This structure defines the routines for Inline TLS driver. + * The following routines are optional and filled with a + * null pointer if not defined. + * + * @name: Its the name of registered Inline tls device + * @dev_list: Inline tls device list + * int (*feature)(struct tls_device *device); + * Called to return Inline TLS driver capability + * + * int (*hash)(struct tls_device *device, struct sock *sk); + * This function sets Inline driver for listen and program + * device specific functioanlity as required + * + * void (*unhash)(struct tls_device *device, struct sock *sk); + * This function cleans listen state set by Inline TLS driver + */ +struct tls_device { + char name[TLS_DEVICE_NAME_MAX]; + struct list_head dev_list; + int (*feature)(struct tls_device *device); + int (*hash)(struct tls_device *device, struct sock *sk); + void (*unhash)(struct tls_device *device, struct sock *sk); +}; struct tls_sw_context { struct crypto_aead *aead_send; @@ -114,7 +140,7 @@ struct tls_context { void *priv_ctx; - u8 conf:2; + u8 conf:3; struct cipher_context tx; struct cipher_context rx; @@ -135,6 +161,8 @@ struct tls_context { int (*getsockopt)(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); + int (*hash)(struct sock *sk); + void (*unhash)(struct sock *sk); }; int wait_on_pending_writer(struct sock *sk, long *timeo); @@ -283,5 +311,7 @@ static inline struct tls_offload_context *tls_offload_ctx( int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg, unsigned char *record_type); +void tls_register_device(struct tls_device *device); +void tls_unregister_device(struct tls_device *device); #endif /* _TLS_OFFLOAD_H */ -- cgit v1.2.3 From 8c59c264e5e17670c0ad2063fa40e3091b549151 Mon Sep 17 00:00:00 2001 From: Jaganath Kanakkassery Date: Mon, 26 Feb 2018 12:11:07 +0530 Subject: Bluetooth: Fix data type of appearence It should be __le16 instead of __u16 since its part of mgmt API. Signed-off-by: Jaganath Kanakkassery Signed-off-by: Marcel Holtmann --- include/net/bluetooth/mgmt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 72a456bbbcd5..e7303eee65cd 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -600,7 +600,7 @@ struct mgmt_rp_read_ext_info { #define MGMT_OP_SET_APPEARANCE 0x0043 struct mgmt_cp_set_appearance { - __u16 appearance; + __le16 appearance; } __packed; #define MGMT_SET_APPEARANCE_SIZE 2 -- cgit v1.2.3