diff options
Diffstat (limited to 'include')
132 files changed, 3203 insertions, 1334 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index a4c644c1c091..cb3c6b3b89c8 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -17,6 +17,8 @@ struct bpf_map; struct bpf_prog; struct bpf_sock_ops_kern; struct bpf_cgroup_storage; +struct ctl_table; +struct ctl_table_header; #ifdef CONFIG_CGROUP_BPF @@ -109,6 +111,12 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, short access, enum bpf_attach_type type); +int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, + struct ctl_table *table, int write, + void __user *buf, size_t *pcount, + loff_t *ppos, void **new_buf, + enum bpf_attach_type type); + static inline enum bpf_cgroup_storage_type cgroup_storage_type( struct bpf_map *map) { @@ -253,6 +261,18 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, \ __ret; \ }) + + +#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos, nbuf) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled) \ + __ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \ + buf, count, pos, nbuf, \ + BPF_CGROUP_SYSCTL); \ + __ret; \ +}) + int cgroup_bpf_prog_attach(const union bpf_attr *attr, enum bpf_prog_type ptype, struct bpf_prog *prog); int cgroup_bpf_prog_detach(const union bpf_attr *attr, @@ -321,6 +341,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos,nbuf) ({ 0; }) #define for_each_cgroup_storage_type(stype) for (; false; ) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 944ccc310201..59631dd0777c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -57,6 +57,12 @@ struct bpf_map_ops { const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type); + + /* Direct value access helpers. */ + int (*map_direct_value_addr)(const struct bpf_map *map, + u64 *imm, u32 off); + int (*map_direct_value_meta)(const struct bpf_map *map, + u64 imm, u32 *off); }; struct bpf_map { @@ -81,7 +87,8 @@ struct bpf_map { struct btf *btf; u32 pages; bool unpriv_array; - /* 51 bytes hole */ + bool frozen; /* write-once */ + /* 48 bytes hole */ /* The 3rd and 4th cacheline with misc members to avoid false sharing * particularly with refcounting. @@ -177,6 +184,7 @@ enum bpf_arg_type { ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */ ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */ ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */ + ARG_PTR_TO_MAP_VALUE_OR_NULL, /* pointer to stack used as map value or NULL */ /* the following constraints used to prototype bpf_memcmp() and other * functions that access data on eBPF program stack @@ -195,6 +203,9 @@ enum bpf_arg_type { ARG_ANYTHING, /* any (initialized) argument is ok */ ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ + ARG_PTR_TO_INT, /* pointer to int */ + ARG_PTR_TO_LONG, /* pointer to long */ + ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ }; /* type of values returned from helper functions */ @@ -205,6 +216,7 @@ enum bpf_return_type { RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */ RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */ RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */ + RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */ }; /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs @@ -262,6 +274,7 @@ enum bpf_reg_type { PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */ PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */ PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */ + PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */ }; /* The information passed from prog-specific *_is_valid_access @@ -351,6 +364,7 @@ struct bpf_prog_aux { u32 used_map_cnt; u32 max_ctx_offset; u32 max_pkt_offset; + u32 max_tp_access; u32 stack_depth; u32 id; u32 func_cnt; /* used by non-func prog as the number of func progs */ @@ -420,8 +434,38 @@ struct bpf_array { }; }; +#define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */ #define MAX_TAIL_CALL_CNT 32 +#define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \ + BPF_F_RDONLY_PROG | \ + BPF_F_WRONLY | \ + BPF_F_WRONLY_PROG) + +#define BPF_MAP_CAN_READ BIT(0) +#define BPF_MAP_CAN_WRITE BIT(1) + +static inline u32 bpf_map_flags_to_cap(struct bpf_map *map) +{ + u32 access_flags = map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG); + + /* Combination of BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG is + * not possible. + */ + if (access_flags & BPF_F_RDONLY_PROG) + return BPF_MAP_CAN_READ; + else if (access_flags & BPF_F_WRONLY_PROG) + return BPF_MAP_CAN_WRITE; + else + return BPF_MAP_CAN_READ | BPF_MAP_CAN_WRITE; +} + +static inline bool bpf_map_flags_access_ok(u32 access_flags) +{ + return (access_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) != + (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG); +} + struct bpf_event_entry { struct perf_event *event; struct file *perf_file; @@ -445,14 +489,6 @@ typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type, u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy); -int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, - union bpf_attr __user *uattr); -int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, - union bpf_attr __user *uattr); -int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, - const union bpf_attr *kattr, - union bpf_attr __user *uattr); - /* an array of programs to be executed under rcu_lock. * * Typical usage: @@ -643,6 +679,13 @@ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr) struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type); int array_map_alloc_check(union bpf_attr *attr); +int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr); +int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr); +int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -754,6 +797,27 @@ static inline struct bpf_prog *bpf_prog_get_type_path(const char *name, { return ERR_PTR(-EOPNOTSUPP); } + +static inline int bpf_prog_test_run_xdp(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + return -ENOTSUPP; +} + +static inline int bpf_prog_test_run_skb(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + return -ENOTSUPP; +} + +static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + return -ENOTSUPP; +} #endif /* CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, @@ -929,6 +993,8 @@ extern const struct bpf_func_proto bpf_sk_redirect_map_proto; extern const struct bpf_func_proto bpf_spin_lock_proto; extern const struct bpf_func_proto bpf_spin_unlock_proto; extern const struct bpf_func_proto bpf_get_local_storage_proto; +extern const struct bpf_func_proto bpf_strtol_proto; +extern const struct bpf_func_proto bpf_strtoul_proto; /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 08bf2f1fe553..5a9975678d6f 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -25,9 +25,11 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint) +BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable) #endif #ifdef CONFIG_CGROUP_BPF BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl) #endif #ifdef CONFIG_BPF_LIRC_MODE2 BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2) @@ -59,6 +61,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) #ifdef CONFIG_NET BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops) #if defined(CONFIG_BPF_STREAM_PARSER) BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7d8228d1c898..1305ccbd8fe6 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -207,6 +207,7 @@ struct bpf_verifier_state { struct bpf_verifier_state_list { struct bpf_verifier_state state; struct bpf_verifier_state_list *next; + int miss_cnt, hit_cnt; }; /* Possible states for alu_state member. */ @@ -223,6 +224,10 @@ struct bpf_insn_aux_data { unsigned long map_state; /* pointer/poison value for maps */ s32 call_imm; /* saved imm field of call insn */ u32 alu_limit; /* limit for add/sub register with pointer */ + struct { + u32 map_index; /* index into used_maps[] */ + u32 map_off; /* offset from value base address */ + }; }; int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ int sanitize_stack_off; /* stack slot to be cleared */ @@ -248,6 +253,12 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log) return log->len_used >= log->len_total - 1; } +#define BPF_LOG_LEVEL1 1 +#define BPF_LOG_LEVEL2 2 +#define BPF_LOG_STATS 4 +#define BPF_LOG_LEVEL (BPF_LOG_LEVEL1 | BPF_LOG_LEVEL2) +#define BPF_LOG_MASK (BPF_LOG_LEVEL | BPF_LOG_STATS) + static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) { return log->level && log->ubuf && !bpf_verifier_log_full(log); @@ -274,6 +285,7 @@ struct bpf_verifier_env { bool strict_alignment; /* perform strict pointer alignment checks */ struct bpf_verifier_state *cur_state; /* current verifier state */ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ + struct bpf_verifier_state_list *free_list; struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ u32 used_map_cnt; /* number of used maps */ u32 id_gen; /* used to generate unique reg IDs */ @@ -283,7 +295,27 @@ struct bpf_verifier_env { const struct bpf_line_info *prev_linfo; struct bpf_verifier_log log; struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1]; + struct { + int *insn_state; + int *insn_stack; + int cur_stack; + } cfg; u32 subprog_cnt; + /* number of instructions analyzed by the verifier */ + u32 insn_processed; + /* total verification time */ + u64 verification_time; + /* maximum number of verifier states kept in 'branching' instructions */ + u32 max_states_per_insn; + /* total number of allocated verifier states */ + u32 total_states; + /* some states are freed during program analysis. + * this is peak number of states. this number dominates kernel + * memory consumption during verification + */ + u32 peak_states; + /* longest register parentage chain walked for liveness marking */ + u32 longest_mark_read_walk; }; __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log, diff --git a/include/linux/btf.h b/include/linux/btf.h index 455d31b55828..64cdf2a23d42 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -51,6 +51,7 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, const struct btf_member *m, u32 expected_offset, u32 expected_size); int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t); +bool btf_type_is_void(const struct btf_type *t); #ifdef CONFIG_BPF_SYSCALL const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h new file mode 100644 index 000000000000..3911e0586478 --- /dev/null +++ b/include/linux/dsa/8021q.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com> + */ + +#ifndef _NET_DSA_8021Q_H +#define _NET_DSA_8021Q_H + +#include <linux/types.h> + +struct dsa_switch; +struct sk_buff; +struct net_device; +struct packet_type; + +#if IS_ENABLED(CONFIG_NET_DSA_TAG_8021Q) + +int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int index, + bool enabled); + +struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, + u16 tpid, u16 tci); + +struct sk_buff *dsa_8021q_rcv(struct sk_buff *skb, struct net_device *netdev, + struct packet_type *pt, u16 *tpid, u16 *tci); + +u16 dsa_8021q_tx_vid(struct dsa_switch *ds, int port); + +u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port); + +int dsa_8021q_rx_switch_id(u16 vid); + +int dsa_8021q_rx_source_port(u16 vid); + +#else + +int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int index, + bool enabled) +{ + return 0; +} + +struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, + u16 tpid, u16 tci) +{ + return NULL; +} + +struct sk_buff *dsa_8021q_rcv(struct sk_buff *skb, struct net_device *netdev, + struct packet_type *pt, u16 *tpid, u16 *tci) +{ + return NULL; +} + +u16 dsa_8021q_tx_vid(struct dsa_switch *ds, int port) +{ + return 0; +} + +u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port) +{ + return 0; +} + +int dsa_8021q_rx_switch_id(u16 vid) +{ + return 0; +} + +int dsa_8021q_rx_source_port(u16 vid) +{ + return 0; +} + +#endif /* IS_ENABLED(CONFIG_NET_DSA_TAG_8021Q) */ + +#endif /* _NET_DSA_8021Q_H */ diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h new file mode 100644 index 000000000000..603a02e5a8cb --- /dev/null +++ b/include/linux/dsa/sja1105.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com> + */ + +/* Included by drivers/net/dsa/sja1105/sja1105.h and net/dsa/tag_sja1105.c */ + +#ifndef _NET_DSA_SJA1105_H +#define _NET_DSA_SJA1105_H + +#include <linux/skbuff.h> +#include <linux/etherdevice.h> +#include <net/dsa.h> + +#define ETH_P_SJA1105 ETH_P_DSA_8021Q + +/* IEEE 802.3 Annex 57A: Slow Protocols PDUs (01:80:C2:xx:xx:xx) */ +#define SJA1105_LINKLOCAL_FILTER_A 0x0180C2000000ull +#define SJA1105_LINKLOCAL_FILTER_A_MASK 0xFFFFFF000000ull +/* IEEE 1588 Annex F: Transport of PTP over Ethernet (01:1B:19:xx:xx:xx) */ +#define SJA1105_LINKLOCAL_FILTER_B 0x011B19000000ull +#define SJA1105_LINKLOCAL_FILTER_B_MASK 0xFFFFFF000000ull + +enum sja1105_frame_type { + SJA1105_FRAME_TYPE_NORMAL = 0, + SJA1105_FRAME_TYPE_LINK_LOCAL, +}; + +struct sja1105_skb_cb { + enum sja1105_frame_type type; +}; + +#define SJA1105_SKB_CB(skb) \ + ((struct sja1105_skb_cb *)DSA_SKB_CB_PRIV(skb)) + +struct sja1105_port { + struct dsa_port *dp; + int mgmt_slot; +}; + +#endif /* _NET_DSA_SJA1105_H */ diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index aa8bfd6f738c..25c0d049336f 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -33,7 +33,7 @@ struct device; int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr); unsigned char *arch_get_platform_mac_address(void); int nvmem_get_mac_address(struct device *dev, void *addrbuf); -u32 eth_get_headlen(void *data, unsigned int max_len); +u32 eth_get_headlen(const struct net_device *dev, void *data, unsigned int len); __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); extern const struct header_ops eth_header_ops; diff --git a/include/linux/filter.h b/include/linux/filter.h index 7d3abde3f183..7148bab96943 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -34,6 +34,8 @@ struct bpf_prog_aux; struct xdp_rxq_info; struct xdp_buff; struct sock_reuseport; +struct ctl_table; +struct ctl_table_header; /* ArgX, context and stack frame pointer register positions. Note, * Arg1, Arg2, Arg3, etc are used as argument mappings of function @@ -1167,4 +1169,18 @@ struct bpf_sock_ops_kern { */ }; +struct bpf_sysctl_kern { + struct ctl_table_header *head; + struct ctl_table *table; + void *cur_val; + size_t cur_len; + void *new_val; + size_t new_len; + int new_updated; + int write; + loff_t *ppos; + /* Temporary "register" for indirect stores to ppos. */ + u64 tmp_reg; +}; + #endif /* __LINUX_FILTER_H__ */ diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 83f81ac53282..6cb82301d8e9 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -233,7 +233,6 @@ const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd) { \ handler \ .cmd = op_name, \ - .policy = CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy), \ }, #define ZZZ_genl_ops CONCAT_(GENL_MAGIC_FAMILY, _genl_ops) @@ -290,7 +289,8 @@ static struct genl_family ZZZ_genl_family __ro_after_init = { #ifdef GENL_MAGIC_FAMILY_HDRSZ .hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ), #endif - .maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1, + .maxattr = ARRAY_SIZE(CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy))-1, + .policy = CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy), .ops = ZZZ_genl_ops, .n_ops = ARRAY_SIZE(ZZZ_genl_ops), .mcgrps = ZZZ_genl_mcgrps, diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 48703ec60d06..61f0a316c6ac 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1557,7 +1557,7 @@ struct ieee80211_vht_operation { * struct ieee80211_he_cap_elem - HE capabilities element * * This structure is the "HE capabilities element" fixed fields as - * described in P802.11ax_D3.0 section 9.4.2.237.2 and 9.4.2.237.3 + * described in P802.11ax_D4.0 section 9.4.2.242.2 and 9.4.2.242.3 */ struct ieee80211_he_cap_elem { u8 mac_cap_info[6]; @@ -1619,12 +1619,12 @@ struct ieee80211_he_mcs_nss_supp { * struct ieee80211_he_operation - HE capabilities element * * This structure is the "HE operation element" fields as - * described in P802.11ax_D3.0 section 9.4.2.238 + * described in P802.11ax_D4.0 section 9.4.2.243 */ struct ieee80211_he_operation { __le32 he_oper_params; __le16 he_mcs_nss_set; - /* Optional 0,1,3 or 4 bytes: depends on @he_oper_params */ + /* Optional 0,1,3,4,5,7 or 8 bytes: depends on @he_oper_params */ u8 optional[0]; } __packed; @@ -1632,7 +1632,7 @@ struct ieee80211_he_operation { * struct ieee80211_he_mu_edca_param_ac_rec - MU AC Parameter Record field * * This structure is the "MU AC Parameter Record" fields as - * described in P802.11ax_D2.0 section 9.4.2.240 + * described in P802.11ax_D4.0 section 9.4.2.245 */ struct ieee80211_he_mu_edca_param_ac_rec { u8 aifsn; @@ -1644,7 +1644,7 @@ struct ieee80211_he_mu_edca_param_ac_rec { * struct ieee80211_mu_edca_param_set - MU EDCA Parameter Set element * * This structure is the "MU EDCA Parameter Set element" fields as - * described in P802.11ax_D2.0 section 9.4.2.240 + * described in P802.11ax_D4.0 section 9.4.2.245 */ struct ieee80211_mu_edca_param_set { u8 mu_qos_info; @@ -2026,6 +2026,7 @@ ieee80211_he_ppe_size(u8 ppe_thres_hdr, const u8 *phy_cap_info) #define IEEE80211_HE_OPERATION_VHT_OPER_INFO 0x00004000 #define IEEE80211_HE_OPERATION_CO_HOSTED_BSS 0x00008000 #define IEEE80211_HE_OPERATION_ER_SU_DISABLE 0x00010000 +#define IEEE80211_HE_OPERATION_6GHZ_OP_INFO 0x00020000 #define IEEE80211_HE_OPERATION_BSS_COLOR_MASK 0x3f000000 #define IEEE80211_HE_OPERATION_BSS_COLOR_OFFSET 24 #define IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR 0x40000000 @@ -2056,6 +2057,8 @@ ieee80211_he_oper_size(const u8 *he_oper_ie) oper_len += 3; if (he_oper_params & IEEE80211_HE_OPERATION_CO_HOSTED_BSS) oper_len++; + if (he_oper_params & IEEE80211_HE_OPERATION_6GHZ_OP_INFO) + oper_len += 4; /* Add the first byte (extension ID) to the total length */ oper_len++; @@ -2487,6 +2490,7 @@ enum ieee80211_eid_ext { WLAN_EID_EXT_HE_MU_EDCA = 38, WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME = 52, WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION = 55, + WLAN_EID_EXT_NON_INHERITANCE = 56, }; /* Action category code */ diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 627b788ba0ff..ef0819ced0fc 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -56,9 +56,6 @@ struct br_ip_list { extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *)); -typedef int br_should_route_hook_t(struct sk_buff *skb); -extern br_should_route_hook_t __rcu *br_should_route_hook; - #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING) int br_multicast_list_adjacent(struct net_device *dev, struct list_head *br_ip_list); diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index a64f21a97369..367dc2a0f84a 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -237,6 +237,20 @@ static inline struct in_device *__in_dev_get_rtnl(const struct net_device *dev) return rtnl_dereference(dev->ip_ptr); } +/* called with rcu_read_lock or rtnl held */ +static inline bool ip_ignore_linkdown(const struct net_device *dev) +{ + struct in_device *in_dev; + bool rc = false; + + in_dev = rcu_dereference_rtnl(dev->ip_ptr); + if (in_dev && + IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) + rc = true; + + return rc; +} + static inline struct neigh_parms *__in_dev_arp_parms_get_rcu(const struct net_device *dev) { struct in_device *in_dev = __in_dev_get_rcu(dev); diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index fa928242567d..1b6d31da7cbc 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -297,6 +297,7 @@ static inline u64 jiffies_to_nsecs(const unsigned long j) } extern u64 jiffies64_to_nsecs(u64 j); +extern u64 jiffies64_to_msecs(u64 j); extern unsigned long __msecs_to_jiffies(const unsigned int m); #if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index 73d04743a2bb..af6b11d4d673 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -34,5 +34,6 @@ /* struct phy_device dev_flags definitions */ #define MARVELL_PHY_M1145_FLAGS_RESISTANCE 0x00000001 #define MARVELL_PHY_M1118_DNS323_LEDS 0x00000002 +#define MARVELL_PHY_LED0_LINK_LED1_ACTIVE 0x00000004 #endif /* _MARVELL_PHY_H */ diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 3e99ae3ed87f..9dc16d5705a1 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -39,7 +39,8 @@ struct mdio_device { /* Bus address of the MDIO device (0-31) */ int addr; int flags; - struct gpio_desc *reset; + struct gpio_desc *reset_gpio; + struct reset_control *reset_ctrl; unsigned int reset_assert_delay; unsigned int reset_deassert_delay; }; diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 612c8c2f2466..769326ea1d9b 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -170,7 +170,7 @@ static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd, doorbell[0] = cpu_to_be32(sn << 28 | cmd | ci); doorbell[1] = cpu_to_be32(cq->cqn); - mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, NULL); + mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL); } static inline void mlx5_cq_hold(struct mlx5_core_cq *cq) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index f93a5598b942..fc2b6e807f06 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -361,6 +361,7 @@ enum { enum { MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT = 0x1, + MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT = 0x5, }; enum { @@ -1001,7 +1002,8 @@ enum { MLX5_MATCH_OUTER_HEADERS = 1 << 0, MLX5_MATCH_MISC_PARAMETERS = 1 << 1, MLX5_MATCH_INNER_HEADERS = 1 << 2, - + MLX5_MATCH_MISC_PARAMETERS_2 = 1 << 3, + MLX5_MATCH_MISC_PARAMETERS_3 = 1 << 4, }; enum { @@ -1045,6 +1047,7 @@ enum mlx5_mpls_supported_fields { }; enum mlx5_flex_parser_protos { + MLX5_FLEX_PROTO_GENEVE = 1 << 3, MLX5_FLEX_PROTO_CW_MPLS_GRE = 1 << 4, MLX5_FLEX_PROTO_CW_MPLS_UDP = 1 << 5, }; @@ -1166,6 +1169,12 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_FLOWTABLE_SNIFFER_TX_MAX(mdev, cap) \ MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_transmit_sniffer.cap) +#define MLX5_CAP_FLOWTABLE_RDMA_RX(mdev, cap) \ + MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive_rdma.cap) + +#define MLX5_CAP_FLOWTABLE_RDMA_RX_MAX(mdev, cap) \ + MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive_rdma.cap) + #define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \ MLX5_GET(flow_table_eswitch_cap, \ mdev->caps.hca_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) diff --git a/include/linux/mlx5/doorbell.h b/include/linux/mlx5/doorbell.h index 0787de28f2fc..5c267707e1df 100644 --- a/include/linux/mlx5/doorbell.h +++ b/include/linux/mlx5/doorbell.h @@ -36,46 +36,25 @@ #define MLX5_BF_OFFSET 0x800 #define MLX5_CQ_DOORBELL 0x20 -#if BITS_PER_LONG == 64 /* Assume that we can just write a 64-bit doorbell atomically. s390 * actually doesn't have writeq() but S/390 systems don't even have * PCI so we won't worry about it. + * + * Note that the write is not atomic on 32-bit systems! In contrast to 64-bit + * ones, it requires proper locking. mlx5_write64 doesn't do any locking, so use + * it at your own discretion, protected by some kind of lock on 32 bits. + * + * TODO: use write{q,l}_relaxed() */ -#define MLX5_DECLARE_DOORBELL_LOCK(name) -#define MLX5_INIT_DOORBELL_LOCK(ptr) do { } while (0) -#define MLX5_GET_DOORBELL_LOCK(ptr) (NULL) - -static inline void mlx5_write64(__be32 val[2], void __iomem *dest, - spinlock_t *doorbell_lock) +static inline void mlx5_write64(__be32 val[2], void __iomem *dest) { +#if BITS_PER_LONG == 64 __raw_writeq(*(u64 *)val, dest); -} - #else - -/* Just fall back to a spinlock to protect the doorbell if - * BITS_PER_LONG is 32 -- there's no portable way to do atomic 64-bit - * MMIO writes. - */ - -#define MLX5_DECLARE_DOORBELL_LOCK(name) spinlock_t name; -#define MLX5_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr) -#define MLX5_GET_DOORBELL_LOCK(ptr) (ptr) - -static inline void mlx5_write64(__be32 val[2], void __iomem *dest, - spinlock_t *doorbell_lock) -{ - unsigned long flags; - - if (doorbell_lock) - spin_lock_irqsave(doorbell_lock, flags); __raw_writel((__force u32) val[0], dest); __raw_writel((__force u32) val[1], dest + 4); - if (doorbell_lock) - spin_unlock_irqrestore(doorbell_lock, flags); -} - #endif +} #endif /* MLX5_DOORBELL_H */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 0d0729648844..5a39b323c52e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -56,7 +56,6 @@ enum { MLX5_BOARD_ID_LEN = 64, - MLX5_MAX_NAME_LEN = 16, }; enum { @@ -133,6 +132,7 @@ enum { MLX5_REG_MTRC_CONF = 0x9041, MLX5_REG_MTRC_STDB = 0x9042, MLX5_REG_MTRC_CTRL = 0x9043, + MLX5_REG_MPEIN = 0x9050, MLX5_REG_MPCNT = 0x9051, MLX5_REG_MTPPS = 0x9053, MLX5_REG_MTPPSE = 0x9054, @@ -512,8 +512,13 @@ struct mlx5_rl_table { struct mlx5_rl_entry *rl_entry; }; +struct mlx5_core_roce { + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + struct mlx5_flow_handle *allow_rule; +}; + struct mlx5_priv { - char name[MLX5_MAX_NAME_LEN]; struct mlx5_eq_table *eq_table; /* pages stuff */ @@ -566,6 +571,7 @@ struct mlx5_priv { struct mlx5_lag *lag; struct mlx5_devcom *devcom; unsigned long pci_dev_data; + struct mlx5_core_roce roce; struct mlx5_fc_stats fc_stats; struct mlx5_rl_table rl_table; @@ -642,6 +648,7 @@ struct mlx5_fw_tracer; struct mlx5_vxlan; struct mlx5_core_dev { + struct device *device; struct pci_dev *pdev; /* sync pci state */ struct mutex pci_status_mutex; @@ -662,6 +669,7 @@ struct mlx5_core_dev { u64 sys_image_guid; phys_addr_t iseg_base; struct mlx5_init_seg __iomem *iseg; + phys_addr_t bar_addr; enum mlx5_device_state state; /* sync interface state */ struct mutex intf_state_mutex; @@ -887,6 +895,7 @@ void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome); int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type); int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn); int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn); +void mlx5_health_flush(struct mlx5_core_dev *dev); void mlx5_health_cleanup(struct mlx5_core_dev *dev); int mlx5_health_init(struct mlx5_core_dev *dev); void mlx5_start_health_poll(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 96d8435421de..0ca77dd1429c 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -35,7 +35,7 @@ struct mlx5_eswitch_rep_if { void (*unload)(struct mlx5_eswitch_rep *rep); void *(*get_proto_dev)(struct mlx5_eswitch_rep *rep); void *priv; - u8 state; + atomic_t state; }; struct mlx5_eswitch_rep { diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 9df51da04621..e690ba0f965c 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -73,6 +73,13 @@ enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_SNIFFER_RX, MLX5_FLOW_NAMESPACE_SNIFFER_TX, MLX5_FLOW_NAMESPACE_EGRESS, + MLX5_FLOW_NAMESPACE_RDMA_RX, +}; + +enum { + FDB_BYPASS_PATH, + FDB_FAST_PATH, + FDB_SLOW_PATH, }; struct mlx5_flow_table; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3b83288749c6..82612741b29e 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -81,6 +81,19 @@ enum { }; enum { + MLX5_OBJ_TYPE_SW_ICM = 0x0008, +}; + +enum { + MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM = (1ULL << MLX5_OBJ_TYPE_SW_ICM), + MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT = (1ULL << 11), +}; + +enum { + MLX5_OBJ_TYPE_GENEVE_TLV_OPT = 0x000b, +}; + +enum { MLX5_CMD_OP_QUERY_HCA_CAP = 0x100, MLX5_CMD_OP_QUERY_ADAPTER = 0x101, MLX5_CMD_OP_INIT_HCA = 0x102, @@ -299,7 +312,11 @@ struct mlx5_ifc_flow_table_fields_supported_bits { u8 outer_gre_protocol[0x1]; u8 outer_gre_key[0x1]; u8 outer_vxlan_vni[0x1]; - u8 reserved_at_1a[0x5]; + u8 outer_geneve_vni[0x1]; + u8 outer_geneve_oam[0x1]; + u8 outer_geneve_protocol_type[0x1]; + u8 outer_geneve_opt_len[0x1]; + u8 reserved_at_1e[0x1]; u8 source_eswitch_port[0x1]; u8 inner_dmac[0x1]; @@ -327,7 +344,8 @@ struct mlx5_ifc_flow_table_fields_supported_bits { u8 inner_tcp_flags[0x1]; u8 reserved_at_37[0x9]; - u8 reserved_at_40[0x5]; + u8 geneve_tlv_option_0_data[0x1]; + u8 reserved_at_41[0x4]; u8 outer_first_mpls_over_udp[0x4]; u8 outer_first_mpls_over_gre[0x4]; u8 inner_first_mpls[0x4]; @@ -357,11 +375,14 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 pop_vlan_2[0x1]; u8 push_vlan_2[0x1]; u8 reformat_and_vlan_action[0x1]; - u8 reserved_at_10[0x2]; + u8 reserved_at_10[0x1]; + u8 sw_owner[0x1]; u8 reformat_l3_tunnel_to_l2[0x1]; u8 reformat_l2_to_l3_tunnel[0x1]; u8 reformat_and_modify_action[0x1]; - u8 reserved_at_15[0xb]; + u8 reserved_at_15[0x2]; + u8 table_miss_action_domain[0x1]; + u8 reserved_at_18[0x8]; u8 reserved_at_20[0x2]; u8 log_max_ft_size[0x6]; u8 log_max_modify_header_context[0x8]; @@ -469,7 +490,9 @@ struct mlx5_ifc_fte_match_set_misc_bits { u8 vxlan_vni[0x18]; u8 reserved_at_b8[0x8]; - u8 reserved_at_c0[0x20]; + u8 geneve_vni[0x18]; + u8 reserved_at_d8[0x7]; + u8 geneve_oam[0x1]; u8 reserved_at_e0[0xc]; u8 outer_ipv6_flow_label[0x14]; @@ -477,7 +500,11 @@ struct mlx5_ifc_fte_match_set_misc_bits { u8 reserved_at_100[0xc]; u8 inner_ipv6_flow_label[0x14]; - u8 reserved_at_120[0x28]; + u8 reserved_at_120[0xa]; + u8 geneve_opt_len[0x6]; + u8 geneve_protocol_type[0x10]; + + u8 reserved_at_140[0x8]; u8 bth_dst_qp[0x18]; u8 reserved_at_160[0x20]; u8 outer_esp_spi[0x20]; @@ -507,6 +534,12 @@ struct mlx5_ifc_fte_match_set_misc2_bits { u8 reserved_at_1a0[0x60]; }; +struct mlx5_ifc_fte_match_set_misc3_bits { + u8 reserved_at_0[0x120]; + u8 geneve_tlv_option_0_data[0x20]; + u8 reserved_at_140[0xc0]; +}; + struct mlx5_ifc_cmd_pas_bits { u8 pa_h[0x20]; @@ -589,7 +622,7 @@ struct mlx5_ifc_flow_table_nic_cap_bits { struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive; - u8 reserved_at_400[0x200]; + struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive_rdma; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive_sniffer; @@ -770,7 +803,19 @@ struct mlx5_ifc_device_mem_cap_bits { u8 max_memic_size[0x20]; - u8 reserved_at_c0[0x740]; + u8 steering_sw_icm_start_address[0x40]; + + u8 reserved_at_100[0x8]; + u8 log_header_modify_sw_icm_size[0x8]; + u8 reserved_at_110[0x2]; + u8 log_sw_icm_alloc_granularity[0x6]; + u8 log_steering_sw_icm_size[0x8]; + + u8 reserved_at_120[0x20]; + + u8 header_modify_sw_icm_start_address[0x40]; + + u8 reserved_at_180[0x680]; }; enum { @@ -919,6 +964,7 @@ enum { enum { MLX5_UCTX_CAP_RAW_TX = 1UL << 0, + MLX5_UCTX_CAP_INTERNAL_DEV_RES = 1UL << 1, }; struct mlx5_ifc_cmd_hca_cap_bits { @@ -929,7 +975,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_max_srq_sz[0x8]; u8 log_max_qp_sz[0x8]; - u8 reserved_at_90[0xb]; + u8 reserved_at_90[0x8]; + u8 prio_tag_required[0x1]; + u8 reserved_at_99[0x2]; u8 log_max_qp[0x5]; u8 reserved_at_a0[0xb]; @@ -1211,7 +1259,11 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 num_of_uars_per_page[0x20]; u8 flex_parser_protocols[0x20]; - u8 reserved_at_560[0x20]; + + u8 max_geneve_tlv_options[0x8]; + u8 reserved_at_568[0x3]; + u8 max_geneve_tlv_option_data_len[0x5]; + u8 reserved_at_570[0x10]; u8 reserved_at_580[0x3c]; u8 mini_cqe_resp_stride_index[0x1]; @@ -1247,7 +1299,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 uctx_cap[0x20]; - u8 reserved_at_6c0[0x140]; + u8 reserved_at_6c0[0x4]; + u8 flex_parser_id_geneve_tlv_option_0[0x4]; + u8 reserved_at_6c8[0x138]; }; enum mlx5_flow_destination_type { @@ -1260,6 +1314,12 @@ enum mlx5_flow_destination_type { MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM = 0x101, }; +enum mlx5_flow_table_miss_action { + MLX5_FLOW_TABLE_MISS_ACTION_DEF, + MLX5_FLOW_TABLE_MISS_ACTION_FWD, + MLX5_FLOW_TABLE_MISS_ACTION_SWITCH_DOMAIN, +}; + struct mlx5_ifc_dest_format_struct_bits { u8 destination_type[0x8]; u8 destination_id[0x18]; @@ -1299,7 +1359,9 @@ struct mlx5_ifc_fte_match_param_bits { struct mlx5_ifc_fte_match_set_misc2_bits misc_parameters_2; - u8 reserved_at_800[0x800]; + struct mlx5_ifc_fte_match_set_misc3_bits misc_parameters_3; + + u8 reserved_at_a00[0x600]; }; enum { @@ -2920,6 +2982,7 @@ enum { MLX5_MKC_ACCESS_MODE_MTT = 0x1, MLX5_MKC_ACCESS_MODE_KLMS = 0x2, MLX5_MKC_ACCESS_MODE_KSM = 0x3, + MLX5_MKC_ACCESS_MODE_SW_ICM = 0x4, MLX5_MKC_ACCESS_MODE_MEMIC = 0x5, }; @@ -4807,6 +4870,7 @@ enum { MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS = 0x1, MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_INNER_HEADERS = 0x2, MLX5_QUERY_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_2 = 0x3, + MLX5_QUERY_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_3 = 0x4, }; struct mlx5_ifc_query_flow_group_out_bits { @@ -5110,6 +5174,7 @@ enum { MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0 = 0x14, MLX5_ACTION_IN_FIELD_OUT_SIPV4 = 0x15, MLX5_ACTION_IN_FIELD_OUT_DIPV4 = 0x16, + MLX5_ACTION_IN_FIELD_OUT_FIRST_VID = 0x17, MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT = 0x47, }; @@ -6874,14 +6939,14 @@ struct mlx5_ifc_create_tis_in_bits { struct mlx5_ifc_create_tir_out_bits { u8 status[0x8]; - u8 reserved_at_8[0x18]; + u8 icm_address_63_40[0x18]; u8 syndrome[0x20]; - u8 reserved_at_40[0x8]; + u8 icm_address_39_32[0x8]; u8 tirn[0x18]; - u8 reserved_at_60[0x20]; + u8 icm_address_31_0[0x20]; }; struct mlx5_ifc_create_tir_in_bits { @@ -8026,6 +8091,52 @@ struct mlx5_ifc_ppcnt_reg_bits { union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set; }; +struct mlx5_ifc_mpein_reg_bits { + u8 reserved_at_0[0x2]; + u8 depth[0x6]; + u8 pcie_index[0x8]; + u8 node[0x8]; + u8 reserved_at_18[0x8]; + + u8 capability_mask[0x20]; + + u8 reserved_at_40[0x8]; + u8 link_width_enabled[0x8]; + u8 link_speed_enabled[0x10]; + + u8 lane0_physical_position[0x8]; + u8 link_width_active[0x8]; + u8 link_speed_active[0x10]; + + u8 num_of_pfs[0x10]; + u8 num_of_vfs[0x10]; + + u8 bdf0[0x10]; + u8 reserved_at_b0[0x10]; + + u8 max_read_request_size[0x4]; + u8 max_payload_size[0x4]; + u8 reserved_at_c8[0x5]; + u8 pwr_status[0x3]; + u8 port_type[0x4]; + u8 reserved_at_d4[0xb]; + u8 lane_reversal[0x1]; + + u8 reserved_at_e0[0x14]; + u8 pci_power[0xc]; + + u8 reserved_at_100[0x20]; + + u8 device_status[0x10]; + u8 port_state[0x8]; + u8 reserved_at_138[0x8]; + + u8 reserved_at_140[0x10]; + u8 receiver_detect_result[0x10]; + + u8 reserved_at_160[0x20]; +}; + struct mlx5_ifc_mpcnt_reg_bits { u8 reserved_at_0[0x8]; u8 pcie_index[0x8]; @@ -8345,7 +8456,9 @@ struct mlx5_ifc_pcam_reg_bits { }; struct mlx5_ifc_mcam_enhanced_features_bits { - u8 reserved_at_0[0x74]; + u8 reserved_at_0[0x6e]; + u8 pci_status_and_power[0x1]; + u8 reserved_at_6f[0x5]; u8 mark_tx_action_cnp[0x1]; u8 mark_tx_action_cqe[0x1]; u8 dynamic_tx_overflow[0x1]; @@ -8953,6 +9066,7 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_pmtu_reg_bits pmtu_reg; struct mlx5_ifc_ppad_reg_bits ppad_reg; struct mlx5_ifc_ppcnt_reg_bits ppcnt_reg; + struct mlx5_ifc_mpein_reg_bits mpein_reg; struct mlx5_ifc_mpcnt_reg_bits mpcnt_reg; struct mlx5_ifc_pplm_reg_bits pplm_reg; struct mlx5_ifc_pplr_reg_bits pplr_reg; @@ -9442,6 +9556,33 @@ struct mlx5_ifc_uctx_bits { u8 reserved_at_20[0x160]; }; +struct mlx5_ifc_sw_icm_bits { + u8 modify_field_select[0x40]; + + u8 reserved_at_40[0x18]; + u8 log_sw_icm_size[0x8]; + + u8 reserved_at_60[0x20]; + + u8 sw_icm_start_addr[0x40]; + + u8 reserved_at_c0[0x140]; +}; + +struct mlx5_ifc_geneve_tlv_option_bits { + u8 modify_field_select[0x40]; + + u8 reserved_at_40[0x18]; + u8 geneve_option_fte_index[0x8]; + + u8 option_class[0x10]; + u8 option_type[0x8]; + u8 reserved_at_78[0x3]; + u8 option_data_length[0x5]; + + u8 reserved_at_80[0x180]; +}; + struct mlx5_ifc_create_umem_in_bits { u8 opcode[0x10]; u8 uid[0x10]; @@ -9479,6 +9620,16 @@ struct mlx5_ifc_destroy_uctx_in_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_create_sw_icm_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; + struct mlx5_ifc_sw_icm_bits sw_icm; +}; + +struct mlx5_ifc_create_geneve_tlv_option_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; + struct mlx5_ifc_geneve_tlv_option_bits geneve_tlv_opt; +}; + struct mlx5_ifc_mtrc_string_db_param_bits { u8 string_db_base_address[0x20]; diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 64e78394fc9c..de9a272c9f3d 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -60,6 +60,7 @@ enum mlx5_an_status { #define MLX5_I2C_ADDR_LOW 0x50 #define MLX5_I2C_ADDR_HIGH 0x51 #define MLX5_EEPROM_PAGE_LENGTH 256 +#define MLX5_EEPROM_HIGH_PAGE_LENGTH 128 enum mlx5e_link_mode { MLX5E_1000BASE_CX_SGMII = 0, diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 0343c81d4c5f..3ba4edbd17a6 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -395,6 +395,7 @@ struct mlx5_wqe_signature_seg { struct mlx5_wqe_inline_seg { __be32 byte_count; + __be32 data[0]; }; enum mlx5_sig_type { diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h index a261d5528ff7..dc6b1e7cb8c4 100644 --- a/include/linux/mlx5/transobj.h +++ b/include/linux/mlx5/transobj.h @@ -50,6 +50,9 @@ int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out); int mlx5_core_query_sq_state(struct mlx5_core_dev *dev, u32 sqn, u8 *state); int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tirn); +int mlx5_core_create_tir_out(struct mlx5_core_dev *dev, + u32 *in, int inlen, + u32 *out, int outlen); int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in, int inlen); void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn); diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 0eef548b9946..3d1c6cdbbba7 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -118,10 +118,6 @@ int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev, int promisc_uc, int promisc_mc, int promisc_all); -int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev, - u16 vport, - u16 vlans[], - int *size); int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, u16 vlans[], int list_size); diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h index 97ca105347a6..5685805533b5 100644 --- a/include/linux/mmc/sdio_func.h +++ b/include/linux/mmc/sdio_func.h @@ -111,6 +111,18 @@ struct sdio_driver { extern int sdio_register_driver(struct sdio_driver *); extern void sdio_unregister_driver(struct sdio_driver *); +/** + * module_sdio_driver() - Helper macro for registering a SDIO driver + * @__sdio_driver: sdio_driver struct + * + * Helper macro for SDIO drivers which do not do anything special in module + * init/exit. This eliminates a lot of boilerplate. Each module may only + * use this macro once, and calling it replaces module_init() and module_exit() + */ +#define module_sdio_driver(__sdio_driver) \ + module_driver(__sdio_driver, sdio_register_driver, \ + sdio_unregister_driver) + /* * SDIO I/O operations */ diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 4332199c71c2..d1a5d5df02f5 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -59,6 +59,8 @@ #define SDIO_DEVICE_ID_MARVELL_8797_F0 0x9128 #define SDIO_DEVICE_ID_MARVELL_8887WLAN 0x9134 +#define SDIO_VENDOR_ID_MEDIATEK 0x037a + #define SDIO_VENDOR_ID_SIANO 0x039a #define SDIO_DEVICE_ID_SIANO_NOVA_B0 0x0201 #define SDIO_DEVICE_ID_SIANO_NICE 0x0202 diff --git a/include/linux/net.h b/include/linux/net.h index c606c72311d0..50bf5206ead6 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -161,6 +161,8 @@ struct proto_ops { int (*compat_ioctl) (struct socket *sock, unsigned int cmd, unsigned long arg); #endif + int (*gettstamp) (struct socket *sock, void __user *userstamp, + bool timeval, bool time32); int (*listen) (struct socket *sock, int len); int (*shutdown) (struct socket *sock, int flags); int (*setsockopt)(struct socket *sock, int level, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 324e872c91d1..44b47e9df94a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -194,8 +194,8 @@ struct net_device_stats { #ifdef CONFIG_RPS #include <linux/static_key.h> -extern struct static_key rps_needed; -extern struct static_key rfs_needed; +extern struct static_key_false rps_needed; +extern struct static_key_false rfs_needed; #endif struct neighbour; @@ -914,34 +914,13 @@ struct xfrmdev_ops { }; #endif -#if IS_ENABLED(CONFIG_TLS_DEVICE) -enum tls_offload_ctx_dir { - TLS_OFFLOAD_CTX_DIR_RX, - TLS_OFFLOAD_CTX_DIR_TX, -}; - -struct tls_crypto_info; -struct tls_context; - -struct tlsdev_ops { - int (*tls_dev_add)(struct net_device *netdev, struct sock *sk, - enum tls_offload_ctx_dir direction, - struct tls_crypto_info *crypto_info, - u32 start_offload_tcp_sn); - void (*tls_dev_del)(struct net_device *netdev, - struct tls_context *ctx, - enum tls_offload_ctx_dir direction); - void (*tls_dev_resync_rx)(struct net_device *netdev, - struct sock *sk, u32 seq, u64 rcd_sn); -}; -#endif - struct dev_ifalias { struct rcu_head rcuhead; char ifalias[]; }; struct devlink; +struct tlsdev_ops; /* * This structure defines the management hooks for network devices. @@ -986,8 +965,7 @@ struct devlink; * those the driver believes to be appropriate. * * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, - * struct net_device *sb_dev, - * select_queue_fallback_t fallback); + * struct net_device *sb_dev); * Called to decide which queue to use when device supports multiple * transmit queues. * @@ -1251,8 +1229,8 @@ struct devlink; * that got dropped are freed/returned via xdp_return_frame(). * Returns negative number, means general error invoking ndo, meaning * no frames were xmit'ed and core-caller will free all frames. - * struct devlink *(*ndo_get_devlink)(struct net_device *dev); - * Get devlink instance associated with a given netdev. + * struct devlink_port *(*ndo_get_devlink_port)(struct net_device *dev); + * Get devlink port instance associated with a given netdev. * Called with a reference on the netdevice and devlink locks only, * rtnl_lock is not held. */ @@ -1268,8 +1246,7 @@ struct net_device_ops { netdev_features_t features); u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback); + struct net_device *sb_dev); void (*ndo_change_rx_flags)(struct net_device *dev, int flags); void (*ndo_set_rx_mode)(struct net_device *dev); @@ -1453,7 +1430,7 @@ struct net_device_ops { u32 flags); int (*ndo_xsk_async_xmit)(struct net_device *dev, u32 queue_id); - struct devlink * (*ndo_get_devlink)(struct net_device *dev); + struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev); }; /** @@ -2155,9 +2132,11 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev, &qdisc_xmit_lock_key); \ } -struct netdev_queue *netdev_pick_tx(struct net_device *dev, - struct sk_buff *skb, - struct net_device *sb_dev); +u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev); +struct netdev_queue *netdev_core_pick_tx(struct net_device *dev, + struct sk_buff *skb, + struct net_device *sb_dev); /* returns the headroom that the master device needs to take in account * when forwarding to this dev @@ -2642,11 +2621,9 @@ void dev_close_many(struct list_head *head, bool unlink); void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback); + struct net_device *sb_dev); u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback); + struct net_device *sb_dev); int dev_queue_xmit(struct sk_buff *skb); int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev); int dev_direct_xmit(struct sk_buff *skb, u16 queue_id); @@ -2664,14 +2641,6 @@ void netdev_freemem(struct net_device *dev); void synchronize_net(void); int init_dummy_netdev(struct net_device *dev); -DECLARE_PER_CPU(int, xmit_recursion); -#define XMIT_RECURSION_LIMIT 10 - -static inline int dev_recursion_level(void) -{ - return this_cpu_read(xmit_recursion); -} - struct net_device *dev_get_by_index(struct net *net, int ifindex); struct net_device *__dev_get_by_index(struct net *net, int ifindex); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); @@ -3020,6 +2989,11 @@ struct softnet_data { #ifdef CONFIG_XFRM_OFFLOAD struct sk_buff_head xfrm_backlog; #endif + /* written and read only by owning cpu: */ + struct { + u16 recursion; + u8 more; + } xmit; #ifdef CONFIG_RPS /* input_queue_head should be written by cpu owning this struct, * and only read by other cpus. Worth using a cache line. @@ -3055,6 +3029,28 @@ static inline void input_queue_tail_incr_save(struct softnet_data *sd, DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); +static inline int dev_recursion_level(void) +{ + return this_cpu_read(softnet_data.xmit.recursion); +} + +#define XMIT_RECURSION_LIMIT 10 +static inline bool dev_xmit_recursion(void) +{ + return unlikely(__this_cpu_read(softnet_data.xmit.recursion) > + XMIT_RECURSION_LIMIT); +} + +static inline void dev_xmit_recursion_inc(void) +{ + __this_cpu_inc(softnet_data.xmit.recursion); +} + +static inline void dev_xmit_recursion_dec(void) +{ + __this_cpu_dec(softnet_data.xmit.recursion); +} + void __netif_schedule(struct Qdisc *q); void netif_schedule_queue(struct netdev_queue *txq); @@ -4410,10 +4406,15 @@ static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops, struct sk_buff *skb, struct net_device *dev, bool more) { - skb->xmit_more = more ? 1 : 0; + __this_cpu_write(softnet_data.xmit.more, more); return ops->ndo_start_xmit(skb, dev); } +static inline bool netdev_xmit_more(void) +{ + return __this_cpu_read(softnet_data.xmit.more); +} + static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, bool more) { diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 72cb19c3db6a..996bc247ef6e 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -24,20 +24,36 @@ static inline int NF_DROP_GETERR(int verdict) static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1, const union nf_inet_addr *a2) { +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 + const unsigned long *ul1 = (const unsigned long *)a1; + const unsigned long *ul2 = (const unsigned long *)a2; + + return ((ul1[0] ^ ul2[0]) | (ul1[1] ^ ul2[1])) == 0UL; +#else return a1->all[0] == a2->all[0] && a1->all[1] == a2->all[1] && a1->all[2] == a2->all[2] && a1->all[3] == a2->all[3]; +#endif } static inline void nf_inet_addr_mask(const union nf_inet_addr *a1, union nf_inet_addr *result, const union nf_inet_addr *mask) { +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 + const unsigned long *ua = (const unsigned long *)a1; + unsigned long *ur = (unsigned long *)result; + const unsigned long *um = (const unsigned long *)mask; + + ur[0] = ua[0] & um[0]; + ur[1] = ua[1] & um[1]; +#else result->all[0] = a1->all[0] & mask->all[0]; result->all[1] = a1->all[1] & mask->all[1]; result->all[2] = a1->all[2] & mask->all[2]; result->all[3] = a1->all[3] & mask->all[3]; +#endif } int netfilter_init(void); @@ -360,7 +376,7 @@ extern struct nf_nat_hook __rcu *nf_nat_hook; static inline void nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) { -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) struct nf_nat_hook *nat_hook; rcu_read_lock(); diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index f2e1e6b13ca4..e499d170f12d 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -401,33 +401,30 @@ ip_set_get_h16(const struct nlattr *attr) return ntohs(nla_get_be16(attr)); } -#define ipset_nest_start(skb, attr) nla_nest_start(skb, attr | NLA_F_NESTED) -#define ipset_nest_end(skb, start) nla_nest_end(skb, start) - static inline int nla_put_ipaddr4(struct sk_buff *skb, int type, __be32 ipaddr) { - struct nlattr *__nested = ipset_nest_start(skb, type); + struct nlattr *__nested = nla_nest_start(skb, type); int ret; if (!__nested) return -EMSGSIZE; ret = nla_put_in_addr(skb, IPSET_ATTR_IPADDR_IPV4, ipaddr); if (!ret) - ipset_nest_end(skb, __nested); + nla_nest_end(skb, __nested); return ret; } static inline int nla_put_ipaddr6(struct sk_buff *skb, int type, const struct in6_addr *ipaddrptr) { - struct nlattr *__nested = ipset_nest_start(skb, type); + struct nlattr *__nested = nla_nest_start(skb, type); int ret; if (!__nested) return -EMSGSIZE; ret = nla_put_in6_addr(skb, IPSET_ATTR_IPADDR_IPV6, ipaddrptr); if (!ret) - ipset_nest_end(skb, __nested); + nla_nest_end(skb, __nested); return ret; } diff --git a/include/linux/netfilter/nfnetlink_osf.h b/include/linux/netfilter/nfnetlink_osf.h index c6000046c966..788613f36935 100644 --- a/include/linux/netfilter/nfnetlink_osf.h +++ b/include/linux/netfilter/nfnetlink_osf.h @@ -21,13 +21,18 @@ struct nf_osf_finger { struct nf_osf_user_finger finger; }; +struct nf_osf_data { + const char *genre; + const char *version; +}; + bool nf_osf_match(const struct sk_buff *skb, u_int8_t family, int hooknum, struct net_device *in, struct net_device *out, const struct nf_osf_info *info, struct net *net, const struct list_head *nf_osf_fingers); -const char *nf_osf_find(const struct sk_buff *skb, - const struct list_head *nf_osf_fingers, - const int ttl_check); +bool nf_osf_find(const struct sk_buff *skb, + const struct list_head *nf_osf_fingers, + const int ttl_check, struct nf_osf_data *data); #endif /* _NFOSF_H */ diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index bf384b3eedb8..1f852ef7b098 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -317,7 +317,6 @@ struct xt_table_info *xt_replace_table(struct xt_table *table, int *error); struct xt_match *xt_find_match(u8 af, const char *name, u8 revision); -struct xt_target *xt_find_target(u8 af, const char *name, u8 revision); struct xt_match *xt_request_find_match(u8 af, const char *name, u8 revision); struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision); int xt_find_revision(u8 af, const char *name, u8 revision, int target, diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 471e9467105b..12113e502656 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -87,6 +87,21 @@ static inline int nf_ip6_route(struct net *net, struct dst_entry **dst, } int ip6_route_me_harder(struct net *net, struct sk_buff *skb); + +static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb) +{ +#if IS_MODULE(CONFIG_IPV6) + const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); + + if (!v6_ops) + return -EHOSTUNREACH; + + return v6_ops->route_me_harder(net, skb); +#else + return ip6_route_me_harder(net, skb); +#endif +} + __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); diff --git a/include/linux/packing.h b/include/linux/packing.h new file mode 100644 index 000000000000..54667735cc67 --- /dev/null +++ b/include/linux/packing.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2016-2018, NXP Semiconductors + * Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com> + */ +#ifndef _LINUX_PACKING_H +#define _LINUX_PACKING_H + +#include <linux/types.h> +#include <linux/bitops.h> + +#define QUIRK_MSB_ON_THE_RIGHT BIT(0) +#define QUIRK_LITTLE_ENDIAN BIT(1) +#define QUIRK_LSW32_IS_FIRST BIT(2) + +enum packing_op { + PACK, + UNPACK, +}; + +/** + * packing - Convert numbers (currently u64) between a packed and an unpacked + * format. Unpacked means laid out in memory in the CPU's native + * understanding of integers, while packed means anything else that + * requires translation. + * + * @pbuf: Pointer to a buffer holding the packed value. + * @uval: Pointer to an u64 holding the unpacked value. + * @startbit: The index (in logical notation, compensated for quirks) where + * the packed value starts within pbuf. Must be larger than, or + * equal to, endbit. + * @endbit: The index (in logical notation, compensated for quirks) where + * the packed value ends within pbuf. Must be smaller than, or equal + * to, startbit. + * @op: If PACK, then uval will be treated as const pointer and copied (packed) + * into pbuf, between startbit and endbit. + * If UNPACK, then pbuf will be treated as const pointer and the logical + * value between startbit and endbit will be copied (unpacked) to uval. + * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and + * QUIRK_MSB_ON_THE_RIGHT. + * + * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming + * correct usage, return code may be discarded. + * If op is PACK, pbuf is modified. + * If op is UNPACK, uval is modified. + */ +int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen, + enum packing_op op, u8 quirks); + +#endif diff --git a/include/linux/phy.h b/include/linux/phy.h index 34084892a466..073fb151b5a9 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -308,13 +308,7 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); * * HALTED: PHY is up, but no polling or interrupts are done. Or * PHY is in an error state. - * - * - phy_start moves to RESUMING - * - * RESUMING: PHY was halted, but now wants to run again. - * - If we are forcing, or aneg is done, timer moves to RUNNING - * - If aneg is not done, timer moves to AN - * - phy_stop moves to HALTED + * - phy_start moves to UP */ enum phy_state { PHY_DOWN = 0, @@ -324,7 +318,6 @@ enum phy_state { PHY_RUNNING, PHY_NOLINK, PHY_FORCING, - PHY_RESUMING }; /** @@ -345,6 +338,7 @@ struct phy_c45_device_ids { * is_c45: Set to true if this phy uses clause 45 addressing. * is_internal: Set to true if this phy is internal to a MAC. * is_pseudo_fixed_link: Set to true if this phy is an Ethernet switch, etc. + * is_gigabit_capable: Set to true if PHY supports 1000Mbps * has_fixups: Set to true if this phy has fixups/quirks. * suspended: Set to true if this phy has been suspended successfully. * sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal. @@ -382,6 +376,7 @@ struct phy_device { unsigned is_c45:1; unsigned is_internal:1; unsigned is_pseudo_fixed_link:1; + unsigned is_gigabit_capable:1; unsigned has_fixups:1; unsigned suspended:1; unsigned sysfs_links:1; @@ -390,6 +385,7 @@ struct phy_device { unsigned autoneg:1; /* The most recently read link state */ unsigned link:1; + unsigned autoneg_complete:1; /* Interrupts are enabled */ unsigned interrupts:1; @@ -1075,6 +1071,7 @@ void phy_attached_info(struct phy_device *phydev); /* Clause 22 PHY */ int genphy_config_init(struct phy_device *phydev); +int genphy_read_abilities(struct phy_device *phydev); int genphy_setup_forced(struct phy_device *phydev); int genphy_restart_aneg(struct phy_device *phydev); int genphy_config_eee_advert(struct phy_device *phydev); @@ -1150,6 +1147,7 @@ void phy_request_interrupt(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); int phy_set_max_speed(struct phy_device *phydev, u32 max_speed); void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode); +void phy_advertise_supported(struct phy_device *phydev); void phy_support_sym_pause(struct phy_device *phydev); void phy_support_asym_pause(struct phy_device *phydev); void phy_set_sym_pause(struct phy_device *phydev, bool rx, bool tx, diff --git a/include/linux/platform_data/macb.h b/include/linux/platform_data/macb.h index 7815d50c26ff..2bc51b822956 100644 --- a/include/linux/platform_data/macb.h +++ b/include/linux/platform_data/macb.h @@ -12,19 +12,10 @@ /** * struct macb_platform_data - platform data for MACB Ethernet - * @phy_mask: phy mask passed when register the MDIO bus - * within the driver - * @phy_irq_pin: PHY IRQ - * @is_rmii: using RMII interface? - * @rev_eth_addr: reverse Ethernet address byte order * @pclk: platform clock * @hclk: AHB clock */ struct macb_platform_data { - u32 phy_mask; - int phy_irq_pin; - u8 is_rmii; - u8 rev_eth_addr; struct clk *pclk; struct clk *hclk; }; diff --git a/include/linux/platform_data/mv88e6xxx.h b/include/linux/platform_data/mv88e6xxx.h index 963730b44aea..21452a9365e1 100644 --- a/include/linux/platform_data/mv88e6xxx.h +++ b/include/linux/platform_data/mv88e6xxx.h @@ -13,6 +13,7 @@ struct dsa_mv88e6xxx_pdata { unsigned int enabled_ports; struct net_device *netdev; u32 eeprom_len; + int irq; }; #endif diff --git a/include/linux/platform_data/xilinx-ll-temac.h b/include/linux/platform_data/xilinx-ll-temac.h new file mode 100644 index 000000000000..368530f98176 --- /dev/null +++ b/include/linux/platform_data/xilinx-ll-temac.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_XILINX_LL_TEMAC_H +#define __LINUX_XILINX_LL_TEMAC_H + +#include <linux/if_ether.h> +#include <linux/phy.h> + +struct ll_temac_platform_data { + bool txcsum; /* Enable/disable TX checksum */ + bool rxcsum; /* Enable/disable RX checksum */ + u8 mac_addr[ETH_ALEN]; /* MAC address (6 bytes) */ + /* Clock frequency for input to MDIO clock generator */ + u32 mdio_clk_freq; + unsigned long long mdio_bus_id; /* Unique id for MDIO bus */ + int phy_addr; /* Address of the PHY to connect to */ + phy_interface_t phy_interface; /* PHY interface mode */ + bool reg_little_endian; /* Little endian TEMAC register access */ + bool dma_little_endian; /* Little endian DMA register access */ + /* Pre-initialized mutex to use for synchronizing indirect + * register access. When using both interfaces of a single + * TEMAC IP block, the same mutex should be passed here, as + * they share the same DCR bus bridge. + */ + struct mutex *indirect_mutex; + /* DMA channel control setup */ + u8 tx_irq_timeout; /* TX Interrupt Delay Time-out */ + u8 tx_irq_count; /* TX Interrupt Coalescing Threshold Count */ + u8 rx_irq_timeout; /* RX Interrupt Delay Time-out */ + u8 rx_irq_count; /* RX Interrupt Coalescing Threshold Count */ +}; + +#endif /* __LINUX_XILINX_LL_TEMAC_H */ diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h index 763d613ce2c2..57467cbf4c5b 100644 --- a/include/linux/rhashtable-types.h +++ b/include/linux/rhashtable-types.h @@ -48,7 +48,6 @@ typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg, * @head_offset: Offset of rhash_head in struct to be hashed * @max_size: Maximum size while expanding * @min_size: Minimum size while shrinking - * @locks_mul: Number of bucket locks to allocate per cpu (default: 32) * @automatic_shrinking: Enable automatic shrinking of tables * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) * @obj_hashfn: Function to hash object @@ -62,7 +61,6 @@ struct rhashtable_params { unsigned int max_size; u16 min_size; bool automatic_shrinking; - u8 locks_mul; rht_hashfn_t hashfn; rht_obj_hashfn_t obj_hashfn; rht_obj_cmpfn_t obj_cmpfn; diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index ae9c0f71f311..f7714d3b46bd 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -24,12 +24,27 @@ #include <linux/list_nulls.h> #include <linux/workqueue.h> #include <linux/rculist.h> +#include <linux/bit_spinlock.h> #include <linux/rhashtable-types.h> /* + * Objects in an rhashtable have an embedded struct rhash_head + * which is linked into as hash chain from the hash table - or one + * of two or more hash tables when the rhashtable is being resized. * The end of the chain is marked with a special nulls marks which has - * the least significant bit set. + * the least significant bit set but otherwise stores the address of + * the hash bucket. This allows us to be be sure we've found the end + * of the right list. + * The value stored in the hash bucket has BIT(0) used as a lock bit. + * This bit must be atomically set before any changes are made to + * the chain. To avoid dereferencing this pointer without clearing + * the bit first, we use an opaque 'struct rhash_lock_head *' for the + * pointer stored in the bucket. This struct needs to be defined so + * that rcu_dereference() works on it, but it has no content so a + * cast is needed for it to be useful. This ensures it isn't + * used by mistake with clearing the lock bit first. */ +struct rhash_lock_head {}; /* Maximum chain length before rehash * @@ -52,8 +67,6 @@ * @nest: Number of bits of first-level nested table. * @rehash: Current bucket being rehashed * @hash_rnd: Random seed to fold into hash - * @locks_mask: Mask to apply before accessing locks[] - * @locks: Array of spinlocks protecting individual buckets * @walkers: List of active walkers * @rcu: RCU structure for freeing the table * @future_tbl: Table under construction during rehashing @@ -63,31 +76,34 @@ struct bucket_table { unsigned int size; unsigned int nest; - unsigned int rehash; u32 hash_rnd; - unsigned int locks_mask; - spinlock_t *locks; struct list_head walkers; struct rcu_head rcu; struct bucket_table __rcu *future_tbl; - struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; + struct lockdep_map dep_map; + + struct rhash_lock_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; /* * NULLS_MARKER() expects a hash value with the low * bits mostly likely to be significant, and it discards * the msb. - * We git it an address, in which the bottom 2 bits are + * We give it an address, in which the bottom bit is * always 0, and the msb might be significant. * So we shift the address down one bit to align with * expectations and avoid losing a significant bit. + * + * We never store the NULLS_MARKER in the hash table + * itself as we need the lsb for locking. + * Instead we store a NULL */ #define RHT_NULLS_MARKER(ptr) \ ((void *)NULLS_MARKER(((unsigned long) (ptr)) >> 1)) #define INIT_RHT_NULLS_HEAD(ptr) \ - ((ptr) = RHT_NULLS_MARKER(&(ptr))) + ((ptr) = NULL) static inline bool rht_is_a_nulls(const struct rhash_head *ptr) { @@ -207,25 +223,6 @@ static inline bool rht_grow_above_max(const struct rhashtable *ht, return atomic_read(&ht->nelems) >= ht->max_elems; } -/* The bucket lock is selected based on the hash and protects mutations - * on a group of hash buckets. - * - * A maximum of tbl->size/2 bucket locks is allocated. This ensures that - * a single lock always covers both buckets which may both contains - * entries which link to the same bucket of the old table during resizing. - * This allows to simplify the locking as locking the bucket in both - * tables during resize always guarantee protection. - * - * IMPORTANT: When holding the bucket lock of both the old and new table - * during expansions and shrinking, the old bucket lock must always be - * acquired first. - */ -static inline spinlock_t *rht_bucket_lock(const struct bucket_table *tbl, - unsigned int hash) -{ - return &tbl->locks[hash & tbl->locks_mask]; -} - #ifdef CONFIG_PROVE_LOCKING int lockdep_rht_mutex_is_held(struct rhashtable *ht); int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash); @@ -264,11 +261,13 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, void *arg); void rhashtable_destroy(struct rhashtable *ht); -struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, - unsigned int hash); -struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, - struct bucket_table *tbl, +struct rhash_lock_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, + unsigned int hash); +struct rhash_lock_head __rcu **__rht_bucket_nested(const struct bucket_table *tbl, unsigned int hash); +struct rhash_lock_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, + struct bucket_table *tbl, + unsigned int hash); #define rht_dereference(p, ht) \ rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht)) @@ -285,37 +284,136 @@ struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, #define rht_entry(tpos, pos, member) \ ({ tpos = container_of(pos, typeof(*tpos), member); 1; }) -static inline struct rhash_head __rcu *const *rht_bucket( +static inline struct rhash_lock_head __rcu *const *rht_bucket( const struct bucket_table *tbl, unsigned int hash) { return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) : &tbl->buckets[hash]; } -static inline struct rhash_head __rcu **rht_bucket_var( +static inline struct rhash_lock_head __rcu **rht_bucket_var( struct bucket_table *tbl, unsigned int hash) { - return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) : + return unlikely(tbl->nest) ? __rht_bucket_nested(tbl, hash) : &tbl->buckets[hash]; } -static inline struct rhash_head __rcu **rht_bucket_insert( +static inline struct rhash_lock_head __rcu **rht_bucket_insert( struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash) { return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) : &tbl->buckets[hash]; } +/* + * We lock a bucket by setting BIT(0) in the pointer - this is always + * zero in real pointers. The NULLS mark is never stored in the bucket, + * rather we store NULL if the bucket is empty. + * bit_spin_locks do not handle contention well, but the whole point + * of the hashtable design is to achieve minimum per-bucket contention. + * A nested hash table might not have a bucket pointer. In that case + * we cannot get a lock. For remove and replace the bucket cannot be + * interesting and doesn't need locking. + * For insert we allocate the bucket if this is the last bucket_table, + * and then take the lock. + * Sometimes we unlock a bucket by writing a new pointer there. In that + * case we don't need to unlock, but we do need to reset state such as + * local_bh. For that we have rht_assign_unlock(). As rcu_assign_pointer() + * provides the same release semantics that bit_spin_unlock() provides, + * this is safe. + * When we write to a bucket without unlocking, we use rht_assign_locked(). + */ + +static inline void rht_lock(struct bucket_table *tbl, + struct rhash_lock_head **bkt) +{ + local_bh_disable(); + bit_spin_lock(0, (unsigned long *)bkt); + lock_map_acquire(&tbl->dep_map); +} + +static inline void rht_lock_nested(struct bucket_table *tbl, + struct rhash_lock_head **bucket, + unsigned int subclass) +{ + local_bh_disable(); + bit_spin_lock(0, (unsigned long *)bucket); + lock_acquire_exclusive(&tbl->dep_map, subclass, 0, NULL, _THIS_IP_); +} + +static inline void rht_unlock(struct bucket_table *tbl, + struct rhash_lock_head **bkt) +{ + lock_map_release(&tbl->dep_map); + bit_spin_unlock(0, (unsigned long *)bkt); + local_bh_enable(); +} + +/* + * Where 'bkt' is a bucket and might be locked: + * rht_ptr() dereferences that pointer and clears the lock bit. + * rht_ptr_exclusive() dereferences in a context where exclusive + * access is guaranteed, such as when destroying the table. + */ +static inline struct rhash_head *rht_ptr( + struct rhash_lock_head __rcu * const *bkt, + struct bucket_table *tbl, + unsigned int hash) +{ + const struct rhash_lock_head *p = + rht_dereference_bucket_rcu(*bkt, tbl, hash); + + if ((((unsigned long)p) & ~BIT(0)) == 0) + return RHT_NULLS_MARKER(bkt); + return (void *)(((unsigned long)p) & ~BIT(0)); +} + +static inline struct rhash_head *rht_ptr_exclusive( + struct rhash_lock_head __rcu * const *bkt) +{ + const struct rhash_lock_head *p = + rcu_dereference_protected(*bkt, 1); + + if (!p) + return RHT_NULLS_MARKER(bkt); + return (void *)(((unsigned long)p) & ~BIT(0)); +} + +static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt, + struct rhash_head *obj) +{ + struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt; + + if (rht_is_a_nulls(obj)) + obj = NULL; + rcu_assign_pointer(*p, (void *)((unsigned long)obj | BIT(0))); +} + +static inline void rht_assign_unlock(struct bucket_table *tbl, + struct rhash_lock_head __rcu **bkt, + struct rhash_head *obj) +{ + struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt; + + if (rht_is_a_nulls(obj)) + obj = NULL; + lock_map_release(&tbl->dep_map); + rcu_assign_pointer(*p, obj); + preempt_enable(); + __release(bitlock); + local_bh_enable(); +} + /** - * rht_for_each_continue - continue iterating over hash chain + * rht_for_each_from - iterate over hash chain from given head * @pos: the &struct rhash_head to use as a loop cursor. - * @head: the previous &struct rhash_head to continue from + * @head: the &struct rhash_head to start from * @tbl: the &struct bucket_table * @hash: the hash value / bucket index */ -#define rht_for_each_continue(pos, head, tbl, hash) \ - for (pos = rht_dereference_bucket(head, tbl, hash); \ - !rht_is_a_nulls(pos); \ +#define rht_for_each_from(pos, head, tbl, hash) \ + for (pos = head; \ + !rht_is_a_nulls(pos); \ pos = rht_dereference_bucket((pos)->next, tbl, hash)) /** @@ -325,19 +423,20 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * @hash: the hash value / bucket index */ #define rht_for_each(pos, tbl, hash) \ - rht_for_each_continue(pos, *rht_bucket(tbl, hash), tbl, hash) + rht_for_each_from(pos, rht_ptr(rht_bucket(tbl, hash), tbl, hash), \ + tbl, hash) /** - * rht_for_each_entry_continue - continue iterating over hash chain + * rht_for_each_entry_from - iterate over hash chain from given head * @tpos: the type * to use as a loop cursor. * @pos: the &struct rhash_head to use as a loop cursor. - * @head: the previous &struct rhash_head to continue from + * @head: the &struct rhash_head to start from * @tbl: the &struct bucket_table * @hash: the hash value / bucket index * @member: name of the &struct rhash_head within the hashable struct. */ -#define rht_for_each_entry_continue(tpos, pos, head, tbl, hash, member) \ - for (pos = rht_dereference_bucket(head, tbl, hash); \ +#define rht_for_each_entry_from(tpos, pos, head, tbl, hash, member) \ + for (pos = head; \ (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ pos = rht_dereference_bucket((pos)->next, tbl, hash)) @@ -350,8 +449,9 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * @member: name of the &struct rhash_head within the hashable struct. */ #define rht_for_each_entry(tpos, pos, tbl, hash, member) \ - rht_for_each_entry_continue(tpos, pos, *rht_bucket(tbl, hash), \ - tbl, hash, member) + rht_for_each_entry_from(tpos, pos, \ + rht_ptr(rht_bucket(tbl, hash), tbl, hash), \ + tbl, hash, member) /** * rht_for_each_entry_safe - safely iterate over hash chain of given type @@ -366,7 +466,7 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * remove the loop cursor from the list. */ #define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \ - for (pos = rht_dereference_bucket(*rht_bucket(tbl, hash), tbl, hash), \ + for (pos = rht_ptr(rht_bucket(tbl, hash), tbl, hash), \ next = !rht_is_a_nulls(pos) ? \ rht_dereference_bucket(pos->next, tbl, hash) : NULL; \ (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ @@ -375,9 +475,9 @@ static inline struct rhash_head __rcu **rht_bucket_insert( rht_dereference_bucket(pos->next, tbl, hash) : NULL) /** - * rht_for_each_rcu_continue - continue iterating over rcu hash chain + * rht_for_each_rcu_from - iterate over rcu hash chain from given head * @pos: the &struct rhash_head to use as a loop cursor. - * @head: the previous &struct rhash_head to continue from + * @head: the &struct rhash_head to start from * @tbl: the &struct bucket_table * @hash: the hash value / bucket index * @@ -385,9 +485,9 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * the _rcu mutation primitives such as rhashtable_insert() as long as the * traversal is guarded by rcu_read_lock(). */ -#define rht_for_each_rcu_continue(pos, head, tbl, hash) \ +#define rht_for_each_rcu_from(pos, head, tbl, hash) \ for (({barrier(); }), \ - pos = rht_dereference_bucket_rcu(head, tbl, hash); \ + pos = head; \ !rht_is_a_nulls(pos); \ pos = rcu_dereference_raw(pos->next)) @@ -401,14 +501,17 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * the _rcu mutation primitives such as rhashtable_insert() as long as the * traversal is guarded by rcu_read_lock(). */ -#define rht_for_each_rcu(pos, tbl, hash) \ - rht_for_each_rcu_continue(pos, *rht_bucket(tbl, hash), tbl, hash) +#define rht_for_each_rcu(pos, tbl, hash) \ + for (({barrier(); }), \ + pos = rht_ptr(rht_bucket(tbl, hash), tbl, hash); \ + !rht_is_a_nulls(pos); \ + pos = rcu_dereference_raw(pos->next)) /** - * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain + * rht_for_each_entry_rcu_from - iterated over rcu hash chain from given head * @tpos: the type * to use as a loop cursor. * @pos: the &struct rhash_head to use as a loop cursor. - * @head: the previous &struct rhash_head to continue from + * @head: the &struct rhash_head to start from * @tbl: the &struct bucket_table * @hash: the hash value / bucket index * @member: name of the &struct rhash_head within the hashable struct. @@ -417,9 +520,9 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * the _rcu mutation primitives such as rhashtable_insert() as long as the * traversal is guarded by rcu_read_lock(). */ -#define rht_for_each_entry_rcu_continue(tpos, pos, head, tbl, hash, member) \ +#define rht_for_each_entry_rcu_from(tpos, pos, head, tbl, hash, member) \ for (({barrier(); }), \ - pos = rht_dereference_bucket_rcu(head, tbl, hash); \ + pos = head; \ (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ pos = rht_dereference_bucket_rcu(pos->next, tbl, hash)) @@ -436,8 +539,10 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * traversal is guarded by rcu_read_lock(). */ #define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \ - rht_for_each_entry_rcu_continue(tpos, pos, *rht_bucket(tbl, hash), \ - tbl, hash, member) + rht_for_each_entry_rcu_from(tpos, pos, \ + rht_ptr(rht_bucket(tbl, hash), \ + tbl, hash), \ + tbl, hash, member) /** * rhl_for_each_rcu - iterate over rcu hash table list @@ -482,7 +587,7 @@ static inline struct rhash_head *__rhashtable_lookup( .ht = ht, .key = key, }; - struct rhash_head __rcu * const *head; + struct rhash_lock_head __rcu * const *bkt; struct bucket_table *tbl; struct rhash_head *he; unsigned int hash; @@ -490,9 +595,9 @@ static inline struct rhash_head *__rhashtable_lookup( tbl = rht_dereference_rcu(ht->tbl, ht); restart: hash = rht_key_hashfn(ht, tbl, key, params); - head = rht_bucket(tbl, hash); + bkt = rht_bucket(tbl, hash); do { - rht_for_each_rcu_continue(he, *head, tbl, hash) { + rht_for_each_rcu_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) { if (params.obj_cmpfn ? params.obj_cmpfn(&arg, rht_obj(ht, he)) : rhashtable_compare(&arg, rht_obj(ht, he))) @@ -502,7 +607,7 @@ restart: /* An object might have been moved to a different hash chain, * while we walk along it - better check and retry. */ - } while (he != RHT_NULLS_MARKER(head)); + } while (he != RHT_NULLS_MARKER(bkt)); /* Ensure we see any new tables. */ smp_rmb(); @@ -598,10 +703,10 @@ static inline void *__rhashtable_insert_fast( .ht = ht, .key = key, }; + struct rhash_lock_head __rcu **bkt; struct rhash_head __rcu **pprev; struct bucket_table *tbl; struct rhash_head *head; - spinlock_t *lock; unsigned int hash; int elasticity; void *data; @@ -610,23 +715,22 @@ static inline void *__rhashtable_insert_fast( tbl = rht_dereference_rcu(ht->tbl, ht); hash = rht_head_hashfn(ht, tbl, obj, params); - lock = rht_bucket_lock(tbl, hash); - spin_lock_bh(lock); + elasticity = RHT_ELASTICITY; + bkt = rht_bucket_insert(ht, tbl, hash); + data = ERR_PTR(-ENOMEM); + if (!bkt) + goto out; + pprev = NULL; + rht_lock(tbl, bkt); if (unlikely(rcu_access_pointer(tbl->future_tbl))) { slow_path: - spin_unlock_bh(lock); + rht_unlock(tbl, bkt); rcu_read_unlock(); return rhashtable_insert_slow(ht, key, obj); } - elasticity = RHT_ELASTICITY; - pprev = rht_bucket_insert(ht, tbl, hash); - data = ERR_PTR(-ENOMEM); - if (!pprev) - goto out; - - rht_for_each_continue(head, *pprev, tbl, hash) { + rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) { struct rhlist_head *plist; struct rhlist_head *list; @@ -642,7 +746,7 @@ slow_path: data = rht_obj(ht, head); if (!rhlist) - goto out; + goto out_unlock; list = container_of(obj, struct rhlist_head, rhead); @@ -651,9 +755,13 @@ slow_path: RCU_INIT_POINTER(list->next, plist); head = rht_dereference_bucket(head->next, tbl, hash); RCU_INIT_POINTER(list->rhead.next, head); - rcu_assign_pointer(*pprev, obj); - - goto good; + if (pprev) { + rcu_assign_pointer(*pprev, obj); + rht_unlock(tbl, bkt); + } else + rht_assign_unlock(tbl, bkt, obj); + data = NULL; + goto out; } if (elasticity <= 0) @@ -661,12 +769,13 @@ slow_path: data = ERR_PTR(-E2BIG); if (unlikely(rht_grow_above_max(ht, tbl))) - goto out; + goto out_unlock; if (unlikely(rht_grow_above_100(ht, tbl))) goto slow_path; - head = rht_dereference_bucket(*pprev, tbl, hash); + /* Inserting at head of list makes unlocking free. */ + head = rht_ptr(bkt, tbl, hash); RCU_INIT_POINTER(obj->next, head); if (rhlist) { @@ -676,20 +785,21 @@ slow_path: RCU_INIT_POINTER(list->next, NULL); } - rcu_assign_pointer(*pprev, obj); - atomic_inc(&ht->nelems); + rht_assign_unlock(tbl, bkt, obj); + if (rht_grow_above_75(ht, tbl)) schedule_work(&ht->run_work); -good: data = NULL; - out: - spin_unlock_bh(lock); rcu_read_unlock(); return data; + +out_unlock: + rht_unlock(tbl, bkt); + goto out; } /** @@ -698,9 +808,9 @@ out: * @obj: pointer to hash head inside object * @params: hash table parameters * - * Will take a per bucket spinlock to protect against mutual mutations + * Will take the per bucket bitlock to protect against mutual mutations * on the same bucket. Multiple insertions may occur in parallel unless - * they map to the same bucket lock. + * they map to the same bucket. * * It is safe to call this function from atomic context. * @@ -727,9 +837,9 @@ static inline int rhashtable_insert_fast( * @list: pointer to hash list head inside object * @params: hash table parameters * - * Will take a per bucket spinlock to protect against mutual mutations + * Will take the per bucket bitlock to protect against mutual mutations * on the same bucket. Multiple insertions may occur in parallel unless - * they map to the same bucket lock. + * they map to the same bucket. * * It is safe to call this function from atomic context. * @@ -750,9 +860,9 @@ static inline int rhltable_insert_key( * @list: pointer to hash list head inside object * @params: hash table parameters * - * Will take a per bucket spinlock to protect against mutual mutations + * Will take the per bucket bitlock to protect against mutual mutations * on the same bucket. Multiple insertions may occur in parallel unless - * they map to the same bucket lock. + * they map to the same bucket. * * It is safe to call this function from atomic context. * @@ -776,12 +886,6 @@ static inline int rhltable_insert( * @obj: pointer to hash head inside object * @params: hash table parameters * - * Locks down the bucket chain in both the old and new table if a resize - * is in progress to ensure that writers can't remove from the old table - * and can't insert to the new table during the atomic operation of search - * and insertion. Searches for duplicates in both the old and new table if - * a resize is in progress. - * * This lookup function may only be used for fixed key hash table (key_len * parameter set). It will BUG() if used inappropriately. * @@ -837,12 +941,6 @@ static inline void *rhashtable_lookup_get_insert_fast( * @obj: pointer to hash head inside object * @params: hash table parameters * - * Locks down the bucket chain in both the old and new table if a resize - * is in progress to ensure that writers can't remove from the old table - * and can't insert to the new table during the atomic operation of search - * and insertion. Searches for duplicates in both the old and new table if - * a resize is in progress. - * * Lookups may occur in parallel with hashtable mutations and resizing. * * Will trigger an automatic deferred table resizing if residency in the @@ -891,19 +989,20 @@ static inline int __rhashtable_remove_fast_one( struct rhash_head *obj, const struct rhashtable_params params, bool rhlist) { + struct rhash_lock_head __rcu **bkt; struct rhash_head __rcu **pprev; struct rhash_head *he; - spinlock_t * lock; unsigned int hash; int err = -ENOENT; hash = rht_head_hashfn(ht, tbl, obj, params); - lock = rht_bucket_lock(tbl, hash); - - spin_lock_bh(lock); + bkt = rht_bucket_var(tbl, hash); + if (!bkt) + return -ENOENT; + pprev = NULL; + rht_lock(tbl, bkt); - pprev = rht_bucket_var(tbl, hash); - rht_for_each_continue(he, *pprev, tbl, hash) { + rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) { struct rhlist_head *list; list = container_of(he, struct rhlist_head, rhead); @@ -943,12 +1042,17 @@ static inline int __rhashtable_remove_fast_one( } } - rcu_assign_pointer(*pprev, obj); - break; + if (pprev) { + rcu_assign_pointer(*pprev, obj); + rht_unlock(tbl, bkt); + } else { + rht_assign_unlock(tbl, bkt, obj); + } + goto unlocked; } - spin_unlock_bh(lock); - + rht_unlock(tbl, bkt); +unlocked: if (err > 0) { atomic_dec(&ht->nelems); if (unlikely(ht->p.automatic_shrinking && @@ -1037,9 +1141,9 @@ static inline int __rhashtable_replace_fast( struct rhash_head *obj_old, struct rhash_head *obj_new, const struct rhashtable_params params) { + struct rhash_lock_head __rcu **bkt; struct rhash_head __rcu **pprev; struct rhash_head *he; - spinlock_t *lock; unsigned int hash; int err = -ENOENT; @@ -1050,25 +1154,33 @@ static inline int __rhashtable_replace_fast( if (hash != rht_head_hashfn(ht, tbl, obj_new, params)) return -EINVAL; - lock = rht_bucket_lock(tbl, hash); + bkt = rht_bucket_var(tbl, hash); + if (!bkt) + return -ENOENT; - spin_lock_bh(lock); + pprev = NULL; + rht_lock(tbl, bkt); - pprev = rht_bucket_var(tbl, hash); - rht_for_each_continue(he, *pprev, tbl, hash) { + rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) { if (he != obj_old) { pprev = &he->next; continue; } rcu_assign_pointer(obj_new->next, obj_old->next); - rcu_assign_pointer(*pprev, obj_new); + if (pprev) { + rcu_assign_pointer(*pprev, obj_new); + rht_unlock(tbl, bkt); + } else { + rht_assign_unlock(tbl, bkt, obj_new); + } err = 0; - break; + goto unlocked; } - spin_unlock_bh(lock); + rht_unlock(tbl, bkt); +unlocked: return err; } diff --git a/include/linux/siphash.h b/include/linux/siphash.h index fa7a6b9cedbf..bf21591a9e5e 100644 --- a/include/linux/siphash.h +++ b/include/linux/siphash.h @@ -21,6 +21,11 @@ typedef struct { u64 key[2]; } siphash_key_t; +static inline bool siphash_key_is_zero(const siphash_key_t *key) +{ + return !(key->key[0] | key->key[1]); +} + u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key); #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9027a8c4219f..6d58fa8a65fd 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -657,7 +657,6 @@ typedef unsigned char *sk_buff_data_t; * @tc_index: Traffic control index * @hash: the packet hash * @queue_mapping: Queue mapping for multiqueue devices - * @xmit_more: More SKBs are pending for this queue * @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves * @active_extensions: active extensions (skb_ext_id types) * @ndisc_nodetype: router type (from link layer) @@ -764,7 +763,6 @@ struct sk_buff { fclone:2, peeked:1, head_frag:1, - xmit_more:1, pfmemalloc:1; #ifdef CONFIG_SKB_EXTENSIONS __u8 active_extensions; @@ -1044,6 +1042,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags, int node); struct sk_buff *__build_skb(void *data, unsigned int frag_size); struct sk_buff *build_skb(void *data, unsigned int frag_size); +struct sk_buff *build_skb_around(struct sk_buff *skb, + void *data, unsigned int frag_size); /** * alloc_skb - allocate a network buffer @@ -1258,11 +1258,19 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, unsigned int key_count); #ifdef CONFIG_NET +int skb_flow_dissector_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr); int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog); int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr); #else +static inline int skb_flow_dissector_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + return -EOPNOTSUPP; +} + static inline int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) { @@ -1275,12 +1283,12 @@ static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) } #endif -struct bpf_flow_keys; -bool __skb_flow_bpf_dissect(struct bpf_prog *prog, - const struct sk_buff *skb, - struct flow_dissector *flow_dissector, - struct bpf_flow_keys *flow_keys); -bool __skb_flow_dissect(const struct sk_buff *skb, +struct bpf_flow_dissector; +bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, + __be16 proto, int nhoff, int hlen); + +bool __skb_flow_dissect(const struct net *net, + const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, void *data, __be16 proto, int nhoff, int hlen, @@ -1290,8 +1298,8 @@ static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, unsigned int flags) { - return __skb_flow_dissect(skb, flow_dissector, target_container, - NULL, 0, 0, 0, flags); + return __skb_flow_dissect(NULL, skb, flow_dissector, + target_container, NULL, 0, 0, 0, flags); } static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb, @@ -1299,18 +1307,19 @@ static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb, unsigned int flags) { memset(flow, 0, sizeof(*flow)); - return __skb_flow_dissect(skb, &flow_keys_dissector, flow, - NULL, 0, 0, 0, flags); + return __skb_flow_dissect(NULL, skb, &flow_keys_dissector, + flow, NULL, 0, 0, 0, flags); } static inline bool -skb_flow_dissect_flow_keys_basic(const struct sk_buff *skb, +skb_flow_dissect_flow_keys_basic(const struct net *net, + const struct sk_buff *skb, struct flow_keys_basic *flow, void *data, __be16 proto, int nhoff, int hlen, unsigned int flags) { memset(flow, 0, sizeof(*flow)); - return __skb_flow_dissect(skb, &flow_keys_basic_dissector, flow, + return __skb_flow_dissect(net, skb, &flow_keys_basic_dissector, flow, data, proto, nhoff, hlen, flags); } @@ -2102,8 +2111,6 @@ void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, unsigned int truesize); -#define SKB_PAGE_ASSERT(skb) BUG_ON(skb_shinfo(skb)->nr_frags) -#define SKB_FRAG_ASSERT(skb) BUG_ON(skb_has_frag_list(skb)) #define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb)) #ifdef NET_SKBUFF_DATA_USES_OFFSET @@ -2490,7 +2497,8 @@ static inline void skb_probe_transport_header(struct sk_buff *skb) if (skb_transport_header_was_set(skb)) return; - if (skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0)) + if (skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, + NULL, 0, 0, 0, 0)) skb_set_transport_header(skb, keys.control.thoff); } @@ -3372,17 +3380,17 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, unsigned int flags, void (*destructor)(struct sock *sk, struct sk_buff *skb), - int *peeked, int *off, int *err, + int *off, int *err, struct sk_buff **last); struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned flags, void (*destructor)(struct sock *sk, struct sk_buff *skb), - int *peeked, int *off, int *err, + int *off, int *err, struct sk_buff **last); struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, void (*destructor)(struct sock *sk, struct sk_buff *skb), - int *peeked, int *off, int *err); + int *off, int *err); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err); __poll_t datagram_poll(struct file *file, struct socket *sock, diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h index 49ba9cde7e4b..b29950a19205 100644 --- a/include/linux/tracepoint-defs.h +++ b/include/linux/tracepoint-defs.h @@ -45,6 +45,7 @@ struct bpf_raw_event_map { struct tracepoint *tp; void *bpf_func; u32 num_args; + u32 writable_size; } __aligned(32); #endif diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 269ec27385e9..2f67ae854ff0 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -238,53 +238,6 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, void ipv6_mc_dad_complete(struct inet6_dev *idev); -/* A stub used by vxlan module. This is ugly, ideally these - * symbols should be built into the core kernel. - */ -struct ipv6_stub { - int (*ipv6_sock_mc_join)(struct sock *sk, int ifindex, - const struct in6_addr *addr); - int (*ipv6_sock_mc_drop)(struct sock *sk, int ifindex, - const struct in6_addr *addr); - int (*ipv6_dst_lookup)(struct net *net, struct sock *sk, - struct dst_entry **dst, struct flowi6 *fl6); - int (*ipv6_route_input)(struct sk_buff *skb); - - struct fib6_table *(*fib6_get_table)(struct net *net, u32 id); - struct fib6_info *(*fib6_lookup)(struct net *net, int oif, - struct flowi6 *fl6, int flags); - struct fib6_info *(*fib6_table_lookup)(struct net *net, - struct fib6_table *table, - int oif, struct flowi6 *fl6, - int flags); - struct fib6_info *(*fib6_multipath_select)(const struct net *net, - struct fib6_info *f6i, - struct flowi6 *fl6, int oif, - const struct sk_buff *skb, - int strict); - u32 (*ip6_mtu_from_fib6)(struct fib6_info *f6i, struct in6_addr *daddr, - struct in6_addr *saddr); - - void (*udpv6_encap_enable)(void); - void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr, - const struct in6_addr *solicited_addr, - bool router, bool solicited, bool override, bool inc_opt); - struct neigh_table *nd_tbl; -}; -extern const struct ipv6_stub *ipv6_stub __read_mostly; - -/* A stub used by bpf helpers. Similarly ugly as ipv6_stub */ -struct ipv6_bpf_stub { - int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len, - bool force_bind_address_no_port, bool with_lock); - struct sock *(*udp6_lib_lookup)(struct net *net, - const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, __be16 dport, - int dif, int sdif, struct udp_table *tbl, - struct sk_buff *skb); -}; -extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; - /* * identify MLD packets for MLD filter exceptions */ @@ -425,6 +378,14 @@ static inline void in6_dev_hold(struct inet6_dev *idev) refcount_inc(&idev->refcnt); } +/* called with rcu_read_lock held */ +static inline bool ip6_ignore_linkdown(const struct net_device *dev) +{ + const struct inet6_dev *idev = __in6_dev_get(dev); + + return !!idev->cnf.ignore_routes_with_linkdown; +} + void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp); static inline void in6_ifa_put(struct inet6_ifaddr *ifp) diff --git a/include/net/arp.h b/include/net/arp.h index 977aabfcdc03..c8f580a0e6b1 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -18,6 +18,7 @@ static inline u32 arp_hashfn(const void *pkey, const struct net_device *dev, u32 return val * hash_rnd[0]; } +#ifdef CONFIG_INET static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key) { if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) @@ -25,6 +26,13 @@ static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev return ___neigh_lookup_noref(&arp_tbl, neigh_key_eq32, arp_hashfn, &key, dev); } +#else +static inline +struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key) +{ + return NULL; +} +#endif static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32 key) { diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index fbba43e9bef5..9a5330eed794 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -282,6 +282,7 @@ enum { HCI_FORCE_BREDR_SMP, HCI_FORCE_STATIC_ADDR, HCI_LL_RPA_RESOLUTION, + HCI_CMD_PENDING, __HCI_NUM_FLAGS, }; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 094e61e07030..05b1b96f4d9e 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -190,6 +190,9 @@ struct adv_info { #define HCI_MAX_SHORT_NAME_LENGTH 10 +/* Min encryption key size to match with SMP */ +#define HCI_MIN_ENC_KEY_SIZE 7 + /* Default LE RPA expiry time, 15 minutes */ #define HCI_DEFAULT_RPA_TIMEOUT (15 * 60) diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h new file mode 100644 index 000000000000..b9dcb02e756b --- /dev/null +++ b/include/net/bpf_sk_storage.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019 Facebook */ +#ifndef _BPF_SK_STORAGE_H +#define _BPF_SK_STORAGE_H + +struct sock; + +void bpf_sk_storage_free(struct sock *sk); + +extern const struct bpf_func_proto bpf_sk_storage_get_proto; +extern const struct bpf_func_proto bpf_sk_storage_delete_proto; + +#endif /* _BPF_SK_STORAGE_H */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 13bfeb712d36..87dae868707e 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -485,6 +485,7 @@ struct vif_params { * with the get_key() callback, must be in little endian, * length given by @seq_len. * @seq_len: length of @seq. + * @mode: key install mode (RX_TX, NO_TX or SET_TX) */ struct key_params { const u8 *key; @@ -492,6 +493,7 @@ struct key_params { int key_len; int seq_len; u32 cipher; + enum nl80211_key_mode mode; }; /** @@ -973,6 +975,27 @@ enum station_parameters_apply_mask { STATION_PARAM_APPLY_UAPSD = BIT(0), STATION_PARAM_APPLY_CAPABILITY = BIT(1), STATION_PARAM_APPLY_PLINK_STATE = BIT(2), + STATION_PARAM_APPLY_STA_TXPOWER = BIT(3), +}; + +/** + * struct sta_txpwr - station txpower configuration + * + * Used to configure txpower for station. + * + * @power: tx power (in dBm) to be used for sending data traffic. If tx power + * is not provided, the default per-interface tx power setting will be + * overriding. Driver should be picking up the lowest tx power, either tx + * power per-interface or per-station. + * @type: In particular if TPC %type is NL80211_TX_POWER_LIMITED then tx power + * will be less than or equal to specified from userspace, whereas if TPC + * %type is NL80211_TX_POWER_AUTOMATIC then it indicates default tx power. + * NL80211_TX_POWER_FIXED is not a valid configuration option for + * per peer TPC. + */ +struct sta_txpwr { + s16 power; + enum nl80211_tx_power_setting type; }; /** @@ -1047,6 +1070,7 @@ struct station_parameters { const struct ieee80211_he_cap_elem *he_capa; u8 he_capa_len; u16 airtime_weight; + struct sta_txpwr txpwr; }; /** @@ -1327,6 +1351,7 @@ struct cfg80211_tid_stats { * @fcs_err_count: number of packets (MPDUs) received from this station with * an FCS error. This counter should be incremented only when TA of the * received packet with an FCS error matches the peer MAC address. + * @airtime_link_metric: mesh airtime link metric. */ struct station_info { u64 filled; @@ -1381,6 +1406,8 @@ struct station_info { u32 rx_mpdu_count; u32 fcs_err_count; + + u32 airtime_link_metric; }; #if IS_ENABLED(CONFIG_CFG80211) @@ -1832,11 +1859,19 @@ static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask) * @bssid: BSSID to be matched; may be all-zero BSSID in case of SSID match * or no match (RSSI only) * @rssi_thold: don't report scan results below this threshold (in s32 dBm) + * @per_band_rssi_thold: Minimum rssi threshold for each band to be applied + * for filtering out scan results received. Drivers advertize this support + * of band specific rssi based filtering through the feature capability + * %NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD. These band + * specific rssi thresholds take precedence over rssi_thold, if specified. + * If not specified for any band, it will be assigned with rssi_thold of + * corresponding matchset. */ struct cfg80211_match_set { struct cfg80211_ssid ssid; u8 bssid[ETH_ALEN]; s32 rssi_thold; + s32 per_band_rssi_thold[NUM_NL80211_BANDS]; }; /** @@ -3100,6 +3135,32 @@ struct cfg80211_pmsr_request { }; /** + * struct cfg80211_update_owe_info - OWE Information + * + * This structure provides information needed for the drivers to offload OWE + * (Opportunistic Wireless Encryption) processing to the user space. + * + * Commonly used across update_owe_info request and event interfaces. + * + * @peer: MAC address of the peer device for which the OWE processing + * has to be done. + * @status: status code, %WLAN_STATUS_SUCCESS for successful OWE info + * processing, use %WLAN_STATUS_UNSPECIFIED_FAILURE if user space + * cannot give you the real status code for failures. Used only for + * OWE update request command interface (user space to driver). + * @ie: IEs obtained from the peer or constructed by the user space. These are + * the IEs of the remote peer in the event from the host driver and + * the constructed IEs by the user space in the request interface. + * @ie_len: Length of IEs in octets. + */ +struct cfg80211_update_owe_info { + u8 peer[ETH_ALEN] __aligned(2); + u16 status; + const u8 *ie; + size_t ie_len; +}; + +/** * struct cfg80211_ops - backend description for wireless configuration * * This struct is registered by fullmac card drivers and/or wireless stacks @@ -3436,6 +3497,13 @@ struct cfg80211_pmsr_request { * Statistics should be cumulative, currently no way to reset is provided. * @start_pmsr: start peer measurement (e.g. FTM) * @abort_pmsr: abort peer measurement + * + * @update_owe_info: Provide updated OWE info to driver. Driver implementing SME + * but offloading OWE processing to the user space will get the updated + * DH IE through this interface. + * + * @probe_mesh_link: Probe direct Mesh peer's link quality by sending data frame + * and overrule HWMP path selection algorithm. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -3750,6 +3818,10 @@ struct cfg80211_ops { struct cfg80211_pmsr_request *request); void (*abort_pmsr)(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_pmsr_request *request); + int (*update_owe_info)(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_update_owe_info *owe_info); + int (*probe_mesh_link)(struct wiphy *wiphy, struct net_device *dev, + const u8 *buf, size_t len); }; /* @@ -5492,6 +5564,28 @@ static inline void cfg80211_gen_new_bssid(const u8 *bssid, u8 max_bssid, } /** + * cfg80211_is_element_inherited - returns if element ID should be inherited + * @element: element to check + * @non_inherit_element: non inheritance element + */ +bool cfg80211_is_element_inherited(const struct element *element, + const struct element *non_inherit_element); + +/** + * cfg80211_merge_profile - merges a MBSSID profile if it is split between IEs + * @ie: ies + * @ielen: length of IEs + * @mbssid_elem: current MBSSID element + * @sub_elem: current MBSSID subelement (profile) + * @merged_ie: location of the merged profile + * @max_copy_len: max merged profile length + */ +size_t cfg80211_merge_profile(const u8 *ie, size_t ielen, + const struct element *mbssid_elem, + const struct element *sub_elem, + u8 *merged_ie, size_t max_copy_len); + +/** * enum cfg80211_bss_frame_type - frame type that the BSS data came from * @CFG80211_BSS_FTYPE_UNKNOWN: driver doesn't know whether the data is * from a beacon or probe response @@ -7213,4 +7307,14 @@ void cfg80211_pmsr_complete(struct wireless_dev *wdev, #define wiphy_WARN(wiphy, format, args...) \ WARN(1, "wiphy: %s\n" format, wiphy_name(wiphy), ##args); +/** + * cfg80211_update_owe_info_event - Notify the peer's OWE info to user space + * @netdev: network device + * @owe_info: peer's owe info + * @gfp: allocation flags + */ +void cfg80211_update_owe_info_event(struct net_device *netdev, + struct cfg80211_update_owe_info *owe_info, + gfp_t gfp); + #endif /* __NET_CFG80211_H */ diff --git a/include/net/compat.h b/include/net/compat.h index 4c6d75612b6c..f277653c7e17 100644 --- a/include/net/compat.h +++ b/include/net/compat.h @@ -30,9 +30,6 @@ struct compat_cmsghdr { compat_int_t cmsg_type; }; -int compat_sock_get_timestamp(struct sock *, struct timeval __user *); -int compat_sock_get_timestampns(struct sock *, struct timespec __user *); - #else /* defined(CONFIG_COMPAT) */ /* * To avoid compiler warnings: diff --git a/include/net/devlink.h b/include/net/devlink.h index 63de99e09f04..1c4adfb4195a 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -16,6 +16,7 @@ #include <linux/gfp.h> #include <linux/list.h> #include <linux/netdevice.h> +#include <linux/spinlock.h> #include <net/net_namespace.h> #include <uapi/linux/devlink.h> @@ -31,6 +32,7 @@ struct devlink { struct list_head region_list; u32 snapshot_id; struct list_head reporter_list; + struct mutex reporters_lock; /* protects reporter_list */ struct devlink_dpipe_headers *dpipe_headers; const struct devlink_ops *ops; struct device *dev; @@ -40,11 +42,13 @@ struct devlink { }; struct devlink_port_attrs { - bool set; + u8 set:1, + split:1, + switch_port:1; enum devlink_port_flavour flavour; u32 port_number; /* same value as "split group" */ - bool split; u32 split_subport_number; + struct netdev_phys_item_id switch_id; }; struct devlink_port { @@ -53,6 +57,9 @@ struct devlink_port { struct devlink *devlink; unsigned index; bool registered; + spinlock_t type_lock; /* Protects type and type_dev + * pointer consistency. + */ enum devlink_port_type type; enum devlink_port_type desired_type; void *type_dev; @@ -485,13 +492,14 @@ struct devlink_ops { struct devlink_sb_pool_info *pool_info); int (*sb_pool_set)(struct devlink *devlink, unsigned int sb_index, u16 pool_index, u32 size, - enum devlink_sb_threshold_type threshold_type); + enum devlink_sb_threshold_type threshold_type, + struct netlink_ext_ack *extack); int (*sb_port_pool_get)(struct devlink_port *devlink_port, unsigned int sb_index, u16 pool_index, u32 *p_threshold); int (*sb_port_pool_set)(struct devlink_port *devlink_port, unsigned int sb_index, u16 pool_index, - u32 threshold); + u32 threshold, struct netlink_ext_ack *extack); int (*sb_tc_pool_bind_get)(struct devlink_port *devlink_port, unsigned int sb_index, u16 tc_index, @@ -501,7 +509,8 @@ struct devlink_ops { unsigned int sb_index, u16 tc_index, enum devlink_sb_pool_type pool_type, - u16 pool_index, u32 threshold); + u16 pool_index, u32 threshold, + struct netlink_ext_ack *extack); int (*sb_occ_snapshot)(struct devlink *devlink, unsigned int sb_index); int (*sb_occ_max_clear)(struct devlink *devlink, @@ -543,19 +552,25 @@ static inline struct devlink *priv_to_devlink(void *priv) return container_of(priv, struct devlink, priv); } +static inline struct devlink_port * +netdev_to_devlink_port(struct net_device *dev) +{ + if (dev->netdev_ops->ndo_get_devlink_port) + return dev->netdev_ops->ndo_get_devlink_port(dev); + return NULL; +} + static inline struct devlink *netdev_to_devlink(struct net_device *dev) { -#if IS_ENABLED(CONFIG_NET_DEVLINK) - if (dev->netdev_ops->ndo_get_devlink) - return dev->netdev_ops->ndo_get_devlink(dev); -#endif + struct devlink_port *devlink_port = netdev_to_devlink_port(dev); + + if (devlink_port) + return devlink_port->devlink; return NULL; } struct ib_device; -#if IS_ENABLED(CONFIG_NET_DEVLINK) - struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size); int devlink_register(struct devlink *devlink, struct device *dev); void devlink_unregister(struct devlink *devlink); @@ -572,9 +587,9 @@ void devlink_port_type_clear(struct devlink_port *devlink_port); void devlink_port_attrs_set(struct devlink_port *devlink_port, enum devlink_port_flavour flavour, u32 port_number, bool split, - u32 split_subport_number); -int devlink_port_get_phys_port_name(struct devlink_port *devlink_port, - char *name, size_t len); + u32 split_subport_number, + const unsigned char *switch_id, + unsigned char switch_id_len); int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, @@ -724,510 +739,43 @@ void devlink_health_reporter_state_update(struct devlink_health_reporter *reporter, enum devlink_health_reporter_state state); +#if IS_ENABLED(CONFIG_NET_DEVLINK) + void devlink_compat_running_version(struct net_device *dev, char *buf, size_t len); int devlink_compat_flash_update(struct net_device *dev, const char *file_name); +int devlink_compat_phys_port_name_get(struct net_device *dev, + char *name, size_t len); +int devlink_compat_switch_id_get(struct net_device *dev, + struct netdev_phys_item_id *ppid); #else -static inline struct devlink *devlink_alloc(const struct devlink_ops *ops, - size_t priv_size) -{ - return kzalloc(sizeof(struct devlink) + priv_size, GFP_KERNEL); -} - -static inline int devlink_register(struct devlink *devlink, struct device *dev) -{ - return 0; -} - -static inline void devlink_unregister(struct devlink *devlink) -{ -} - -static inline void devlink_params_publish(struct devlink *devlink) -{ -} - -static inline void devlink_params_unpublish(struct devlink *devlink) -{ -} - -static inline void devlink_free(struct devlink *devlink) -{ - kfree(devlink); -} - -static inline int devlink_port_register(struct devlink *devlink, - struct devlink_port *devlink_port, - unsigned int port_index) -{ - return 0; -} - -static inline void devlink_port_unregister(struct devlink_port *devlink_port) -{ -} - -static inline void devlink_port_type_eth_set(struct devlink_port *devlink_port, - struct net_device *netdev) -{ -} - -static inline void devlink_port_type_ib_set(struct devlink_port *devlink_port, - struct ib_device *ibdev) -{ -} - -static inline void devlink_port_type_clear(struct devlink_port *devlink_port) -{ -} - -static inline void devlink_port_attrs_set(struct devlink_port *devlink_port, - enum devlink_port_flavour flavour, - u32 port_number, bool split, - u32 split_subport_number) -{ -} - -static inline int -devlink_port_get_phys_port_name(struct devlink_port *devlink_port, - char *name, size_t len) -{ - return -EOPNOTSUPP; -} - -static inline int devlink_sb_register(struct devlink *devlink, - unsigned int sb_index, u32 size, - u16 ingress_pools_count, - u16 egress_pools_count, - u16 ingress_tc_count, - u16 egress_tc_count) -{ - return 0; -} - -static inline void devlink_sb_unregister(struct devlink *devlink, - unsigned int sb_index) -{ -} - -static inline int -devlink_dpipe_table_register(struct devlink *devlink, - const char *table_name, - struct devlink_dpipe_table_ops *table_ops, - void *priv, bool counter_control_extern) -{ - return 0; -} - -static inline void devlink_dpipe_table_unregister(struct devlink *devlink, - const char *table_name) -{ -} - -static inline int devlink_dpipe_headers_register(struct devlink *devlink, - struct devlink_dpipe_headers * - dpipe_headers) -{ - return 0; -} - -static inline void devlink_dpipe_headers_unregister(struct devlink *devlink) -{ -} - -static inline bool devlink_dpipe_table_counter_enabled(struct devlink *devlink, - const char *table_name) -{ - return false; -} - -static inline int -devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx) -{ - return 0; -} - -static inline int -devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx, - struct devlink_dpipe_entry *entry) -{ - return 0; -} - -static inline int -devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx) -{ - return 0; -} - -static inline void -devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry) -{ -} - -static inline int -devlink_dpipe_action_put(struct sk_buff *skb, - struct devlink_dpipe_action *action) -{ - return 0; -} - -static inline int -devlink_dpipe_match_put(struct sk_buff *skb, - struct devlink_dpipe_match *match) -{ - return 0; -} - -static inline int -devlink_resource_register(struct devlink *devlink, - const char *resource_name, - u64 resource_size, - u64 resource_id, - u64 parent_resource_id, - const struct devlink_resource_size_params *size_params) -{ - return 0; -} - static inline void -devlink_resources_unregister(struct devlink *devlink, - struct devlink_resource *resource) -{ -} - -static inline int -devlink_resource_size_get(struct devlink *devlink, u64 resource_id, - u64 *p_resource_size) -{ - return -EOPNOTSUPP; -} - -static inline int -devlink_dpipe_table_resource_set(struct devlink *devlink, - const char *table_name, u64 resource_id, - u64 resource_units) -{ - return -EOPNOTSUPP; -} - -static inline void -devlink_resource_occ_get_register(struct devlink *devlink, - u64 resource_id, - devlink_resource_occ_get_t *occ_get, - void *occ_get_priv) -{ -} - -static inline void -devlink_resource_occ_get_unregister(struct devlink *devlink, - u64 resource_id) -{ -} - -static inline int -devlink_params_register(struct devlink *devlink, - const struct devlink_param *params, - size_t params_count) -{ - return 0; -} - -static inline void -devlink_params_unregister(struct devlink *devlink, - const struct devlink_param *params, - size_t params_count) -{ - -} - -static inline int -devlink_port_params_register(struct devlink_port *devlink_port, - const struct devlink_param *params, - size_t params_count) -{ - return 0; -} - -static inline void -devlink_port_params_unregister(struct devlink_port *devlink_port, - const struct devlink_param *params, - size_t params_count) -{ -} - -static inline int -devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, - union devlink_param_value *init_val) +devlink_compat_running_version(struct net_device *dev, char *buf, size_t len) { - return -EOPNOTSUPP; } static inline int -devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, - union devlink_param_value init_val) +devlink_compat_flash_update(struct net_device *dev, const char *file_name) { return -EOPNOTSUPP; } static inline int -devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port, - u32 param_id, - union devlink_param_value *init_val) +devlink_compat_phys_port_name_get(struct net_device *dev, + char *name, size_t len) { return -EOPNOTSUPP; } static inline int -devlink_port_param_driverinit_value_set(struct devlink_port *devlink_port, - u32 param_id, - union devlink_param_value init_val) +devlink_compat_switch_id_get(struct net_device *dev, + struct netdev_phys_item_id *ppid) { return -EOPNOTSUPP; } -static inline void -devlink_param_value_changed(struct devlink *devlink, u32 param_id) -{ -} - -static inline void -devlink_port_param_value_changed(struct devlink_port *devlink_port, - u32 param_id) -{ -} - -static inline void -devlink_param_value_str_fill(union devlink_param_value *dst_val, - const char *src) -{ -} - -static inline struct devlink_region * -devlink_region_create(struct devlink *devlink, - const char *region_name, - u32 region_max_snapshots, - u64 region_size) -{ - return NULL; -} - -static inline void -devlink_region_destroy(struct devlink_region *region) -{ -} - -static inline u32 -devlink_region_shapshot_id_get(struct devlink *devlink) -{ - return 0; -} - -static inline int -devlink_region_snapshot_create(struct devlink_region *region, u64 data_len, - u8 *data, u32 snapshot_id, - devlink_snapshot_data_dest_t *data_destructor) -{ - return 0; -} - -static inline int -devlink_info_driver_name_put(struct devlink_info_req *req, const char *name) -{ - return 0; -} - -static inline int -devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn) -{ - return 0; -} - -static inline int -devlink_info_version_fixed_put(struct devlink_info_req *req, - const char *version_name, - const char *version_value) -{ - return 0; -} - -static inline int -devlink_info_version_stored_put(struct devlink_info_req *req, - const char *version_name, - const char *version_value) -{ - return 0; -} - -static inline int -devlink_info_version_running_put(struct devlink_info_req *req, - const char *version_name, - const char *version_value) -{ - return 0; -} - -static inline int -devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg) -{ - return 0; -} - -static inline int -devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg) -{ - return 0; -} - -static inline int -devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name) -{ - return 0; -} - -static inline int -devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg) -{ - return 0; -} - -static inline int -devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg, - const char *name) -{ - return 0; -} - -static inline int -devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg) -{ - return 0; -} - -static inline int -devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value) -{ - return 0; -} - -static inline int -devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value) -{ - return 0; -} - -static inline int -devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value) -{ - return 0; -} - -static inline int -devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value) -{ - return 0; -} - -static inline int -devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value) -{ - return 0; -} - -static inline int -devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value, - u16 value_len) -{ - return 0; -} - -static inline int -devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name, - bool value) -{ - return 0; -} - -static inline int -devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name, - u8 value) -{ - return 0; -} - -static inline int -devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name, - u32 value) -{ - return 0; -} - -static inline int -devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name, - u64 value) -{ - return 0; -} - -static inline int -devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name, - const char *value) -{ - return 0; -} - -static inline int -devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, - const void *value, u16 value_len) -{ - return 0; -} - -static inline struct devlink_health_reporter * -devlink_health_reporter_create(struct devlink *devlink, - const struct devlink_health_reporter_ops *ops, - u64 graceful_period, bool auto_recover, - void *priv) -{ - return NULL; -} - -static inline void -devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) -{ -} - -static inline void * -devlink_health_reporter_priv(struct devlink_health_reporter *reporter) -{ - return NULL; -} - -static inline int -devlink_health_report(struct devlink_health_reporter *reporter, - const char *msg, void *priv_ctx) -{ - return 0; -} - -static inline void -devlink_health_reporter_state_update(struct devlink_health_reporter *reporter, - enum devlink_health_reporter_state state) -{ -} - -static inline void -devlink_compat_running_version(struct net_device *dev, char *buf, size_t len) -{ -} - -static inline int -devlink_compat_flash_update(struct net_device *dev, const char *file_name) -{ - return -EOPNOTSUPP; -} #endif #endif /* _NET_DEVLINK_H_ */ diff --git a/include/net/dsa.h b/include/net/dsa.h index ae480bba11f5..6aaaadd6a413 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -30,20 +30,36 @@ struct phy_device; struct fixed_phy_status; struct phylink_link_state; +#define DSA_TAG_PROTO_NONE_VALUE 0 +#define DSA_TAG_PROTO_BRCM_VALUE 1 +#define DSA_TAG_PROTO_BRCM_PREPEND_VALUE 2 +#define DSA_TAG_PROTO_DSA_VALUE 3 +#define DSA_TAG_PROTO_EDSA_VALUE 4 +#define DSA_TAG_PROTO_GSWIP_VALUE 5 +#define DSA_TAG_PROTO_KSZ9477_VALUE 6 +#define DSA_TAG_PROTO_KSZ9893_VALUE 7 +#define DSA_TAG_PROTO_LAN9303_VALUE 8 +#define DSA_TAG_PROTO_MTK_VALUE 9 +#define DSA_TAG_PROTO_QCA_VALUE 10 +#define DSA_TAG_PROTO_TRAILER_VALUE 11 +#define DSA_TAG_PROTO_8021Q_VALUE 12 +#define DSA_TAG_PROTO_SJA1105_VALUE 13 + enum dsa_tag_protocol { - DSA_TAG_PROTO_NONE = 0, - DSA_TAG_PROTO_BRCM, - DSA_TAG_PROTO_BRCM_PREPEND, - DSA_TAG_PROTO_DSA, - DSA_TAG_PROTO_EDSA, - DSA_TAG_PROTO_GSWIP, - DSA_TAG_PROTO_KSZ9477, - DSA_TAG_PROTO_KSZ9893, - DSA_TAG_PROTO_LAN9303, - DSA_TAG_PROTO_MTK, - DSA_TAG_PROTO_QCA, - DSA_TAG_PROTO_TRAILER, - DSA_TAG_LAST, /* MUST BE LAST */ + DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, + DSA_TAG_PROTO_BRCM = DSA_TAG_PROTO_BRCM_VALUE, + DSA_TAG_PROTO_BRCM_PREPEND = DSA_TAG_PROTO_BRCM_PREPEND_VALUE, + DSA_TAG_PROTO_DSA = DSA_TAG_PROTO_DSA_VALUE, + DSA_TAG_PROTO_EDSA = DSA_TAG_PROTO_EDSA_VALUE, + DSA_TAG_PROTO_GSWIP = DSA_TAG_PROTO_GSWIP_VALUE, + DSA_TAG_PROTO_KSZ9477 = DSA_TAG_PROTO_KSZ9477_VALUE, + DSA_TAG_PROTO_KSZ9893 = DSA_TAG_PROTO_KSZ9893_VALUE, + DSA_TAG_PROTO_LAN9303 = DSA_TAG_PROTO_LAN9303_VALUE, + DSA_TAG_PROTO_MTK = DSA_TAG_PROTO_MTK_VALUE, + DSA_TAG_PROTO_QCA = DSA_TAG_PROTO_QCA_VALUE, + DSA_TAG_PROTO_TRAILER = DSA_TAG_PROTO_TRAILER_VALUE, + DSA_TAG_PROTO_8021Q = DSA_TAG_PROTO_8021Q_VALUE, + DSA_TAG_PROTO_SJA1105 = DSA_TAG_PROTO_SJA1105_VALUE, }; struct packet_type; @@ -55,9 +71,52 @@ struct dsa_device_ops { struct packet_type *pt); int (*flow_dissect)(const struct sk_buff *skb, __be16 *proto, int *offset); + /* Used to determine which traffic should match the DSA filter in + * eth_type_trans, and which, if any, should bypass it and be processed + * as regular on the master net device. + */ + bool (*filter)(const struct sk_buff *skb, struct net_device *dev); unsigned int overhead; + const char *name; + enum dsa_tag_protocol proto; +}; + +#define DSA_TAG_DRIVER_ALIAS "dsa_tag-" +#define MODULE_ALIAS_DSA_TAG_DRIVER(__proto) \ + MODULE_ALIAS(DSA_TAG_DRIVER_ALIAS __stringify(__proto##_VALUE)) + +struct dsa_skb_cb { + struct sk_buff *clone; + bool deferred_xmit; +}; + +struct __dsa_skb_cb { + struct dsa_skb_cb cb; + u8 priv[48 - sizeof(struct dsa_skb_cb)]; }; +#define __DSA_SKB_CB(skb) ((struct __dsa_skb_cb *)((skb)->cb)) + +#define DSA_SKB_CB(skb) ((struct dsa_skb_cb *)((skb)->cb)) + +#define DSA_SKB_CB_COPY(nskb, skb) \ + { *__DSA_SKB_CB(nskb) = *__DSA_SKB_CB(skb); } + +#define DSA_SKB_CB_ZERO(skb) \ + { *__DSA_SKB_CB(skb) = (struct __dsa_skb_cb) {0}; } + +#define DSA_SKB_CB_PRIV(skb) \ + ((void *)(skb)->cb + offsetof(struct __dsa_skb_cb, priv)) + +#define DSA_SKB_CB_CLONE(_clone, _skb) \ + { \ + struct sk_buff *clone = _clone; \ + struct sk_buff *skb = _skb; \ + \ + DSA_SKB_CB_COPY(clone, skb); \ + DSA_SKB_CB(skb)->clone = clone; \ + } + struct dsa_switch_tree { struct list_head list; @@ -128,6 +187,7 @@ struct dsa_port { struct dsa_switch_tree *dst; struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt); + bool (*filter)(const struct sk_buff *skb, struct net_device *dev); enum { DSA_PORT_TYPE_UNUSED = 0, @@ -140,12 +200,24 @@ struct dsa_port { unsigned int index; const char *name; const struct dsa_port *cpu_dp; + const char *mac; struct device_node *dn; unsigned int ageing_time; + bool vlan_filtering; u8 stp_state; struct net_device *bridge_dev; struct devlink_port devlink_port; struct phylink *pl; + + struct work_struct xmit_work; + struct sk_buff_head xmit_queue; + + /* + * Give the switch driver somewhere to hang its per-port private data + * structures (accessible from the tagger). + */ + void *priv; + /* * Original copy of the master netdev ethtool_ops */ @@ -208,6 +280,16 @@ struct dsa_switch { /* Number of switch port queues */ unsigned int num_tx_queues; + /* Disallow bridge core from requesting different VLAN awareness + * settings on ports if not hardware-supported + */ + bool vlan_filtering_is_global; + + /* In case vlan_filtering_is_global is set, the VLAN awareness state + * should be retrieved from here and not from the per-port settings. + */ + bool vlan_filtering; + unsigned long *bitmap; unsigned long _bitmap; @@ -275,18 +357,19 @@ static inline unsigned int dsa_upstream_port(struct dsa_switch *ds, int port) return dsa_towards_port(ds, cpu_dp->ds->index, cpu_dp->index); } +static inline bool dsa_port_is_vlan_filtering(const struct dsa_port *dp) +{ + const struct dsa_switch *ds = dp->ds; + + if (ds->vlan_filtering_is_global) + return ds->vlan_filtering; + else + return dp->vlan_filtering; +} + typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid, bool is_static, void *data); struct dsa_switch_ops { -#if IS_ENABLED(CONFIG_NET_DSA_LEGACY) - /* - * Legacy probing. - */ - const char *(*probe)(struct device *dsa_dev, - struct device *host_dev, int sw_addr, - void **priv); -#endif - enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds, int port); @@ -469,6 +552,12 @@ struct dsa_switch_ops { struct sk_buff *clone, unsigned int type); bool (*port_rxtstamp)(struct dsa_switch *ds, int port, struct sk_buff *skb, unsigned int type); + + /* + * Deferred frame Tx + */ + netdev_tx_t (*port_deferred_xmit)(struct dsa_switch *ds, int port, + struct sk_buff *skb); }; struct dsa_switch_driver { @@ -476,20 +565,6 @@ struct dsa_switch_driver { const struct dsa_switch_ops *ops; }; -#if IS_ENABLED(CONFIG_NET_DSA_LEGACY) -/* Legacy driver registration */ -void register_switch_driver(struct dsa_switch_driver *type); -void unregister_switch_driver(struct dsa_switch_driver *type); -struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev); - -#else -static inline void register_switch_driver(struct dsa_switch_driver *type) { } -static inline void unregister_switch_driver(struct dsa_switch_driver *type) { } -static inline struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev) -{ - return NULL; -} -#endif struct net_device *dsa_dev_to_net_device(struct device *dev); /* Keep inline for faster access in hot path */ @@ -501,6 +576,15 @@ static inline bool netdev_uses_dsa(struct net_device *dev) return false; } +static inline bool dsa_can_decode(const struct sk_buff *skb, + struct net_device *dev) +{ +#if IS_ENABLED(CONFIG_NET_DSA) + return !dev->dsa_ptr->filter || dev->dsa_ptr->filter(skb, dev); +#endif + return false; +} + struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n); void dsa_unregister_switch(struct dsa_switch *ds); int dsa_register_switch(struct dsa_switch *ds); @@ -569,9 +653,76 @@ static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev, #define BRCM_TAG_GET_QUEUE(v) ((v) & 0xff) +netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev); int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data); int dsa_port_get_ethtool_phy_stats(struct dsa_port *dp, uint64_t *data); int dsa_port_get_phy_sset_count(struct dsa_port *dp); void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up); +struct dsa_tag_driver { + const struct dsa_device_ops *ops; + struct list_head list; + struct module *owner; +}; + +void dsa_tag_drivers_register(struct dsa_tag_driver *dsa_tag_driver_array[], + unsigned int count, + struct module *owner); +void dsa_tag_drivers_unregister(struct dsa_tag_driver *dsa_tag_driver_array[], + unsigned int count); + +#define dsa_tag_driver_module_drivers(__dsa_tag_drivers_array, __count) \ +static int __init dsa_tag_driver_module_init(void) \ +{ \ + dsa_tag_drivers_register(__dsa_tag_drivers_array, __count, \ + THIS_MODULE); \ + return 0; \ +} \ +module_init(dsa_tag_driver_module_init); \ + \ +static void __exit dsa_tag_driver_module_exit(void) \ +{ \ + dsa_tag_drivers_unregister(__dsa_tag_drivers_array, __count); \ +} \ +module_exit(dsa_tag_driver_module_exit) + +/** + * module_dsa_tag_drivers() - Helper macro for registering DSA tag + * drivers + * @__ops_array: Array of tag driver strucutres + * + * Helper macro for DSA tag drivers which do not do anything special + * in module init/exit. Each module may only use this macro once, and + * calling it replaces module_init() and module_exit(). + */ +#define module_dsa_tag_drivers(__ops_array) \ +dsa_tag_driver_module_drivers(__ops_array, ARRAY_SIZE(__ops_array)) + +#define DSA_TAG_DRIVER_NAME(__ops) dsa_tag_driver ## _ ## __ops + +/* Create a static structure we can build a linked list of dsa_tag + * drivers + */ +#define DSA_TAG_DRIVER(__ops) \ +static struct dsa_tag_driver DSA_TAG_DRIVER_NAME(__ops) = { \ + .ops = &__ops, \ +} + +/** + * module_dsa_tag_driver() - Helper macro for registering a single DSA tag + * driver + * @__ops: Single tag driver structures + * + * Helper macro for DSA tag drivers which do not do anything special + * in module init/exit. Each module may only use this macro once, and + * calling it replaces module_init() and module_exit(). + */ +#define module_dsa_tag_driver(__ops) \ +DSA_TAG_DRIVER(__ops); \ + \ +static struct dsa_tag_driver *dsa_tag_driver_array[] = { \ + &DSA_TAG_DRIVER_NAME(__ops) \ +}; \ +module_dsa_tag_drivers(dsa_tag_driver_array) #endif + diff --git a/include/net/dst.h b/include/net/dst.h index 6cf0870414c7..12b31c602cb0 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -19,17 +19,6 @@ #include <net/neighbour.h> #include <asm/processor.h> -#define DST_GC_MIN (HZ/10) -#define DST_GC_INC (HZ/2) -#define DST_GC_MAX (120*HZ) - -/* Each dst_entry has reference count and sits in some parent list(s). - * When it is removed from parent list, it is "freed" (dst_free). - * After this it enters dead state (dst->obsolete > 0) and if its refcnt - * is zero, it can be destroyed immediately, otherwise it is added - * to gc list and garbage collector periodically checks the refcnt. - */ - struct sk_buff; struct dst_entry { diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h index c91ec732afd6..c49d7bfb5c30 100644 --- a/include/net/fib_notifier.h +++ b/include/net/fib_notifier.h @@ -2,10 +2,11 @@ #define __NET_FIB_NOTIFIER_H #include <linux/types.h> -#include <linux/module.h> #include <linux/notifier.h> #include <net/net_namespace.h> +struct module; + struct fib_notifier_info { struct net *net; int family; diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 2b26979efb48..7c5a8d9a8d2a 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -305,4 +305,11 @@ static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissec return ((char *)target_container) + flow_dissector->offset[key_id]; } +struct bpf_flow_dissector { + struct bpf_flow_keys *flow_keys; + const struct sk_buff *skb; + void *data; + void *data_end; +}; + #endif diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index d035183c8d03..6200900434e1 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -118,6 +118,8 @@ enum flow_action_id { FLOW_ACTION_MARK, FLOW_ACTION_WAKE, FLOW_ACTION_QUEUE, + FLOW_ACTION_SAMPLE, + FLOW_ACTION_POLICE, }; /* This is mirroring enum pedit_header_type definition for easy mapping between @@ -157,6 +159,16 @@ struct flow_action_entry { u32 index; u8 vf; } queue; + struct { /* FLOW_ACTION_SAMPLE */ + struct psample_group *psample_group; + u32 rate; + u32 trunc_size; + bool truncate; + } sample; + struct { /* FLOW_ACTION_POLICE */ + s64 burst; + u64 rate_bytes_ps; + } police; }; }; @@ -170,6 +182,17 @@ static inline bool flow_action_has_entries(const struct flow_action *action) return action->num_entries; } +/** + * flow_action_has_one_action() - check if exactly one action is present + * @action: tc filter flow offload action + * + * Returns true if exactly one action is present. + */ +static inline bool flow_offload_has_one_action(const struct flow_action *action) +{ + return action->num_entries == 1; +} + #define flow_action_for_each(__i, __act, __actions) \ for (__i = 0, __act = &(__actions)->entries[0]; __i < (__actions)->num_entries; __act = &(__actions)->entries[++__i]) diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h index be7c0fab3478..2caa86660ab0 100644 --- a/include/net/fq_impl.h +++ b/include/net/fq_impl.h @@ -107,21 +107,23 @@ begin: return skb; } +static u32 fq_flow_idx(struct fq *fq, struct sk_buff *skb) +{ + u32 hash = skb_get_hash_perturb(skb, fq->perturbation); + + return reciprocal_scale(hash, fq->flows_cnt); +} + static struct fq_flow *fq_flow_classify(struct fq *fq, - struct fq_tin *tin, + struct fq_tin *tin, u32 idx, struct sk_buff *skb, fq_flow_get_default_t get_default_func) { struct fq_flow *flow; - u32 hash; - u32 idx; lockdep_assert_held(&fq->lock); - hash = skb_get_hash_perturb(skb, fq->perturbation); - idx = reciprocal_scale(hash, fq->flows_cnt); flow = &fq->flows[idx]; - if (flow->tin && flow->tin != tin) { flow = get_default_func(fq, tin, idx, skb); tin->collisions++; @@ -153,7 +155,7 @@ static void fq_recalc_backlog(struct fq *fq, } static void fq_tin_enqueue(struct fq *fq, - struct fq_tin *tin, + struct fq_tin *tin, u32 idx, struct sk_buff *skb, fq_skb_free_t free_func, fq_flow_get_default_t get_default_func) @@ -163,7 +165,7 @@ static void fq_tin_enqueue(struct fq *fq, lockdep_assert_held(&fq->lock); - flow = fq_flow_classify(fq, tin, skb, get_default_func); + flow = fq_flow_classify(fq, tin, idx, skb, get_default_func); flow->tin = tin; flow->backlog += skb->len; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index aa2e5888f18d..9292f1c588b7 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -26,6 +26,7 @@ struct genl_info; * @name: name of family * @version: protocol version * @maxattr: maximum number of attributes supported + * @policy: netlink policy * @netnsok: set to true if the family can handle network * namespaces and should be presented in all of them * @parallel_ops: operations can be called in parallel and aren't @@ -56,6 +57,7 @@ struct genl_family { unsigned int maxattr; bool netnsok; bool parallel_ops; + const struct nla_policy *policy; int (*pre_doit)(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info); @@ -119,19 +121,23 @@ static inline int genl_err_attr(struct genl_info *info, int err, return err; } +enum genl_validate_flags { + GENL_DONT_VALIDATE_STRICT = BIT(0), + GENL_DONT_VALIDATE_DUMP = BIT(1), + GENL_DONT_VALIDATE_DUMP_STRICT = BIT(2), +}; + /** * struct genl_ops - generic netlink operations * @cmd: command identifier * @internal_flags: flags used by the family * @flags: flags - * @policy: attribute validation policy * @doit: standard command callback * @start: start callback for dumps * @dumpit: callback for dumpers * @done: completion callback for dumps */ struct genl_ops { - const struct nla_policy *policy; int (*doit)(struct sk_buff *skb, struct genl_info *info); int (*start)(struct netlink_callback *cb); @@ -141,6 +147,7 @@ struct genl_ops { u8 cmd; u8 internal_flags; u8 flags; + u8 validate; }; int genl_register_family(struct genl_family *family); @@ -165,6 +172,25 @@ static inline struct nlmsghdr *genlmsg_nlhdr(void *user_hdr) } /** + * genlmsg_parse_deprecated - parse attributes of a genetlink message + * @nlh: netlink message header + * @family: genetlink message family + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @policy: validation policy + * @extack: extended ACK report struct + */ +static inline int genlmsg_parse_deprecated(const struct nlmsghdr *nlh, + const struct genl_family *family, + struct nlattr *tb[], int maxtype, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) +{ + return __nlmsg_parse(nlh, family->hdrsize + GENL_HDRLEN, tb, maxtype, + policy, NL_VALIDATE_LIBERAL, extack); +} + +/** * genlmsg_parse - parse attributes of a genetlink message * @nlh: netlink message header * @family: genetlink message family @@ -179,8 +205,8 @@ static inline int genlmsg_parse(const struct nlmsghdr *nlh, const struct nla_policy *policy, struct netlink_ext_ack *extack) { - return nlmsg_parse(nlh, family->hdrsize + GENL_HDRLEN, tb, maxtype, - policy, extack); + return __nlmsg_parse(nlh, family->hdrsize + GENL_HDRLEN, tb, maxtype, + policy, NL_VALIDATE_STRICT, extack); } /** diff --git a/include/net/geneve.h b/include/net/geneve.h index fc6a7e0a874a..bced0b1d9fe4 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -4,6 +4,8 @@ #include <net/udp_tunnel.h> +#define GENEVE_UDP_PORT 6081 + /* Geneve Header: * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |Ver| Opt Len |O|C| Rsvd. | Protocol Type | diff --git a/include/net/ife.h b/include/net/ife.h index e117617e3c34..7e2538d8585b 100644 --- a/include/net/ife.h +++ b/include/net/ife.h @@ -4,7 +4,6 @@ #include <linux/etherdevice.h> #include <linux/rtnetlink.h> -#include <linux/module.h> #include <uapi/linux/ife.h> #if IS_ENABLED(CONFIG_NET_IFE) diff --git a/include/net/ip.h b/include/net/ip.h index 583526aad1d0..2d3cce7c3e8a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -38,6 +38,10 @@ #define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */ #define IPV4_MIN_MTU 68 /* RFC 791 */ +extern unsigned int sysctl_fib_sync_mem; +extern unsigned int sysctl_fib_sync_mem_min; +extern unsigned int sysctl_fib_sync_mem_max; + struct sock; struct inet_skb_parm { diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 84097010237c..40105738e2f6 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -19,6 +19,7 @@ #include <linux/notifier.h> #include <net/dst.h> #include <net/flow.h> +#include <net/ip_fib.h> #include <net/netlink.h> #include <net/inetpeer.h> #include <net/fib_notifier.h> @@ -50,7 +51,8 @@ struct fib6_config { u32 fc_protocol; u16 fc_type; /* only 8 bits are used */ u16 fc_delete_all_nh : 1, - __unused : 15; + fc_ignore_dev_down:1, + __unused : 14; struct in6_addr fc_dst; struct in6_addr fc_src; @@ -124,13 +126,11 @@ struct rt6_exception { #define FIB6_MAX_DEPTH 5 struct fib6_nh { - struct in6_addr nh_gw; - struct net_device *nh_dev; - struct lwtunnel_state *nh_lwtstate; + struct fib_nh_common nh_common; - unsigned int nh_flags; - atomic_t nh_upper_bound; - int nh_weight; +#ifdef CONFIG_IPV6_ROUTER_PREF + unsigned long last_probe; +#endif }; struct fib6_info { @@ -146,7 +146,7 @@ struct fib6_info { struct list_head fib6_siblings; unsigned int fib6_nsiblings; - atomic_t fib6_ref; + refcount_t fib6_ref; unsigned long expires; struct dst_metrics *fib6_metrics; #define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1] @@ -159,10 +159,6 @@ struct fib6_info { struct rt6_info * __percpu *rt6i_pcpu; struct rt6_exception_bucket __rcu *rt6i_exception_bucket; -#ifdef CONFIG_IPV6_ROUTER_PREF - unsigned long last_probe; -#endif - u32 fib6_metric; u8 fib6_protocol; u8 fib6_type; @@ -194,6 +190,14 @@ struct rt6_info { unsigned short rt6i_nfheader_len; }; +struct fib6_result { + struct fib6_nh *nh; + struct fib6_info *f6i; + u32 fib6_flags; + u8 fib6_type; + struct rt6_info *rt6; +}; + #define for_each_fib6_node_rt_rcu(fn) \ for (rt = rcu_dereference((fn)->leaf); rt; \ rt = rcu_dereference(rt->fib6_next)) @@ -281,17 +285,17 @@ void fib6_info_destroy_rcu(struct rcu_head *head); static inline void fib6_info_hold(struct fib6_info *f6i) { - atomic_inc(&f6i->fib6_ref); + refcount_inc(&f6i->fib6_ref); } static inline bool fib6_info_hold_safe(struct fib6_info *f6i) { - return atomic_inc_not_zero(&f6i->fib6_ref); + return refcount_inc_not_zero(&f6i->fib6_ref); } static inline void fib6_info_release(struct fib6_info *f6i) { - if (f6i && atomic_dec_and_test(&f6i->fib6_ref)) + if (f6i && refcount_dec_and_test(&f6i->fib6_ref)) call_rcu(&f6i->rcu, fib6_info_destroy_rcu); } @@ -388,18 +392,17 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, /* called with rcu lock held; can return error pointer * caller needs to select path */ -struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, - int flags); +int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, + struct fib6_result *res, int flags); /* called with rcu lock held; caller needs to select path */ -struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table, - int oif, struct flowi6 *fl6, int strict); - -struct fib6_info *fib6_multipath_select(const struct net *net, - struct fib6_info *match, - struct flowi6 *fl6, int oif, - const struct sk_buff *skb, int strict); +int fib6_table_lookup(struct net *net, struct fib6_table *table, + int oif, struct flowi6 *fl6, struct fib6_result *res, + int strict); +void fib6_select_path(const struct net *net, struct fib6_result *res, + struct flowi6 *fl6, int oif, bool have_oif_match, + const struct sk_buff *skb, int strict); struct fib6_node *fib6_node_lookup(struct fib6_node *root, const struct in6_addr *daddr, const struct in6_addr *saddr); @@ -440,14 +443,13 @@ void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr) static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i) { - return f6i->fib6_nh.nh_dev; + return f6i->fib6_nh.fib_nh_dev; } -static inline -struct lwtunnel_state *fib6_info_nh_lwt(const struct fib6_info *f6i) -{ - return f6i->fib6_nh.nh_lwtstate; -} +int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, + struct fib6_config *cfg, gfp_t gfp_flags, + struct netlink_ext_ack *extack); +void fib6_nh_release(struct fib6_nh *fib6_nh); void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, unsigned int flags); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 7ab119936e69..4790beaa86e0 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -68,8 +68,9 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr) static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i) { - return (f6i->fib6_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) == - RTF_GATEWAY; + /* the RTF_ADDRCONF flag filters out RA's */ + return !(f6i->fib6_flags & RTF_ADDRCONF) && + f6i->fib6_nh.fib_nh_gw_family; } void ip6_route_input(struct sk_buff *skb); @@ -181,7 +182,7 @@ int rt6_dump_route(struct fib6_info *f6i, void *p_arg); void rt6_mtu_change(struct net_device *dev, unsigned int mtu); void rt6_remove_prefsrc(struct inet6_ifaddr *ifp); void rt6_clean_tohost(struct net *net, struct in6_addr *gateway); -void rt6_sync_up(struct net_device *dev, unsigned int nh_flags); +void rt6_sync_up(struct net_device *dev, unsigned char nh_flags); void rt6_disable_ip(struct net_device *dev, unsigned long event); void rt6_sync_down_dev(struct net_device *dev, unsigned long event); void rt6_multipath_rebalance(struct fib6_info *f6i); @@ -274,9 +275,11 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b) { - return a->fib6_nh.nh_dev == b->fib6_nh.nh_dev && - ipv6_addr_equal(&a->fib6_nh.nh_gw, &b->fib6_nh.nh_gw) && - !lwtunnel_cmp_encap(a->fib6_nh.nh_lwtstate, b->fib6_nh.nh_lwtstate); + struct fib6_nh *nha = &a->fib6_nh, *nhb = &b->fib6_nh; + + return nha->fib_nh_dev == nhb->fib_nh_dev && + ipv6_addr_equal(&nha->fib_nh_gw6, &nhb->fib_nh_gw6) && + !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws); } static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) @@ -300,8 +303,9 @@ static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) return mtu; } -u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr, - struct in6_addr *saddr); +u32 ip6_mtu_from_fib6(const struct fib6_result *res, + const struct in6_addr *daddr, + const struct in6_addr *saddr); struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw, struct net_device *dev, struct sk_buff *skb, diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 9c8214d2116d..d0e28f4ab099 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -32,10 +32,14 @@ struct fib_config { u8 fc_protocol; u8 fc_scope; u8 fc_type; - /* 3 bytes unused */ + u8 fc_gw_family; + /* 2 bytes unused */ u32 fc_table; __be32 fc_dst; - __be32 fc_gw; + union { + __be32 fc_gw4; + struct in6_addr fc_gw6; + }; int fc_oif; u32 fc_flags; u32 fc_priority; @@ -76,27 +80,49 @@ struct fnhe_hash_bucket { #define FNHE_HASH_SIZE (1 << FNHE_HASH_SHIFT) #define FNHE_RECLAIM_DEPTH 5 +struct fib_nh_common { + struct net_device *nhc_dev; + int nhc_oif; + unsigned char nhc_scope; + u8 nhc_family; + u8 nhc_gw_family; + unsigned char nhc_flags; + struct lwtunnel_state *nhc_lwtstate; + + union { + __be32 ipv4; + struct in6_addr ipv6; + } nhc_gw; + + int nhc_weight; + atomic_t nhc_upper_bound; + + /* v4 specific, but allows fib6_nh with v4 routes */ + struct rtable __rcu * __percpu *nhc_pcpu_rth_output; + struct rtable __rcu *nhc_rth_input; + struct fnhe_hash_bucket __rcu *nhc_exceptions; +}; + struct fib_nh { - struct net_device *nh_dev; + struct fib_nh_common nh_common; struct hlist_node nh_hash; struct fib_info *nh_parent; - unsigned int nh_flags; - unsigned char nh_scope; -#ifdef CONFIG_IP_ROUTE_MULTIPATH - int nh_weight; - atomic_t nh_upper_bound; -#endif #ifdef CONFIG_IP_ROUTE_CLASSID __u32 nh_tclassid; #endif - int nh_oif; - __be32 nh_gw; __be32 nh_saddr; int nh_saddr_genid; - struct rtable __rcu * __percpu *nh_pcpu_rth_output; - struct rtable __rcu *nh_rth_input; - struct fnhe_hash_bucket __rcu *nh_exceptions; - struct lwtunnel_state *nh_lwtstate; +#define fib_nh_family nh_common.nhc_family +#define fib_nh_dev nh_common.nhc_dev +#define fib_nh_oif nh_common.nhc_oif +#define fib_nh_flags nh_common.nhc_flags +#define fib_nh_lws nh_common.nhc_lwtstate +#define fib_nh_scope nh_common.nhc_scope +#define fib_nh_gw_family nh_common.nhc_gw_family +#define fib_nh_gw4 nh_common.nhc_gw.ipv4 +#define fib_nh_gw6 nh_common.nhc_gw.ipv6 +#define fib_nh_weight nh_common.nhc_weight +#define fib_nh_upper_bound nh_common.nhc_upper_bound }; /* @@ -123,9 +149,10 @@ struct fib_info { #define fib_rtt fib_metrics->metrics[RTAX_RTT-1] #define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1] int fib_nhs; + bool fib_nh_is_v6; struct rcu_head rcu; struct fib_nh fib_nh[0]; -#define fib_dev fib_nh[0].nh_dev +#define fib_dev fib_nh[0].fib_nh_dev }; @@ -135,15 +162,16 @@ struct fib_rule; struct fib_table; struct fib_result { - __be32 prefix; - unsigned char prefixlen; - unsigned char nh_sel; - unsigned char type; - unsigned char scope; - u32 tclassid; - struct fib_info *fi; - struct fib_table *table; - struct hlist_head *fa_head; + __be32 prefix; + unsigned char prefixlen; + unsigned char nh_sel; + unsigned char type; + unsigned char scope; + u32 tclassid; + struct fib_nh_common *nhc; + struct fib_info *fi; + struct fib_table *table; + struct hlist_head *fa_head; }; struct fib_result_nl { @@ -161,11 +189,10 @@ struct fib_result_nl { int err; }; -#ifdef CONFIG_IP_ROUTE_MULTIPATH -#define FIB_RES_NH(res) ((res).fi->fib_nh[(res).nh_sel]) -#else /* CONFIG_IP_ROUTE_MULTIPATH */ -#define FIB_RES_NH(res) ((res).fi->fib_nh[0]) -#endif /* CONFIG_IP_ROUTE_MULTIPATH */ +static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel) +{ + return &fi->fib_nh[nhsel].nh_common; +} #ifdef CONFIG_IP_MULTIPLE_TABLES #define FIB_TABLE_HASHSZ 256 @@ -174,18 +201,11 @@ struct fib_result_nl { #endif __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); +__be32 fib_result_prefsrc(struct net *net, struct fib_result *res); -#define FIB_RES_SADDR(net, res) \ - ((FIB_RES_NH(res).nh_saddr_genid == \ - atomic_read(&(net)->ipv4.dev_addr_genid)) ? \ - FIB_RES_NH(res).nh_saddr : \ - fib_info_update_nh_saddr((net), &FIB_RES_NH(res))) -#define FIB_RES_GW(res) (FIB_RES_NH(res).nh_gw) -#define FIB_RES_DEV(res) (FIB_RES_NH(res).nh_dev) -#define FIB_RES_OIF(res) (FIB_RES_NH(res).nh_oif) - -#define FIB_RES_PREFSRC(net, res) ((res).fi->fib_prefsrc ? : \ - FIB_RES_SADDR(net, res)) +#define FIB_RES_NHC(res) ((res).nhc) +#define FIB_RES_DEV(res) (FIB_RES_NHC(res)->nhc_dev) +#define FIB_RES_OIF(res) (FIB_RES_NHC(res)->nhc_oif) struct fib_entry_notifier_info { struct fib_notifier_info info; /* must be first */ @@ -383,6 +403,8 @@ static inline bool fib4_rules_early_flow_dissect(struct net *net, /* Exported by fib_frontend.c */ extern const struct nla_policy rtm_ipv4_policy[]; void ip_fib_init(void); +int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla, + struct netlink_ext_ack *extack); __be32 fib_compute_spec_dst(struct sk_buff *skb); bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev); int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, @@ -405,7 +427,7 @@ int fib_unmerge(struct net *net); int ip_fib_check_default(__be32 gw, struct net_device *dev); int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force); int fib_sync_down_addr(struct net_device *dev, __be32 local); -int fib_sync_up(struct net_device *dev, unsigned int nh_flags); +int fib_sync_up(struct net_device *dev, unsigned char nh_flags); void fib_sync_mtu(struct net_device *dev, u32 orig_mtu); #ifdef CONFIG_IP_ROUTE_MULTIPATH @@ -416,6 +438,15 @@ void fib_select_multipath(struct fib_result *res, int hash); void fib_select_path(struct net *net, struct fib_result *res, struct flowi4 *fl4, const struct sk_buff *skb); +int fib_nh_init(struct net *net, struct fib_nh *fib_nh, + struct fib_config *cfg, int nh_weight, + struct netlink_ext_ack *extack); +void fib_nh_release(struct net *net, struct fib_nh *fib_nh); +int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *fc_encap, + u16 fc_encap_type, void *cfg, gfp_t gfp_flags, + struct netlink_ext_ack *extack); +void fib_nh_common_release(struct fib_nh_common *nhc); + /* Exported by fib_trie.c */ void fib_trie_init(void); struct fib_table *fib_trie_table(u32 id, struct fib_table *alias); @@ -423,10 +454,12 @@ struct fib_table *fib_trie_table(u32 id, struct fib_table *alias); static inline void fib_combine_itag(u32 *itag, const struct fib_result *res) { #ifdef CONFIG_IP_ROUTE_CLASSID + struct fib_nh_common *nhc = res->nhc; + struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common); #ifdef CONFIG_IP_MULTIPLE_TABLES u32 rtag; #endif - *itag = FIB_RES_NH(*res).nh_tclassid<<16; + *itag = nh->nh_tclassid << 16; #ifdef CONFIG_IP_MULTIPLE_TABLES rtag = res->tclassid; if (*itag == 0) @@ -467,4 +500,9 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr); int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, struct fib_dump_filter *filter, struct netlink_callback *cb); + +int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nh, + unsigned char *flags, bool skip_oif); +int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nh, + int nh_weight); #endif /* _NET_FIB_H */ diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 047f9a5ccaad..2ac40135b576 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -600,6 +600,9 @@ struct ip_vs_dest_user_kern { /* Address family of addr */ u16 af; + + u16 tun_type; /* tunnel type */ + __be16 tun_port; /* tunnel port */ }; @@ -660,6 +663,8 @@ struct ip_vs_dest { atomic_t conn_flags; /* flags to copy to conn */ atomic_t weight; /* server weight */ atomic_t last_weight; /* server latest weight */ + __u16 tun_type; /* tunnel type */ + __be16 tun_port; /* tunnel port */ refcount_t refcnt; /* reference counter */ struct ip_vs_stats stats; /* statistics */ diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index 28aa9b30aece..1f77fb4dc79d 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -94,7 +94,6 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) goto out; head->dev = dev; - skb_get(head); spin_unlock(&fq->q.lock); icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h new file mode 100644 index 000000000000..6c0c4fde16f8 --- /dev/null +++ b/include/net/ipv6_stubs.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _IPV6_STUBS_H +#define _IPV6_STUBS_H + +#include <linux/in6.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <net/dst.h> +#include <net/flow.h> +#include <net/neighbour.h> +#include <net/sock.h> + +/* structs from net/ip6_fib.h */ +struct fib6_info; +struct fib6_nh; +struct fib6_config; +struct fib6_result; + +/* This is ugly, ideally these symbols should be built + * into the core kernel. + */ +struct ipv6_stub { + int (*ipv6_sock_mc_join)(struct sock *sk, int ifindex, + const struct in6_addr *addr); + int (*ipv6_sock_mc_drop)(struct sock *sk, int ifindex, + const struct in6_addr *addr); + int (*ipv6_dst_lookup)(struct net *net, struct sock *sk, + struct dst_entry **dst, struct flowi6 *fl6); + int (*ipv6_route_input)(struct sk_buff *skb); + + struct fib6_table *(*fib6_get_table)(struct net *net, u32 id); + int (*fib6_lookup)(struct net *net, int oif, struct flowi6 *fl6, + struct fib6_result *res, int flags); + int (*fib6_table_lookup)(struct net *net, struct fib6_table *table, + int oif, struct flowi6 *fl6, + struct fib6_result *res, int flags); + void (*fib6_select_path)(const struct net *net, struct fib6_result *res, + struct flowi6 *fl6, int oif, bool oif_match, + const struct sk_buff *skb, int strict); + u32 (*ip6_mtu_from_fib6)(const struct fib6_result *res, + const struct in6_addr *daddr, + const struct in6_addr *saddr); + + int (*fib6_nh_init)(struct net *net, struct fib6_nh *fib6_nh, + struct fib6_config *cfg, gfp_t gfp_flags, + struct netlink_ext_ack *extack); + void (*fib6_nh_release)(struct fib6_nh *fib6_nh); + void (*udpv6_encap_enable)(void); + void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr, + const struct in6_addr *solicited_addr, + bool router, bool solicited, bool override, bool inc_opt); + struct neigh_table *nd_tbl; +}; +extern const struct ipv6_stub *ipv6_stub __read_mostly; + +/* A stub used by bpf helpers. Similarly ugly as ipv6_stub */ +struct ipv6_bpf_stub { + int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len, + bool force_bind_address_no_port, bool with_lock); + struct sock *(*udp6_lib_lookup)(struct net *net, + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, __be16 dport, + int dif, int sdif, struct udp_table *tbl, + struct sk_buff *skb); +}; +extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; + +#endif diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index 671113bcb2cc..5d6c5b1fc695 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -118,8 +118,8 @@ int lwtunnel_build_state(u16 encap_type, unsigned int family, const void *cfg, struct lwtunnel_state **lws, struct netlink_ext_ack *extack); -int lwtunnel_fill_encap(struct sk_buff *skb, - struct lwtunnel_state *lwtstate); +int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate, + int encap_attr, int encap_type_attr); int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate); struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len); int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b); @@ -219,7 +219,8 @@ static inline int lwtunnel_build_state(u16 encap_type, } static inline int lwtunnel_fill_encap(struct sk_buff *skb, - struct lwtunnel_state *lwtstate) + struct lwtunnel_state *lwtstate, + int encap_attr, int encap_type_attr) { return 0; } diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 112dc18c658f..72080d9d617e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -807,6 +807,7 @@ enum mac80211_tx_info_flags { * @IEEE80211_TX_CTRL_RATE_INJECT: This frame is injected with rate information * @IEEE80211_TX_CTRL_AMSDU: This frame is an A-MSDU frame * @IEEE80211_TX_CTRL_FAST_XMIT: This frame is going through the fast_xmit path + * @IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP: This frame skips mesh path lookup * * These flags are used in tx_info->control.flags. */ @@ -816,6 +817,7 @@ enum mac80211_tx_control_flags { IEEE80211_TX_CTRL_RATE_INJECT = BIT(2), IEEE80211_TX_CTRL_AMSDU = BIT(3), IEEE80211_TX_CTRL_FAST_XMIT = BIT(4), + IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP = BIT(5), }; /* @@ -1697,6 +1699,7 @@ struct wireless_dev *ieee80211_vif_to_wdev(struct ieee80211_vif *vif); * @IEEE80211_KEY_FLAG_PUT_MIC_SPACE: This flag should be set by the driver for * a TKIP key if it only requires MIC space. Do not set together with * @IEEE80211_KEY_FLAG_GENERATE_MMIC on the same key. + * @IEEE80211_KEY_FLAG_NO_AUTO_TX: Key needs explicit Tx activation. */ enum ieee80211_key_flags { IEEE80211_KEY_FLAG_GENERATE_IV_MGMT = BIT(0), @@ -1708,6 +1711,7 @@ enum ieee80211_key_flags { IEEE80211_KEY_FLAG_RX_MGMT = BIT(6), IEEE80211_KEY_FLAG_RESERVE_TAILROOM = BIT(7), IEEE80211_KEY_FLAG_PUT_MIC_SPACE = BIT(8), + IEEE80211_KEY_FLAG_NO_AUTO_TX = BIT(9), }; /** @@ -1888,6 +1892,24 @@ struct ieee80211_sta_rates { }; /** + * struct ieee80211_sta_txpwr - station txpower configuration + * + * Used to configure txpower for station. + * + * @power: indicates the tx power, in dBm, to be used when sending data frames + * to the STA. + * @type: In particular if TPC %type is NL80211_TX_POWER_LIMITED then tx power + * will be less than or equal to specified from userspace, whereas if TPC + * %type is NL80211_TX_POWER_AUTOMATIC then it indicates default tx power. + * NL80211_TX_POWER_FIXED is not a valid configuration option for + * per peer TPC. + */ +struct ieee80211_sta_txpwr { + s16 power; + enum nl80211_tx_power_setting type; +}; + +/** * struct ieee80211_sta - station table entry * * A station table entry represents a station we are possibly @@ -1973,6 +1995,7 @@ struct ieee80211_sta { bool support_p2p_ps; u16 max_rc_amsdu_len; u16 max_tid_amsdu_len[IEEE80211_NUM_TIDS]; + struct ieee80211_sta_txpwr txpwr; struct ieee80211_txq *txq[IEEE80211_NUM_TIDS + 1]; @@ -2243,6 +2266,9 @@ struct ieee80211_txq { * @IEEE80211_HW_SUPPORTS_ONLY_HE_MULTI_BSSID: Hardware supports multi BSSID * only for HE APs. Applies if @IEEE80211_HW_SUPPORTS_MULTI_BSSID is set. * + * @IEEE80211_HW_EXT_KEY_ID_NATIVE: Driver and hardware are supporting Extended + * Key ID and can handle two unicast keys per station for Rx and Tx. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2294,6 +2320,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_TX_STATUS_NO_AMPDU_LEN, IEEE80211_HW_SUPPORTS_MULTI_BSSID, IEEE80211_HW_SUPPORTS_ONLY_HE_MULTI_BSSID, + IEEE80211_HW_EXT_KEY_ID_NATIVE, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -3794,6 +3821,9 @@ struct ieee80211_ops { #endif void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum sta_notify_cmd, struct ieee80211_sta *sta); + int (*sta_set_txpwr)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta); int (*sta_state)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, enum ieee80211_sta_state old_state, diff --git a/include/net/ndisc.h b/include/net/ndisc.h index ddfbb591e2c5..366150053043 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -2,6 +2,8 @@ #ifndef _NDISC_H #define _NDISC_H +#include <net/ipv6_stubs.h> + /* * ICMP codes for neighbour discovery messages */ @@ -379,6 +381,14 @@ static inline struct neighbour *__ipv6_neigh_lookup_noref(struct net_device *dev return ___neigh_lookup_noref(&nd_tbl, neigh_key_eq128, ndisc_hashfn, pkey, dev); } +static inline +struct neighbour *__ipv6_neigh_lookup_noref_stub(struct net_device *dev, + const void *pkey) +{ + return ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128, + ndisc_hashfn, pkey, dev); +} + static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, const void *pkey) { struct neighbour *n; @@ -409,6 +419,36 @@ static inline void __ipv6_confirm_neigh(struct net_device *dev, rcu_read_unlock_bh(); } +static inline void __ipv6_confirm_neigh_stub(struct net_device *dev, + const void *pkey) +{ + struct neighbour *n; + + rcu_read_lock_bh(); + n = __ipv6_neigh_lookup_noref_stub(dev, pkey); + if (n) { + unsigned long now = jiffies; + + /* avoid dirtying neighbour */ + if (n->confirmed != now) + n->confirmed = now; + } + rcu_read_unlock_bh(); +} + +/* uses ipv6_stub and is meant for use outside of IPv6 core */ +static inline struct neighbour *ip_neigh_gw6(struct net_device *dev, + const void *addr) +{ + struct neighbour *neigh; + + neigh = __ipv6_neigh_lookup_noref_stub(dev, addr); + if (unlikely(!neigh)) + neigh = __neigh_create(ipv6_stub->nd_tbl, addr, dev, false); + + return neigh; +} + int ndisc_init(void); int ndisc_late_init(void); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 7c1ab9edba03..50a67bd6a434 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -205,6 +205,8 @@ struct neigh_table { int (*pconstructor)(struct pneigh_entry *); void (*pdestructor)(struct pneigh_entry *); void (*proxy_redo)(struct sk_buff *skb); + bool (*allow_add)(const struct net_device *dev, + struct netlink_ext_ack *extack); char *id; struct neigh_parms parms; struct list_head parms_list; @@ -498,11 +500,12 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb return dev_queue_xmit(skb); } -static inline int neigh_output(struct neighbour *n, struct sk_buff *skb) +static inline int neigh_output(struct neighbour *n, struct sk_buff *skb, + bool skip_cache) { const struct hh_cache *hh = &n->hh; - if ((n->nud_state & NUD_CONNECTED) && hh->hh_len) + if ((n->nud_state & NUD_CONNECTED) && hh->hh_len && !skip_cache) return neigh_hh_output(hh, skb); else return n->output(n, skb); diff --git a/include/net/netfilter/ipv4/nf_nat_masquerade.h b/include/net/netfilter/ipv4/nf_nat_masquerade.h deleted file mode 100644 index 13d55206bb9f..000000000000 --- a/include/net/netfilter/ipv4/nf_nat_masquerade.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NF_NAT_MASQUERADE_IPV4_H_ -#define _NF_NAT_MASQUERADE_IPV4_H_ - -#include <net/netfilter/nf_nat.h> - -unsigned int -nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, - const struct nf_nat_range2 *range, - const struct net_device *out); - -int nf_nat_masquerade_ipv4_register_notifier(void); -void nf_nat_masquerade_ipv4_unregister_notifier(void); - -#endif /*_NF_NAT_MASQUERADE_IPV4_H_ */ diff --git a/include/net/netfilter/ipv6/nf_nat_masquerade.h b/include/net/netfilter/ipv6/nf_nat_masquerade.h deleted file mode 100644 index 2917bf95c437..000000000000 --- a/include/net/netfilter/ipv6/nf_nat_masquerade.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NF_NAT_MASQUERADE_IPV6_H_ -#define _NF_NAT_MASQUERADE_IPV6_H_ - -unsigned int -nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, - const struct net_device *out); -int nf_nat_masquerade_ipv6_register_notifier(void); -void nf_nat_masquerade_ipv6_unregister_notifier(void); - -#endif /* _NF_NAT_MASQUERADE_IPV6_H_ */ diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 006e430d1cdf..93ce6b0daaba 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -48,7 +48,7 @@ struct nf_conntrack_expect { /* Expectation class */ unsigned int class; -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) union nf_inet_addr saved_addr; /* This is the original per-proto part, used to map the * expected connection the way the recipient expects. */ diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index ec52a8dc32fd..44b5a00a9c64 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -15,6 +15,11 @@ #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_expect.h> +#define NF_NAT_HELPER_PREFIX "ip_nat_" +#define NF_NAT_HELPER_NAME(name) NF_NAT_HELPER_PREFIX name +#define MODULE_ALIAS_NF_NAT_HELPER(name) \ + MODULE_ALIAS(NF_NAT_HELPER_NAME(name)) + struct module; enum nf_ct_helper_flags { @@ -54,6 +59,8 @@ struct nf_conntrack_helper { unsigned int queue_num; /* length of userspace private data stored in nf_conn_help->data */ u16 data_len; + /* name of NAT helper module */ + char nat_mod_name[NF_CT_HELPER_NAME_LEN]; }; /* Must be kept in sync with the classes defined by helpers */ @@ -153,4 +160,21 @@ nf_ct_helper_expectfn_find_by_symbol(const void *symbol); extern struct hlist_head *nf_ct_helper_hash; extern unsigned int nf_ct_helper_hsize; +struct nf_conntrack_nat_helper { + struct list_head list; + char mod_name[NF_CT_HELPER_NAME_LEN]; /* module name */ + struct module *module; /* pointer to self */ +}; + +#define NF_CT_NAT_HELPER_INIT(name) \ + { \ + .mod_name = NF_NAT_HELPER_NAME(name), \ + .module = THIS_MODULE \ + } + +void nf_nat_helper_register(struct nf_conntrack_nat_helper *nat); +void nf_nat_helper_unregister(struct nf_conntrack_nat_helper *nat); +int nf_nat_helper_try_module_get(const char *name, u16 l3num, + u8 protonum); +void nf_nat_helper_put(struct nf_conntrack_helper *helper); #endif /*_NF_CONNTRACK_HELPER_H*/ diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index 3394d75e1c80..00a8fbb2d735 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -88,6 +88,9 @@ static inline unsigned int *nf_ct_timeout_lookup(const struct nf_conn *ct) int nf_conntrack_timeout_init(void); void nf_conntrack_timeout_fini(void); void nf_ct_untimeout(struct net *net, struct nf_ct_timeout *timeout); +int nf_ct_set_timeout(struct net *net, struct nf_conn *ct, u8 l3num, u8 l4num, + const char *timeout_name); +void nf_ct_destroy_timeout(struct nf_conn *ct); #else static inline int nf_conntrack_timeout_init(void) { @@ -98,6 +101,18 @@ static inline void nf_conntrack_timeout_fini(void) { return; } + +static inline int nf_ct_set_timeout(struct net *net, struct nf_conn *ct, + u8 l3num, u8 l4num, + const char *timeout_name) +{ + return -EOPNOTSUPP; +} + +static inline void nf_ct_destroy_timeout(struct nf_conn *ct) +{ + return; +} #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ #ifdef CONFIG_NF_CONNTRACK_TIMEOUT diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index cf332c4e0b32..423cda2c6542 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -69,9 +69,9 @@ static inline bool nf_nat_oif_changed(unsigned int hooknum, #endif } -int nf_nat_register_fn(struct net *net, const struct nf_hook_ops *ops, +int nf_nat_register_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops, const struct nf_hook_ops *nat_ops, unsigned int ops_count); -void nf_nat_unregister_fn(struct net *net, const struct nf_hook_ops *ops, +void nf_nat_unregister_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops, unsigned int ops_count); unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, @@ -98,6 +98,9 @@ void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops); int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops); void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops); +int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops); +void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops); + unsigned int nf_nat_inet_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); diff --git a/include/net/netfilter/nf_nat_masquerade.h b/include/net/netfilter/nf_nat_masquerade.h new file mode 100644 index 000000000000..54a14d643c34 --- /dev/null +++ b/include/net/netfilter/nf_nat_masquerade.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_NAT_MASQUERADE_H_ +#define _NF_NAT_MASQUERADE_H_ + +#include <net/netfilter/nf_nat.h> + +unsigned int +nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, + const struct nf_nat_range2 *range, + const struct net_device *out); + +int nf_nat_masquerade_inet_register_notifiers(void); +void nf_nat_masquerade_inet_unregister_notifiers(void); + +unsigned int +nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, + const struct net_device *out); + +#endif /*_NF_NAT_MASQUERADE_H_ */ diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index a50a69f5334c..7239105d9d2e 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -119,4 +119,7 @@ nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family, return queue; } +int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, + const struct nf_hook_entries *entries, unsigned int index, + unsigned int verdict); #endif /* _NF_QUEUE_H */ diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 3e9ab643eedf..5b8624ae4a27 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -2,7 +2,6 @@ #ifndef _NET_NF_TABLES_H #define _NET_NF_TABLES_H -#include <linux/module.h> #include <linux/list.h> #include <linux/netfilter.h> #include <linux/netfilter/nfnetlink.h> @@ -13,6 +12,8 @@ #include <net/netfilter/nf_flow_table.h> #include <net/netlink.h> +struct module; + #define NFT_JUMP_STACK_SIZE 16 struct nft_pktinfo { @@ -475,8 +476,6 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, enum nft_trans_phase phase); int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding); -void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding, bool commit); void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set); /** @@ -808,23 +807,6 @@ void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr); int nft_expr_dump(struct sk_buff *skb, unsigned int attr, const struct nft_expr *expr); -static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src) -{ - int err; - - if (src->ops->clone) { - dst->ops = src->ops; - err = src->ops->clone(dst, src); - if (err < 0) - return err; - } else { - memcpy(dst, src, src->ops->size); - } - - __module_get(src->ops->type->owner); - return 0; -} - /** * struct nft_rule - nf_tables rule * @@ -1411,4 +1393,6 @@ struct nft_trans_flowtable { int __init nft_chain_filter_init(void); void nft_chain_filter_fini(void); +void __init nft_chain_route_init(void); +void nft_chain_route_fini(void); #endif /* _NET_NF_TABLES_H */ diff --git a/include/net/netlink.h b/include/net/netlink.h index 23f27b0b3cef..395b4406f4b0 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -183,6 +183,7 @@ enum { NLA_REJECT, NLA_EXACT_LEN, NLA_EXACT_LEN_WARN, + NLA_MIN_LEN, __NLA_TYPE_MAX, }; @@ -212,6 +213,7 @@ enum nla_policy_validation { * NLA_NUL_STRING Maximum length of string (excluding NUL) * NLA_FLAG Unused * NLA_BINARY Maximum length of attribute payload + * NLA_MIN_LEN Minimum length of attribute payload * NLA_NESTED, * NLA_NESTED_ARRAY Length verification is done by checking len of * nested header (or empty); len field is used if @@ -230,6 +232,7 @@ enum nla_policy_validation { * it is rejected. * NLA_EXACT_LEN_WARN Attribute should have exactly this length, a warning * is logged if it is longer, shorter is rejected. + * NLA_MIN_LEN Minimum length of attribute payload * All other Minimum length of attribute payload * * Meaning of `validation_data' field: @@ -281,7 +284,7 @@ enum nla_policy_validation { * static const struct nla_policy my_policy[ATTR_MAX+1] = { * [ATTR_FOO] = { .type = NLA_U16 }, * [ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ }, - * [ATTR_BAZ] = { .len = sizeof(struct mystruct) }, + * [ATTR_BAZ] = { .type = NLA_EXACT_LEN, .len = sizeof(struct mystruct) }, * [ATTR_GOO] = { .type = NLA_BITFIELD32, .validation_data = &myvalidflags }, * }; */ @@ -296,12 +299,31 @@ struct nla_policy { }; int (*validate)(const struct nlattr *attr, struct netlink_ext_ack *extack); + /* This entry is special, and used for the attribute at index 0 + * only, and specifies special data about the policy, namely it + * specifies the "boundary type" where strict length validation + * starts for any attribute types >= this value, also, strict + * nesting validation starts here. + * + * Additionally, it means that NLA_UNSPEC is actually NLA_REJECT + * for any types >= this, so need to use NLA_MIN_LEN to get the + * previous pure { .len = xyz } behaviour. The advantage of this + * is that types not specified in the policy will be rejected. + * + * For completely new families it should be set to 1 so that the + * validation is enforced for all attributes. For existing ones + * it should be set at least when new attributes are added to + * the enum used by the policy, and be set to the new value that + * was added to enforce strict validation from thereon. + */ + u16 strict_start_type; }; }; #define NLA_POLICY_EXACT_LEN(_len) { .type = NLA_EXACT_LEN, .len = _len } #define NLA_POLICY_EXACT_LEN_WARN(_len) { .type = NLA_EXACT_LEN_WARN, \ .len = _len } +#define NLA_POLICY_MIN_LEN(_len) { .type = NLA_MIN_LEN, .len = _len } #define NLA_POLICY_ETH_ADDR NLA_POLICY_EXACT_LEN(ETH_ALEN) #define NLA_POLICY_ETH_ADDR_COMPAT NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN) @@ -365,21 +387,52 @@ struct nl_info { bool skip_notify; }; +/** + * enum netlink_validation - netlink message/attribute validation levels + * @NL_VALIDATE_LIBERAL: Old-style "be liberal" validation, not caring about + * extra data at the end of the message, attributes being longer than + * they should be, or unknown attributes being present. + * @NL_VALIDATE_TRAILING: Reject junk data encountered after attribute parsing. + * @NL_VALIDATE_MAXTYPE: Reject attributes > max type; Together with _TRAILING + * this is equivalent to the old nla_parse_strict()/nlmsg_parse_strict(). + * @NL_VALIDATE_UNSPEC: Reject attributes with NLA_UNSPEC in the policy. + * This can safely be set by the kernel when the given policy has no + * NLA_UNSPEC anymore, and can thus be used to ensure policy entries + * are enforced going forward. + * @NL_VALIDATE_STRICT_ATTRS: strict attribute policy parsing (e.g. + * U8, U16, U32 must have exact size, etc.) + * @NL_VALIDATE_NESTED: Check that NLA_F_NESTED is set for NLA_NESTED(_ARRAY) + * and unset for other policies. + */ +enum netlink_validation { + NL_VALIDATE_LIBERAL = 0, + NL_VALIDATE_TRAILING = BIT(0), + NL_VALIDATE_MAXTYPE = BIT(1), + NL_VALIDATE_UNSPEC = BIT(2), + NL_VALIDATE_STRICT_ATTRS = BIT(3), + NL_VALIDATE_NESTED = BIT(4), +}; + +#define NL_VALIDATE_DEPRECATED_STRICT (NL_VALIDATE_TRAILING |\ + NL_VALIDATE_MAXTYPE) +#define NL_VALIDATE_STRICT (NL_VALIDATE_TRAILING |\ + NL_VALIDATE_MAXTYPE |\ + NL_VALIDATE_UNSPEC |\ + NL_VALIDATE_STRICT_ATTRS |\ + NL_VALIDATE_NESTED) + int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, struct nlmsghdr *, struct netlink_ext_ack *)); int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid, unsigned int group, int report, gfp_t flags); -int nla_validate(const struct nlattr *head, int len, int maxtype, - const struct nla_policy *policy, - struct netlink_ext_ack *extack); -int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, - int len, const struct nla_policy *policy, - struct netlink_ext_ack *extack); -int nla_parse_strict(struct nlattr **tb, int maxtype, const struct nlattr *head, - int len, const struct nla_policy *policy, - struct netlink_ext_ack *extack); +int __nla_validate(const struct nlattr *head, int len, int maxtype, + const struct nla_policy *policy, unsigned int validate, + struct netlink_ext_ack *extack); +int __nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, + int len, const struct nla_policy *policy, unsigned int validate, + struct netlink_ext_ack *extack); int nla_policy_len(const struct nla_policy *, int); struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype); size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize); @@ -508,42 +561,167 @@ nlmsg_next(const struct nlmsghdr *nlh, int *remaining) } /** - * nlmsg_parse - parse attributes of a netlink message + * nla_parse - Parse a stream of attributes into a tb buffer + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @head: head of attribute stream + * @len: length of attribute stream + * @policy: validation policy + * @extack: extended ACK pointer + * + * Parses a stream of attributes and stores a pointer to each attribute in + * the tb array accessible via the attribute type. Attributes with a type + * exceeding maxtype will be rejected, policy must be specified, attributes + * will be validated in the strictest way possible. + * + * Returns 0 on success or a negative error code. + */ +static inline int nla_parse(struct nlattr **tb, int maxtype, + const struct nlattr *head, int len, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) +{ + return __nla_parse(tb, maxtype, head, len, policy, + NL_VALIDATE_STRICT, extack); +} + +/** + * nla_parse_deprecated - Parse a stream of attributes into a tb buffer + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @head: head of attribute stream + * @len: length of attribute stream + * @policy: validation policy + * @extack: extended ACK pointer + * + * Parses a stream of attributes and stores a pointer to each attribute in + * the tb array accessible via the attribute type. Attributes with a type + * exceeding maxtype will be ignored and attributes from the policy are not + * always strictly validated (only for new attributes). + * + * Returns 0 on success or a negative error code. + */ +static inline int nla_parse_deprecated(struct nlattr **tb, int maxtype, + const struct nlattr *head, int len, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) +{ + return __nla_parse(tb, maxtype, head, len, policy, + NL_VALIDATE_LIBERAL, extack); +} + +/** + * nla_parse_deprecated_strict - Parse a stream of attributes into a tb buffer + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @head: head of attribute stream + * @len: length of attribute stream + * @policy: validation policy + * @extack: extended ACK pointer + * + * Parses a stream of attributes and stores a pointer to each attribute in + * the tb array accessible via the attribute type. Attributes with a type + * exceeding maxtype will be rejected as well as trailing data, but the + * policy is not completely strictly validated (only for new attributes). + * + * Returns 0 on success or a negative error code. + */ +static inline int nla_parse_deprecated_strict(struct nlattr **tb, int maxtype, + const struct nlattr *head, + int len, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) +{ + return __nla_parse(tb, maxtype, head, len, policy, + NL_VALIDATE_DEPRECATED_STRICT, extack); +} + +/** + * __nlmsg_parse - parse attributes of a netlink message * @nlh: netlink message header * @hdrlen: length of family specific header * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected * @policy: validation policy + * @validate: validation strictness * @extack: extended ACK report struct * * See nla_parse() */ -static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen, - struct nlattr *tb[], int maxtype, - const struct nla_policy *policy, - struct netlink_ext_ack *extack) +static inline int __nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen, + struct nlattr *tb[], int maxtype, + const struct nla_policy *policy, + unsigned int validate, + struct netlink_ext_ack *extack) { if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) { NL_SET_ERR_MSG(extack, "Invalid header length"); return -EINVAL; } - return nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen), - nlmsg_attrlen(nlh, hdrlen), policy, extack); + return __nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), policy, validate, + extack); } -static inline int nlmsg_parse_strict(const struct nlmsghdr *nlh, int hdrlen, - struct nlattr *tb[], int maxtype, - const struct nla_policy *policy, - struct netlink_ext_ack *extack) +/** + * nlmsg_parse - parse attributes of a netlink message + * @nlh: netlink message header + * @hdrlen: length of family specific header + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @validate: validation strictness + * @extack: extended ACK report struct + * + * See nla_parse() + */ +static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen, + struct nlattr *tb[], int maxtype, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) { - if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) { - NL_SET_ERR_MSG(extack, "Invalid header length"); - return -EINVAL; - } + return __nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), policy, + NL_VALIDATE_STRICT, extack); +} - return nla_parse_strict(tb, maxtype, nlmsg_attrdata(nlh, hdrlen), - nlmsg_attrlen(nlh, hdrlen), policy, extack); +/** + * nlmsg_parse_deprecated - parse attributes of a netlink message + * @nlh: netlink message header + * @hdrlen: length of family specific header + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @extack: extended ACK report struct + * + * See nla_parse_deprecated() + */ +static inline int nlmsg_parse_deprecated(const struct nlmsghdr *nlh, int hdrlen, + struct nlattr *tb[], int maxtype, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) +{ + return __nlmsg_parse(nlh, hdrlen, tb, maxtype, policy, + NL_VALIDATE_LIBERAL, extack); +} + +/** + * nlmsg_parse_deprecated_strict - parse attributes of a netlink message + * @nlh: netlink message header + * @hdrlen: length of family specific header + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @extack: extended ACK report struct + * + * See nla_parse_deprecated_strict() + */ +static inline int +nlmsg_parse_deprecated_strict(const struct nlmsghdr *nlh, int hdrlen, + struct nlattr *tb[], int maxtype, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) +{ + return __nlmsg_parse(nlh, hdrlen, tb, maxtype, policy, + NL_VALIDATE_DEPRECATED_STRICT, extack); } /** @@ -562,26 +740,75 @@ static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh, } /** - * nlmsg_validate - validate a netlink message including attributes + * nla_validate_deprecated - Validate a stream of attributes + * @head: head of attribute stream + * @len: length of attribute stream + * @maxtype: maximum attribute type to be expected + * @policy: validation policy + * @validate: validation strictness + * @extack: extended ACK report struct + * + * Validates all attributes in the specified attribute stream against the + * specified policy. Validation is done in liberal mode. + * See documenation of struct nla_policy for more details. + * + * Returns 0 on success or a negative error code. + */ +static inline int nla_validate_deprecated(const struct nlattr *head, int len, + int maxtype, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) +{ + return __nla_validate(head, len, maxtype, policy, NL_VALIDATE_LIBERAL, + extack); +} + +/** + * nla_validate - Validate a stream of attributes + * @head: head of attribute stream + * @len: length of attribute stream + * @maxtype: maximum attribute type to be expected + * @policy: validation policy + * @validate: validation strictness + * @extack: extended ACK report struct + * + * Validates all attributes in the specified attribute stream against the + * specified policy. Validation is done in strict mode. + * See documenation of struct nla_policy for more details. + * + * Returns 0 on success or a negative error code. + */ +static inline int nla_validate(const struct nlattr *head, int len, int maxtype, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) +{ + return __nla_validate(head, len, maxtype, policy, NL_VALIDATE_STRICT, + extack); +} + +/** + * nlmsg_validate_deprecated - validate a netlink message including attributes * @nlh: netlinket message header * @hdrlen: length of familiy specific header * @maxtype: maximum attribute type to be expected * @policy: validation policy * @extack: extended ACK report struct */ -static inline int nlmsg_validate(const struct nlmsghdr *nlh, - int hdrlen, int maxtype, - const struct nla_policy *policy, - struct netlink_ext_ack *extack) +static inline int nlmsg_validate_deprecated(const struct nlmsghdr *nlh, + int hdrlen, int maxtype, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) { if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) return -EINVAL; - return nla_validate(nlmsg_attrdata(nlh, hdrlen), - nlmsg_attrlen(nlh, hdrlen), maxtype, policy, - extack); + return __nla_validate(nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), maxtype, + policy, NL_VALIDATE_LIBERAL, extack); } + + /** * nlmsg_report - need to report back to application? * @nlh: netlink message header @@ -909,8 +1136,32 @@ static inline int nla_parse_nested(struct nlattr *tb[], int maxtype, const struct nla_policy *policy, struct netlink_ext_ack *extack) { - return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy, - extack); + if (!(nla->nla_type & NLA_F_NESTED)) { + NL_SET_ERR_MSG_ATTR(extack, nla, "NLA_F_NESTED is missing"); + return -EINVAL; + } + + return __nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy, + NL_VALIDATE_STRICT, extack); +} + +/** + * nla_parse_nested_deprecated - parse nested attributes + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @nla: attribute containing the nested attributes + * @policy: validation policy + * @extack: extended ACK report struct + * + * See nla_parse_deprecated() + */ +static inline int nla_parse_nested_deprecated(struct nlattr *tb[], int maxtype, + const struct nlattr *nla, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) +{ + return __nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy, + NL_VALIDATE_LIBERAL, extack); } /** @@ -1415,13 +1666,18 @@ static inline void *nla_memdup(const struct nlattr *src, gfp_t gfp) } /** - * nla_nest_start - Start a new level of nested attributes + * nla_nest_start_noflag - Start a new level of nested attributes * @skb: socket buffer to add attributes to * @attrtype: attribute type of container * - * Returns the container attribute + * This function exists for backward compatibility to use in APIs which never + * marked their nest attributes with NLA_F_NESTED flag. New APIs should use + * nla_nest_start() which sets the flag. + * + * Returns the container attribute or NULL on error */ -static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype) +static inline struct nlattr *nla_nest_start_noflag(struct sk_buff *skb, + int attrtype) { struct nlattr *start = (struct nlattr *)skb_tail_pointer(skb); @@ -1432,6 +1688,21 @@ static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype) } /** + * nla_nest_start - Start a new level of nested attributes, with NLA_F_NESTED + * @skb: socket buffer to add attributes to + * @attrtype: attribute type of container + * + * Unlike nla_nest_start_noflag(), mark the nest attribute with NLA_F_NESTED + * flag. This is the preferred function to use in new code. + * + * Returns the container attribute or NULL on error + */ +static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype) +{ + return nla_nest_start_noflag(skb, attrtype | NLA_F_NESTED); +} + +/** * nla_nest_end - Finalize nesting of attributes * @skb: socket buffer the attributes are stored in * @start: container attribute @@ -1465,6 +1736,7 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) * @start: container attribute * @maxtype: maximum attribute type to be expected * @policy: validation policy + * @validate: validation strictness * @extack: extended ACK report struct * * Validates all attributes in the nested attribute stream against the @@ -1473,12 +1745,22 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) * * Returns 0 on success or a negative error code. */ -static inline int nla_validate_nested(const struct nlattr *start, int maxtype, - const struct nla_policy *policy, - struct netlink_ext_ack *extack) +static inline int __nla_validate_nested(const struct nlattr *start, int maxtype, + const struct nla_policy *policy, + unsigned int validate, + struct netlink_ext_ack *extack) +{ + return __nla_validate(nla_data(start), nla_len(start), maxtype, policy, + validate, extack); +} + +static inline int +nla_validate_nested_deprecated(const struct nlattr *start, int maxtype, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) { - return nla_validate(nla_data(start), nla_len(start), maxtype, policy, - extack); + return __nla_validate_nested(start, maxtype, policy, + NL_VALIDATE_LIBERAL, extack); } /** diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index f19b53130bf7..806454e767bf 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -24,9 +24,9 @@ struct nf_generic_net { struct nf_tcp_net { unsigned int timeouts[TCP_CONNTRACK_TIMEOUT_MAX]; - unsigned int tcp_loose; - unsigned int tcp_be_liberal; - unsigned int tcp_max_retrans; + int tcp_loose; + int tcp_be_liberal; + int tcp_max_retrans; }; enum udp_conntrack { diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 104a6669e344..7698460a3dd1 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -9,6 +9,7 @@ #include <linux/uidgid.h> #include <net/inet_frag.h> #include <linux/rcupdate.h> +#include <linux/siphash.h> struct tcpm_hash_bucket; struct ctl_table_header; @@ -217,5 +218,6 @@ struct netns_ipv4 { unsigned int ipmr_seq; /* protected by rtnl_mutex */ atomic_t rt_genid; + siphash_key_t ip_id_key; }; #endif diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index b028a1dc150d..5e61b5a8635d 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -8,6 +8,7 @@ #ifndef __NETNS_IPV6_H__ #define __NETNS_IPV6_H__ #include <net/dst_ops.h> +#include <uapi/linux/icmpv6.h> struct ctl_table_header; @@ -33,6 +34,10 @@ struct netns_sysctl_ipv6 { int auto_flowlabels; int icmpv6_time; int icmpv6_echo_ignore_all; + int icmpv6_echo_ignore_multicast; + int icmpv6_echo_ignore_anycast; + DECLARE_BITMAP(icmpv6_ratemask, ICMPV6_MSG_MAX + 1); + unsigned long *icmpv6_ratemask_ptr; int anycast_src_echo_reply; int ip_nonlocal_bind; int fwmark_reflect; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index d5e7a1af346f..514e3c80ecc1 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -100,6 +100,11 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode); #else +static inline bool tcf_block_shared(struct tcf_block *block) +{ + return false; +} + static inline int tcf_block_get(struct tcf_block **p_block, struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, @@ -372,30 +377,6 @@ static inline bool tcf_exts_has_actions(struct tcf_exts *exts) } /** - * tcf_exts_has_one_action - check if exactly one action is present - * @exts: tc filter extensions handle - * - * Returns true if exactly one action is present. - */ -static inline bool tcf_exts_has_one_action(struct tcf_exts *exts) -{ -#ifdef CONFIG_NET_CLS_ACT - return exts->nr_actions == 1; -#else - return false; -#endif -} - -static inline struct tc_action *tcf_exts_first_action(struct tcf_exts *exts) -{ -#ifdef CONFIG_NET_CLS_ACT - return exts->actions[0]; -#else - return NULL; -#endif -} - -/** * tcf_exts_exec - execute tc filter extensions * @skb: socket buffer * @exts: tc filter extensions handle @@ -784,12 +765,14 @@ tc_cls_flower_offload_flow_rule(struct tc_cls_flower_offload *tc_flow_cmd) enum tc_matchall_command { TC_CLSMATCHALL_REPLACE, TC_CLSMATCHALL_DESTROY, + TC_CLSMATCHALL_STATS, }; struct tc_cls_matchall_offload { struct tc_cls_common_offload common; enum tc_matchall_command command; - struct tcf_exts *exts; + struct flow_rule *rule; + struct flow_stats stats; unsigned long cookie; }; diff --git a/include/net/psample.h b/include/net/psample.h index 9b80f814ab04..37a4df2325b2 100644 --- a/include/net/psample.h +++ b/include/net/psample.h @@ -3,7 +3,6 @@ #define __NET_PSAMPLE_H #include <uapi/linux/psample.h> -#include <linux/module.h> #include <linux/list.h> struct psample_group { diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 21a5243fecd1..9dfd7960d90a 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -106,10 +106,8 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, return req; } -static inline void reqsk_free(struct request_sock *req) +static inline void __reqsk_free(struct request_sock *req) { - WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0); - req->rsk_ops->destructor(req); if (req->rsk_listener) sock_put(req->rsk_listener); @@ -117,6 +115,12 @@ static inline void reqsk_free(struct request_sock *req) kmem_cache_free(req->rsk_ops->slab, req); } +static inline void reqsk_free(struct request_sock *req) +{ + WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0); + __reqsk_free(req); +} + static inline void reqsk_put(struct request_sock *req) { if (refcount_dec_and_test(&req->rsk_refcnt)) diff --git a/include/net/route.h b/include/net/route.h index 9883dc82f723..96f6c9ae33c2 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -29,6 +29,8 @@ #include <net/flow.h> #include <net/inet_sock.h> #include <net/ip_fib.h> +#include <net/arp.h> +#include <net/ndisc.h> #include <linux/in_route.h> #include <linux/rtnetlink.h> #include <linux/rcupdate.h> @@ -55,12 +57,15 @@ struct rtable { unsigned int rt_flags; __u16 rt_type; __u8 rt_is_input; - __u8 rt_uses_gateway; + u8 rt_gw_family; int rt_iif; /* Info on neighbour */ - __be32 rt_gateway; + union { + __be32 rt_gw4; + struct in6_addr rt_gw6; + }; /* Miscellaneous cached information */ u32 rt_mtu_locked:1, @@ -82,8 +87,8 @@ static inline bool rt_is_output_route(const struct rtable *rt) static inline __be32 rt_nexthop(const struct rtable *rt, __be32 daddr) { - if (rt->rt_gateway) - return rt->rt_gateway; + if (rt->rt_gw_family == AF_INET) + return rt->rt_gw4; return daddr; } @@ -347,4 +352,34 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst) return hoplimit; } +static inline struct neighbour *ip_neigh_gw4(struct net_device *dev, + __be32 daddr) +{ + struct neighbour *neigh; + + neigh = __ipv4_neigh_lookup_noref(dev, daddr); + if (unlikely(!neigh)) + neigh = __neigh_create(&arp_tbl, &daddr, dev, false); + + return neigh; +} + +static inline struct neighbour *ip_neigh_for_gw(struct rtable *rt, + struct sk_buff *skb, + bool *is_v6gw) +{ + struct net_device *dev = rt->dst.dev; + struct neighbour *neigh; + + if (likely(rt->rt_gw_family == AF_INET)) { + neigh = ip_neigh_gw4(dev, rt->rt_gw4); + } else if (rt->rt_gw_family == AF_INET6) { + neigh = ip_neigh_gw6(dev, &rt->rt_gw6); + *is_v6gw = true; + } else { + neigh = ip_neigh_gw4(dev, ip_hdr(skb)->daddr); + } + return neigh; +} + #endif /* _ROUTE_H */ diff --git a/include/net/nexthop.h b/include/net/rtnh.h index 902ff382a6dc..aa2cfc508f7c 100644 --- a/include/net/nexthop.h +++ b/include/net/rtnh.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __NET_NEXTHOP_H -#define __NET_NEXTHOP_H +#ifndef __NET_RTNH_H +#define __NET_RTNH_H #include <linux/rtnetlink.h> #include <net/netlink.h> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index a2b38b3deeca..21f434f3ac9e 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -52,10 +52,7 @@ struct qdisc_size_table { struct qdisc_skb_head { struct sk_buff *head; struct sk_buff *tail; - union { - u32 qlen; - atomic_t atomic_qlen; - }; + __u32 qlen; spinlock_t lock; }; @@ -113,6 +110,9 @@ struct Qdisc { spinlock_t busylock ____cacheline_aligned_in_smp; spinlock_t seqlock; + + /* for NOLOCK qdisc, true if there are no enqueued skbs */ + bool empty; struct rcu_head rcu; }; @@ -143,11 +143,24 @@ static inline bool qdisc_is_running(struct Qdisc *qdisc) return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; } +static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) +{ + return q->flags & TCQ_F_CPUSTATS; +} + +static inline bool qdisc_is_empty(const struct Qdisc *qdisc) +{ + if (qdisc_is_percpu_stats(qdisc)) + return qdisc->empty; + return !qdisc->q.qlen; +} + static inline bool qdisc_run_begin(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) { if (!spin_trylock(&qdisc->seqlock)) return false; + qdisc->empty = false; } else if (qdisc_is_running(qdisc)) { return false; } @@ -351,13 +364,10 @@ struct tcf_proto { }; struct qdisc_skb_cb { - union { - struct { - unsigned int pkt_len; - u16 slave_dev_queue_mapping; - u16 tc_classid; - }; - struct bpf_flow_keys *flow_keys; + struct { + unsigned int pkt_len; + u16 slave_dev_queue_mapping; + u16 tc_classid; }; #define QDISC_CB_PRIV_LEN 20 unsigned char data[QDISC_CB_PRIV_LEN]; @@ -470,19 +480,27 @@ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) BUILD_BUG_ON(sizeof(qcb->data) < sz); } +static inline int qdisc_qlen_cpu(const struct Qdisc *q) +{ + return this_cpu_ptr(q->cpu_qstats)->qlen; +} + static inline int qdisc_qlen(const struct Qdisc *q) { return q->q.qlen; } -static inline u32 qdisc_qlen_sum(const struct Qdisc *q) +static inline int qdisc_qlen_sum(const struct Qdisc *q) { - u32 qlen = q->qstats.qlen; + __u32 qlen = q->qstats.qlen; + int i; - if (q->flags & TCQ_F_NOLOCK) - qlen += atomic_read(&q->q.atomic_qlen); - else + if (qdisc_is_percpu_stats(q)) { + for_each_possible_cpu(i) + qlen += per_cpu_ptr(q->cpu_qstats, i)->qlen; + } else { qlen += q->q.qlen; + } return qlen; } @@ -736,7 +754,7 @@ static inline bool qdisc_all_tx_empty(const struct net_device *dev) struct netdev_queue *txq = netdev_get_tx_queue(dev, i); const struct Qdisc *q = rcu_dereference(txq->qdisc); - if (q->q.qlen) { + if (!qdisc_is_empty(q)) { rcu_read_unlock(); return false; } @@ -806,11 +824,6 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, return sch->enqueue(skb, sch, to_free); } -static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) -{ - return q->flags & TCQ_F_CPUSTATS; -} - static inline void _bstats_update(struct gnet_stats_basic_packed *bstats, __u64 bytes, __u32 packets) { @@ -878,14 +891,14 @@ static inline void qdisc_qstats_cpu_backlog_inc(struct Qdisc *sch, this_cpu_add(sch->cpu_qstats->backlog, qdisc_pkt_len(skb)); } -static inline void qdisc_qstats_atomic_qlen_inc(struct Qdisc *sch) +static inline void qdisc_qstats_cpu_qlen_inc(struct Qdisc *sch) { - atomic_inc(&sch->q.atomic_qlen); + this_cpu_inc(sch->cpu_qstats->qlen); } -static inline void qdisc_qstats_atomic_qlen_dec(struct Qdisc *sch) +static inline void qdisc_qstats_cpu_qlen_dec(struct Qdisc *sch) { - atomic_dec(&sch->q.atomic_qlen); + this_cpu_dec(sch->cpu_qstats->qlen); } static inline void qdisc_qstats_cpu_requeues_inc(struct Qdisc *sch) @@ -1095,6 +1108,32 @@ static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch) return skb; } +static inline void qdisc_update_stats_at_dequeue(struct Qdisc *sch, + struct sk_buff *skb) +{ + if (qdisc_is_percpu_stats(sch)) { + qdisc_qstats_cpu_backlog_dec(sch, skb); + qdisc_bstats_cpu_update(sch, skb); + qdisc_qstats_cpu_qlen_dec(sch); + } else { + qdisc_qstats_backlog_dec(sch, skb); + qdisc_bstats_update(sch, skb); + sch->q.qlen--; + } +} + +static inline void qdisc_update_stats_at_enqueue(struct Qdisc *sch, + unsigned int pkt_len) +{ + if (qdisc_is_percpu_stats(sch)) { + qdisc_qstats_cpu_qlen_inc(sch); + this_cpu_add(sch->cpu_qstats->backlog, pkt_len); + } else { + sch->qstats.backlog += pkt_len; + sch->q.qlen++; + } +} + /* use instead of qdisc->dequeue() for all qdiscs queried with ->peek() */ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) { @@ -1102,8 +1141,13 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) if (skb) { skb = __skb_dequeue(&sch->gso_skb); - qdisc_qstats_backlog_dec(sch, skb); - sch->q.qlen--; + if (qdisc_is_percpu_stats(sch)) { + qdisc_qstats_cpu_backlog_dec(sch, skb); + qdisc_qstats_cpu_qlen_dec(sch); + } else { + qdisc_qstats_backlog_dec(sch, skb); + sch->q.qlen--; + } } else { skb = sch->dequeue(sch); } diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 1d13ec3f2707..eefdfa5abf6e 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -421,7 +421,7 @@ static inline void sctp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) /* * This mimics the behavior of skb_set_owner_r */ - sk->sk_forward_alloc -= event->rmem_len; + sk_mem_charge(sk, event->rmem_len); } /* Tests if the list has one and only one entry. */ diff --git a/include/net/sctp/ulpqueue.h b/include/net/sctp/ulpqueue.h index bb0ecba3db2b..f4ac7117ff29 100644 --- a/include/net/sctp/ulpqueue.h +++ b/include/net/sctp/ulpqueue.h @@ -59,7 +59,7 @@ void sctp_ulpq_free(struct sctp_ulpq *); int sctp_ulpq_tail_data(struct sctp_ulpq *, struct sctp_chunk *, gfp_t); /* Add a new event for propagation to the ULP. */ -int sctp_ulpq_tail_event(struct sctp_ulpq *, struct sctp_ulpevent *ev); +int sctp_ulpq_tail_event(struct sctp_ulpq *, struct sk_buff_head *skb_list); /* Renege previously received chunks. */ void sctp_ulpq_renege(struct sctp_ulpq *, struct sctp_chunk *, gfp_t); diff --git a/include/net/sock.h b/include/net/sock.h index 341f8bafa0cf..4d208c0f9c14 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -236,6 +236,8 @@ struct sock_common { /* public: */ }; +struct bpf_sk_storage; + /** * struct sock - network layer representation of sockets * @__sk_common: shared layout with inet_timewait_sock @@ -368,6 +370,7 @@ struct sock { atomic_t sk_drops; int sk_rcvlowat; struct sk_buff_head sk_error_queue; + struct sk_buff *sk_rx_skb_cache; struct sk_buff_head sk_receive_queue; /* * The backlog queue is special, it is always used with @@ -414,6 +417,7 @@ struct sock { struct sk_buff *sk_send_head; struct rb_root tcp_rtx_queue; }; + struct sk_buff *sk_tx_skb_cache; struct sk_buff_head sk_write_queue; __s32 sk_peek_off; int sk_write_pending; @@ -508,6 +512,9 @@ struct sock { #endif void (*sk_destruct)(struct sock *sk); struct sock_reuseport __rcu *sk_reuseport_cb; +#ifdef CONFIG_BPF_SYSCALL + struct bpf_sk_storage __rcu *sk_bpf_storage; +#endif struct rcu_head sk_rcu; }; @@ -966,7 +973,7 @@ static inline void sock_rps_record_flow_hash(__u32 hash) static inline void sock_rps_record_flow(const struct sock *sk) { #ifdef CONFIG_RPS - if (static_key_false(&rfs_needed)) { + if (static_branch_unlikely(&rfs_needed)) { /* Reading sk->sk_rxhash might incur an expensive cache line * miss. * @@ -1466,6 +1473,11 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) sock_set_flag(sk, SOCK_QUEUE_SHRUNK); sk->sk_wmem_queued -= skb->truesize; sk_mem_uncharge(sk, skb->truesize); + if (!sk->sk_tx_skb_cache) { + skb_zcopy_clear(skb, true); + sk->sk_tx_skb_cache = skb; + return; + } __kfree_skb(skb); } @@ -1607,6 +1619,8 @@ int sock_setsockopt(struct socket *sock, int level, int op, int sock_getsockopt(struct socket *sock, int level, int op, char __user *optval, int __user *optlen); +int sock_gettstamp(struct socket *sock, void __user *userstamp, + bool timeval, bool time32); struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, int noblock, int *errcode); struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, @@ -2427,6 +2441,15 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags) static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb) { __skb_unlink(skb, &sk->sk_receive_queue); + if ( +#ifdef CONFIG_RPS + !static_branch_unlikely(&rps_needed) && +#endif + !sk->sk_rx_skb_cache) { + sk->sk_rx_skb_cache = skb; + skb_orphan(skb); + return; + } __kfree_skb(skb); } @@ -2487,8 +2510,6 @@ static inline bool sk_listener(const struct sock *sk) } void sock_enable_timestamp(struct sock *sk, int flag); -int sock_get_timestamp(struct sock *, struct timeval __user *); -int sock_get_timestampns(struct sock *, struct timespec __user *); int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level, int type); diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h index 86d13b01b39d..c7f24a2da1ca 100644 --- a/include/net/tc_act/tc_ife.h +++ b/include/net/tc_act/tc_ife.h @@ -5,7 +5,8 @@ #include <net/act_api.h> #include <linux/etherdevice.h> #include <linux/rtnetlink.h> -#include <linux/module.h> + +struct module; struct tcf_ife_params { u8 eth_dst[ETH_ALEN]; diff --git a/include/net/tc_act/tc_police.h b/include/net/tc_act/tc_police.h new file mode 100644 index 000000000000..8b9ef3664262 --- /dev/null +++ b/include/net/tc_act/tc_police.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NET_TC_POLICE_H +#define __NET_TC_POLICE_H + +#include <net/act_api.h> + +struct tcf_police_params { + int tcfp_result; + u32 tcfp_ewma_rate; + s64 tcfp_burst; + u32 tcfp_mtu; + s64 tcfp_mtu_ptoks; + struct psched_ratecfg rate; + bool rate_present; + struct psched_ratecfg peak; + bool peak_present; + struct rcu_head rcu; +}; + +struct tcf_police { + struct tc_action common; + struct tcf_police_params __rcu *params; + + spinlock_t tcfp_lock ____cacheline_aligned_in_smp; + s64 tcfp_toks; + s64 tcfp_ptoks; + s64 tcfp_t_c; +}; + +#define to_police(pc) ((struct tcf_police *)pc) + +/* old policer structure from before tc actions */ +struct tc_police_compat { + u32 index; + int action; + u32 limit; + u32 burst; + u32 mtu; + struct tc_ratespec rate; + struct tc_ratespec peakrate; +}; + +static inline bool is_tcf_police(const struct tc_action *act) +{ +#ifdef CONFIG_NET_CLS_ACT + if (act->ops && act->ops->id == TCA_ID_POLICE) + return true; +#endif + return false; +} + +static inline u64 tcf_police_rate_bytes_ps(const struct tc_action *act) +{ + struct tcf_police *police = to_police(act); + struct tcf_police_params *params; + + params = rcu_dereference_bh(police->params); + return params->rate.rate_bytes_ps; +} + +static inline s64 tcf_police_tcfp_burst(const struct tc_action *act) +{ + struct tcf_police *police = to_police(act); + struct tcf_police_params *params; + + params = rcu_dereference_bh(police->params); + return params->tcfp_burst; +} + +#endif /* __NET_TC_POLICE_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 68ee02523b87..7cf1181630a3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1314,7 +1314,7 @@ static inline void tcp_update_wl(struct tcp_sock *tp, u32 seq) static inline __sum16 tcp_v4_check(int len, __be32 saddr, __be32 daddr, __wsum base) { - return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base); + return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_TCP, base); } static inline bool tcp_checksum_complete(struct sk_buff *skb) diff --git a/include/net/tls.h b/include/net/tls.h index 5934246b2c6f..39ea62f0c1f6 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -60,6 +60,17 @@ #define TLS_AAD_SPACE_SIZE 13 #define TLS_DEVICE_NAME_MAX 32 +#define MAX_IV_SIZE 16 + +/* For AES-CCM, the full 16-bytes of IV is made of '4' fields of given sizes. + * + * IV[16] = b0[1] || implicit nonce[4] || explicit nonce[8] || length[3] + * + * The field 'length' is encoded in field 'b0' as '(length width - 1)'. + * Hence b0 contains (3 - 1) = 2. + */ +#define TLS_AES_CCM_IV_B0_BYTE 2 + /* * This structure defines the routines for Inline TLS driver. * The following routines are optional and filled with a @@ -123,8 +134,7 @@ struct tls_rec { struct scatterlist sg_content_type; char aad_space[TLS_AAD_SPACE_SIZE]; - u8 iv_data[TLS_CIPHER_AES_GCM_128_IV_SIZE + - TLS_CIPHER_AES_GCM_128_SALT_SIZE]; + u8 iv_data[MAX_IV_SIZE]; struct aead_request aead_req; u8 aead_req_ctx[]; }; @@ -219,6 +229,7 @@ struct tls_prot_info { u16 tag_size; u16 overhead_size; u16 iv_size; + u16 salt_size; u16 rec_seq_size; u16 aad_size; u16 tail_size; @@ -266,6 +277,23 @@ struct tls_context { void (*unhash)(struct sock *sk); }; +enum tls_offload_ctx_dir { + TLS_OFFLOAD_CTX_DIR_RX, + TLS_OFFLOAD_CTX_DIR_TX, +}; + +struct tlsdev_ops { + int (*tls_dev_add)(struct net_device *netdev, struct sock *sk, + enum tls_offload_ctx_dir direction, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn); + void (*tls_dev_del)(struct net_device *netdev, + struct tls_context *ctx, + enum tls_offload_ctx_dir direction); + void (*tls_dev_resync_rx)(struct net_device *netdev, + struct sock *sk, u32 seq, u64 rcd_sn); +}; + struct tls_offload_context_rx { /* sw must be the first member of tls_offload_context_rx */ struct tls_sw_context_rx sw; @@ -306,7 +334,6 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx); int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tls_device_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); -void tls_device_sk_destruct(struct sock *sk); void tls_device_free_resources_tx(struct sock *sk); void tls_device_init(void); void tls_device_cleanup(void); @@ -325,7 +352,6 @@ static inline u32 tls_record_start_seq(struct tls_record_info *rec) return rec->end_seq - rec->len; } -void tls_sk_destruct(struct sock *sk, struct tls_context *ctx); int tls_push_sg(struct sock *sk, struct tls_context *ctx, struct scatterlist *sg, u16 first_offset, int flags); @@ -536,7 +562,7 @@ static inline void tls_offload_rx_resync_request(struct sock *sk, __be32 seq) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx); - atomic64_set(&rx_ctx->resync_req, ((((uint64_t)seq) << 32) | 1)); + atomic64_set(&rx_ctx->resync_req, ((u64)ntohl(seq) << 32) | 1); } diff --git a/include/net/udp.h b/include/net/udp.h index fd6d948755c8..d8ce937bc395 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -269,13 +269,13 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len); int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb); void udp_skb_destructor(struct sock *sk, struct sk_buff *skb); struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, - int noblock, int *peeked, int *off, int *err); + int noblock, int *off, int *err); static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, int noblock, int *err) { - int peeked, off = 0; + int off = 0; - return __skb_recv_udp(sk, flags, noblock, &peeked, &off, err); + return __skb_recv_udp(sk, flags, noblock, &off, err); } int udp_v4_early_demux(struct sk_buff *skb); diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index b8137953fea3..4b1f95e08307 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -7,7 +7,7 @@ #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> -#include <net/addrconf.h> +#include <net/ipv6_stubs.h> #endif struct udp_port_cfg { diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 00254a58824b..83b5999a2587 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -8,6 +8,8 @@ #include <net/rtnetlink.h> #include <net/switchdev.h> +#define IANA_VXLAN_UDP_PORT 4789 + /* VXLAN protocol (RFC 7348) header: * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |R|R|R|R|I|R|R|R| Reserved | diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 99f722c4d804..a2907873ed56 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -132,6 +132,17 @@ struct xfrm_state_offload { u8 flags; }; +struct xfrm_mode { + u8 encap; + u8 family; + u8 flags; +}; + +/* Flags for xfrm_mode. */ +enum { + XFRM_MODE_FLAG_TUNNEL = 1, +}; + /* Full description of state of transformer. */ struct xfrm_state { possible_net_t xs_net; @@ -234,9 +245,9 @@ struct xfrm_state { /* Reference to data common to all the instances of this * transformer. */ const struct xfrm_type *type; - struct xfrm_mode *inner_mode; - struct xfrm_mode *inner_mode_iaf; - struct xfrm_mode *outer_mode; + struct xfrm_mode inner_mode; + struct xfrm_mode inner_mode_iaf; + struct xfrm_mode outer_mode; const struct xfrm_type_offload *type_offload; @@ -316,13 +327,6 @@ struct xfrm_policy_afinfo { xfrm_address_t *saddr, xfrm_address_t *daddr, u32 mark); - void (*decode_session)(struct sk_buff *skb, - struct flowi *fl, - int reverse); - int (*get_tos)(const struct flowi *fl); - int (*init_path)(struct xfrm_dst *path, - struct dst_entry *dst, - int nfheader_len); int (*fill_dst)(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl); @@ -348,7 +352,6 @@ struct xfrm_state_afinfo { struct module *owner; const struct xfrm_type *type_map[IPPROTO_MAX]; const struct xfrm_type_offload *type_offload_map[IPPROTO_MAX]; - struct xfrm_mode *mode_map[XFRM_MODE_MAX]; int (*init_flags)(struct xfrm_state *x); void (*init_tempsel)(struct xfrm_selector *sel, @@ -423,78 +426,6 @@ struct xfrm_type_offload { int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family); int xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family); -struct xfrm_mode { - /* - * Remove encapsulation header. - * - * The IP header will be moved over the top of the encapsulation - * header. - * - * On entry, the transport header shall point to where the IP header - * should be and the network header shall be set to where the IP - * header currently is. skb->data shall point to the start of the - * payload. - */ - int (*input2)(struct xfrm_state *x, struct sk_buff *skb); - - /* - * This is the actual input entry point. - * - * For transport mode and equivalent this would be identical to - * input2 (which does not need to be set). While tunnel mode - * and equivalent would set this to the tunnel encapsulation function - * xfrm4_prepare_input that would in turn call input2. - */ - int (*input)(struct xfrm_state *x, struct sk_buff *skb); - - /* - * Add encapsulation header. - * - * On exit, the transport header will be set to the start of the - * encapsulation header to be filled in by x->type->output and - * the mac header will be set to the nextheader (protocol for - * IPv4) field of the extension header directly preceding the - * encapsulation header, or in its absence, that of the top IP - * header. The value of the network header will always point - * to the top IP header while skb->data will point to the payload. - */ - int (*output2)(struct xfrm_state *x,struct sk_buff *skb); - - /* - * This is the actual output entry point. - * - * For transport mode and equivalent this would be identical to - * output2 (which does not need to be set). While tunnel mode - * and equivalent would set this to a tunnel encapsulation function - * (xfrm4_prepare_output or xfrm6_prepare_output) that would in turn - * call output2. - */ - int (*output)(struct xfrm_state *x, struct sk_buff *skb); - - /* - * Adjust pointers into the packet and do GSO segmentation. - */ - struct sk_buff *(*gso_segment)(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features); - - /* - * Adjust pointers into the packet when IPsec is done at layer2. - */ - void (*xmit)(struct xfrm_state *x, struct sk_buff *skb); - - struct xfrm_state_afinfo *afinfo; - struct module *owner; - unsigned int encap; - int flags; -}; - -/* Flags for xfrm_mode. */ -enum { - XFRM_MODE_FLAG_TUNNEL = 1, -}; - -int xfrm_register_mode(struct xfrm_mode *mode, int family); -int xfrm_unregister_mode(struct xfrm_mode *mode, int family); - static inline int xfrm_af2proto(unsigned int family) { switch(family) { @@ -507,13 +438,13 @@ static inline int xfrm_af2proto(unsigned int family) } } -static inline struct xfrm_mode *xfrm_ip2inner_mode(struct xfrm_state *x, int ipproto) +static inline const struct xfrm_mode *xfrm_ip2inner_mode(struct xfrm_state *x, int ipproto) { if ((ipproto == IPPROTO_IPIP && x->props.family == AF_INET) || (ipproto == IPPROTO_IPV6 && x->props.family == AF_INET6)) - return x->inner_mode; + return &x->inner_mode; else - return x->inner_mode_iaf; + return &x->inner_mode_iaf; } struct xfrm_tmpl { @@ -1623,7 +1554,6 @@ int xfrm_init_replay(struct xfrm_state *x); int xfrm_state_mtu(struct xfrm_state *x, int mtu); int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload); int xfrm_init_state(struct xfrm_state *x); -int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int xfrm_input_resume(struct sk_buff *skb, int nexthdr); int xfrm_trans_queue(struct sk_buff *skb, @@ -1631,7 +1561,11 @@ int xfrm_trans_queue(struct sk_buff *skb, struct sk_buff *)); int xfrm_output_resume(struct sk_buff *skb, int err); int xfrm_output(struct sock *sk, struct sk_buff *skb); -int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb); + +#if IS_ENABLED(CONFIG_NET_PKTGEN) +int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb); +#endif + void xfrm_local_error(struct sk_buff *skb, int mtu); int xfrm4_extract_header(struct sk_buff *skb); int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb); @@ -1650,10 +1584,8 @@ static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) } int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb); -int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb); int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb); int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb); -int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err); int xfrm4_protocol_register(struct xfrm4_protocol *handler, unsigned char protocol); int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, unsigned char protocol); int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family); @@ -1669,7 +1601,6 @@ int xfrm6_rcv(struct sk_buff *skb); int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto); void xfrm6_local_error(struct sk_buff *skb, u32 mtu); -int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err); int xfrm6_protocol_register(struct xfrm6_protocol *handler, unsigned char protocol); int xfrm6_protocol_deregister(struct xfrm6_protocol *handler, unsigned char protocol); int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family); @@ -1677,7 +1608,6 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family) __be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr); __be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr); int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb); -int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb); int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb); int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb); int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, @@ -2069,7 +1999,7 @@ static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x, tunnel = true; break; } - if (tunnel && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)) + if (tunnel && !(x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL)) return -EINVAL; return 0; diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h index 505dae0bed80..d6e556c0a085 100644 --- a/include/trace/bpf_probe.h +++ b/include/trace/bpf_probe.h @@ -69,8 +69,7 @@ __bpf_trace_##call(void *__data, proto) \ * to make sure that if the tracepoint handling changes, the * bpf probe will fail to compile unless it too is updated. */ -#undef DEFINE_EVENT -#define DEFINE_EVENT(template, call, proto, args) \ +#define __DEFINE_EVENT(template, call, proto, args, size) \ static inline void bpf_test_probe_##call(void) \ { \ check_trace_callback_type_##call(__bpf_trace_##template); \ @@ -81,12 +80,36 @@ __bpf_trace_tp_map_##call = { \ .tp = &__tracepoint_##call, \ .bpf_func = (void *)__bpf_trace_##template, \ .num_args = COUNT_ARGS(args), \ + .writable_size = size, \ }; +#define FIRST(x, ...) x + +#undef DEFINE_EVENT_WRITABLE +#define DEFINE_EVENT_WRITABLE(template, call, proto, args, size) \ +static inline void bpf_test_buffer_##call(void) \ +{ \ + /* BUILD_BUG_ON() is ignored if the code is completely eliminated, but \ + * BUILD_BUG_ON_ZERO() uses a different mechanism that is not \ + * dead-code-eliminated. \ + */ \ + FIRST(proto); \ + (void)BUILD_BUG_ON_ZERO(size != sizeof(*FIRST(args))); \ +} \ +__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size) + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, call, proto, args) \ + __DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), 0) #undef DEFINE_EVENT_PRINT #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef DEFINE_EVENT_WRITABLE +#undef __DEFINE_EVENT +#undef FIRST + #endif /* CONFIG_BPF_EVENTS */ diff --git a/include/trace/events/bpf_test_run.h b/include/trace/events/bpf_test_run.h new file mode 100644 index 000000000000..265447e3f71a --- /dev/null +++ b/include/trace/events/bpf_test_run.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM bpf_test_run + +#if !defined(_TRACE_BPF_TEST_RUN_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_BPF_TEST_RUN_H + +#include <linux/tracepoint.h> + +DECLARE_EVENT_CLASS(bpf_test_finish, + + TP_PROTO(int *err), + + TP_ARGS(err), + + TP_STRUCT__entry( + __field(int, err) + ), + + TP_fast_assign( + __entry->err = *err; + ), + + TP_printk("bpf_test_finish with err=%d", __entry->err) +); + +#ifdef DEFINE_EVENT_WRITABLE +#undef BPF_TEST_RUN_DEFINE_EVENT +#define BPF_TEST_RUN_DEFINE_EVENT(template, call, proto, args, size) \ + DEFINE_EVENT_WRITABLE(template, call, PARAMS(proto), \ + PARAMS(args), size) +#else +#undef BPF_TEST_RUN_DEFINE_EVENT +#define BPF_TEST_RUN_DEFINE_EVENT(template, call, proto, args, size) \ + DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args)) +#endif + +BPF_TEST_RUN_DEFINE_EVENT(bpf_test_finish, bpf_test_finish, + + TP_PROTO(int *err), + + TP_ARGS(err), + + sizeof(int) +); + +#endif + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/fib.h b/include/trace/events/fib.h index 6271bab63bfb..6f2a4dc35e37 100644 --- a/include/trace/events/fib.h +++ b/include/trace/events/fib.h @@ -13,9 +13,9 @@ TRACE_EVENT(fib_table_lookup, TP_PROTO(u32 tb_id, const struct flowi4 *flp, - const struct fib_nh *nh, int err), + const struct fib_nh_common *nhc, int err), - TP_ARGS(tb_id, flp, nh, err), + TP_ARGS(tb_id, flp, nhc, err), TP_STRUCT__entry( __field( u32, tb_id ) @@ -28,14 +28,17 @@ TRACE_EVENT(fib_table_lookup, __field( __u8, flags ) __array( __u8, src, 4 ) __array( __u8, dst, 4 ) - __array( __u8, gw, 4 ) - __array( __u8, saddr, 4 ) + __array( __u8, gw4, 4 ) + __array( __u8, gw6, 16 ) __field( u16, sport ) __field( u16, dport ) __dynamic_array(char, name, IFNAMSIZ ) ), TP_fast_assign( + struct in6_addr in6_zero = {}; + struct net_device *dev; + struct in6_addr *in6; __be32 *p32; __entry->tb_id = tb_id; @@ -62,30 +65,37 @@ TRACE_EVENT(fib_table_lookup, __entry->dport = 0; } - if (nh) { - p32 = (__be32 *) __entry->saddr; - *p32 = nh->nh_saddr; + dev = nhc ? nhc->nhc_dev : NULL; + __assign_str(name, dev ? dev->name : "-"); - p32 = (__be32 *) __entry->gw; - *p32 = nh->nh_gw; + if (nhc) { + if (nhc->nhc_gw_family == AF_INET) { + p32 = (__be32 *) __entry->gw4; + *p32 = nhc->nhc_gw.ipv4; - __assign_str(name, nh->nh_dev ? nh->nh_dev->name : "-"); - } else { - p32 = (__be32 *) __entry->saddr; - *p32 = 0; + in6 = (struct in6_addr *)__entry->gw6; + *in6 = in6_zero; + } else if (nhc->nhc_gw_family == AF_INET6) { + p32 = (__be32 *) __entry->gw4; + *p32 = 0; - p32 = (__be32 *) __entry->gw; + in6 = (struct in6_addr *)__entry->gw6; + *in6 = nhc->nhc_gw.ipv6; + } + } else { + p32 = (__be32 *) __entry->gw4; *p32 = 0; - __assign_str(name, "-"); + in6 = (struct in6_addr *)__entry->gw6; + *in6 = in6_zero; } ), - TP_printk("table %u oif %d iif %d proto %u %pI4/%u -> %pI4/%u tos %d scope %d flags %x ==> dev %s gw %pI4 src %pI4 err %d", + TP_printk("table %u oif %d iif %d proto %u %pI4/%u -> %pI4/%u tos %d scope %d flags %x ==> dev %s gw %pI4/%pI6c err %d", __entry->tb_id, __entry->oif, __entry->iif, __entry->proto, __entry->src, __entry->sport, __entry->dst, __entry->dport, __entry->tos, __entry->scope, __entry->flags, - __get_str(name), __entry->gw, __entry->saddr, __entry->err) + __get_str(name), __entry->gw4, __entry->gw6, __entry->err) ); #endif /* _TRACE_FIB_H */ diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h index b088b54d699c..c6abdcc77c12 100644 --- a/include/trace/events/fib6.h +++ b/include/trace/events/fib6.h @@ -12,10 +12,10 @@ TRACE_EVENT(fib6_table_lookup, - TP_PROTO(const struct net *net, const struct fib6_info *f6i, + TP_PROTO(const struct net *net, const struct fib6_result *res, struct fib6_table *table, const struct flowi6 *flp), - TP_ARGS(net, f6i, table, flp), + TP_ARGS(net, res, table, flp), TP_STRUCT__entry( __field( u32, tb_id ) @@ -39,7 +39,7 @@ TRACE_EVENT(fib6_table_lookup, struct in6_addr *in6; __entry->tb_id = table->tb6_id; - __entry->err = ip6_rt_type_to_error(f6i->fib6_type); + __entry->err = ip6_rt_type_to_error(res->fib6_type); __entry->oif = flp->flowi6_oif; __entry->iif = flp->flowi6_iif; __entry->tos = ip6_tclass(flp->flowlabel); @@ -62,20 +62,20 @@ TRACE_EVENT(fib6_table_lookup, __entry->dport = 0; } - if (f6i->fib6_nh.nh_dev) { - __assign_str(name, f6i->fib6_nh.nh_dev); + if (res->nh && res->nh->fib_nh_dev) { + __assign_str(name, res->nh->fib_nh_dev); } else { __assign_str(name, "-"); } - if (f6i == net->ipv6.fib6_null_entry) { + if (res->f6i == net->ipv6.fib6_null_entry) { struct in6_addr in6_zero = {}; in6 = (struct in6_addr *)__entry->gw; *in6 = in6_zero; - } else if (f6i) { + } else if (res->nh) { in6 = (struct in6_addr *)__entry->gw; - *in6 = f6i->fib6_nh.nh_gw; + *in6 = res->nh->fib_nh_gw6; } ), diff --git a/include/trace/events/mlxsw.h b/include/trace/events/mlxsw.h index 6a4cfaef33a2..19a25ed323a5 100644 --- a/include/trace/events/mlxsw.h +++ b/include/trace/events/mlxsw.h @@ -93,7 +93,7 @@ TRACE_EVENT(mlxsw_sp_acl_tcam_vregion_migrate_end, __entry->mlxsw_sp, __entry->vregion) ); -TRACE_EVENT(mlxsw_sp_acl_tcam_vregion_rehash_dis, +TRACE_EVENT(mlxsw_sp_acl_tcam_vregion_rehash_rollback_failed, TP_PROTO(const struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_acl_tcam_vregion *vregion), diff --git a/include/trace/events/nbd.h b/include/trace/events/nbd.h new file mode 100644 index 000000000000..9849956f34d8 --- /dev/null +++ b/include/trace/events/nbd.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM nbd + +#if !defined(_TRACE_NBD_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NBD_H + +#include <linux/tracepoint.h> + +DECLARE_EVENT_CLASS(nbd_transport_event, + + TP_PROTO(struct request *req, u64 handle), + + TP_ARGS(req, handle), + + TP_STRUCT__entry( + __field(struct request *, req) + __field(u64, handle) + ), + + TP_fast_assign( + __entry->req = req; + __entry->handle = handle; + ), + + TP_printk( + "nbd transport event: request %p, handle 0x%016llx", + __entry->req, + __entry->handle + ) +); + +DEFINE_EVENT(nbd_transport_event, nbd_header_sent, + + TP_PROTO(struct request *req, u64 handle), + + TP_ARGS(req, handle) +); + +DEFINE_EVENT(nbd_transport_event, nbd_payload_sent, + + TP_PROTO(struct request *req, u64 handle), + + TP_ARGS(req, handle) +); + +DEFINE_EVENT(nbd_transport_event, nbd_header_received, + + TP_PROTO(struct request *req, u64 handle), + + TP_ARGS(req, handle) +); + +DEFINE_EVENT(nbd_transport_event, nbd_payload_received, + + TP_PROTO(struct request *req, u64 handle), + + TP_ARGS(req, handle) +); + +DECLARE_EVENT_CLASS(nbd_send_request, + + TP_PROTO(struct nbd_request *nbd_request, int index, + struct request *rq), + + TP_ARGS(nbd_request, index, rq), + + TP_STRUCT__entry( + __field(struct nbd_request *, nbd_request) + __field(u64, dev_index) + __field(struct request *, request) + ), + + TP_fast_assign( + __entry->nbd_request = 0; + __entry->dev_index = index; + __entry->request = rq; + ), + + TP_printk("nbd%lld: request %p", __entry->dev_index, __entry->request) +); + +#ifdef DEFINE_EVENT_WRITABLE +#undef NBD_DEFINE_EVENT +#define NBD_DEFINE_EVENT(template, call, proto, args, size) \ + DEFINE_EVENT_WRITABLE(template, call, PARAMS(proto), \ + PARAMS(args), size) +#else +#undef NBD_DEFINE_EVENT +#define NBD_DEFINE_EVENT(template, call, proto, args, size) \ + DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args)) +#endif + +NBD_DEFINE_EVENT(nbd_send_request, nbd_send_request, + + TP_PROTO(struct nbd_request *nbd_request, int index, + struct request *rq), + + TP_ARGS(nbd_request, index, rq), + + sizeof(struct nbd_request) +); + +#endif + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/net.h b/include/trace/events/net.h index 1efd7d9b25fe..2399073c3afc 100644 --- a/include/trace/events/net.h +++ b/include/trace/events/net.h @@ -95,6 +95,29 @@ TRACE_EVENT(net_dev_xmit, __get_str(name), __entry->skbaddr, __entry->len, __entry->rc) ); +TRACE_EVENT(net_dev_xmit_timeout, + + TP_PROTO(struct net_device *dev, + int queue_index), + + TP_ARGS(dev, queue_index), + + TP_STRUCT__entry( + __string( name, dev->name ) + __string( driver, netdev_drivername(dev)) + __field( int, queue_index ) + ), + + TP_fast_assign( + __assign_str(name, dev->name); + __assign_str(driver, netdev_drivername(dev)); + __entry->queue_index = queue_index; + ), + + TP_printk("dev=%s driver=%s queue=%d", + __get_str(name), __get_str(driver), __entry->queue_index) +); + DECLARE_EVENT_CLASS(net_dev_template, TP_PROTO(struct sk_buff *skb), diff --git a/include/uapi/asm-generic/sockios.h b/include/uapi/asm-generic/sockios.h index 64f658c7cec2..44fa3ed70483 100644 --- a/include/uapi/asm-generic/sockios.h +++ b/include/uapi/asm-generic/sockios.h @@ -8,7 +8,7 @@ #define FIOGETOWN 0x8903 #define SIOCGPGRP 0x8904 #define SIOCATMARK 0x8905 -#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */ -#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */ +#define SIOCGSTAMP_OLD 0x8906 /* Get stamp (timeval) */ +#define SIOCGSTAMPNS_OLD 0x8907 /* Get stamp (timespec) */ #endif /* __ASM_GENERIC_SOCKIOS_H */ diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h index c99336f4eefe..4ebc2135e950 100644 --- a/include/uapi/linux/batadv_packet.h +++ b/include/uapi/linux/batadv_packet.h @@ -2,18 +2,6 @@ /* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _UAPI_LINUX_BATADV_PACKET_H_ diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 305bf316dd03..67f4636758af 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -2,24 +2,6 @@ /* Copyright (C) 2016-2019 B.A.T.M.A.N. contributors: * * Matthias Schiffer - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. */ #ifndef _UAPI_LINUX_BATMAN_ADV_H_ @@ -491,6 +473,13 @@ enum batadv_nl_attrs { */ BATADV_ATTR_THROUGHPUT_OVERRIDE, + /** + * @BATADV_ATTR_MULTICAST_FANOUT: defines the maximum number of packet + * copies that may be generated for a multicast-to-unicast conversion. + * Once this limit is exceeded distribution will fall back to broadcast. + */ + BATADV_ATTR_MULTICAST_FANOUT, + /* add attributes above here, update the policy in netlink.c */ /** diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 929c8e537a14..72336bac7573 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -105,6 +105,7 @@ enum bpf_cmd { BPF_BTF_GET_FD_BY_ID, BPF_TASK_FD_QUERY, BPF_MAP_LOOKUP_AND_DELETE_ELEM, + BPF_MAP_FREEZE, }; enum bpf_map_type { @@ -132,6 +133,7 @@ enum bpf_map_type { BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, BPF_MAP_TYPE_QUEUE, BPF_MAP_TYPE_STACK, + BPF_MAP_TYPE_SK_STORAGE, }; /* Note that tracing related programs such as @@ -166,6 +168,8 @@ enum bpf_prog_type { BPF_PROG_TYPE_LIRC_MODE2, BPF_PROG_TYPE_SK_REUSEPORT, BPF_PROG_TYPE_FLOW_DISSECTOR, + BPF_PROG_TYPE_CGROUP_SYSCTL, + BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, }; enum bpf_attach_type { @@ -187,6 +191,7 @@ enum bpf_attach_type { BPF_CGROUP_UDP6_SENDMSG, BPF_LIRC_MODE2, BPF_FLOW_DISSECTOR, + BPF_CGROUP_SYSCTL, __MAX_BPF_ATTACH_TYPE }; @@ -255,8 +260,19 @@ enum bpf_attach_type { */ #define BPF_F_ANY_ALIGNMENT (1U << 1) -/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */ +/* When BPF ldimm64's insn[0].src_reg != 0 then this can have + * two extensions: + * + * insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE + * insn[0].imm: map fd map fd + * insn[1].imm: 0 offset into value + * insn[0].off: 0 0 + * insn[1].off: 0 0 + * ldimm64 rewrite: address of map address of map[0]+offset + * verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE + */ #define BPF_PSEUDO_MAP_FD 1 +#define BPF_PSEUDO_MAP_VALUE 2 /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative * offset to another bpf function @@ -283,7 +299,7 @@ enum bpf_attach_type { #define BPF_OBJ_NAME_LEN 16U -/* Flags for accessing BPF object */ +/* Flags for accessing BPF object from syscall side. */ #define BPF_F_RDONLY (1U << 3) #define BPF_F_WRONLY (1U << 4) @@ -293,6 +309,10 @@ enum bpf_attach_type { /* Zero-initialize hash function seed. This should only be used for testing. */ #define BPF_F_ZERO_SEED (1U << 6) +/* Flags for accessing BPF object from program side. */ +#define BPF_F_RDONLY_PROG (1U << 7) +#define BPF_F_WRONLY_PROG (1U << 8) + /* flags for BPF_PROG_QUERY */ #define BPF_F_QUERY_EFFECTIVE (1U << 0) @@ -396,6 +416,13 @@ union bpf_attr { __aligned_u64 data_out; __u32 repeat; __u32 duration; + __u32 ctx_size_in; /* input: len of ctx_in */ + __u32 ctx_size_out; /* input/output: len of ctx_out + * returns ENOSPC if ctx_out + * is too small. + */ + __aligned_u64 ctx_in; + __aligned_u64 ctx_out; } test; struct { /* anonymous struct used by BPF_*_GET_*_ID */ @@ -1478,13 +1505,31 @@ union bpf_attr { * Grow or shrink the room for data in the packet associated to * *skb* by *len_diff*, and according to the selected *mode*. * - * There is a single supported mode at this time: + * There are two supported modes at this time: + * + * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer + * (room space is added or removed below the layer 2 header). * * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer * (room space is added or removed below the layer 3 header). * - * All values for *flags* are reserved for future usage, and must - * be left at zero. + * The following flags are supported at this time: + * + * * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size. + * Adjusting mss in this way is not allowed for datagrams. + * + * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 **: + * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 **: + * Any new space is reserved to hold a tunnel header. + * Configure skb offsets and other fields accordingly. + * + * * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE **: + * * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **: + * Use with ENCAP_L3 flags to further specify the tunnel type. + * + * * **BPF_F_ADJ_ROOM_ENCAP_L2(len) **: + * Use with ENCAP_L3/L4 flags to further specify the tunnel + * type; **len** is the length of the inner MAC header. * * A call to this helper is susceptible to change the underlaying * packet buffer. Therefore, at load time, all checks on pointers @@ -1694,12 +1739,19 @@ union bpf_attr { * error if an eBPF program tries to set a callback that is not * supported in the current kernel. * - * The supported callback values that *argval* can combine are: + * *argval* is a flag array which can combine these flags: * * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out) * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission) * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change) * + * Therefore, this function can be used to clear a callback flag by + * setting the appropriate bit to zero. e.g. to disable the RTO + * callback: + * + * **bpf_sock_ops_cb_flags_set(bpf_sock,** + * **bpf_sock->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_RTO_CB_FLAG)** + * * Here are some examples of where one could call such eBPF * program: * @@ -2431,6 +2483,190 @@ union bpf_attr { * Return * A **struct bpf_sock** pointer on success, or **NULL** in * case of failure. + * + * struct bpf_sock *bpf_skc_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) + * Description + * Look for TCP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, + * and if non-**NULL**, released via **bpf_sk_release**\ (). + * + * This function is identical to bpf_sk_lookup_tcp, except that it + * also returns timewait or request sockets. Use bpf_sk_fullsock + * or bpf_tcp_socket to access the full structure. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * Return + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from **reuse->socks**\ [] using the hash of the tuple. + * + * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * Description + * Check whether iph and th contain a valid SYN cookie ACK for + * the listening socket in sk. + * + * iph points to the start of the IPv4 or IPv6 header, while + * iph_len contains sizeof(struct iphdr) or sizeof(struct ip6hdr). + * + * th points to the start of the TCP header, while th_len contains + * sizeof(struct tcphdr). + * + * Return + * 0 if iph and th are a valid SYN cookie ACK, or a negative error + * otherwise. + * + * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags) + * Description + * Get name of sysctl in /proc/sys/ and copy it into provided by + * program buffer *buf* of size *buf_len*. + * + * The buffer is always NUL terminated, unless it's zero-sized. + * + * If *flags* is zero, full name (e.g. "net/ipv4/tcp_mem") is + * copied. Use **BPF_F_SYSCTL_BASE_NAME** flag to copy base name + * only (e.g. "tcp_mem"). + * Return + * Number of character copied (not including the trailing NUL). + * + * **-E2BIG** if the buffer wasn't big enough (*buf* will contain + * truncated name in this case). + * + * int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) + * Description + * Get current value of sysctl as it is presented in /proc/sys + * (incl. newline, etc), and copy it as a string into provided + * by program buffer *buf* of size *buf_len*. + * + * The whole value is copied, no matter what file position user + * space issued e.g. sys_read at. + * + * The buffer is always NUL terminated, unless it's zero-sized. + * Return + * Number of character copied (not including the trailing NUL). + * + * **-E2BIG** if the buffer wasn't big enough (*buf* will contain + * truncated name in this case). + * + * **-EINVAL** if current value was unavailable, e.g. because + * sysctl is uninitialized and read returns -EIO for it. + * + * int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) + * Description + * Get new value being written by user space to sysctl (before + * the actual write happens) and copy it as a string into + * provided by program buffer *buf* of size *buf_len*. + * + * User space may write new value at file position > 0. + * + * The buffer is always NUL terminated, unless it's zero-sized. + * Return + * Number of character copied (not including the trailing NUL). + * + * **-E2BIG** if the buffer wasn't big enough (*buf* will contain + * truncated name in this case). + * + * **-EINVAL** if sysctl is being read. + * + * int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len) + * Description + * Override new value being written by user space to sysctl with + * value provided by program in buffer *buf* of size *buf_len*. + * + * *buf* should contain a string in same form as provided by user + * space on sysctl write. + * + * User space may write new value at file position > 0. To override + * the whole sysctl value file position should be set to zero. + * Return + * 0 on success. + * + * **-E2BIG** if the *buf_len* is too big. + * + * **-EINVAL** if sysctl is being read. + * + * int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res) + * Description + * Convert the initial part of the string from buffer *buf* of + * size *buf_len* to a long integer according to the given base + * and save the result in *res*. + * + * The string may begin with an arbitrary amount of white space + * (as determined by isspace(3)) followed by a single optional '-' + * sign. + * + * Five least significant bits of *flags* encode base, other bits + * are currently unused. + * + * Base must be either 8, 10, 16 or 0 to detect it automatically + * similar to user space strtol(3). + * Return + * Number of characters consumed on success. Must be positive but + * no more than buf_len. + * + * **-EINVAL** if no valid digits were found or unsupported base + * was provided. + * + * **-ERANGE** if resulting value was out of range. + * + * int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res) + * Description + * Convert the initial part of the string from buffer *buf* of + * size *buf_len* to an unsigned long integer according to the + * given base and save the result in *res*. + * + * The string may begin with an arbitrary amount of white space + * (as determined by isspace(3)). + * + * Five least significant bits of *flags* encode base, other bits + * are currently unused. + * + * Base must be either 8, 10, 16 or 0 to detect it automatically + * similar to user space strtoul(3). + * Return + * Number of characters consumed on success. Must be positive but + * no more than buf_len. + * + * **-EINVAL** if no valid digits were found or unsupported base + * was provided. + * + * **-ERANGE** if resulting value was out of range. + * + * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags) + * Description + * Get a bpf-local-storage from a sk. + * + * Logically, it could be thought of getting the value from + * a *map* with *sk* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem(map, &sk)** except this + * helper enforces the key must be a **bpf_fullsock()** + * and the map must be a BPF_MAP_TYPE_SK_STORAGE also. + * + * Underneath, the value is stored locally at *sk* instead of + * the map. The *map* is used as the bpf-local-storage **type**. + * The bpf-local-storage **type** (i.e. the *map*) is searched + * against all bpf-local-storages residing at sk. + * + * An optional *flags* (BPF_SK_STORAGE_GET_F_CREATE) can be + * used such that a new bpf-local-storage will be + * created if one does not exist. *value* can be used + * together with BPF_SK_STORAGE_GET_F_CREATE to specify + * the initial value of a bpf-local-storage. If *value* is + * NULL, the new bpf-local-storage will be zero initialized. + * Return + * A bpf-local-storage pointer is returned on success. + * + * **NULL** if not found or there was an error in adding + * a new bpf-local-storage. + * + * int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) + * Description + * Delete a bpf-local-storage from a sk. + * Return + * 0 on success. + * + * **-ENOENT** if the bpf-local-storage cannot be found. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2531,7 +2767,17 @@ union bpf_attr { FN(sk_fullsock), \ FN(tcp_sock), \ FN(skb_ecn_set_ce), \ - FN(get_listener_sock), + FN(get_listener_sock), \ + FN(skc_lookup_tcp), \ + FN(tcp_check_syncookie), \ + FN(sysctl_get_name), \ + FN(sysctl_get_current_value), \ + FN(sysctl_get_new_value), \ + FN(sysctl_set_new_value), \ + FN(strtol), \ + FN(strtoul), \ + FN(sk_storage_get), \ + FN(sk_storage_delete), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2590,9 +2836,30 @@ enum bpf_func_id { /* Current network namespace */ #define BPF_F_CURRENT_NETNS (-1L) +/* BPF_FUNC_skb_adjust_room flags. */ +#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0) + +#define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff +#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56 + +#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1) +#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2) +#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3) +#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4) +#define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \ + BPF_ADJ_ROOM_ENCAP_L2_MASK) \ + << BPF_ADJ_ROOM_ENCAP_L2_SHIFT) + +/* BPF_FUNC_sysctl_get_name flags. */ +#define BPF_F_SYSCTL_BASE_NAME (1ULL << 0) + +/* BPF_FUNC_sk_storage_get flags */ +#define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0) + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, + BPF_ADJ_ROOM_MAC, }; /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */ @@ -3218,4 +3485,14 @@ struct bpf_line_info { struct bpf_spin_lock { __u32 val; }; + +struct bpf_sysctl { + __u32 write; /* Sysctl is being read (= 0) or written (= 1). + * Allows 1,2,4-byte read, but no write. + */ + __u32 file_pos; /* Sysctl file position to read from, write to. + * Allows 1,2,4-byte read an 4-byte write. + */ +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index 7b7475ef2f17..9310652ca4f9 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -39,11 +39,11 @@ struct btf_type { * struct, union and fwd */ __u32 info; - /* "size" is used by INT, ENUM, STRUCT and UNION. + /* "size" is used by INT, ENUM, STRUCT, UNION and DATASEC. * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, - * FUNC and FUNC_PROTO. + * FUNC, FUNC_PROTO and VAR. * "type" is a type_id referring to another type. */ union { @@ -70,8 +70,10 @@ struct btf_type { #define BTF_KIND_RESTRICT 11 /* Restrict */ #define BTF_KIND_FUNC 12 /* Function */ #define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ -#define BTF_KIND_MAX 13 -#define NR_BTF_KINDS 14 +#define BTF_KIND_VAR 14 /* Variable */ +#define BTF_KIND_DATASEC 15 /* Section */ +#define BTF_KIND_MAX BTF_KIND_DATASEC +#define NR_BTF_KINDS (BTF_KIND_MAX + 1) /* For some specific BTF_KIND, "struct btf_type" is immediately * followed by extra data. @@ -138,4 +140,26 @@ struct btf_param { __u32 type; }; +enum { + BTF_VAR_STATIC = 0, + BTF_VAR_GLOBAL_ALLOCATED, +}; + +/* BTF_KIND_VAR is followed by a single "struct btf_var" to describe + * additional information related to the variable such as its linkage. + */ +struct btf_var { + __u32 linkage; +}; + +/* BTF_KIND_DATASEC is followed by multiple "struct btf_var_secinfo" + * to describe all BTF_KIND_VAR types it contains along with it's + * in-section offset as well as size. + */ +struct btf_var_secinfo { + __u32 type; + __u32 offset; + __u32 size; +}; + #endif /* _UAPI__LINUX_BTF_H__ */ diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index d473e5ed044c..3534ce157ae9 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -252,9 +252,17 @@ struct ethtool_tunable { #define DOWNSHIFT_DEV_DEFAULT_COUNT 0xff #define DOWNSHIFT_DEV_DISABLE 0 +/* Time in msecs after which link is reported as down + * 0 = lowest time supported by the PHY + * 0xff = off, link down detection according to standard + */ +#define ETHTOOL_PHY_FAST_LINK_DOWN_ON 0 +#define ETHTOOL_PHY_FAST_LINK_DOWN_OFF 0xff + enum phy_tunable_id { ETHTOOL_PHY_ID_UNSPEC, ETHTOOL_PHY_DOWNSHIFT, + ETHTOOL_PHY_FAST_LINK_DOWN, /* * Add your fresh new phy tunable attribute above and remember to update * phy_tunable_strings[] in net/core/ethtool.c @@ -1704,6 +1712,9 @@ static inline int ethtool_validate_duplex(__u8 duplex) #define ETH_MODULE_SFF_8436 0x4 #define ETH_MODULE_SFF_8436_LEN 256 +#define ETH_MODULE_SFF_8636_MAX_LEN 640 +#define ETH_MODULE_SFF_8436_MAX_LEN 640 + /* Reset flags */ /* The reset() operation must clear the flags for the components which * were actually reset. On successful return, the flags indicate the diff --git a/include/uapi/linux/fou.h b/include/uapi/linux/fou.h index f2ea833a2812..87c2c9f08803 100644 --- a/include/uapi/linux/fou.h +++ b/include/uapi/linux/fou.h @@ -16,6 +16,12 @@ enum { FOU_ATTR_IPPROTO, /* u8 */ FOU_ATTR_TYPE, /* u8 */ FOU_ATTR_REMCSUM_NOPARTIAL, /* flag */ + FOU_ATTR_LOCAL_V4, /* u32 */ + FOU_ATTR_LOCAL_V6, /* in6_addr */ + FOU_ATTR_PEER_V4, /* u32 */ + FOU_ATTR_PEER_V6, /* in6_addr */ + FOU_ATTR_PEER_PORT, /* u16 */ + FOU_ATTR_IFINDEX, /* s32 */ __FOU_ATTR_MAX, }; diff --git a/include/uapi/linux/icmpv6.h b/include/uapi/linux/icmpv6.h index 325395f56bfa..2622b5a3e616 100644 --- a/include/uapi/linux/icmpv6.h +++ b/include/uapi/linux/icmpv6.h @@ -90,6 +90,8 @@ struct icmp6hdr { #define ICMPV6_TIME_EXCEED 3 #define ICMPV6_PARAMPROB 4 +#define ICMPV6_ERRMSG_MAX 127 + #define ICMPV6_INFOMSG_MASK 0x80 #define ICMPV6_ECHO_REQUEST 128 @@ -110,6 +112,8 @@ struct icmp6hdr { #define ICMPV6_MRDISC_ADV 151 +#define ICMPV6_MSG_MAX 255 + /* * Codes for Destination Unreachable */ diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 3a45b4ad71a3..3158ba672b72 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -109,6 +109,7 @@ #define ETH_P_QINQ2 0x9200 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_QINQ3 0x9300 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ +#define ETH_P_DSA_8021Q 0xDADB /* Fake VLAN Header for DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_IFE 0xED3E /* ForCES inter-FE LFB type */ #define ETH_P_AF_IUCV 0xFBFB /* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */ diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index 23a6753b37df..454ae31b93c7 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -60,6 +60,7 @@ #define TUNSETSTEERINGEBPF _IOR('T', 224, int) #define TUNSETFILTEREBPF _IOR('T', 225, int) #define TUNSETCARRIER _IOW('T', 226, int) +#define TUNGETDEVNETNS _IO('T', 227) /* TUNSETIFF ifr flags */ #define IFF_TUN 0x0001 diff --git a/include/uapi/linux/if_vlan.h b/include/uapi/linux/if_vlan.h index 7a0e8bd65b6b..90a2c89afc8f 100644 --- a/include/uapi/linux/if_vlan.h +++ b/include/uapi/linux/if_vlan.h @@ -32,10 +32,11 @@ enum vlan_ioctl_cmds { }; enum vlan_flags { - VLAN_FLAG_REORDER_HDR = 0x1, - VLAN_FLAG_GVRP = 0x2, - VLAN_FLAG_LOOSE_BINDING = 0x4, - VLAN_FLAG_MVRP = 0x8, + VLAN_FLAG_REORDER_HDR = 0x1, + VLAN_FLAG_GVRP = 0x2, + VLAN_FLAG_LOOSE_BINDING = 0x4, + VLAN_FLAG_MVRP = 0x8, + VLAN_FLAG_BRIDGE_BINDING = 0x10, }; enum vlan_name_types { diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h index 1c916b2f89dc..e34f436fc79d 100644 --- a/include/uapi/linux/ip_vs.h +++ b/include/uapi/linux/ip_vs.h @@ -124,6 +124,13 @@ #define IP_VS_PEDATA_MAXLEN 255 +/* Tunnel types */ +enum { + IP_VS_CONN_F_TUNNEL_TYPE_IPIP = 0, /* IPIP */ + IP_VS_CONN_F_TUNNEL_TYPE_GUE, /* GUE */ + IP_VS_CONN_F_TUNNEL_TYPE_MAX, +}; + /* * The struct ip_vs_service_user and struct ip_vs_dest_user are * used to set IPVS rules through setsockopt. @@ -392,6 +399,10 @@ enum { IPVS_DEST_ATTR_STATS64, /* nested attribute for dest stats */ + IPVS_DEST_ATTR_TUN_TYPE, /* tunnel type */ + + IPVS_DEST_ATTR_TUN_PORT, /* tunnel port */ + __IPVS_DEST_ATTR_MAX, }; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index a66c8de006cc..f0cf7b0f4f35 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -967,6 +967,7 @@ enum nft_socket_keys { * @NFT_CT_SRC_IP6: conntrack layer 3 protocol source (IPv6 address) * @NFT_CT_DST_IP6: conntrack layer 3 protocol destination (IPv6 address) * @NFT_CT_TIMEOUT: connection tracking timeout policy assigned to conntrack + * @NFT_CT_ID: conntrack id */ enum nft_ct_keys { NFT_CT_STATE, @@ -993,6 +994,7 @@ enum nft_ct_keys { NFT_CT_SRC_IP6, NFT_CT_DST_IP6, NFT_CT_TIMEOUT, + NFT_CT_ID, __NFT_CT_MAX }; #define NFT_CT_MAX (__NFT_CT_MAX - 1) @@ -1522,15 +1524,21 @@ enum nft_flowtable_hook_attributes { * * @NFTA_OSF_DREG: destination register (NLA_U32: nft_registers) * @NFTA_OSF_TTL: Value of the TTL osf option (NLA_U8) + * @NFTA_OSF_FLAGS: flags (NLA_U32) */ enum nft_osf_attributes { NFTA_OSF_UNSPEC, NFTA_OSF_DREG, NFTA_OSF_TTL, + NFTA_OSF_FLAGS, __NFTA_OSF_MAX, }; #define NFTA_OSF_MAX (__NFTA_OSF_MAX - 1) +enum nft_osf_flags { + NFT_OSF_F_VERSION = (1 << 0), +}; + /** * enum nft_device_attributes - nf_tables device netlink attributes * diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index dd4f86ee286e..6f09d1500960 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -11,7 +11,7 @@ * Copyright 2008 Jouni Malinen <jouni.malinen@atheros.com> * Copyright 2008 Colin McCabe <colin@cozybit.com> * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018-2019 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -1065,6 +1065,26 @@ * indicated by %NL80211_ATTR_WIPHY_FREQ and other attributes * determining the width and type. * + * @NL80211_CMD_UPDATE_OWE_INFO: This interface allows the host driver to + * offload OWE processing to user space. This intends to support + * OWE AKM by the host drivers that implement SME but rely + * on the user space for the cryptographic/DH IE processing in AP mode. + * + * @NL80211_CMD_PROBE_MESH_LINK: The requirement for mesh link metric + * refreshing, is that from one mesh point we be able to send some data + * frames to other mesh points which are not currently selected as a + * primary traffic path, but which are only 1 hop away. The absence of + * the primary path to the chosen node makes it necessary to apply some + * form of marking on a chosen packet stream so that the packets can be + * properly steered to the selected node for testing, and not by the + * regular mesh path lookup. Further, the packets must be of type data + * so that the rate control (often embedded in firmware) is used for + * rate selection. + * + * Here attribute %NL80211_ATTR_MAC is used to specify connected mesh + * peer MAC address and %NL80211_ATTR_FRAME is used to specify the frame + * content. The frame is ethernet data. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1285,6 +1305,10 @@ enum nl80211_commands { NL80211_CMD_NOTIFY_RADAR, + NL80211_CMD_UPDATE_OWE_INFO, + + NL80211_CMD_PROBE_MESH_LINK, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -2308,6 +2332,15 @@ enum nl80211_commands { * @NL80211_ATTR_AIRTIME_WEIGHT: Station's weight when scheduled by the airtime * scheduler. * + * @NL80211_ATTR_STA_TX_POWER_SETTING: Transmit power setting type (u8) for + * station associated with the AP. See &enum nl80211_tx_power_setting for + * possible values. + * @NL80211_ATTR_STA_TX_POWER: Transmit power level (s16) in dBm units. This + * allows to set Tx power for a station. If this attribute is not included, + * the default per-interface tx power setting will be overriding. Driver + * should be picking up the lowest tx power, either tx power per-interface + * or per-station. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2758,6 +2791,8 @@ enum nl80211_attrs { NL80211_ATTR_PEER_MEASUREMENTS, NL80211_ATTR_AIRTIME_WEIGHT, + NL80211_ATTR_STA_TX_POWER_SETTING, + NL80211_ATTR_STA_TX_POWER, /* add attributes here, update the policy in nl80211.c */ @@ -2802,7 +2837,7 @@ enum nl80211_attrs { #define NL80211_MAX_SUPP_RATES 32 #define NL80211_MAX_SUPP_HT_RATES 77 -#define NL80211_MAX_SUPP_REG_RULES 64 +#define NL80211_MAX_SUPP_REG_RULES 128 #define NL80211_TKIP_DATA_OFFSET_ENCR_KEY 0 #define NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY 16 #define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY 24 @@ -3139,6 +3174,7 @@ enum nl80211_sta_bss_param { * @NL80211_STA_INFO_TX_DURATION: aggregate PPDU duration for all frames * sent to the station (u64, usec) * @NL80211_STA_INFO_AIRTIME_WEIGHT: current airtime weight for station (u16) + * @NL80211_STA_INFO_AIRTIME_LINK_METRIC: airtime link metric for mesh station * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute */ @@ -3184,6 +3220,7 @@ enum nl80211_sta_info { NL80211_STA_INFO_CONNECTED_TO_GATE, NL80211_STA_INFO_TX_DURATION, NL80211_STA_INFO_AIRTIME_WEIGHT, + NL80211_STA_INFO_AIRTIME_LINK_METRIC, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, @@ -3638,6 +3675,14 @@ enum nl80211_reg_rule_attr { * value as specified by &struct nl80211_bss_select_rssi_adjust. * @NL80211_SCHED_SCAN_MATCH_ATTR_BSSID: BSSID to be used for matching * (this cannot be used together with SSID). + * @NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI: Nested attribute that carries the + * band specific minimum rssi thresholds for the bands defined in + * enum nl80211_band. The minimum rssi threshold value(s32) specific to a + * band shall be encapsulated in attribute with type value equals to one + * of the NL80211_BAND_* defined in enum nl80211_band. For example, the + * minimum rssi threshold value for 2.4GHZ band shall be encapsulated + * within an attribute of type NL80211_BAND_2GHZ. And one or more of such + * attributes will be nested within this attribute. * @NL80211_SCHED_SCAN_MATCH_ATTR_MAX: highest scheduled scan filter * attribute number currently defined * @__NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST: internal use @@ -3650,6 +3695,7 @@ enum nl80211_sched_scan_match_attr { NL80211_SCHED_SCAN_MATCH_ATTR_RELATIVE_RSSI, NL80211_SCHED_SCAN_MATCH_ATTR_RSSI_ADJUST, NL80211_SCHED_SCAN_MATCH_ATTR_BSSID, + NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI, /* keep last */ __NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST, @@ -4135,6 +4181,27 @@ enum nl80211_channel_type { }; /** + * enum nl80211_key_mode - Key mode + * + * @NL80211_KEY_RX_TX: (Default) + * Key can be used for Rx and Tx immediately + * + * The following modes can only be selected for unicast keys and when the + * driver supports @NL80211_EXT_FEATURE_EXT_KEY_ID: + * + * @NL80211_KEY_NO_TX: Only allowed in combination with @NL80211_CMD_NEW_KEY: + * Unicast key can only be used for Rx, Tx not allowed, yet + * @NL80211_KEY_SET_TX: Only allowed in combination with @NL80211_CMD_SET_KEY: + * The unicast key identified by idx and mac is cleared for Tx and becomes + * the preferred Tx key for the station. + */ +enum nl80211_key_mode { + NL80211_KEY_RX_TX, + NL80211_KEY_NO_TX, + NL80211_KEY_SET_TX +}; + +/** * enum nl80211_chan_width - channel width definitions * * These values are used with the %NL80211_ATTR_CHANNEL_WIDTH @@ -4377,6 +4444,9 @@ enum nl80211_key_default_types { * @NL80211_KEY_DEFAULT_TYPES: A nested attribute containing flags * attributes, specifying what a key should be set as default as. * See &enum nl80211_key_default_types. + * @NL80211_KEY_MODE: the mode from enum nl80211_key_mode. + * Defaults to @NL80211_KEY_RX_TX. + * * @__NL80211_KEY_AFTER_LAST: internal * @NL80211_KEY_MAX: highest key attribute */ @@ -4390,6 +4460,7 @@ enum nl80211_key_attributes { NL80211_KEY_DEFAULT_MGMT, NL80211_KEY_TYPE, NL80211_KEY_DEFAULT_TYPES, + NL80211_KEY_MODE, /* keep last */ __NL80211_KEY_AFTER_LAST, @@ -5335,6 +5406,8 @@ enum nl80211_feature_flags { * able to rekey an in-use key correctly. Userspace must not rekey PTK keys * if this flag is not set. Ignoring this can leak clear text packets and/or * freeze the connection. + * @NL80211_EXT_FEATURE_EXT_KEY_ID: Driver supports "Extended Key ID for + * Individually Addressed Frames" from IEEE802.11-2016. * * @NL80211_EXT_FEATURE_AIRTIME_FAIRNESS: Driver supports getting airtime * fairness for transmitted packets and has enabled airtime fairness @@ -5343,6 +5416,12 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_AP_PMKSA_CACHING: Driver/device supports PMKSA caching * (set/del PMKSA operations) in AP mode. * + * @NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD: Driver supports + * filtering of sched scan results using band specific RSSI thresholds. + * + * @NL80211_EXT_FEATURE_STA_TX_PWR: This driver supports controlling tx power + * to a station. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -5384,6 +5463,9 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER, NL80211_EXT_FEATURE_AIRTIME_FAIRNESS, NL80211_EXT_FEATURE_AP_PMKSA_CACHING, + NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD, + NL80211_EXT_FEATURE_EXT_KEY_ID, + NL80211_EXT_FEATURE_STA_TX_PWR, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index dbe0cbe4f1b7..f271f1ec50ae 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -364,6 +364,7 @@ enum ovs_tunnel_key_attr { OVS_TUNNEL_KEY_ATTR_IPV6_DST, /* struct in6_addr dst IPv6 address. */ OVS_TUNNEL_KEY_ATTR_PAD, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, /* struct erspan_metadata */ + OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE, /* No argument. IPV4_INFO_BRIDGE mode.*/ __OVS_TUNNEL_KEY_ATTR_MAX }; @@ -734,6 +735,7 @@ struct ovs_action_hash { * be received on NFNLGRP_CONNTRACK_NEW and NFNLGRP_CONNTRACK_DESTROY groups, * respectively. Remaining bits control the changes for which an event is * delivered on the NFNLGRP_CONNTRACK_UPDATE group. + * @OVS_CT_ATTR_TIMEOUT: Variable length string defining conntrack timeout. */ enum ovs_ct_attr { OVS_CT_ATTR_UNSPEC, @@ -746,6 +748,8 @@ enum ovs_ct_attr { OVS_CT_ATTR_NAT, /* Nested OVS_NAT_ATTR_* */ OVS_CT_ATTR_FORCE_COMMIT, /* No argument */ OVS_CT_ATTR_EVENTMASK, /* u32 mask of IPCT_* events. */ + OVS_CT_ATTR_TIMEOUT, /* Associate timeout with this connection for + * fine-grain timeout tuning. */ __OVS_CT_ATTR_MAX }; @@ -798,6 +802,44 @@ struct ovs_action_push_eth { struct ovs_key_ethernet addresses; }; +/* + * enum ovs_check_pkt_len_attr - Attributes for %OVS_ACTION_ATTR_CHECK_PKT_LEN. + * + * @OVS_CHECK_PKT_LEN_ATTR_PKT_LEN: u16 Packet length to check for. + * @OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER: Nested OVS_ACTION_ATTR_* + * actions to apply if the packer length is greater than the specified + * length in the attr - OVS_CHECK_PKT_LEN_ATTR_PKT_LEN. + * @OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL - Nested OVS_ACTION_ATTR_* + * actions to apply if the packer length is lesser or equal to the specified + * length in the attr - OVS_CHECK_PKT_LEN_ATTR_PKT_LEN. + */ +enum ovs_check_pkt_len_attr { + OVS_CHECK_PKT_LEN_ATTR_UNSPEC, + OVS_CHECK_PKT_LEN_ATTR_PKT_LEN, + OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER, + OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL, + __OVS_CHECK_PKT_LEN_ATTR_MAX, + +#ifdef __KERNEL__ + OVS_CHECK_PKT_LEN_ATTR_ARG /* struct check_pkt_len_arg */ +#endif +}; + +#define OVS_CHECK_PKT_LEN_ATTR_MAX (__OVS_CHECK_PKT_LEN_ATTR_MAX - 1) + +#ifdef __KERNEL__ +struct check_pkt_len_arg { + u16 pkt_len; /* Same value as OVS_CHECK_PKT_LEN_ATTR_PKT_LEN'. */ + bool exec_for_greater; /* When true, actions in IF_GREATER will + * not change flow keys. False otherwise. + */ + bool exec_for_lesser_equal; /* When true, actions in IF_LESS_EQUAL + * will not change flow keys. False + * otherwise. + */ +}; +#endif + /** * enum ovs_action_attr - Action types. * @@ -842,6 +884,9 @@ struct ovs_action_push_eth { * packet, or modify the packet (e.g., change the DSCP field). * @OVS_ACTION_ATTR_CLONE: make a copy of the packet and execute a list of * actions without affecting the original packet and key. + * @OVS_ACTION_ATTR_CHECK_PKT_LEN: Check the packet length and execute a set + * of actions if greater than the specified packet length, else execute + * another set of actions. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -876,6 +921,7 @@ enum ovs_action_attr { OVS_ACTION_ATTR_POP_NSH, /* No argument. */ OVS_ACTION_ATTR_METER, /* u32 meter ID. */ OVS_ACTION_ATTR_CLONE, /* Nested OVS_CLONE_ATTR_*. */ + OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 7ee74c3474bf..8b2f993cbb77 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -1148,6 +1148,16 @@ enum { #define TCA_TAPRIO_SCHED_MAX (__TCA_TAPRIO_SCHED_MAX - 1) +/* The format for the admin sched (dump only): + * [TCA_TAPRIO_SCHED_ADMIN_SCHED] + * [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] + * [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] + * [TCA_TAPRIO_ATTR_SCHED_ENTRY] + * [TCA_TAPRIO_ATTR_SCHED_ENTRY_CMD] + * [TCA_TAPRIO_ATTR_SCHED_ENTRY_GATES] + * [TCA_TAPRIO_ATTR_SCHED_ENTRY_INTERVAL] + */ + enum { TCA_TAPRIO_ATTR_UNSPEC, TCA_TAPRIO_ATTR_PRIOMAP, /* struct tc_mqprio_qopt */ @@ -1156,6 +1166,9 @@ enum { TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY, /* single entry */ TCA_TAPRIO_ATTR_SCHED_CLOCKID, /* s32 */ TCA_TAPRIO_PAD, + TCA_TAPRIO_ATTR_ADMIN_SCHED, /* The admin sched, only used in dump */ + TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME, /* s64 */ + TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION, /* s64 */ __TCA_TAPRIO_ATTR_MAX, }; diff --git a/include/uapi/linux/sockios.h b/include/uapi/linux/sockios.h index d393e9ed3964..7d1bccbbef78 100644 --- a/include/uapi/linux/sockios.h +++ b/include/uapi/linux/sockios.h @@ -19,6 +19,7 @@ #ifndef _LINUX_SOCKIOS_H #define _LINUX_SOCKIOS_H +#include <asm/bitsperlong.h> #include <asm/sockios.h> /* Linux-specific socket ioctls */ @@ -27,6 +28,26 @@ #define SOCK_IOC_TYPE 0x89 +/* + * the timeval/timespec data structure layout is defined by libc, + * so we need to cover both possible versions on 32-bit. + */ +/* Get stamp (timeval) */ +#define SIOCGSTAMP_NEW _IOR(SOCK_IOC_TYPE, 0x06, long long[2]) +/* Get stamp (timespec) */ +#define SIOCGSTAMPNS_NEW _IOR(SOCK_IOC_TYPE, 0x07, long long[2]) + +#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) +/* on 64-bit and x32, avoid the ?: operator */ +#define SIOCGSTAMP SIOCGSTAMP_OLD +#define SIOCGSTAMPNS SIOCGSTAMPNS_OLD +#else +#define SIOCGSTAMP ((sizeof(struct timeval)) == 8 ? \ + SIOCGSTAMP_OLD : SIOCGSTAMP_NEW) +#define SIOCGSTAMPNS ((sizeof(struct timespec)) == 8 ? \ + SIOCGSTAMPNS_OLD : SIOCGSTAMPNS_NEW) +#endif + /* Routing table calls. */ #define SIOCADDRT 0x890B /* add routing table entry */ #define SIOCDELRT 0x890C /* delete routing table entry */ diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 8bb6cc5f3235..b521464ea962 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -160,15 +160,42 @@ enum { #define TCPI_OPT_ECN_SEEN 16 /* we received at least one packet with ECT */ #define TCPI_OPT_SYN_DATA 32 /* SYN-ACK acked data in SYN sent or rcvd */ +/* + * Sender's congestion state indicating normal or abnormal situations + * in the last round of packets sent. The state is driven by the ACK + * information and timer events. + */ enum tcp_ca_state { + /* + * Nothing bad has been observed recently. + * No apparent reordering, packet loss, or ECN marks. + */ TCP_CA_Open = 0, #define TCPF_CA_Open (1<<TCP_CA_Open) + /* + * The sender enters disordered state when it has received DUPACKs or + * SACKs in the last round of packets sent. This could be due to packet + * loss or reordering but needs further information to confirm packets + * have been lost. + */ TCP_CA_Disorder = 1, #define TCPF_CA_Disorder (1<<TCP_CA_Disorder) + /* + * The sender enters Congestion Window Reduction (CWR) state when it + * has received ACKs with ECN-ECE marks, or has experienced congestion + * or packet discard on the sender host (e.g. qdisc). + */ TCP_CA_CWR = 2, #define TCPF_CA_CWR (1<<TCP_CA_CWR) + /* + * The sender is in fast recovery and retransmitting lost packets, + * typically triggered by ACK events. + */ TCP_CA_Recovery = 3, #define TCPF_CA_Recovery (1<<TCP_CA_Recovery) + /* + * The sender is in loss recovery triggered by retransmission timeout. + */ TCP_CA_Loss = 4 #define TCPF_CA_Loss (1<<TCP_CA_Loss) }; diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index 6b2fd4d9655f..7df026ea6aff 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -190,6 +190,7 @@ struct sockaddr_tipc { #define TIPC_MCAST_REPLICAST 134 /* Default: TIPC selects. No arg */ #define TIPC_GROUP_JOIN 135 /* Takes struct tipc_group_req* */ #define TIPC_GROUP_LEAVE 136 /* No argument */ +#define TIPC_SOCK_RECVQ_USED 137 /* Default: none (read only) */ /* * Flag values diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index 0ebe02ef1a86..efb958fd167d 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -281,6 +281,8 @@ enum { TIPC_NLA_PROP_TOL, /* u32 */ TIPC_NLA_PROP_WIN, /* u32 */ TIPC_NLA_PROP_MTU, /* u32 */ + TIPC_NLA_PROP_BROADCAST, /* u32 */ + TIPC_NLA_PROP_BROADCAST_RATIO, /* u32 */ __TIPC_NLA_PROP_MAX, TIPC_NLA_PROP_MAX = __TIPC_NLA_PROP_MAX - 1 diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h index 401d6f01de6a..5b9c26753e46 100644 --- a/include/uapi/linux/tls.h +++ b/include/uapi/linux/tls.h @@ -70,6 +70,13 @@ #define TLS_CIPHER_AES_GCM_256_TAG_SIZE 16 #define TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE 8 +#define TLS_CIPHER_AES_CCM_128 53 +#define TLS_CIPHER_AES_CCM_128_IV_SIZE 8 +#define TLS_CIPHER_AES_CCM_128_KEY_SIZE 16 +#define TLS_CIPHER_AES_CCM_128_SALT_SIZE 4 +#define TLS_CIPHER_AES_CCM_128_TAG_SIZE 16 +#define TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE 8 + #define TLS_SET_RECORD_TYPE 1 #define TLS_GET_RECORD_TYPE 2 @@ -94,4 +101,12 @@ struct tls12_crypto_info_aes_gcm_256 { unsigned char rec_seq[TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE]; }; +struct tls12_crypto_info_aes_ccm_128 { + struct tls_crypto_info info; + unsigned char iv[TLS_CIPHER_AES_CCM_128_IV_SIZE]; + unsigned char key[TLS_CIPHER_AES_CCM_128_KEY_SIZE]; + unsigned char salt[TLS_CIPHER_AES_CCM_128_SALT_SIZE]; + unsigned char rec_seq[TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE]; +}; + #endif /* _UAPI_LINUX_TLS_H */ |