diff options
Diffstat (limited to 'net')
121 files changed, 2018 insertions, 1191 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 296d0145932f..5920544e93e8 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -365,7 +365,7 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) switch (cmd) { case SIOCSHWTSTAMP: - if (!net_eq(dev_net(dev), &init_net)) + if (!net_eq(dev_net(dev), dev_net(real_dev))) break; fallthrough; case SIOCGMIIPHY: diff --git a/net/atm/signaling.c b/net/atm/signaling.c index 5de06ab8ed75..e70ae2c113f9 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -125,7 +125,7 @@ as_indicate_complete: break; case as_addparty: case as_dropparty: - sk->sk_err_soft = -msg->reply; + WRITE_ONCE(sk->sk_err_soft, -msg->reply); /* < 0 failure, otherwise ep_ref */ clear_bit(ATM_VF_WAITING, &vcc->flags); break; diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index f81b24320a36..d350f31c7a3d 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -744,6 +744,7 @@ __bpf_kfunc void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len) __bpf_kfunc void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) { + /* p != NULL, but p->cnt could be 0 */ } __bpf_kfunc void bpf_kfunc_call_test_destructive(void) @@ -791,7 +792,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3) BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1) BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1) BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU) BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE) BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg) BTF_SET8_END(test_sk_check_kfunc_ids) diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c index e5e48c6e35d7..b45c00c01dea 100644 --- a/net/bridge/br_arp_nd_proxy.c +++ b/net/bridge/br_arp_nd_proxy.c @@ -192,7 +192,7 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br, if (n) { struct net_bridge_fdb_entry *f; - if (!(n->nud_state & NUD_VALID)) { + if (!(READ_ONCE(n->nud_state) & NUD_VALID)) { neigh_release(n); return; } @@ -452,7 +452,7 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, if (n) { struct net_bridge_fdb_entry *f; - if (!(n->nud_state & NUD_VALID)) { + if (!(READ_ONCE(n->nud_state) & NUD_VALID)) { neigh_release(n); return; } diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index b82906fc999a..df47c876230e 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -468,6 +468,9 @@ static const struct net_device_ops br_netdev_ops = { .ndo_fdb_del_bulk = br_fdb_delete_bulk, .ndo_fdb_dump = br_fdb_dump, .ndo_fdb_get = br_fdb_get, + .ndo_mdb_add = br_mdb_add, + .ndo_mdb_del = br_mdb_del, + .ndo_mdb_dump = br_mdb_dump, .ndo_bridge_getlink = br_getlink, .ndo_bridge_setlink = br_setlink, .ndo_bridge_dellink = br_dellink, diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 25c48d81a597..7305f5f8215c 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -380,82 +380,37 @@ out: return err; } -static int br_mdb_valid_dump_req(const struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) +int br_mdb_dump(struct net_device *dev, struct sk_buff *skb, + struct netlink_callback *cb) { + struct net_bridge *br = netdev_priv(dev); struct br_port_msg *bpm; + struct nlmsghdr *nlh; + int err; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*bpm))) { - NL_SET_ERR_MSG_MOD(extack, "Invalid header for mdb dump request"); - return -EINVAL; - } + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, RTM_GETMDB, sizeof(*bpm), + NLM_F_MULTI); + if (!nlh) + return -EMSGSIZE; bpm = nlmsg_data(nlh); - if (bpm->ifindex) { - NL_SET_ERR_MSG_MOD(extack, "Filtering by device index is not supported for mdb dump request"); - return -EINVAL; - } - if (nlmsg_attrlen(nlh, sizeof(*bpm))) { - NL_SET_ERR_MSG(extack, "Invalid data after header in mdb dump request"); - return -EINVAL; - } - - return 0; -} - -static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct net_device *dev; - struct net *net = sock_net(skb->sk); - struct nlmsghdr *nlh = NULL; - int idx = 0, s_idx; - - if (cb->strict_check) { - int err = br_mdb_valid_dump_req(cb->nlh, cb->extack); - - if (err < 0) - return err; - } - - s_idx = cb->args[0]; + memset(bpm, 0, sizeof(*bpm)); + bpm->ifindex = dev->ifindex; rcu_read_lock(); - for_each_netdev_rcu(net, dev) { - if (netif_is_bridge_master(dev)) { - struct net_bridge *br = netdev_priv(dev); - struct br_port_msg *bpm; - - if (idx < s_idx) - goto skip; - - nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, RTM_GETMDB, - sizeof(*bpm), NLM_F_MULTI); - if (nlh == NULL) - break; - - bpm = nlmsg_data(nlh); - memset(bpm, 0, sizeof(*bpm)); - bpm->ifindex = dev->ifindex; - if (br_mdb_fill_info(skb, cb, dev) < 0) - goto out; - if (br_rports_fill_info(skb, &br->multicast_ctx) < 0) - goto out; - - cb->args[1] = 0; - nlmsg_end(skb, nlh); - skip: - idx++; - } - } + err = br_mdb_fill_info(skb, cb, dev); + if (err) + goto out; + err = br_rports_fill_info(skb, &br->multicast_ctx); + if (err) + goto out; out: - if (nlh) - nlmsg_end(skb, nlh); rcu_read_unlock(); - cb->args[0] = idx; - return skb->len; + nlmsg_end(skb, nlh); + return err; } static int nlmsg_populate_mdb_fill(struct sk_buff *skb, @@ -683,60 +638,6 @@ static const struct nla_policy br_mdbe_attrs_pol[MDBE_ATTR_MAX + 1] = { [MDBE_ATTR_RTPROT] = NLA_POLICY_MIN(NLA_U8, RTPROT_STATIC), }; -static int validate_mdb_entry(const struct nlattr *attr, - struct netlink_ext_ack *extack) -{ - struct br_mdb_entry *entry = nla_data(attr); - - if (nla_len(attr) != sizeof(struct br_mdb_entry)) { - NL_SET_ERR_MSG_MOD(extack, "Invalid MDBA_SET_ENTRY attribute length"); - return -EINVAL; - } - - if (entry->ifindex == 0) { - NL_SET_ERR_MSG_MOD(extack, "Zero entry ifindex is not allowed"); - return -EINVAL; - } - - if (entry->addr.proto == htons(ETH_P_IP)) { - if (!ipv4_is_multicast(entry->addr.u.ip4)) { - NL_SET_ERR_MSG_MOD(extack, "IPv4 entry group address is not multicast"); - return -EINVAL; - } - if (ipv4_is_local_multicast(entry->addr.u.ip4)) { - NL_SET_ERR_MSG_MOD(extack, "IPv4 entry group address is local multicast"); - return -EINVAL; - } -#if IS_ENABLED(CONFIG_IPV6) - } else if (entry->addr.proto == htons(ETH_P_IPV6)) { - if (ipv6_addr_is_ll_all_nodes(&entry->addr.u.ip6)) { - NL_SET_ERR_MSG_MOD(extack, "IPv6 entry group address is link-local all nodes"); - return -EINVAL; - } -#endif - } else if (entry->addr.proto == 0) { - /* L2 mdb */ - if (!is_multicast_ether_addr(entry->addr.u.mac_addr)) { - NL_SET_ERR_MSG_MOD(extack, "L2 entry group is not multicast"); - return -EINVAL; - } - } else { - NL_SET_ERR_MSG_MOD(extack, "Unknown entry protocol"); - return -EINVAL; - } - - if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) { - NL_SET_ERR_MSG_MOD(extack, "Unknown entry state"); - return -EINVAL; - } - if (entry->vid >= VLAN_VID_MASK) { - NL_SET_ERR_MSG_MOD(extack, "Invalid entry VLAN id"); - return -EINVAL; - } - - return 0; -} - static bool is_valid_mdb_source(struct nlattr *attr, __be16 proto, struct netlink_ext_ack *extack) { @@ -1299,49 +1200,16 @@ static int br_mdb_config_attrs_init(struct nlattr *set_attrs, return 0; } -static const struct nla_policy mdba_policy[MDBA_SET_ENTRY_MAX + 1] = { - [MDBA_SET_ENTRY_UNSPEC] = { .strict_start_type = MDBA_SET_ENTRY_ATTRS + 1 }, - [MDBA_SET_ENTRY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, - validate_mdb_entry, - sizeof(struct br_mdb_entry)), - [MDBA_SET_ENTRY_ATTRS] = { .type = NLA_NESTED }, -}; - -static int br_mdb_config_init(struct net *net, const struct nlmsghdr *nlh, - struct br_mdb_config *cfg, +static int br_mdb_config_init(struct br_mdb_config *cfg, struct net_device *dev, + struct nlattr *tb[], u16 nlmsg_flags, struct netlink_ext_ack *extack) { - struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1]; - struct br_port_msg *bpm; - struct net_device *dev; - int err; - - err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb, - MDBA_SET_ENTRY_MAX, mdba_policy, extack); - if (err) - return err; + struct net *net = dev_net(dev); memset(cfg, 0, sizeof(*cfg)); cfg->filter_mode = MCAST_EXCLUDE; cfg->rt_protocol = RTPROT_STATIC; - cfg->nlflags = nlh->nlmsg_flags; - - bpm = nlmsg_data(nlh); - if (!bpm->ifindex) { - NL_SET_ERR_MSG_MOD(extack, "Invalid bridge ifindex"); - return -EINVAL; - } - - dev = __dev_get_by_index(net, bpm->ifindex); - if (!dev) { - NL_SET_ERR_MSG_MOD(extack, "Bridge device doesn't exist"); - return -ENODEV; - } - - if (!netif_is_bridge_master(dev)) { - NL_SET_ERR_MSG_MOD(extack, "Device is not a bridge"); - return -EOPNOTSUPP; - } + cfg->nlflags = nlmsg_flags; cfg->br = netdev_priv(dev); @@ -1355,11 +1223,6 @@ static int br_mdb_config_init(struct net *net, const struct nlmsghdr *nlh, return -EINVAL; } - if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY)) { - NL_SET_ERR_MSG_MOD(extack, "Missing MDBA_SET_ENTRY attribute"); - return -EINVAL; - } - cfg->entry = nla_data(tb[MDBA_SET_ENTRY]); if (cfg->entry->ifindex != cfg->br->dev->ifindex) { @@ -1383,6 +1246,12 @@ static int br_mdb_config_init(struct net *net, const struct nlmsghdr *nlh, } } + if (cfg->entry->addr.proto == htons(ETH_P_IP) && + ipv4_is_zeronet(cfg->entry->addr.u.ip4)) { + NL_SET_ERR_MSG_MOD(extack, "IPv4 entry group address 0.0.0.0 is not allowed"); + return -EINVAL; + } + if (tb[MDBA_SET_ENTRY_ATTRS]) return br_mdb_config_attrs_init(tb[MDBA_SET_ENTRY_ATTRS], cfg, extack); @@ -1397,16 +1266,15 @@ static void br_mdb_config_fini(struct br_mdb_config *cfg) br_mdb_config_src_list_fini(cfg); } -static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) +int br_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags, + struct netlink_ext_ack *extack) { - struct net *net = sock_net(skb->sk); struct net_bridge_vlan_group *vg; struct net_bridge_vlan *v; struct br_mdb_config cfg; int err; - err = br_mdb_config_init(net, nlh, &cfg, extack); + err = br_mdb_config_init(&cfg, dev, tb, nlmsg_flags, extack); if (err) return err; @@ -1500,16 +1368,15 @@ unlock: return err; } -static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) +int br_mdb_del(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack) { - struct net *net = sock_net(skb->sk); struct net_bridge_vlan_group *vg; struct net_bridge_vlan *v; struct br_mdb_config cfg; int err; - err = br_mdb_config_init(net, nlh, &cfg, extack); + err = br_mdb_config_init(&cfg, dev, tb, 0, extack); if (err) return err; @@ -1534,17 +1401,3 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, br_mdb_config_fini(&cfg); return err; } - -void br_mdb_init(void) -{ - rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, 0); - rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, 0); - rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, 0); -} - -void br_mdb_uninit(void) -{ - rtnl_unregister(PF_BRIDGE, RTM_GETMDB); - rtnl_unregister(PF_BRIDGE, RTM_NEWMDB); - rtnl_unregister(PF_BRIDGE, RTM_DELMDB); -} diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 638a4d5359db..3e3065bc0465 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -277,7 +277,8 @@ int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); int ret; - if ((neigh->nud_state & NUD_CONNECTED) && neigh->hh.hh_len) { + if ((READ_ONCE(neigh->nud_state) & NUD_CONNECTED) && + READ_ONCE(neigh->hh.hh_len)) { neigh_hh_bridge(&neigh->hh, skb); skb->dev = nf_bridge->physindev; ret = br_handle_frame_finish(net, sk, skb); diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 6b07f30675bb..550039dfc31a 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -40,62 +40,6 @@ #include <linux/sysctl.h> #endif -/* We only check the length. A bridge shouldn't do any hop-by-hop stuff - * anyway - */ -static int br_nf_check_hbh_len(struct sk_buff *skb) -{ - unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1); - u32 pkt_len; - const unsigned char *nh = skb_network_header(skb); - int off = raw - nh; - int len = (raw[1] + 1) << 3; - - if ((raw + len) - skb->data > skb_headlen(skb)) - goto bad; - - off += 2; - len -= 2; - - while (len > 0) { - int optlen = nh[off + 1] + 2; - - switch (nh[off]) { - case IPV6_TLV_PAD1: - optlen = 1; - break; - - case IPV6_TLV_PADN: - break; - - case IPV6_TLV_JUMBO: - if (nh[off + 1] != 4 || (off & 3) != 2) - goto bad; - pkt_len = ntohl(*(__be32 *)(nh + off + 2)); - if (pkt_len <= IPV6_MAXPLEN || - ipv6_hdr(skb)->payload_len) - goto bad; - if (pkt_len > skb->len - sizeof(struct ipv6hdr)) - goto bad; - if (pskb_trim_rcsum(skb, - pkt_len + sizeof(struct ipv6hdr))) - goto bad; - nh = skb_network_header(skb); - break; - default: - if (optlen > len) - goto bad; - break; - } - off += optlen; - len -= optlen; - } - if (len == 0) - return 0; -bad: - return -1; -} - int br_validate_ipv6(struct net *net, struct sk_buff *skb) { const struct ipv6hdr *hdr; @@ -115,22 +59,19 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb) goto inhdr_error; pkt_len = ntohs(hdr->payload_len); + if (hdr->nexthdr == NEXTHDR_HOP && nf_ip6_check_hbh_len(skb, &pkt_len)) + goto drop; - if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { - if (pkt_len + ip6h_len > skb->len) { - __IP6_INC_STATS(net, idev, - IPSTATS_MIB_INTRUNCATEDPKTS); - goto drop; - } - if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) { - __IP6_INC_STATS(net, idev, - IPSTATS_MIB_INDISCARDS); - goto drop; - } - hdr = ipv6_hdr(skb); + if (pkt_len + ip6h_len > skb->len) { + __IP6_INC_STATS(net, idev, + IPSTATS_MIB_INTRUNCATEDPKTS); + goto drop; } - if (hdr->nexthdr == NEXTHDR_HOP && br_nf_check_hbh_len(skb)) + if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) { + __IP6_INC_STATS(net, idev, + IPSTATS_MIB_INDISCARDS); goto drop; + } memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); /* No IP options in IPv6 header; however it should be diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 9173e52b89e2..fefb1c0e248b 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1886,7 +1886,6 @@ int __init br_netlink_init(void) { int err; - br_mdb_init(); br_vlan_rtnl_init(); rtnl_af_register(&br_af_ops); @@ -1898,13 +1897,11 @@ int __init br_netlink_init(void) out_af: rtnl_af_unregister(&br_af_ops); - br_mdb_uninit(); return err; } void br_netlink_fini(void) { - br_mdb_uninit(); br_vlan_rtnl_uninit(); rtnl_af_unregister(&br_af_ops); rtnl_link_unregister(&br_link_ops); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index cef5f6ea850c..7264fd40f82f 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -981,8 +981,12 @@ void br_multicast_get_stats(const struct net_bridge *br, u32 br_multicast_ngroups_get(const struct net_bridge_mcast_port *pmctx); void br_multicast_ngroups_set_max(struct net_bridge_mcast_port *pmctx, u32 max); u32 br_multicast_ngroups_get_max(const struct net_bridge_mcast_port *pmctx); -void br_mdb_init(void); -void br_mdb_uninit(void); +int br_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags, + struct netlink_ext_ack *extack); +int br_mdb_del(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack); +int br_mdb_dump(struct net_device *dev, struct sk_buff *skb, + struct netlink_callback *cb); void br_multicast_host_join(const struct net_bridge_mcast *brmctx, struct net_bridge_mdb_entry *mp, bool notify); void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify); @@ -1374,12 +1378,22 @@ static inline bool br_multicast_querier_exists(struct net_bridge_mcast *brmctx, return false; } -static inline void br_mdb_init(void) +static inline int br_mdb_add(struct net_device *dev, struct nlattr *tb[], + u16 nlmsg_flags, struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + +static inline int br_mdb_del(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack) { + return -EOPNOTSUPP; } -static inline void br_mdb_uninit(void) +static inline int br_mdb_dump(struct net_device *dev, struct sk_buff *skb, + struct netlink_callback *cb) { + return 0; } static inline int br_mdb_hash_init(struct net_bridge *br) diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index c3ecd77e25cb..bd4d1b4d745f 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -8,6 +8,9 @@ #include <net/netfilter/nf_tables.h> #include <net/netfilter/nft_meta.h> #include <linux/if_bridge.h> +#include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */ + +#include "../br_private.h" static const struct net_device * nft_meta_get_bridge(const struct net_device *dev) @@ -102,6 +105,50 @@ static const struct nft_expr_ops nft_meta_bridge_get_ops = { .reduce = nft_meta_get_reduce, }; +static void nft_meta_bridge_set_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_meta *meta = nft_expr_priv(expr); + u32 *sreg = ®s->data[meta->sreg]; + struct sk_buff *skb = pkt->skb; + u8 value8; + + switch (meta->key) { + case NFT_META_BRI_BROUTE: + value8 = nft_reg_load8(sreg); + BR_INPUT_SKB_CB(skb)->br_netfilter_broute = !!value8; + break; + default: + nft_meta_set_eval(expr, regs, pkt); + } +} + +static int nft_meta_bridge_set_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_meta *priv = nft_expr_priv(expr); + unsigned int len; + int err; + + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + switch (priv->key) { + case NFT_META_BRI_BROUTE: + len = sizeof(u8); + break; + default: + return nft_meta_set_init(ctx, expr, tb); + } + + priv->len = len; + err = nft_parse_register_load(tb[NFTA_META_SREG], &priv->sreg, len); + if (err < 0) + return err; + + return 0; +} + static bool nft_meta_bridge_set_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { @@ -120,15 +167,33 @@ static bool nft_meta_bridge_set_reduce(struct nft_regs_track *track, return false; } +static int nft_meta_bridge_set_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + struct nft_meta *priv = nft_expr_priv(expr); + unsigned int hooks; + + switch (priv->key) { + case NFT_META_BRI_BROUTE: + hooks = 1 << NF_BR_PRE_ROUTING; + break; + default: + return nft_meta_set_validate(ctx, expr, data); + } + + return nft_chain_validate_hooks(ctx->chain, hooks); +} + static const struct nft_expr_ops nft_meta_bridge_set_ops = { .type = &nft_meta_bridge_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), - .eval = nft_meta_set_eval, - .init = nft_meta_set_init, + .eval = nft_meta_bridge_set_eval, + .init = nft_meta_bridge_set_init, .destroy = nft_meta_set_destroy, .dump = nft_meta_set_dump, .reduce = nft_meta_bridge_set_reduce, - .validate = nft_meta_set_validate, + .validate = nft_meta_bridge_set_validate, }; static const struct nft_expr_ops * diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index bb378c33f542..7a36353dbc22 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -324,6 +324,7 @@ const struct bpf_map_ops sk_storage_map_ops = { .map_local_storage_charge = bpf_sk_storage_charge, .map_local_storage_uncharge = bpf_sk_storage_uncharge, .map_owner_storage_ptr = bpf_sk_storage_ptr, + .map_mem_usage = bpf_local_storage_map_mem_usage, }; const struct bpf_func_proto bpf_sk_storage_get_proto = { diff --git a/net/core/datagram.c b/net/core/datagram.c index e4ff2db40c98..5662dff3d381 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -622,12 +622,12 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, frag = skb_shinfo(skb)->nr_frags; while (length && iov_iter_count(from)) { + struct page *head, *last_head = NULL; struct page *pages[MAX_SKB_FRAGS]; - struct page *last_head = NULL; + int refs, order, n = 0; size_t start; ssize_t copied; unsigned long truesize; - int refs, n = 0; if (frag == MAX_SKB_FRAGS) return -EMSGSIZE; @@ -650,9 +650,17 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, } else { refcount_add(truesize, &skb->sk->sk_wmem_alloc); } + + head = compound_head(pages[n]); + order = compound_order(head); + for (refs = 0; copied != 0; start = 0) { int size = min_t(int, copied, PAGE_SIZE - start); - struct page *head = compound_head(pages[n]); + + if (pages[n] - head > (1UL << order) - 1) { + head = compound_head(pages[n]); + order = compound_order(head); + } start += (pages[n] - head) << PAGE_SHIFT; copied -= size; diff --git a/net/core/dev.c b/net/core/dev.c index 253584777101..7172334a418f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2535,6 +2535,8 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask, struct xps_map *map, *new_map; unsigned int nr_ids; + WARN_ON_ONCE(index >= dev->num_tx_queues); + if (dev->num_tc) { /* Do not allow XPS on subordinate device directly */ num_tc = dev->num_tc; @@ -3075,7 +3077,7 @@ void __netif_schedule(struct Qdisc *q) EXPORT_SYMBOL(__netif_schedule); struct dev_kfree_skb_cb { - enum skb_free_reason reason; + enum skb_drop_reason reason; }; static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb) @@ -3108,7 +3110,7 @@ void netif_tx_wake_queue(struct netdev_queue *dev_queue) } EXPORT_SYMBOL(netif_tx_wake_queue); -void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) +void dev_kfree_skb_irq_reason(struct sk_buff *skb, enum skb_drop_reason reason) { unsigned long flags; @@ -3128,18 +3130,16 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); } -EXPORT_SYMBOL(__dev_kfree_skb_irq); +EXPORT_SYMBOL(dev_kfree_skb_irq_reason); -void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason) +void dev_kfree_skb_any_reason(struct sk_buff *skb, enum skb_drop_reason reason) { if (in_hardirq() || irqs_disabled()) - __dev_kfree_skb_irq(skb, reason); - else if (unlikely(reason == SKB_REASON_DROPPED)) - kfree_skb(skb); + dev_kfree_skb_irq_reason(skb, reason); else - consume_skb(skb); + kfree_skb_reason(skb, reason); } -EXPORT_SYMBOL(__dev_kfree_skb_any); +EXPORT_SYMBOL(dev_kfree_skb_any_reason); /** @@ -3735,25 +3735,25 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) * we add to pkt_len the headers size of all segments */ if (shinfo->gso_size && skb_transport_header_was_set(skb)) { - unsigned int hdr_len; u16 gso_segs = shinfo->gso_segs; + unsigned int hdr_len; /* mac layer + network layer */ - hdr_len = skb_transport_header(skb) - skb_mac_header(skb); + hdr_len = skb_transport_offset(skb); /* + transport layer */ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { const struct tcphdr *th; struct tcphdr _tcphdr; - th = skb_header_pointer(skb, skb_transport_offset(skb), + th = skb_header_pointer(skb, hdr_len, sizeof(_tcphdr), &_tcphdr); if (likely(th)) hdr_len += __tcp_hdrlen(th); } else { struct udphdr _udphdr; - if (skb_header_pointer(skb, skb_transport_offset(skb), + if (skb_header_pointer(skb, hdr_len, sizeof(_udphdr), &_udphdr)) hdr_len += sizeof(struct udphdr); } @@ -5020,11 +5020,11 @@ static __latent_entropy void net_tx_action(struct softirq_action *h) clist = clist->next; WARN_ON(refcount_read(&skb->users)); - if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED)) + if (likely(get_kfree_skb_cb(skb)->reason == SKB_CONSUMED)) trace_consume_skb(skb, net_tx_action); else trace_kfree_skb(skb, net_tx_action, - SKB_DROP_REASON_NOT_SPECIFIED); + get_kfree_skb_cb(skb)->reason); if (skb->fclone != SKB_FCLONE_UNAVAILABLE) __kfree_skb(skb); diff --git a/net/core/filter.c b/net/core/filter.c index 1d6f165923bf..a8c8fd96c822 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1721,6 +1721,12 @@ static const struct bpf_func_proto bpf_skb_store_bytes_proto = { .arg5_type = ARG_ANYTHING, }; +int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, + u32 len, u64 flags) +{ + return ____bpf_skb_store_bytes(skb, offset, from, len, flags); +} + BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset, void *, to, u32, len) { @@ -1751,6 +1757,11 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = { .arg4_type = ARG_CONST_SIZE, }; +int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len) +{ + return ____bpf_skb_load_bytes(skb, offset, to, len); +} + BPF_CALL_4(bpf_flow_dissector_load_bytes, const struct bpf_flow_dissector *, ctx, u32, offset, void *, to, u32, len) @@ -2193,7 +2204,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, return -ENOMEM; } - rcu_read_lock_bh(); + rcu_read_lock(); if (!nh) { dst = skb_dst(skb); nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst), @@ -2206,10 +2217,12 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, int ret; sock_confirm_neigh(skb, neigh); + local_bh_disable(); dev_xmit_recursion_inc(); ret = neigh_output(neigh, skb, false); dev_xmit_recursion_dec(); - rcu_read_unlock_bh(); + local_bh_enable(); + rcu_read_unlock(); return ret; } rcu_read_unlock_bh(); @@ -2291,7 +2304,7 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb, return -ENOMEM; } - rcu_read_lock_bh(); + rcu_read_lock(); if (!nh) { struct dst_entry *dst = skb_dst(skb); struct rtable *rt = container_of(dst, struct rtable, dst); @@ -2303,7 +2316,7 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb, } else if (nh->nh_family == AF_INET) { neigh = ip_neigh_gw4(dev, nh->ipv4_nh); } else { - rcu_read_unlock_bh(); + rcu_read_unlock(); goto out_drop; } @@ -2311,13 +2324,15 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb, int ret; sock_confirm_neigh(skb, neigh); + local_bh_disable(); dev_xmit_recursion_inc(); ret = neigh_output(neigh, skb, is_v6gw); dev_xmit_recursion_dec(); - rcu_read_unlock_bh(); + local_bh_enable(); + rcu_read_unlock(); return ret; } - rcu_read_unlock_bh(); + rcu_read_unlock(); out_drop: kfree_skb(skb); return -ENETDOWN; @@ -3828,7 +3843,7 @@ static const struct bpf_func_proto sk_skb_change_head_proto = { .arg3_type = ARG_ANYTHING, }; -BPF_CALL_1(bpf_xdp_get_buff_len, struct xdp_buff*, xdp) +BPF_CALL_1(bpf_xdp_get_buff_len, struct xdp_buff*, xdp) { return xdp_get_buff_len(xdp); } @@ -3883,8 +3898,8 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = { .arg2_type = ARG_ANYTHING, }; -static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, - void *buf, unsigned long len, bool flush) +void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, + void *buf, unsigned long len, bool flush) { unsigned long ptr_len, ptr_off = 0; skb_frag_t *next_frag, *end_frag; @@ -3930,7 +3945,7 @@ static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, } } -static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len) +void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len) { struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); u32 size = xdp->data_end - xdp->data; @@ -3988,6 +4003,11 @@ static const struct bpf_func_proto bpf_xdp_load_bytes_proto = { .arg4_type = ARG_CONST_SIZE, }; +int __bpf_xdp_load_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len) +{ + return ____bpf_xdp_load_bytes(xdp, offset, buf, len); +} + BPF_CALL_4(bpf_xdp_store_bytes, struct xdp_buff *, xdp, u32, offset, void *, buf, u32, len) { @@ -4015,6 +4035,11 @@ static const struct bpf_func_proto bpf_xdp_store_bytes_proto = { .arg4_type = ARG_CONST_SIZE, }; +int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len) +{ + return ____bpf_xdp_store_bytes(xdp, offset, buf, len); +} + static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset) { struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); @@ -5850,7 +5875,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, else neigh = __ipv6_neigh_lookup_noref_stub(dev, params->ipv6_dst); - if (!neigh || !(neigh->nud_state & NUD_VALID)) + if (!neigh || !(READ_ONCE(neigh->nud_state) & NUD_VALID)) return BPF_FIB_LKUP_RET_NO_NEIGH; memcpy(params->dmac, neigh->ha, ETH_ALEN); memcpy(params->smac, dev->dev_addr, ETH_ALEN); @@ -5971,7 +5996,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, * not needed here. */ neigh = __ipv6_neigh_lookup_noref_stub(dev, dst); - if (!neigh || !(neigh->nud_state & NUD_VALID)) + if (!neigh || !(READ_ONCE(neigh->nud_state) & NUD_VALID)) return BPF_FIB_LKUP_RET_NO_NEIGH; memcpy(params->dmac, neigh->ha, ETH_ALEN); memcpy(params->smac, dev->dev_addr, ETH_ALEN); @@ -8144,12 +8169,6 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_storage_delete_proto; case BPF_FUNC_get_netns_cookie: return &bpf_get_netns_cookie_sk_msg_proto; -#ifdef CONFIG_CGROUPS - case BPF_FUNC_get_current_cgroup_id: - return &bpf_get_current_cgroup_id_proto; - case BPF_FUNC_get_current_ancestor_cgroup_id: - return &bpf_get_current_ancestor_cgroup_id_proto; -#endif #ifdef CONFIG_CGROUP_NET_CLASSID case BPF_FUNC_get_cgroup_classid: return &bpf_get_cgroup_classid_curr_proto; @@ -9264,11 +9283,15 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog, #endif /* <store>: skb->tstamp = tstamp */ - *insn++ = BPF_STX_MEM(BPF_DW, skb_reg, value_reg, - offsetof(struct sk_buff, tstamp)); + *insn++ = BPF_RAW_INSN(BPF_CLASS(si->code) | BPF_DW | BPF_MEM, + skb_reg, value_reg, offsetof(struct sk_buff, tstamp), si->imm); return insn; } +#define BPF_EMIT_STORE(size, si, off) \ + BPF_RAW_INSN(BPF_CLASS((si)->code) | (size) | BPF_MEM, \ + (si)->dst_reg, (si)->src_reg, (off), (si)->imm) + static u32 bpf_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, @@ -9298,9 +9321,9 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, case offsetof(struct __sk_buff, priority): if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, - bpf_target_off(struct sk_buff, priority, 4, - target_size)); + *insn++ = BPF_EMIT_STORE(BPF_W, si, + bpf_target_off(struct sk_buff, priority, 4, + target_size)); else *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, bpf_target_off(struct sk_buff, priority, 4, @@ -9331,9 +9354,9 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, case offsetof(struct __sk_buff, mark): if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, - bpf_target_off(struct sk_buff, mark, 4, - target_size)); + *insn++ = BPF_EMIT_STORE(BPF_W, si, + bpf_target_off(struct sk_buff, mark, 4, + target_size)); else *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, bpf_target_off(struct sk_buff, mark, 4, @@ -9352,11 +9375,16 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, case offsetof(struct __sk_buff, queue_mapping): if (type == BPF_WRITE) { - *insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, 1); - *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg, - bpf_target_off(struct sk_buff, - queue_mapping, - 2, target_size)); + u32 off = bpf_target_off(struct sk_buff, queue_mapping, 2, target_size); + + if (BPF_CLASS(si->code) == BPF_ST && si->imm >= NO_QUEUE_MAPPING) { + *insn++ = BPF_JMP_A(0); /* noop */ + break; + } + + if (BPF_CLASS(si->code) == BPF_STX) + *insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, 1); + *insn++ = BPF_EMIT_STORE(BPF_H, si, off); } else { *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, bpf_target_off(struct sk_buff, @@ -9392,8 +9420,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, off += offsetof(struct sk_buff, cb); off += offsetof(struct qdisc_skb_cb, data); if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg, - si->src_reg, off); + *insn++ = BPF_EMIT_STORE(BPF_SIZE(si->code), si, off); else *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg, si->src_reg, off); @@ -9408,8 +9435,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, off += offsetof(struct qdisc_skb_cb, tc_classid); *target_size = 2; if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, - si->src_reg, off); + *insn++ = BPF_EMIT_STORE(BPF_H, si, off); else *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, off); @@ -9442,9 +9468,9 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, case offsetof(struct __sk_buff, tc_index): #ifdef CONFIG_NET_SCHED if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg, - bpf_target_off(struct sk_buff, tc_index, 2, - target_size)); + *insn++ = BPF_EMIT_STORE(BPF_H, si, + bpf_target_off(struct sk_buff, tc_index, 2, + target_size)); else *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, bpf_target_off(struct sk_buff, tc_index, 2, @@ -9645,8 +9671,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, BUILD_BUG_ON(sizeof_field(struct sock, sk_bound_dev_if) != 4); if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sock, sk_bound_dev_if)); + *insn++ = BPF_EMIT_STORE(BPF_W, si, + offsetof(struct sock, sk_bound_dev_if)); else *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, offsetof(struct sock, sk_bound_dev_if)); @@ -9656,8 +9682,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, BUILD_BUG_ON(sizeof_field(struct sock, sk_mark) != 4); if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sock, sk_mark)); + *insn++ = BPF_EMIT_STORE(BPF_W, si, + offsetof(struct sock, sk_mark)); else *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, offsetof(struct sock, sk_mark)); @@ -9667,8 +9693,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, BUILD_BUG_ON(sizeof_field(struct sock, sk_priority) != 4); if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sock, sk_priority)); + *insn++ = BPF_EMIT_STORE(BPF_W, si, + offsetof(struct sock, sk_priority)); else *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, offsetof(struct sock, sk_priority)); @@ -9933,10 +9959,12 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, offsetof(S, TF)); \ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \ si->dst_reg, offsetof(S, F)); \ - *insn++ = BPF_STX_MEM(SIZE, tmp_reg, si->src_reg, \ + *insn++ = BPF_RAW_INSN(SIZE | BPF_MEM | BPF_CLASS(si->code), \ + tmp_reg, si->src_reg, \ bpf_target_off(NS, NF, sizeof_field(NS, NF), \ target_size) \ - + OFF); \ + + OFF, \ + si->imm); \ *insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg, \ offsetof(S, TF)); \ } while (0) @@ -10171,9 +10199,11 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, struct bpf_sock_ops_kern, sk),\ reg, si->dst_reg, \ offsetof(struct bpf_sock_ops_kern, sk));\ - *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \ - reg, si->src_reg, \ - offsetof(OBJ, OBJ_FIELD)); \ + *insn++ = BPF_RAW_INSN(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD) | \ + BPF_MEM | BPF_CLASS(si->code), \ + reg, si->src_reg, \ + offsetof(OBJ, OBJ_FIELD), \ + si->imm); \ *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \ offsetof(struct bpf_sock_ops_kern, \ temp)); \ @@ -10205,8 +10235,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, off -= offsetof(struct bpf_sock_ops, replylong[0]); off += offsetof(struct bpf_sock_ops_kern, replylong[0]); if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, - off); + *insn++ = BPF_EMIT_STORE(BPF_W, si, off); else *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off); @@ -10563,8 +10592,7 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type, off += offsetof(struct sk_buff, cb); off += offsetof(struct sk_skb_cb, data); if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg, - si->src_reg, off); + *insn++ = BPF_EMIT_STORE(BPF_SIZE(si->code), si, off); else *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg, si->src_reg, off); @@ -11621,3 +11649,82 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id) return func; } + +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", + "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc int bpf_dynptr_from_skb(struct sk_buff *skb, u64 flags, + struct bpf_dynptr_kern *ptr__uninit) +{ + if (flags) { + bpf_dynptr_set_null(ptr__uninit); + return -EINVAL; + } + + bpf_dynptr_init(ptr__uninit, skb, BPF_DYNPTR_TYPE_SKB, 0, skb->len); + + return 0; +} + +__bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_buff *xdp, u64 flags, + struct bpf_dynptr_kern *ptr__uninit) +{ + if (flags) { + bpf_dynptr_set_null(ptr__uninit); + return -EINVAL; + } + + bpf_dynptr_init(ptr__uninit, xdp, BPF_DYNPTR_TYPE_XDP, 0, xdp_get_buff_len(xdp)); + + return 0; +} +__diag_pop(); + +int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, + struct bpf_dynptr_kern *ptr__uninit) +{ + int err; + + err = bpf_dynptr_from_skb(skb, flags, ptr__uninit); + if (err) + return err; + + bpf_dynptr_set_rdonly(ptr__uninit); + + return 0; +} + +BTF_SET8_START(bpf_kfunc_check_set_skb) +BTF_ID_FLAGS(func, bpf_dynptr_from_skb) +BTF_SET8_END(bpf_kfunc_check_set_skb) + +BTF_SET8_START(bpf_kfunc_check_set_xdp) +BTF_ID_FLAGS(func, bpf_dynptr_from_xdp) +BTF_SET8_END(bpf_kfunc_check_set_xdp) + +static const struct btf_kfunc_id_set bpf_kfunc_set_skb = { + .owner = THIS_MODULE, + .set = &bpf_kfunc_check_set_skb, +}; + +static const struct btf_kfunc_id_set bpf_kfunc_set_xdp = { + .owner = THIS_MODULE, + .set = &bpf_kfunc_check_set_xdp, +}; + +static int __init bpf_kfunc_init(void) +{ + int ret; + + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_skb); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_ACT, &bpf_kfunc_set_skb); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SK_SKB, &bpf_kfunc_set_skb); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SOCKET_FILTER, &bpf_kfunc_set_skb); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SKB, &bpf_kfunc_set_skb); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_OUT, &bpf_kfunc_set_skb); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_IN, &bpf_kfunc_set_skb); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_XMIT, &bpf_kfunc_set_skb); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb); + return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp); +} +late_initcall(bpf_kfunc_init); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 6798f6d2423b..ddd0f32de20e 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -614,7 +614,7 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, NEIGH_CACHE_STAT_INC(tbl, lookups); - rcu_read_lock_bh(); + rcu_read_lock(); n = __neigh_lookup_noref(tbl, pkey, dev); if (n) { if (!refcount_inc_not_zero(&n->refcnt)) @@ -622,42 +622,11 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, NEIGH_CACHE_STAT_INC(tbl, hits); } - rcu_read_unlock_bh(); + rcu_read_unlock(); return n; } EXPORT_SYMBOL(neigh_lookup); -struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, - const void *pkey) -{ - struct neighbour *n; - unsigned int key_len = tbl->key_len; - u32 hash_val; - struct neigh_hash_table *nht; - - NEIGH_CACHE_STAT_INC(tbl, lookups); - - rcu_read_lock_bh(); - nht = rcu_dereference_bh(tbl->nht); - hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift); - - for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); - n != NULL; - n = rcu_dereference_bh(n->next)) { - if (!memcmp(n->primary_key, pkey, key_len) && - net_eq(dev_net(n->dev), net)) { - if (!refcount_inc_not_zero(&n->refcnt)) - n = NULL; - NEIGH_CACHE_STAT_INC(tbl, hits); - break; - } - } - - rcu_read_unlock_bh(); - return n; -} -EXPORT_SYMBOL(neigh_lookup_nodev); - static struct neighbour * ___neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev, u32 flags, @@ -1124,13 +1093,13 @@ static void neigh_timer_handler(struct timer_list *t) neigh->used + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { neigh_dbg(2, "neigh %p is delayed\n", neigh); - neigh->nud_state = NUD_DELAY; + WRITE_ONCE(neigh->nud_state, NUD_DELAY); neigh->updated = jiffies; neigh_suspect(neigh); next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME); } else { neigh_dbg(2, "neigh %p is suspected\n", neigh); - neigh->nud_state = NUD_STALE; + WRITE_ONCE(neigh->nud_state, NUD_STALE); neigh->updated = jiffies; neigh_suspect(neigh); notify = 1; @@ -1140,14 +1109,14 @@ static void neigh_timer_handler(struct timer_list *t) neigh->confirmed + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { neigh_dbg(2, "neigh %p is now reachable\n", neigh); - neigh->nud_state = NUD_REACHABLE; + WRITE_ONCE(neigh->nud_state, NUD_REACHABLE); neigh->updated = jiffies; neigh_connect(neigh); notify = 1; next = neigh->confirmed + neigh->parms->reachable_time; } else { neigh_dbg(2, "neigh %p is probed\n", neigh); - neigh->nud_state = NUD_PROBE; + WRITE_ONCE(neigh->nud_state, NUD_PROBE); neigh->updated = jiffies; atomic_set(&neigh->probes, 0); notify = 1; @@ -1161,7 +1130,7 @@ static void neigh_timer_handler(struct timer_list *t) if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) { - neigh->nud_state = NUD_FAILED; + WRITE_ONCE(neigh->nud_state, NUD_FAILED); notify = 1; neigh_invalidate(neigh); goto out; @@ -1210,7 +1179,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, atomic_set(&neigh->probes, NEIGH_VAR(neigh->parms, UCAST_PROBES)); neigh_del_timer(neigh); - neigh->nud_state = NUD_INCOMPLETE; + WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE); neigh->updated = now; if (!immediate_ok) { next = now + 1; @@ -1222,7 +1191,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, } neigh_add_timer(neigh, next); } else { - neigh->nud_state = NUD_FAILED; + WRITE_ONCE(neigh->nud_state, NUD_FAILED); neigh->updated = jiffies; write_unlock_bh(&neigh->lock); @@ -1232,7 +1201,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, } else if (neigh->nud_state & NUD_STALE) { neigh_dbg(2, "neigh %p is delayed\n", neigh); neigh_del_timer(neigh); - neigh->nud_state = NUD_DELAY; + WRITE_ONCE(neigh->nud_state, NUD_DELAY); neigh->updated = jiffies; neigh_add_timer(neigh, jiffies + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME)); @@ -1344,7 +1313,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, neigh_update_flags(neigh, flags, ¬ify, &gc_update, &managed_update); if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) { new = old & ~NUD_PERMANENT; - neigh->nud_state = new; + WRITE_ONCE(neigh->nud_state, new); err = 0; goto out; } @@ -1353,7 +1322,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, neigh_del_timer(neigh); if (old & NUD_CONNECTED) neigh_suspect(neigh); - neigh->nud_state = new; + WRITE_ONCE(neigh->nud_state, new); err = 0; notify = old & NUD_VALID; if ((old & (NUD_INCOMPLETE | NUD_PROBE)) && @@ -1432,7 +1401,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, ((new & NUD_REACHABLE) ? neigh->parms->reachable_time : 0))); - neigh->nud_state = new; + WRITE_ONCE(neigh->nud_state, new); notify = 1; } @@ -1519,7 +1488,7 @@ void __neigh_set_probe_once(struct neighbour *neigh) neigh->updated = jiffies; if (!(neigh->nud_state & NUD_FAILED)) return; - neigh->nud_state = NUD_INCOMPLETE; + WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE); atomic_set(&neigh->probes, neigh_max_probes(neigh)); neigh_add_timer(neigh, jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), @@ -2215,11 +2184,11 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, .ndtc_proxy_qlen = tbl->proxy_queue.qlen, }; - rcu_read_lock_bh(); - nht = rcu_dereference_bh(tbl->nht); + rcu_read_lock(); + nht = rcu_dereference(tbl->nht); ndc.ndtc_hash_rnd = nht->hash_rnd[0]; ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1); - rcu_read_unlock_bh(); + rcu_read_unlock(); if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc)) goto nla_put_failure; @@ -2734,15 +2703,15 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, if (filter->dev_idx || filter->master_idx) flags |= NLM_F_DUMP_FILTERED; - rcu_read_lock_bh(); - nht = rcu_dereference_bh(tbl->nht); + rcu_read_lock(); + nht = rcu_dereference(tbl->nht); for (h = s_h; h < (1 << nht->hash_shift); h++) { if (h > s_h) s_idx = 0; - for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0; + for (n = rcu_dereference(nht->hash_buckets[h]), idx = 0; n != NULL; - n = rcu_dereference_bh(n->next)) { + n = rcu_dereference(n->next)) { if (idx < s_idx || !net_eq(dev_net(n->dev), net)) goto next; if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || @@ -2761,7 +2730,7 @@ next: } rc = skb->len; out: - rcu_read_unlock_bh(); + rcu_read_unlock(); cb->args[1] = h; cb->args[2] = idx; return rc; @@ -3106,20 +3075,20 @@ void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void int chain; struct neigh_hash_table *nht; - rcu_read_lock_bh(); - nht = rcu_dereference_bh(tbl->nht); + rcu_read_lock(); + nht = rcu_dereference(tbl->nht); - read_lock(&tbl->lock); /* avoid resizes */ + read_lock_bh(&tbl->lock); /* avoid resizes */ for (chain = 0; chain < (1 << nht->hash_shift); chain++) { struct neighbour *n; - for (n = rcu_dereference_bh(nht->hash_buckets[chain]); + for (n = rcu_dereference(nht->hash_buckets[chain]); n != NULL; - n = rcu_dereference_bh(n->next)) + n = rcu_dereference(n->next)) cb(n, cookie); } - read_unlock(&tbl->lock); - rcu_read_unlock_bh(); + read_unlock_bh(&tbl->lock); + rcu_read_unlock(); } EXPORT_SYMBOL(neigh_for_each); @@ -3169,7 +3138,7 @@ int neigh_xmit(int index, struct net_device *dev, tbl = neigh_tables[index]; if (!tbl) goto out; - rcu_read_lock_bh(); + rcu_read_lock(); if (index == NEIGH_ARP_TABLE) { u32 key = *((u32 *)addr); @@ -3181,11 +3150,11 @@ int neigh_xmit(int index, struct net_device *dev, neigh = __neigh_create(tbl, addr, dev, false); err = PTR_ERR(neigh); if (IS_ERR(neigh)) { - rcu_read_unlock_bh(); + rcu_read_unlock(); goto out_kfree_skb; } err = neigh->output(neigh, skb); - rcu_read_unlock_bh(); + rcu_read_unlock(); } else if (index == NEIGH_LINK_TABLE) { err = dev_hard_header(skb, dev, ntohs(skb->protocol), @@ -3214,7 +3183,7 @@ static struct neighbour *neigh_get_first(struct seq_file *seq) state->flags &= ~NEIGH_SEQ_IS_PNEIGH; for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) { - n = rcu_dereference_bh(nht->hash_buckets[bucket]); + n = rcu_dereference(nht->hash_buckets[bucket]); while (n) { if (!net_eq(dev_net(n->dev), net)) @@ -3229,10 +3198,10 @@ static struct neighbour *neigh_get_first(struct seq_file *seq) } if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) break; - if (n->nud_state & ~NUD_NOARP) + if (READ_ONCE(n->nud_state) & ~NUD_NOARP) break; next: - n = rcu_dereference_bh(n->next); + n = rcu_dereference(n->next); } if (n) @@ -3256,7 +3225,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, if (v) return n; } - n = rcu_dereference_bh(n->next); + n = rcu_dereference(n->next); while (1) { while (n) { @@ -3271,10 +3240,10 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) break; - if (n->nud_state & ~NUD_NOARP) + if (READ_ONCE(n->nud_state) & ~NUD_NOARP) break; next: - n = rcu_dereference_bh(n->next); + n = rcu_dereference(n->next); } if (n) @@ -3283,7 +3252,7 @@ next: if (++state->bucket >= (1 << nht->hash_shift)) break; - n = rcu_dereference_bh(nht->hash_buckets[state->bucket]); + n = rcu_dereference(nht->hash_buckets[state->bucket]); } if (n && pos) @@ -3385,7 +3354,7 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos) void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) __acquires(tbl->lock) - __acquires(rcu_bh) + __acquires(rcu) { struct neigh_seq_state *state = seq->private; @@ -3393,9 +3362,9 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl state->bucket = 0; state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH); - rcu_read_lock_bh(); - state->nht = rcu_dereference_bh(tbl->nht); - read_lock(&tbl->lock); + rcu_read_lock(); + state->nht = rcu_dereference(tbl->nht); + read_lock_bh(&tbl->lock); return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN; } @@ -3430,13 +3399,13 @@ EXPORT_SYMBOL(neigh_seq_next); void neigh_seq_stop(struct seq_file *seq, void *v) __releases(tbl->lock) - __releases(rcu_bh) + __releases(rcu) { struct neigh_seq_state *state = seq->private; struct neigh_table *tbl = state->tbl; - read_unlock(&tbl->lock); - rcu_read_unlock_bh(); + read_unlock_bh(&tbl->lock); + rcu_read_unlock(); } EXPORT_SYMBOL(neigh_seq_stop); diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 1ec23bf8b05c..09f7ed1a04e8 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -115,10 +115,14 @@ static int dev_seq_show(struct seq_file *seq, void *v) return 0; } -static u32 softnet_backlog_len(struct softnet_data *sd) +static u32 softnet_input_pkt_queue_len(struct softnet_data *sd) { - return skb_queue_len_lockless(&sd->input_pkt_queue) + - skb_queue_len_lockless(&sd->process_queue); + return skb_queue_len_lockless(&sd->input_pkt_queue); +} + +static u32 softnet_process_queue_len(struct softnet_data *sd) +{ + return skb_queue_len_lockless(&sd->process_queue); } static struct softnet_data *softnet_get_online(loff_t *pos) @@ -152,6 +156,8 @@ static void softnet_seq_stop(struct seq_file *seq, void *v) static int softnet_seq_show(struct seq_file *seq, void *v) { struct softnet_data *sd = v; + u32 input_qlen = softnet_input_pkt_queue_len(sd); + u32 process_qlen = softnet_process_queue_len(sd); unsigned int flow_limit_count = 0; #ifdef CONFIG_NET_FLOW_LIMIT @@ -169,12 +175,14 @@ static int softnet_seq_show(struct seq_file *seq, void *v) * mapping the data a specific CPU */ seq_printf(seq, - "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", + "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x " + "%08x %08x\n", sd->processed, sd->dropped, sd->time_squeeze, 0, 0, 0, 0, 0, /* was fastroute */ 0, /* was cpu_collision */ sd->received_rps, flow_limit_count, - softnet_backlog_len(sd), (int)seq->index); + input_qlen + process_qlen, (int)seq->index, + input_qlen, process_qlen); return 0; } diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index 3abab70d66dd..de17ca2f7dbf 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -16,7 +16,7 @@ static const struct nla_policy netdev_dev_get_nl_policy[NETDEV_A_DEV_IFINDEX + 1 }; /* Ops table for netdev */ -static const struct genl_split_ops netdev_nl_ops[2] = { +static const struct genl_split_ops netdev_nl_ops[] = { { .cmd = NETDEV_CMD_DEV_GET, .doit = netdev_nl_dev_get_doit, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5d8eb57867a9..b7b1661d0d56 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -54,6 +54,9 @@ #include <net/rtnetlink.h> #include <net/net_namespace.h> #include <net/devlink.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/addrconf.h> +#endif #include "dev.h" @@ -6063,6 +6066,217 @@ static int rtnl_stats_set(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; } +static int rtnl_mdb_valid_dump_req(const struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct br_port_msg *bpm; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*bpm))) { + NL_SET_ERR_MSG(extack, "Invalid header for mdb dump request"); + return -EINVAL; + } + + bpm = nlmsg_data(nlh); + if (bpm->ifindex) { + NL_SET_ERR_MSG(extack, "Filtering by device index is not supported for mdb dump request"); + return -EINVAL; + } + if (nlmsg_attrlen(nlh, sizeof(*bpm))) { + NL_SET_ERR_MSG(extack, "Invalid data after header in mdb dump request"); + return -EINVAL; + } + + return 0; +} + +struct rtnl_mdb_dump_ctx { + long idx; +}; + +static int rtnl_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct rtnl_mdb_dump_ctx *ctx = (void *)cb->ctx; + struct net *net = sock_net(skb->sk); + struct net_device *dev; + int idx, s_idx; + int err; + + NL_ASSERT_DUMP_CTX_FITS(struct rtnl_mdb_dump_ctx); + + if (cb->strict_check) { + err = rtnl_mdb_valid_dump_req(cb->nlh, cb->extack); + if (err) + return err; + } + + s_idx = ctx->idx; + idx = 0; + + for_each_netdev(net, dev) { + if (idx < s_idx) + goto skip; + if (!dev->netdev_ops->ndo_mdb_dump) + goto skip; + + err = dev->netdev_ops->ndo_mdb_dump(dev, skb, cb); + if (err == -EMSGSIZE) + goto out; + /* Moving on to next device, reset markers and sequence + * counters since they are all maintained per-device. + */ + memset(cb->ctx, 0, sizeof(cb->ctx)); + cb->prev_seq = 0; + cb->seq = 0; +skip: + idx++; + } + +out: + ctx->idx = idx; + return skb->len; +} + +static int rtnl_validate_mdb_entry(const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct br_mdb_entry *entry = nla_data(attr); + + if (nla_len(attr) != sizeof(struct br_mdb_entry)) { + NL_SET_ERR_MSG_ATTR(extack, attr, "Invalid attribute length"); + return -EINVAL; + } + + if (entry->ifindex == 0) { + NL_SET_ERR_MSG(extack, "Zero entry ifindex is not allowed"); + return -EINVAL; + } + + if (entry->addr.proto == htons(ETH_P_IP)) { + if (!ipv4_is_multicast(entry->addr.u.ip4) && + !ipv4_is_zeronet(entry->addr.u.ip4)) { + NL_SET_ERR_MSG(extack, "IPv4 entry group address is not multicast or 0.0.0.0"); + return -EINVAL; + } + if (ipv4_is_local_multicast(entry->addr.u.ip4)) { + NL_SET_ERR_MSG(extack, "IPv4 entry group address is local multicast"); + return -EINVAL; + } +#if IS_ENABLED(CONFIG_IPV6) + } else if (entry->addr.proto == htons(ETH_P_IPV6)) { + if (ipv6_addr_is_ll_all_nodes(&entry->addr.u.ip6)) { + NL_SET_ERR_MSG(extack, "IPv6 entry group address is link-local all nodes"); + return -EINVAL; + } +#endif + } else if (entry->addr.proto == 0) { + /* L2 mdb */ + if (!is_multicast_ether_addr(entry->addr.u.mac_addr)) { + NL_SET_ERR_MSG(extack, "L2 entry group is not multicast"); + return -EINVAL; + } + } else { + NL_SET_ERR_MSG(extack, "Unknown entry protocol"); + return -EINVAL; + } + + if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) { + NL_SET_ERR_MSG(extack, "Unknown entry state"); + return -EINVAL; + } + if (entry->vid >= VLAN_VID_MASK) { + NL_SET_ERR_MSG(extack, "Invalid entry VLAN id"); + return -EINVAL; + } + + return 0; +} + +static const struct nla_policy mdba_policy[MDBA_SET_ENTRY_MAX + 1] = { + [MDBA_SET_ENTRY_UNSPEC] = { .strict_start_type = MDBA_SET_ENTRY_ATTRS + 1 }, + [MDBA_SET_ENTRY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, + rtnl_validate_mdb_entry, + sizeof(struct br_mdb_entry)), + [MDBA_SET_ENTRY_ATTRS] = { .type = NLA_NESTED }, +}; + +static int rtnl_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct br_port_msg *bpm; + struct net_device *dev; + int err; + + err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb, + MDBA_SET_ENTRY_MAX, mdba_policy, extack); + if (err) + return err; + + bpm = nlmsg_data(nlh); + if (!bpm->ifindex) { + NL_SET_ERR_MSG(extack, "Invalid ifindex"); + return -EINVAL; + } + + dev = __dev_get_by_index(net, bpm->ifindex); + if (!dev) { + NL_SET_ERR_MSG(extack, "Device doesn't exist"); + return -ENODEV; + } + + if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY)) { + NL_SET_ERR_MSG(extack, "Missing MDBA_SET_ENTRY attribute"); + return -EINVAL; + } + + if (!dev->netdev_ops->ndo_mdb_add) { + NL_SET_ERR_MSG(extack, "Device does not support MDB operations"); + return -EOPNOTSUPP; + } + + return dev->netdev_ops->ndo_mdb_add(dev, tb, nlh->nlmsg_flags, extack); +} + +static int rtnl_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct br_port_msg *bpm; + struct net_device *dev; + int err; + + err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb, + MDBA_SET_ENTRY_MAX, mdba_policy, extack); + if (err) + return err; + + bpm = nlmsg_data(nlh); + if (!bpm->ifindex) { + NL_SET_ERR_MSG(extack, "Invalid ifindex"); + return -EINVAL; + } + + dev = __dev_get_by_index(net, bpm->ifindex); + if (!dev) { + NL_SET_ERR_MSG(extack, "Device doesn't exist"); + return -ENODEV; + } + + if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY)) { + NL_SET_ERR_MSG(extack, "Missing MDBA_SET_ENTRY attribute"); + return -EINVAL; + } + + if (!dev->netdev_ops->ndo_mdb_del) { + NL_SET_ERR_MSG(extack, "Device does not support MDB operations"); + return -EOPNOTSUPP; + } + + return dev->netdev_ops->ndo_mdb_del(dev, tb, extack); +} + /* Process one rtnetlink message. */ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -6297,4 +6511,8 @@ void __init rtnetlink_init(void) rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump, 0); rtnl_register(PF_UNSPEC, RTM_SETSTATS, rtnl_stats_set, NULL, 0); + + rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, rtnl_mdb_dump, 0); + rtnl_register(PF_BRIDGE, RTM_NEWMDB, rtnl_mdb_add, NULL, 0); + rtnl_register(PF_BRIDGE, RTM_DELMDB, rtnl_mdb_del, NULL, 0); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1a31815104d6..050a875d09c5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -420,10 +420,9 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) { struct sk_buff *skb = __build_skb(data, frag_size); - if (skb && frag_size) { + if (likely(skb && frag_size)) { skb->head_frag = 1; - if (page_is_pfmemalloc(virt_to_head_page(data))) - skb->pfmemalloc = 1; + skb_propagate_pfmemalloc(virt_to_head_page(data), skb); } return skb; } @@ -445,8 +444,7 @@ struct sk_buff *build_skb_around(struct sk_buff *skb, if (frag_size) { skb->head_frag = 1; - if (page_is_pfmemalloc(virt_to_head_page(data))) - skb->pfmemalloc = 1; + skb_propagate_pfmemalloc(virt_to_head_page(data), skb); } return skb; } diff --git a/net/core/sock_map.c b/net/core/sock_map.c index a68a7290a3b2..9b854e236d23 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -797,6 +797,14 @@ static void sock_map_fini_seq_private(void *priv_data) bpf_map_put_with_uref(info->map); } +static u64 sock_map_mem_usage(const struct bpf_map *map) +{ + u64 usage = sizeof(struct bpf_stab); + + usage += (u64)map->max_entries * sizeof(struct sock *); + return usage; +} + static const struct bpf_iter_seq_info sock_map_iter_seq_info = { .seq_ops = &sock_map_seq_ops, .init_seq_private = sock_map_init_seq_private, @@ -816,6 +824,7 @@ const struct bpf_map_ops sock_map_ops = { .map_lookup_elem = sock_map_lookup, .map_release_uref = sock_map_release_progs, .map_check_btf = map_check_no_btf, + .map_mem_usage = sock_map_mem_usage, .map_btf_id = &sock_map_btf_ids[0], .iter_seq_info = &sock_map_iter_seq_info, }; @@ -1397,6 +1406,16 @@ static void sock_hash_fini_seq_private(void *priv_data) bpf_map_put_with_uref(info->map); } +static u64 sock_hash_mem_usage(const struct bpf_map *map) +{ + struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map); + u64 usage = sizeof(*htab); + + usage += htab->buckets_num * sizeof(struct bpf_shtab_bucket); + usage += atomic_read(&htab->count) * (u64)htab->elem_size; + return usage; +} + static const struct bpf_iter_seq_info sock_hash_iter_seq_info = { .seq_ops = &sock_hash_seq_ops, .init_seq_private = sock_hash_init_seq_private, @@ -1416,6 +1435,7 @@ const struct bpf_map_ops sock_hash_ops = { .map_lookup_elem_sys_only = sock_hash_lookup_sys, .map_release_uref = sock_hash_release_progs, .map_check_btf = map_check_no_btf, + .map_mem_usage = sock_hash_mem_usage, .map_btf_id = &sock_hash_map_btf_ids[0], .iter_seq_info = &sock_hash_iter_seq_info, }; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index b780827f5e0a..3ab68415d121 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -177,7 +177,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, * for the case, if this connection will not able to recover. */ if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) - sk->sk_err_soft = EMSGSIZE; + WRITE_ONCE(sk->sk_err_soft, EMSGSIZE); mtu = dst_mtu(dst); @@ -339,8 +339,9 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info) sk_error_report(sk); dccp_done(sk); - } else - sk->sk_err_soft = err; + } else { + WRITE_ONCE(sk->sk_err_soft, err); + } goto out; } @@ -364,8 +365,9 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info) if (!sock_owned_by_user(sk) && inet->recverr) { sk->sk_err = err; sk_error_report(sk); - } else /* Only an error on timeout */ - sk->sk_err_soft = err; + } else { /* Only an error on timeout */ + WRITE_ONCE(sk->sk_err_soft, err); + } out: bh_unlock_sock(sk); sock_put(sk); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index b9d7c3dd1cb3..93c98990d726 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -174,17 +174,18 @@ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, */ sk_error_report(sk); dccp_done(sk); - } else - sk->sk_err_soft = err; + } else { + WRITE_ONCE(sk->sk_err_soft, err); + } goto out; } if (!sock_owned_by_user(sk) && np->recverr) { sk->sk_err = err; sk_error_report(sk); - } else - sk->sk_err_soft = err; - + } else { + WRITE_ONCE(sk->sk_err_soft, err); + } out: bh_unlock_sock(sk); sock_put(sk); @@ -783,6 +784,7 @@ lookup: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; + nf_reset_ct(skb); return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, refcounted) ? -1 : 0; diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 27a3b37acd2e..b3255e87cc7e 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -19,7 +19,7 @@ int sysctl_dccp_retries2 __read_mostly = TCP_RETR2; static void dccp_write_err(struct sock *sk) { - sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; + sk->sk_err = READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT; sk_error_report(sk); dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8db6747f892f..940062e08f57 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1322,7 +1322,7 @@ int inet_sk_rebuild_header(struct sock *sk) sk->sk_state != TCP_SYN_SENT || (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || (err = inet_sk_reselect_saddr(sk)) != 0) - sk->sk_err_soft = -err; + WRITE_ONCE(sk->sk_err_soft, -err); } return err; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 4f7237661afb..9456f5bb35e5 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -375,7 +375,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES); if (probes < 0) { - if (!(neigh->nud_state & NUD_VALID)) + if (!(READ_ONCE(neigh->nud_state) & NUD_VALID)) pr_debug("trying to ucast probe in NUD_INVALID\n"); neigh_ha_snapshot(dst_ha, neigh, dev); dst_hw = dst_ha; @@ -1123,7 +1123,7 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) neigh = neigh_lookup(&arp_tbl, &ip, dev); if (neigh) { - if (!(neigh->nud_state & NUD_NOARP)) { + if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) { read_lock_bh(&neigh->lock); memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len); r->arp_flags = arp_state_to_flags(neigh); @@ -1144,12 +1144,12 @@ int arp_invalidate(struct net_device *dev, __be32 ip, bool force) struct neigh_table *tbl = &arp_tbl; if (neigh) { - if ((neigh->nud_state & NUD_VALID) && !force) { + if ((READ_ONCE(neigh->nud_state) & NUD_VALID) && !force) { neigh_release(neigh); return 0; } - if (neigh->nud_state & ~NUD_NOARP) + if (READ_ONCE(neigh->nud_state) & ~NUD_NOARP) err = neigh_update(neigh, NULL, NUD_FAILED, NEIGH_UPDATE_F_OVERRIDE| NEIGH_UPDATE_F_ADMIN, 0); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index b0acf6e19aed..5deac0517ef7 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -962,6 +962,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, extack); } else { u32 new_metric = ifa->ifa_rt_priority; + u8 new_proto = ifa->ifa_proto; inet_free_ifa(ifa); @@ -975,6 +976,8 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, ifa->ifa_rt_priority = new_metric; } + ifa->ifa_proto = new_proto; + set_ifa_lifetime(ifa, valid_lft, prefered_lft); cancel_delayed_work(&check_lifetime_work); queue_delayed_work(system_power_efficient_wq, diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 3bb890a40ed7..65ba18a91865 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -563,7 +563,7 @@ static int fib_detect_death(struct fib_info *fi, int order, n = NULL; if (n) { - state = n->nud_state; + state = READ_ONCE(n->nud_state); neigh_release(n); } else { return 0; @@ -2191,7 +2191,7 @@ static bool fib_good_nh(const struct fib_nh *nh) if (nh->fib_nh_scope == RT_SCOPE_LINK) { struct neighbour *n; - rcu_read_lock_bh(); + rcu_read_lock(); if (likely(nh->fib_nh_gw_family == AF_INET)) n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev, @@ -2202,9 +2202,9 @@ static bool fib_good_nh(const struct fib_nh *nh) else n = NULL; if (n) - state = n->nud_state; + state = READ_ONCE(n->nud_state); - rcu_read_unlock_bh(); + rcu_read_unlock(); } return !!(state & NUD_VALID); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index c920aa9a62a9..48ff5f13e797 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2638,10 +2638,10 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, /* * check if a multicast source filter allows delivery for a given <src,dst,intf> */ -int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, +int ip_mc_sf_allow(const struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif, int sdif) { - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *pmc; struct ip_sf_socklist *psl; int i; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 4e4e308c3230..22a90a9392eb 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -129,7 +129,8 @@ int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(ip_local_out); -static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) +static inline int ip_select_ttl(const struct inet_sock *inet, + const struct dst_entry *dst) { int ttl = inet->uc_ttl; @@ -146,7 +147,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk, __be32 saddr, __be32 daddr, struct ip_options_rcu *opt, u8 tos) { - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); struct rtable *rt = skb_rtable(skb); struct net *net = sock_net(sk); struct iphdr *iph; @@ -218,7 +219,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s return res; } - rcu_read_lock_bh(); + rcu_read_lock(); neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); if (!IS_ERR(neigh)) { int res; @@ -226,10 +227,10 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s sock_confirm_neigh(skb, neigh); /* if crossing protocols, can not use the cached header */ res = neigh_output(neigh, skb, is_v6gw); - rcu_read_unlock_bh(); + rcu_read_unlock(); return res; } - rcu_read_unlock_bh(); + rcu_read_unlock(); net_dbg_ratelimited("%s: No header cache and no neighbour!\n", __func__); @@ -990,7 +991,7 @@ static int __ip_append_data(struct sock *sk, mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize; paged = !!cork->gso_size; - if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP && + if (cork->tx_flags & SKBTX_ANY_TSTAMP && sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) tskey = atomic_inc_return(&sk->sk_tskey) - 1; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index da5998011ab9..7da1df4997d0 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -14,7 +14,6 @@ #include <linux/vmalloc.h> #include <linux/netdevice.h> #include <linux/module.h> -#include <linux/icmp.h> #include <net/ip.h> #include <net/compat.h> #include <linux/uaccess.h> @@ -31,7 +30,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("IPv4 packet filter"); -MODULE_ALIAS("ipt_icmp"); void *ipt_alloc_initial_table(const struct xt_table *info) { @@ -1799,52 +1797,6 @@ void ipt_unregister_table_exit(struct net *net, const char *name) __ipt_unregister_table(net, table); } -/* Returns 1 if the type and code is matched by the range, 0 otherwise */ -static inline bool -icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code, - u_int8_t type, u_int8_t code, - bool invert) -{ - return ((test_type == 0xFF) || - (type == test_type && code >= min_code && code <= max_code)) - ^ invert; -} - -static bool -icmp_match(const struct sk_buff *skb, struct xt_action_param *par) -{ - const struct icmphdr *ic; - struct icmphdr _icmph; - const struct ipt_icmp *icmpinfo = par->matchinfo; - - /* Must not be a fragment. */ - if (par->fragoff != 0) - return false; - - ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph); - if (ic == NULL) { - /* We've been asked to examine this packet, and we - * can't. Hence, no choice but to drop. - */ - par->hotdrop = true; - return false; - } - - return icmp_type_code_match(icmpinfo->type, - icmpinfo->code[0], - icmpinfo->code[1], - ic->type, ic->code, - !!(icmpinfo->invflags&IPT_ICMP_INV)); -} - -static int icmp_checkentry(const struct xt_mtchk_param *par) -{ - const struct ipt_icmp *icmpinfo = par->matchinfo; - - /* Must specify no unknown invflags */ - return (icmpinfo->invflags & ~IPT_ICMP_INV) ? -EINVAL : 0; -} - static struct xt_target ipt_builtin_tg[] __read_mostly = { { .name = XT_STANDARD_TARGET, @@ -1875,18 +1827,6 @@ static struct nf_sockopt_ops ipt_sockopts = { .owner = THIS_MODULE, }; -static struct xt_match ipt_builtin_mt[] __read_mostly = { - { - .name = "icmp", - .match = icmp_match, - .matchsize = sizeof(struct ipt_icmp), - .checkentry = icmp_checkentry, - .proto = IPPROTO_ICMP, - .family = NFPROTO_IPV4, - .me = THIS_MODULE, - }, -}; - static int __net_init ip_tables_net_init(struct net *net) { return xt_proto_init(net, NFPROTO_IPV4); @@ -1914,19 +1854,14 @@ static int __init ip_tables_init(void) ret = xt_register_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg)); if (ret < 0) goto err2; - ret = xt_register_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt)); - if (ret < 0) - goto err4; /* Register setsockopt */ ret = nf_register_sockopt(&ipt_sockopts); if (ret < 0) - goto err5; + goto err4; return 0; -err5: - xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt)); err4: xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg)); err2: @@ -1939,7 +1874,6 @@ static void __exit ip_tables_fini(void) { nf_unregister_sockopt(&ipt_sockopts); - xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt)); xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg)); unregister_pernet_subsys(&ip_tables_net_ops); } diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index d8ef05347fd9..f95142e56da0 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1124,13 +1124,13 @@ static bool ipv6_good_nh(const struct fib6_nh *nh) int state = NUD_REACHABLE; struct neighbour *n; - rcu_read_lock_bh(); + rcu_read_lock(); n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, &nh->fib_nh_gw6); if (n) - state = n->nud_state; + state = READ_ONCE(n->nud_state); - rcu_read_unlock_bh(); + rcu_read_unlock(); return !!(state & NUD_VALID); } @@ -1140,14 +1140,14 @@ static bool ipv4_good_nh(const struct fib_nh *nh) int state = NUD_REACHABLE; struct neighbour *n; - rcu_read_lock_bh(); + rcu_read_lock(); n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev, (__force u32)nh->fib_nh_gw4); if (n) - state = n->nud_state; + state = READ_ONCE(n->nud_state); - rcu_read_unlock_bh(); + rcu_read_unlock(); return !!(state & NUD_VALID); } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 94df935ee0c5..3cf68695b40d 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -116,10 +116,10 @@ void raw_unhash_sk(struct sock *sk) } EXPORT_SYMBOL_GPL(raw_unhash_sk); -bool raw_v4_match(struct net *net, struct sock *sk, unsigned short num, +bool raw_v4_match(struct net *net, const struct sock *sk, unsigned short num, __be32 raddr, __be32 laddr, int dif, int sdif) { - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); if (net_eq(sock_net(sk), net) && inet->inet_num == num && !(inet->inet_daddr && inet->inet_daddr != raddr) && diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index 999321834b94..bca49a844f01 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -34,7 +34,7 @@ raw_get_hashinfo(const struct inet_diag_req_v2 *r) * use helper to figure it out. */ -static bool raw_lookup(struct net *net, struct sock *sk, +static bool raw_lookup(struct net *net, const struct sock *sk, const struct inet_diag_req_v2 *req) { struct inet_diag_req_raw *r = (void *)req; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index de6e3515ab4f..6a0a0bb452e9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -408,7 +408,7 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, struct net_device *dev = dst->dev; struct neighbour *n; - rcu_read_lock_bh(); + rcu_read_lock(); if (likely(rt->rt_gw_family == AF_INET)) { n = ip_neigh_gw4(dev, rt->rt_gw4); @@ -424,7 +424,7 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, if (!IS_ERR(n) && !refcount_inc_not_zero(&n->refcnt)) n = NULL; - rcu_read_unlock_bh(); + rcu_read_unlock(); return n; } @@ -784,7 +784,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow if (!n) n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev); if (!IS_ERR(n)) { - if (!(n->nud_state & NUD_VALID)) { + if (!(READ_ONCE(n->nud_state) & NUD_VALID)) { neigh_event_send(n, NULL); } else { if (fib_lookup(net, fl4, &res, 0) == 0) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 288693981b00..fd68d49490f2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -589,7 +589,8 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) } /* This barrier is coupled with smp_wmb() in tcp_reset() */ smp_rmb(); - if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) + if (READ_ONCE(sk->sk_err) || + !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR; return mask; @@ -3094,7 +3095,7 @@ int tcp_disconnect(struct sock *sk, int flags) if (old_state == TCP_LISTEN) { inet_csk_listen_stop(sk); } else if (unlikely(tp->repair)) { - sk->sk_err = ECONNABORTED; + WRITE_ONCE(sk->sk_err, ECONNABORTED); } else if (tcp_need_reset(old_state) || (tp->snd_nxt != tp->write_seq && (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { @@ -3102,9 +3103,9 @@ int tcp_disconnect(struct sock *sk, int flags) * states */ tcp_send_active_reset(sk, gfp_any()); - sk->sk_err = ECONNRESET; + WRITE_ONCE(sk->sk_err, ECONNRESET); } else if (old_state == TCP_SYN_SENT) - sk->sk_err = ECONNRESET; + WRITE_ONCE(sk->sk_err, ECONNRESET); tcp_clear_xmit_timers(sk); __skb_queue_purge(&sk->sk_receive_queue); @@ -4569,7 +4570,7 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, const __u8 *hash_location = NULL; struct tcp_md5sig_key *hash_expected; const struct tcphdr *th = tcp_hdr(skb); - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); int genhash, l3index; u8 newhash[16]; @@ -4692,7 +4693,7 @@ int tcp_abort(struct sock *sk, int err) bh_lock_sock(sk); if (!sock_flag(sk, SOCK_DEAD)) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); /* This barrier is coupled with smp_rmb() in tcp_poll() */ smp_wmb(); sk_error_report(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cc072d2cfcd8..2b75cd9e2e92 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -458,7 +458,7 @@ static void tcp_sndbuf_expand(struct sock *sk) static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb, unsigned int skbtruesize) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); /* Optimize this! */ int truesize = tcp_win_from_space(sk, skbtruesize) >> 1; int window = tcp_win_from_space(sk, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])) >> 1; @@ -3874,7 +3874,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) /* We passed data and got it acked, remove any soft error * log. Something worked... */ - sk->sk_err_soft = 0; + WRITE_ONCE(sk->sk_err_soft, 0); icsk->icsk_probes_out = 0; tp->rcv_tstamp = tcp_jiffies32; if (!prior_packets) @@ -4322,15 +4322,15 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb) /* We want the right error as BSD sees it (and indeed as we do). */ switch (sk->sk_state) { case TCP_SYN_SENT: - sk->sk_err = ECONNREFUSED; + WRITE_ONCE(sk->sk_err, ECONNREFUSED); break; case TCP_CLOSE_WAIT: - sk->sk_err = EPIPE; + WRITE_ONCE(sk->sk_err, EPIPE); break; case TCP_CLOSE: return; default: - sk->sk_err = ECONNRESET; + WRITE_ONCE(sk->sk_err, ECONNRESET); } /* This barrier is coupled with smp_rmb() in tcp_poll() */ smp_wmb(); @@ -5693,7 +5693,7 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t */ static bool tcp_reset_check(const struct sock *sk, const struct sk_buff *skb) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); return unlikely(TCP_SKB_CB(skb)->seq == (tp->rcv_nxt - 1) && (1 << sk->sk_state) & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK | diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ea370afa70ed..89daa6b953ff 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -361,7 +361,7 @@ void tcp_v4_mtu_reduced(struct sock *sk) * for the case, if this connection will not able to recover. */ if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) - sk->sk_err_soft = EMSGSIZE; + WRITE_ONCE(sk->sk_err_soft, EMSGSIZE); mtu = dst_mtu(dst); @@ -596,13 +596,13 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) ip_icmp_error(sk, skb, err, th->dest, info, (u8 *)th); if (!sock_owned_by_user(sk)) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); tcp_done(sk); } else { - sk->sk_err_soft = err; + WRITE_ONCE(sk->sk_err_soft, err); } goto out; } @@ -625,10 +625,10 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) inet = inet_sk(sk); if (!sock_owned_by_user(sk) && inet->recverr) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); } else { /* Only an error on timeout */ - sk->sk_err_soft = err; + WRITE_ONCE(sk->sk_err_soft, err); } out: diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 9a7ef7732c24..dac0d62120e6 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -463,7 +463,7 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) } EXPORT_SYMBOL_GPL(tcp_ca_openreq_child); -static void smc_check_reset_syn_req(struct tcp_sock *oldtp, +static void smc_check_reset_syn_req(const struct tcp_sock *oldtp, struct request_sock *req, struct tcp_sock *newtp) { @@ -492,7 +492,8 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, const struct inet_request_sock *ireq = inet_rsk(req); struct tcp_request_sock *treq = tcp_rsk(req); struct inet_connection_sock *newicsk; - struct tcp_sock *oldtp, *newtp; + const struct tcp_sock *oldtp; + struct tcp_sock *newtp; u32 seq; if (!newsk) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ba839e441450..cfe128b81a01 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3699,7 +3699,7 @@ static void tcp_connect_init(struct sock *sk) tp->rx_opt.rcv_wscale = rcv_wscale; tp->rcv_ssthresh = tp->rcv_wnd; - sk->sk_err = 0; + WRITE_ONCE(sk->sk_err, 0); sock_reset_flag(sk, SOCK_DONE); tp->snd_wnd = 0; tcp_init_wl(tp, 0); @@ -4127,8 +4127,13 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) if (!res) { TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); - if (unlikely(tcp_passive_fastopen(sk))) - tcp_sk(sk)->total_retrans++; + if (unlikely(tcp_passive_fastopen(sk))) { + /* sk has const attribute because listeners are lockless. + * However in this case, we are dealing with a passive fastopen + * socket thus we can change total_retrans value. + */ + tcp_sk_rw(sk)->total_retrans++; + } trace_tcp_retransmit_synack(sk, req); } return res; diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 50abaa941387..acf4869c5d3b 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -4,7 +4,7 @@ static u32 tcp_rack_reo_wnd(const struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); if (!tp->reord_seen) { /* If reordering has not been observed, be aggressive during diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index cb79127f45c3..b839c2f91292 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -67,7 +67,7 @@ u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when) static void tcp_write_err(struct sock *sk) { - sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; + WRITE_ONCE(sk->sk_err, READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT); sk_error_report(sk); tcp_write_queue_purge(sk); @@ -110,7 +110,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) shift++; /* If some dubious ICMP arrived, penalize even more. */ - if (sk->sk_err_soft) + if (READ_ONCE(sk->sk_err_soft)) shift++; if (tcp_check_oom(sk, shift)) { @@ -146,7 +146,7 @@ static int tcp_orphan_retries(struct sock *sk, bool alive) int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */ /* We know from an ICMP that something is wrong. */ - if (sk->sk_err_soft && !alive) + if (READ_ONCE(sk->sk_err_soft) && !alive) retries = 0; /* However, if socket sent something recently, select some safe diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c605d171eb2d..aa32afd871ee 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -578,12 +578,12 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, EXPORT_SYMBOL_GPL(udp4_lib_lookup); #endif -static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk, +static inline bool __udp_is_mcast_sock(struct net *net, const struct sock *sk, __be16 loc_port, __be32 loc_addr, __be16 rmt_port, __be32 rmt_addr, int dif, int sdif, unsigned short hnum) { - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); if (!net_eq(sock_net(sk), net) || udp_sk(sk)->udp_port_hash != hnum || @@ -1531,10 +1531,21 @@ static void busylock_release(spinlock_t *busy) spin_unlock(busy); } +static int udp_rmem_schedule(struct sock *sk, int size) +{ + int delta; + + delta = size - sk->sk_forward_alloc; + if (delta > 0 && !__sk_mem_schedule(sk, delta, SK_MEM_RECV)) + return -ENOBUFS; + + return 0; +} + int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) { struct sk_buff_head *list = &sk->sk_receive_queue; - int rmem, delta, amt, err = -ENOMEM; + int rmem, err = -ENOMEM; spinlock_t *busy = NULL; int size; @@ -1567,16 +1578,10 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) goto uncharge_drop; spin_lock(&list->lock); - if (size >= sk->sk_forward_alloc) { - amt = sk_mem_pages(size); - delta = amt << PAGE_SHIFT; - if (!__sk_mem_raise_allocated(sk, delta, amt, SK_MEM_RECV)) { - err = -ENOBUFS; - spin_unlock(&list->lock); - goto uncharge_drop; - } - - sk->sk_forward_alloc += delta; + err = udp_rmem_schedule(sk, size); + if (err) { + spin_unlock(&list->lock); + goto uncharge_drop; } sk->sk_forward_alloc -= size; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index faa47f9ea73a..3797917237d0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1034,7 +1034,7 @@ static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa) unsigned int hash = inet6_addr_hash(net, &ifa->addr); int err = 0; - spin_lock(&net->ipv6.addrconf_hash_lock); + spin_lock_bh(&net->ipv6.addrconf_hash_lock); /* Ignore adding duplicate addresses on an interface */ if (ipv6_chk_same_addr(net, &ifa->addr, dev, hash)) { @@ -1044,7 +1044,7 @@ static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa) hlist_add_head_rcu(&ifa->addr_lst, &net->ipv6.inet6_addr_lst[hash]); } - spin_unlock(&net->ipv6.addrconf_hash_lock); + spin_unlock_bh(&net->ipv6.addrconf_hash_lock); return err; } @@ -1139,15 +1139,15 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg, /* For caller */ refcount_set(&ifa->refcnt, 1); - rcu_read_lock_bh(); + rcu_read_lock(); err = ipv6_add_addr_hash(idev->dev, ifa); if (err < 0) { - rcu_read_unlock_bh(); + rcu_read_unlock(); goto out; } - write_lock(&idev->lock); + write_lock_bh(&idev->lock); /* Add to inet6_dev unicast addr list. */ ipv6_link_dev_addr(idev, ifa); @@ -1158,9 +1158,9 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg, } in6_ifa_hold(ifa); - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); - rcu_read_unlock_bh(); + rcu_read_unlock(); inet6addr_notifier_call_chain(NETDEV_UP, ifa); out: @@ -4223,7 +4223,8 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, ipv6_accept_ra(ifp->idev) && ifp->idev->cnf.rtr_solicits != 0 && (dev->flags & IFF_LOOPBACK) == 0 && - (dev->type != ARPHRD_TUNNEL); + (dev->type != ARPHRD_TUNNEL) && + !netif_is_team_port(dev); read_unlock_bh(&ifp->idev->lock); /* While dad is in progress mld report's source address is in6_addrany. diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 38689bedfce7..e1b679a590c9 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -845,7 +845,7 @@ int inet6_sk_rebuild_header(struct sock *sk) dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); if (IS_ERR(dst)) { sk->sk_route_caps = 0; - sk->sk_err_soft = -PTR_ERR(dst); + WRITE_ONCE(sk->sk_err_soft, -PTR_ERR(dst)); return PTR_ERR(dst); } diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 5a9f4d722f35..0c50dcd35fe8 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -120,7 +120,7 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused dst = inet6_csk_route_socket(sk, &fl6); if (IS_ERR(dst)) { - sk->sk_err_soft = -PTR_ERR(dst); + WRITE_ONCE(sk->sk_err_soft, -PTR_ERR(dst)); sk->sk_route_caps = 0; kfree_skb(skb); return PTR_ERR(dst); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 18481eb76a0a..b3ca4beb4405 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -58,18 +58,18 @@ DEFINE_STATIC_KEY_DEFERRED_FALSE(ipv6_flowlabel_exclusive, HZ); EXPORT_SYMBOL(ipv6_flowlabel_exclusive); #define for_each_fl_rcu(hash, fl) \ - for (fl = rcu_dereference_bh(fl_ht[(hash)]); \ + for (fl = rcu_dereference(fl_ht[(hash)]); \ fl != NULL; \ - fl = rcu_dereference_bh(fl->next)) + fl = rcu_dereference(fl->next)) #define for_each_fl_continue_rcu(fl) \ - for (fl = rcu_dereference_bh(fl->next); \ + for (fl = rcu_dereference(fl->next); \ fl != NULL; \ - fl = rcu_dereference_bh(fl->next)) + fl = rcu_dereference(fl->next)) #define for_each_sk_fl_rcu(np, sfl) \ - for (sfl = rcu_dereference_bh(np->ipv6_fl_list); \ + for (sfl = rcu_dereference(np->ipv6_fl_list); \ sfl != NULL; \ - sfl = rcu_dereference_bh(sfl->next)) + sfl = rcu_dereference(sfl->next)) static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label) { @@ -86,11 +86,11 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) { struct ip6_flowlabel *fl; - rcu_read_lock_bh(); + rcu_read_lock(); fl = __fl_lookup(net, label); if (fl && !atomic_inc_not_zero(&fl->users)) fl = NULL; - rcu_read_unlock_bh(); + rcu_read_unlock(); return fl; } @@ -217,6 +217,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net, fl->label = label & IPV6_FLOWLABEL_MASK; + rcu_read_lock(); spin_lock_bh(&ip6_fl_lock); if (label == 0) { for (;;) { @@ -240,6 +241,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net, if (lfl) { atomic_inc(&lfl->users); spin_unlock_bh(&ip6_fl_lock); + rcu_read_unlock(); return lfl; } } @@ -249,6 +251,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net, rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl); atomic_inc(&fl_size); spin_unlock_bh(&ip6_fl_lock); + rcu_read_unlock(); return NULL; } @@ -263,17 +266,17 @@ struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label) label &= IPV6_FLOWLABEL_MASK; - rcu_read_lock_bh(); + rcu_read_lock(); for_each_sk_fl_rcu(np, sfl) { struct ip6_flowlabel *fl = sfl->fl; if (fl->label == label && atomic_inc_not_zero(&fl->users)) { fl->lastuse = jiffies; - rcu_read_unlock_bh(); + rcu_read_unlock(); return fl; } } - rcu_read_unlock_bh(); + rcu_read_unlock(); return NULL; } EXPORT_SYMBOL_GPL(__fl6_sock_lookup); @@ -475,10 +478,10 @@ static int mem_check(struct sock *sk) if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK) return 0; - rcu_read_lock_bh(); + rcu_read_lock(); for_each_sk_fl_rcu(np, sfl) count++; - rcu_read_unlock_bh(); + rcu_read_unlock(); if (room <= 0 || ((count >= FL_MAX_PER_SOCK || @@ -515,7 +518,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, return 0; } - rcu_read_lock_bh(); + rcu_read_lock(); for_each_sk_fl_rcu(np, sfl) { if (sfl->fl->label == (np->flow_label & IPV6_FLOWLABEL_MASK)) { @@ -527,11 +530,11 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, freq->flr_linger = sfl->fl->linger / HZ; spin_unlock_bh(&ip6_fl_lock); - rcu_read_unlock_bh(); + rcu_read_unlock(); return 0; } } - rcu_read_unlock_bh(); + rcu_read_unlock(); return -ENOENT; } @@ -581,16 +584,16 @@ static int ipv6_flowlabel_renew(struct sock *sk, struct in6_flowlabel_req *freq) struct ipv6_fl_socklist *sfl; int err; - rcu_read_lock_bh(); + rcu_read_lock(); for_each_sk_fl_rcu(np, sfl) { if (sfl->fl->label == freq->flr_label) { err = fl6_renew(sfl->fl, freq->flr_linger, freq->flr_expires); - rcu_read_unlock_bh(); + rcu_read_unlock(); return err; } } - rcu_read_unlock_bh(); + rcu_read_unlock(); if (freq->flr_share == IPV6_FL_S_NONE && ns_capable(net->user_ns, CAP_NET_ADMIN)) { @@ -641,11 +644,11 @@ static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq, if (freq->flr_label) { err = -EEXIST; - rcu_read_lock_bh(); + rcu_read_lock(); for_each_sk_fl_rcu(np, sfl) { if (sfl->fl->label == freq->flr_label) { if (freq->flr_flags & IPV6_FL_F_EXCL) { - rcu_read_unlock_bh(); + rcu_read_unlock(); goto done; } fl1 = sfl->fl; @@ -654,7 +657,7 @@ static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq, break; } } - rcu_read_unlock_bh(); + rcu_read_unlock(); if (!fl1) fl1 = fl_lookup(net, freq->flr_label); @@ -809,7 +812,7 @@ static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos) state->pid_ns = proc_pid_ns(file_inode(seq->file)->i_sb); - rcu_read_lock_bh(); + rcu_read_lock(); return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } @@ -828,7 +831,7 @@ static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void ip6fl_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { - rcu_read_unlock_bh(); + rcu_read_unlock(); } static int ip6fl_seq_show(struct seq_file *seq, void *v) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index e1ebf5e42ebe..d94041bb4287 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -404,10 +404,6 @@ resubmit_final: /* Only do this once for first final protocol */ have_final = true; - /* Free reference early: we don't need it any more, - and it may hold ip_conntrack module loaded - indefinitely. */ - nf_reset_ct(skb); skb_postpull_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb)); @@ -430,10 +426,12 @@ resubmit_final: goto discard; } } - if (!(ipprot->flags & INET6_PROTO_NOPOLICY) && - !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { - SKB_DR_SET(reason, XFRM_POLICY); - goto discard; + if (!(ipprot->flags & INET6_PROTO_NOPOLICY)) { + if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { + SKB_DR_SET(reason, XFRM_POLICY); + goto discard; + } + nf_reset_ct(skb); } ret = INDIRECT_CALL_2(ipprot->handler, tcp_v6_rcv, udpv6_rcv, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index c314fdde0097..0b6140f0179d 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -116,7 +116,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * return res; } - rcu_read_lock_bh(); + rcu_read_lock(); nexthop = rt6_nexthop((struct rt6_info *)dst, daddr); neigh = __ipv6_neigh_lookup_noref(dev, nexthop); @@ -124,7 +124,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dev, false); if (IS_ERR(neigh)) { - rcu_read_unlock_bh(); + rcu_read_unlock(); IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES); kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL); return -EINVAL; @@ -132,7 +132,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * } sock_confirm_neigh(skb, neigh); ret = neigh_output(neigh, skb, false); - rcu_read_unlock_bh(); + rcu_read_unlock(); return ret; } @@ -1150,11 +1150,11 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, * dst entry of the nexthop router */ rt = (struct rt6_info *) *dst; - rcu_read_lock_bh(); + rcu_read_lock(); n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr)); - err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; - rcu_read_unlock_bh(); + err = n && !(READ_ONCE(n->nud_state) & NUD_VALID) ? -EINVAL : 0; + rcu_read_unlock(); if (err) { struct inet6_ifaddr *ifp; @@ -1500,7 +1500,7 @@ static int __ip6_append_data(struct sock *sk, mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize; orig_mtu = mtu; - if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP && + if (cork->tx_flags & SKBTX_ANY_TSTAMP && sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) tskey = atomic_inc_return(&sk->sk_tskey) - 1; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 1c02160cf7a4..714cdc9e2b8e 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -627,12 +627,12 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, return 0; } -bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, +bool inet6_mc_check(const struct sock *sk, const struct in6_addr *mc_addr, const struct in6_addr *src_addr) { - struct ipv6_pinfo *np = inet6_sk(sk); - struct ipv6_mc_socklist *mc; - struct ip6_sf_socklist *psl; + const struct ipv6_pinfo *np = inet6_sk(sk); + const struct ipv6_mc_socklist *mc; + const struct ip6_sf_socklist *psl; bool rv = true; rcu_read_lock(); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index c4be62c99f73..18634ebd20a4 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -745,7 +745,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) saddr = &ipv6_hdr(skb)->saddr; probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES); if (probes < 0) { - if (!(neigh->nud_state & NUD_VALID)) { + if (!(READ_ONCE(neigh->nud_state) & NUD_VALID)) { ND_PRINTK(1, dbg, "%s: trying to ucast probe in NUD_INVALID: %pI6\n", __func__, target); @@ -1090,7 +1090,7 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) u8 old_flags = neigh->flags; struct net *net = dev_net(dev); - if (neigh->nud_state & NUD_FAILED) + if (READ_ONCE(neigh->nud_state) & NUD_FAILED) goto out; /* diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 0ce0ed17c758..fd9f049d6d41 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -18,7 +18,6 @@ #include <linux/netdevice.h> #include <linux/module.h> #include <linux/poison.h> -#include <linux/icmpv6.h> #include <net/ipv6.h> #include <net/compat.h> #include <linux/uaccess.h> @@ -35,7 +34,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("IPv6 packet filter"); -MODULE_ALIAS("ip6t_icmp6"); void *ip6t_alloc_initial_table(const struct xt_table *info) { @@ -1805,52 +1803,6 @@ void ip6t_unregister_table_exit(struct net *net, const char *name) __ip6t_unregister_table(net, table); } -/* Returns 1 if the type and code is matched by the range, 0 otherwise */ -static inline bool -icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code, - u_int8_t type, u_int8_t code, - bool invert) -{ - return (type == test_type && code >= min_code && code <= max_code) - ^ invert; -} - -static bool -icmp6_match(const struct sk_buff *skb, struct xt_action_param *par) -{ - const struct icmp6hdr *ic; - struct icmp6hdr _icmph; - const struct ip6t_icmp *icmpinfo = par->matchinfo; - - /* Must not be a fragment. */ - if (par->fragoff != 0) - return false; - - ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph); - if (ic == NULL) { - /* We've been asked to examine this packet, and we - * can't. Hence, no choice but to drop. - */ - par->hotdrop = true; - return false; - } - - return icmp6_type_code_match(icmpinfo->type, - icmpinfo->code[0], - icmpinfo->code[1], - ic->icmp6_type, ic->icmp6_code, - !!(icmpinfo->invflags&IP6T_ICMP_INV)); -} - -/* Called when user tries to insert an entry of this type. */ -static int icmp6_checkentry(const struct xt_mtchk_param *par) -{ - const struct ip6t_icmp *icmpinfo = par->matchinfo; - - /* Must specify no unknown invflags */ - return (icmpinfo->invflags & ~IP6T_ICMP_INV) ? -EINVAL : 0; -} - /* The built-in targets: standard (NULL) and error. */ static struct xt_target ip6t_builtin_tg[] __read_mostly = { { @@ -1882,18 +1834,6 @@ static struct nf_sockopt_ops ip6t_sockopts = { .owner = THIS_MODULE, }; -static struct xt_match ip6t_builtin_mt[] __read_mostly = { - { - .name = "icmp6", - .match = icmp6_match, - .matchsize = sizeof(struct ip6t_icmp), - .checkentry = icmp6_checkentry, - .proto = IPPROTO_ICMPV6, - .family = NFPROTO_IPV6, - .me = THIS_MODULE, - }, -}; - static int __net_init ip6_tables_net_init(struct net *net) { return xt_proto_init(net, NFPROTO_IPV6); @@ -1921,19 +1861,14 @@ static int __init ip6_tables_init(void) ret = xt_register_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg)); if (ret < 0) goto err2; - ret = xt_register_matches(ip6t_builtin_mt, ARRAY_SIZE(ip6t_builtin_mt)); - if (ret < 0) - goto err4; /* Register setsockopt */ ret = nf_register_sockopt(&ip6t_sockopts); if (ret < 0) - goto err5; + goto err4; return 0; -err5: - xt_unregister_matches(ip6t_builtin_mt, ARRAY_SIZE(ip6t_builtin_mt)); err4: xt_unregister_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg)); err2: @@ -1946,7 +1881,6 @@ static void __exit ip6_tables_fini(void) { nf_unregister_sockopt(&ip6t_sockopts); - xt_unregister_matches(ip6t_builtin_mt, ARRAY_SIZE(ip6t_builtin_mt)); xt_unregister_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg)); unregister_pernet_subsys(&ip6_tables_net_ops); } diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 808983bc2ec9..c4835dbdfcff 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -237,7 +237,7 @@ static int ping_v6_seq_show(struct seq_file *seq, void *v) seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); } else { int bucket = ((struct ping_iter_state *) seq->private)->bucket; - struct inet_sock *inet = inet_sk(v); + struct inet_sock *inet = inet_sk((struct sock *)v); __u16 srcp = ntohs(inet->inet_sport); __u16 destp = ntohs(inet->inet_dport); ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index bac9ba747bde..4ab62a9c5c8e 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -64,7 +64,7 @@ struct raw_hashinfo raw_v6_hashinfo; EXPORT_SYMBOL_GPL(raw_v6_hashinfo); -bool raw_v6_match(struct net *net, struct sock *sk, unsigned short num, +bool raw_v6_match(struct net *net, const struct sock *sk, unsigned short num, const struct in6_addr *loc_addr, const struct in6_addr *rmt_addr, int dif, int sdif) { @@ -194,10 +194,8 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); /* Not releasing hash table! */ - if (clone) { - nf_reset_ct(clone); + if (clone) rawv6_rcv(sk, clone); - } } } rcu_read_unlock(); @@ -391,6 +389,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) kfree_skb_reason(skb, SKB_DROP_REASON_XFRM_POLICY); return NET_RX_DROP; } + nf_reset_ct(skb); if (!rp->checksum) skb->ip_summed = CHECKSUM_UNNECESSARY; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0fdb03df2287..244df77fac87 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -633,15 +633,15 @@ static void rt6_probe(struct fib6_nh *fib6_nh) nh_gw = &fib6_nh->fib_nh_gw6; dev = fib6_nh->fib_nh_dev; - rcu_read_lock_bh(); + rcu_read_lock(); last_probe = READ_ONCE(fib6_nh->last_probe); idev = __in6_dev_get(dev); neigh = __ipv6_neigh_lookup_noref(dev, nh_gw); if (neigh) { - if (neigh->nud_state & NUD_VALID) + if (READ_ONCE(neigh->nud_state) & NUD_VALID) goto out; - write_lock(&neigh->lock); + write_lock_bh(&neigh->lock); if (!(neigh->nud_state & NUD_VALID) && time_after(jiffies, neigh->updated + idev->cnf.rtr_probe_interval)) { @@ -649,7 +649,7 @@ static void rt6_probe(struct fib6_nh *fib6_nh) if (work) __neigh_set_probe_once(neigh); } - write_unlock(&neigh->lock); + write_unlock_bh(&neigh->lock); } else if (time_after(jiffies, last_probe + idev->cnf.rtr_probe_interval)) { work = kmalloc(sizeof(*work), GFP_ATOMIC); @@ -667,7 +667,7 @@ static void rt6_probe(struct fib6_nh *fib6_nh) } out: - rcu_read_unlock_bh(); + rcu_read_unlock(); } #else static inline void rt6_probe(struct fib6_nh *fib6_nh) @@ -683,25 +683,25 @@ static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh) enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; struct neighbour *neigh; - rcu_read_lock_bh(); + rcu_read_lock(); neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev, &fib6_nh->fib_nh_gw6); if (neigh) { - read_lock(&neigh->lock); - if (neigh->nud_state & NUD_VALID) + u8 nud_state = READ_ONCE(neigh->nud_state); + + if (nud_state & NUD_VALID) ret = RT6_NUD_SUCCEED; #ifdef CONFIG_IPV6_ROUTER_PREF - else if (!(neigh->nud_state & NUD_FAILED)) + else if (!(nud_state & NUD_FAILED)) ret = RT6_NUD_SUCCEED; else ret = RT6_NUD_FAIL_PROBE; #endif - read_unlock(&neigh->lock); } else { ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ? RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR; } - rcu_read_unlock_bh(); + rcu_read_unlock(); return ret; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1bf93b61aa06..244cf86c4cbb 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -493,12 +493,13 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); if (!sock_owned_by_user(sk)) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ tcp_done(sk); - } else - sk->sk_err_soft = err; + } else { + WRITE_ONCE(sk->sk_err_soft, err); + } goto out; case TCP_LISTEN: break; @@ -512,11 +513,11 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } if (!sock_owned_by_user(sk) && np->recverr) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); - } else - sk->sk_err_soft = err; - + } else { + WRITE_ONCE(sk->sk_err_soft, err); + } out: bh_unlock_sock(sk); sock_put(sk); @@ -1722,6 +1723,8 @@ process: if (drop_reason) goto discard_and_relse; + nf_reset_ct(skb); + if (tcp_filter(sk, skb)) { drop_reason = SKB_DROP_REASON_SOCKET_FILTER; goto discard_and_relse; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9fb2f33ee3a7..4caa70a1b871 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -704,6 +704,7 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) drop_reason = SKB_DROP_REASON_XFRM_POLICY; goto drop; } + nf_reset_ct(skb); if (static_branch_unlikely(&udpv6_encap_needed_key) && up->encap_type) { int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); @@ -805,12 +806,12 @@ static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) return 0; } -static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk, +static bool __udp_v6_is_mcast_sock(struct net *net, const struct sock *sk, __be16 loc_port, const struct in6_addr *loc_addr, __be16 rmt_port, const struct in6_addr *rmt_addr, int dif, int sdif, unsigned short hnum) { - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); if (!net_eq(sock_net(sk), net)) return false; @@ -1027,6 +1028,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard; + nf_reset_ct(skb); if (udp_lib_checksum_complete(skb)) goto csum_error; @@ -1708,7 +1710,7 @@ int udp6_seq_show(struct seq_file *seq, void *v) seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); } else { int bucket = ((struct udp_iter_state *)seq->private)->bucket; - struct inet_sock *inet = inet_sk(v); + const struct inet_sock *inet = inet_sk((const struct sock *)v); __u16 srcp = ntohs(inet->inet_sport); __u16 destp = ntohs(inet->inet_dport); __ip6_dgram_sock_seq_show(seq, v, srcp, destp, diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index f9514bacbd4a..3b651e7f5a73 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -554,6 +554,23 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) ieee80211_send_addba_with_timeout(sta, tid_tx); } +void ieee80211_refresh_tx_agg_session_timer(struct ieee80211_sta *pubsta, + u16 tid) +{ + struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + struct tid_ampdu_tx *tid_tx; + + if (WARN_ON_ONCE(tid >= IEEE80211_NUM_TIDS)) + return; + + tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]); + if (!tid_tx) + return; + + tid_tx->last_tx = jiffies; +} +EXPORT_SYMBOL(ieee80211_refresh_tx_agg_session_timer); + /* * After accepting the AddBA Response we activated a timer, * resetting it after each frame that we send. diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d3d861911ed6..bccc9627dca5 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1252,7 +1252,15 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, prev_beacon_int = link_conf->beacon_int; link_conf->beacon_int = params->beacon_interval; + if (params->ht_cap) + link_conf->ht_ldpc = + params->ht_cap->cap_info & + cpu_to_le16(IEEE80211_HT_CAP_LDPC_CODING); + if (params->vht_cap) { + link_conf->vht_ldpc = + params->vht_cap->vht_cap_info & + cpu_to_le32(IEEE80211_VHT_CAP_RXLDPC); link_conf->vht_su_beamformer = params->vht_cap->vht_cap_info & cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE); @@ -1282,6 +1290,9 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, } if (params->he_cap) { + link_conf->he_ldpc = + params->he_cap->phy_cap_info[1] & + IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD; link_conf->he_su_beamformer = params->he_cap->phy_cap_info[3] & IEEE80211_HE_PHY_CAP3_SU_BEAMFORMER; @@ -1299,6 +1310,22 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, if (params->eht_cap) { link_conf->eht_puncturing = params->punct_bitmap; changed |= BSS_CHANGED_EHT_PUNCTURING; + + link_conf->eht_su_beamformer = + params->eht_cap->fixed.phy_cap_info[0] & + IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMER; + link_conf->eht_su_beamformee = + params->eht_cap->fixed.phy_cap_info[0] & + IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMEE; + link_conf->eht_mu_beamformer = + params->eht_cap->fixed.phy_cap_info[7] & + (IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_80MHZ | + IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_160MHZ | + IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_320MHZ); + } else { + link_conf->eht_su_beamformer = false; + link_conf->eht_su_beamformee = false; + link_conf->eht_mu_beamformer = false; } if (sdata->vif.type == NL80211_IFTYPE_AP && @@ -1788,7 +1815,7 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, (void *)params->he_6ghz_capa, link_sta); - if (params->eht_capa) + if (params->he_capa && params->eht_capa) ieee80211_eht_cap_ie_to_sta_eht_cap(sdata, sband, (u8 *)params->he_capa, params->he_capa_len, @@ -4905,6 +4932,22 @@ ieee80211_del_link_station(struct wiphy *wiphy, struct net_device *dev, return ret; } +static int ieee80211_set_hw_timestamp(struct wiphy *wiphy, + struct net_device *dev, + struct cfg80211_set_hw_timestamp *hwts) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + + if (!local->ops->set_hw_timestamp) + return -EOPNOTSUPP; + + if (!check_sdata_in_driver(sdata)) + return -EIO; + + return local->ops->set_hw_timestamp(&local->hw, &sdata->vif, hwts); +} + const struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -5015,4 +5058,5 @@ const struct cfg80211_ops mac80211_config_ops = { .add_link_station = ieee80211_add_link_station, .mod_link_station = ieee80211_mod_link_station, .del_link_station = ieee80211_del_link_station, + .set_hw_timestamp = ieee80211_set_hw_timestamp, }; diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 0bac9af3ca96..b0cef37eb394 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -23,16 +23,16 @@ #include "driver-ops.h" static ssize_t ieee80211_if_read( - struct ieee80211_sub_if_data *sdata, + void *data, char __user *userbuf, size_t count, loff_t *ppos, - ssize_t (*format)(const struct ieee80211_sub_if_data *, char *, int)) + ssize_t (*format)(const void *, char *, int)) { char buf[200]; ssize_t ret = -EINVAL; read_lock(&dev_base_lock); - ret = (*format)(sdata, buf, sizeof(buf)); + ret = (*format)(data, buf, sizeof(buf)); read_unlock(&dev_base_lock); if (ret >= 0) @@ -42,10 +42,10 @@ static ssize_t ieee80211_if_read( } static ssize_t ieee80211_if_write( - struct ieee80211_sub_if_data *sdata, + void *data, const char __user *userbuf, size_t count, loff_t *ppos, - ssize_t (*write)(struct ieee80211_sub_if_data *, const char *, int)) + ssize_t (*write)(void *, const char *, int)) { char buf[64]; ssize_t ret; @@ -58,64 +58,64 @@ static ssize_t ieee80211_if_write( buf[count] = '\0'; rtnl_lock(); - ret = (*write)(sdata, buf, count); + ret = (*write)(data, buf, count); rtnl_unlock(); return ret; } -#define IEEE80211_IF_FMT(name, field, format_string) \ +#define IEEE80211_IF_FMT(name, type, field, format_string) \ static ssize_t ieee80211_if_fmt_##name( \ - const struct ieee80211_sub_if_data *sdata, char *buf, \ + const type *data, char *buf, \ int buflen) \ { \ - return scnprintf(buf, buflen, format_string, sdata->field); \ + return scnprintf(buf, buflen, format_string, data->field); \ } -#define IEEE80211_IF_FMT_DEC(name, field) \ - IEEE80211_IF_FMT(name, field, "%d\n") -#define IEEE80211_IF_FMT_HEX(name, field) \ - IEEE80211_IF_FMT(name, field, "%#x\n") -#define IEEE80211_IF_FMT_LHEX(name, field) \ - IEEE80211_IF_FMT(name, field, "%#lx\n") +#define IEEE80211_IF_FMT_DEC(name, type, field) \ + IEEE80211_IF_FMT(name, type, field, "%d\n") +#define IEEE80211_IF_FMT_HEX(name, type, field) \ + IEEE80211_IF_FMT(name, type, field, "%#x\n") +#define IEEE80211_IF_FMT_LHEX(name, type, field) \ + IEEE80211_IF_FMT(name, type, field, "%#lx\n") -#define IEEE80211_IF_FMT_HEXARRAY(name, field) \ +#define IEEE80211_IF_FMT_HEXARRAY(name, type, field) \ static ssize_t ieee80211_if_fmt_##name( \ - const struct ieee80211_sub_if_data *sdata, \ + const type *data, \ char *buf, int buflen) \ { \ char *p = buf; \ int i; \ - for (i = 0; i < sizeof(sdata->field); i++) { \ + for (i = 0; i < sizeof(data->field); i++) { \ p += scnprintf(p, buflen + buf - p, "%.2x ", \ - sdata->field[i]); \ + data->field[i]); \ } \ p += scnprintf(p, buflen + buf - p, "\n"); \ return p - buf; \ } -#define IEEE80211_IF_FMT_ATOMIC(name, field) \ +#define IEEE80211_IF_FMT_ATOMIC(name, type, field) \ static ssize_t ieee80211_if_fmt_##name( \ - const struct ieee80211_sub_if_data *sdata, \ + const type *data, \ char *buf, int buflen) \ { \ - return scnprintf(buf, buflen, "%d\n", atomic_read(&sdata->field));\ + return scnprintf(buf, buflen, "%d\n", atomic_read(&data->field));\ } -#define IEEE80211_IF_FMT_MAC(name, field) \ +#define IEEE80211_IF_FMT_MAC(name, type, field) \ static ssize_t ieee80211_if_fmt_##name( \ - const struct ieee80211_sub_if_data *sdata, char *buf, \ + const type *data, char *buf, \ int buflen) \ { \ - return scnprintf(buf, buflen, "%pM\n", sdata->field); \ + return scnprintf(buf, buflen, "%pM\n", data->field); \ } -#define IEEE80211_IF_FMT_JIFFIES_TO_MS(name, field) \ +#define IEEE80211_IF_FMT_JIFFIES_TO_MS(name, type, field) \ static ssize_t ieee80211_if_fmt_##name( \ - const struct ieee80211_sub_if_data *sdata, \ + const type *data, \ char *buf, int buflen) \ { \ return scnprintf(buf, buflen, "%d\n", \ - jiffies_to_msecs(sdata->field)); \ + jiffies_to_msecs(data->field)); \ } #define _IEEE80211_IF_FILE_OPS(name, _read, _write) \ @@ -126,43 +126,67 @@ static const struct file_operations name##_ops = { \ .llseek = generic_file_llseek, \ } -#define _IEEE80211_IF_FILE_R_FN(name) \ +#define _IEEE80211_IF_FILE_R_FN(name, type) \ static ssize_t ieee80211_if_read_##name(struct file *file, \ char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ + ssize_t (*fn)(const void *, char *, int) = (void *) \ + ((ssize_t (*)(const type, char *, int)) \ + ieee80211_if_fmt_##name); \ return ieee80211_if_read(file->private_data, \ - userbuf, count, ppos, \ - ieee80211_if_fmt_##name); \ + userbuf, count, ppos, fn); \ } -#define _IEEE80211_IF_FILE_W_FN(name) \ +#define _IEEE80211_IF_FILE_W_FN(name, type) \ static ssize_t ieee80211_if_write_##name(struct file *file, \ const char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ + ssize_t (*fn)(void *, const char *, int) = (void *) \ + ((ssize_t (*)(type, const char *, int)) \ + ieee80211_if_parse_##name); \ return ieee80211_if_write(file->private_data, userbuf, count, \ - ppos, ieee80211_if_parse_##name); \ + ppos, fn); \ } #define IEEE80211_IF_FILE_R(name) \ - _IEEE80211_IF_FILE_R_FN(name) \ + _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_sub_if_data *) \ _IEEE80211_IF_FILE_OPS(name, ieee80211_if_read_##name, NULL) #define IEEE80211_IF_FILE_W(name) \ - _IEEE80211_IF_FILE_W_FN(name) \ + _IEEE80211_IF_FILE_W_FN(name, struct ieee80211_sub_if_data *) \ _IEEE80211_IF_FILE_OPS(name, NULL, ieee80211_if_write_##name) #define IEEE80211_IF_FILE_RW(name) \ - _IEEE80211_IF_FILE_R_FN(name) \ - _IEEE80211_IF_FILE_W_FN(name) \ + _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_sub_if_data *) \ + _IEEE80211_IF_FILE_W_FN(name, struct ieee80211_sub_if_data *) \ _IEEE80211_IF_FILE_OPS(name, ieee80211_if_read_##name, \ ieee80211_if_write_##name) #define IEEE80211_IF_FILE(name, field, format) \ - IEEE80211_IF_FMT_##format(name, field) \ + IEEE80211_IF_FMT_##format(name, struct ieee80211_sub_if_data, field) \ IEEE80211_IF_FILE_R(name) +/* Same but with a link_ prefix in the ops variable name and different type */ +#define IEEE80211_IF_LINK_FILE_R(name) \ + _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_link_data *) \ + _IEEE80211_IF_FILE_OPS(link_##name, ieee80211_if_read_##name, NULL) + +#define IEEE80211_IF_LINK_FILE_W(name) \ + _IEEE80211_IF_FILE_W_FN(name) \ + _IEEE80211_IF_FILE_OPS(link_##name, NULL, ieee80211_if_write_##name) + +#define IEEE80211_IF_LINK_FILE_RW(name) \ + _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_link_data *) \ + _IEEE80211_IF_FILE_W_FN(name, struct ieee80211_link_data *) \ + _IEEE80211_IF_FILE_OPS(link_##name, ieee80211_if_read_##name, \ + ieee80211_if_write_##name) + +#define IEEE80211_IF_LINK_FILE(name, field, format) \ + IEEE80211_IF_FMT_##format(name, struct ieee80211_link_data, field) \ + IEEE80211_IF_LINK_FILE_R(name) + /* common attributes */ IEEE80211_IF_FILE(rc_rateidx_mask_2ghz, rc_rateidx_mask[NL80211_BAND_2GHZ], HEX); @@ -207,9 +231,9 @@ IEEE80211_IF_FILE_R(rc_rateidx_vht_mcs_mask_5ghz); IEEE80211_IF_FILE(flags, flags, HEX); IEEE80211_IF_FILE(state, state, LHEX); -IEEE80211_IF_FILE(txpower, vif.bss_conf.txpower, DEC); -IEEE80211_IF_FILE(ap_power_level, deflink.ap_power_level, DEC); -IEEE80211_IF_FILE(user_power_level, deflink.user_power_level, DEC); +IEEE80211_IF_LINK_FILE(txpower, conf->txpower, DEC); +IEEE80211_IF_LINK_FILE(ap_power_level, ap_power_level, DEC); +IEEE80211_IF_LINK_FILE(user_power_level, user_power_level, DEC); static ssize_t ieee80211_if_fmt_hw_queues(const struct ieee80211_sub_if_data *sdata, @@ -236,9 +260,10 @@ IEEE80211_IF_FILE(bssid, deflink.u.mgd.bssid, MAC); IEEE80211_IF_FILE(aid, vif.cfg.aid, DEC); IEEE80211_IF_FILE(beacon_timeout, u.mgd.beacon_timeout, JIFFIES_TO_MS); -static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata, +static int ieee80211_set_smps(struct ieee80211_link_data *link, enum ieee80211_smps_mode smps_mode) { + struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; int err; @@ -256,7 +281,7 @@ static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata, return -EOPNOTSUPP; sdata_lock(sdata); - err = __ieee80211_request_smps_mgd(sdata, &sdata->deflink, smps_mode); + err = __ieee80211_request_smps_mgd(link->sdata, link, smps_mode); sdata_unlock(sdata); return err; @@ -269,24 +294,24 @@ static const char *smps_modes[IEEE80211_SMPS_NUM_MODES] = { [IEEE80211_SMPS_DYNAMIC] = "dynamic", }; -static ssize_t ieee80211_if_fmt_smps(const struct ieee80211_sub_if_data *sdata, +static ssize_t ieee80211_if_fmt_smps(const struct ieee80211_link_data *link, char *buf, int buflen) { - if (sdata->vif.type == NL80211_IFTYPE_STATION) + if (link->sdata->vif.type == NL80211_IFTYPE_STATION) return snprintf(buf, buflen, "request: %s\nused: %s\n", - smps_modes[sdata->deflink.u.mgd.req_smps], - smps_modes[sdata->deflink.smps_mode]); + smps_modes[link->u.mgd.req_smps], + smps_modes[link->smps_mode]); return -EINVAL; } -static ssize_t ieee80211_if_parse_smps(struct ieee80211_sub_if_data *sdata, +static ssize_t ieee80211_if_parse_smps(struct ieee80211_link_data *link, const char *buf, int buflen) { enum ieee80211_smps_mode mode; for (mode = 0; mode < IEEE80211_SMPS_NUM_MODES; mode++) { if (strncmp(buf, smps_modes[mode], buflen) == 0) { - int err = ieee80211_set_smps(sdata, mode); + int err = ieee80211_set_smps(link, mode); if (!err) return buflen; return err; @@ -295,7 +320,7 @@ static ssize_t ieee80211_if_parse_smps(struct ieee80211_sub_if_data *sdata, return -EINVAL; } -IEEE80211_IF_FILE_RW(smps); +IEEE80211_IF_LINK_FILE_RW(smps); static ssize_t ieee80211_if_parse_tkip_mic_test( struct ieee80211_sub_if_data *sdata, const char *buf, int buflen) @@ -595,6 +620,8 @@ static ssize_t ieee80211_if_parse_active_links(struct ieee80211_sub_if_data *sda } IEEE80211_IF_FILE_RW(active_links); +IEEE80211_IF_LINK_FILE(addr, conf->addr, MAC); + #ifdef CONFIG_MAC80211_MESH IEEE80211_IF_FILE(estab_plinks, u.mesh.estab_plinks, ATOMIC); @@ -685,7 +712,6 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(bssid); DEBUGFS_ADD(aid); DEBUGFS_ADD(beacon_timeout); - DEBUGFS_ADD_MODE(smps, 0600); DEBUGFS_ADD_MODE(tkip_mic_test, 0200); DEBUGFS_ADD_MODE(beacon_loss, 0200); DEBUGFS_ADD_MODE(uapsd_queues, 0600); @@ -698,7 +724,6 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata) static void add_ap_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD(num_mcast_sta); - DEBUGFS_ADD_MODE(smps, 0600); DEBUGFS_ADD(num_sta_ps); DEBUGFS_ADD(dtim_count); DEBUGFS_ADD(num_buffered_multicast); @@ -789,9 +814,6 @@ static void add_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(flags); DEBUGFS_ADD(state); - DEBUGFS_ADD(txpower); - DEBUGFS_ADD(user_power_level); - DEBUGFS_ADD(ap_power_level); if (sdata->vif.type != NL80211_IFTYPE_MONITOR) add_common_files(sdata); @@ -821,6 +843,31 @@ static void add_files(struct ieee80211_sub_if_data *sdata) } } +#undef DEBUGFS_ADD_MODE +#undef DEBUGFS_ADD + +#define DEBUGFS_ADD_MODE(dentry, name, mode) \ + debugfs_create_file(#name, mode, dentry, \ + link, &link_##name##_ops) + +#define DEBUGFS_ADD(dentry, name) DEBUGFS_ADD_MODE(dentry, name, 0400) + +static void add_link_files(struct ieee80211_link_data *link, + struct dentry *dentry) +{ + DEBUGFS_ADD(dentry, txpower); + DEBUGFS_ADD(dentry, user_power_level); + DEBUGFS_ADD(dentry, ap_power_level); + + switch (link->sdata->vif.type) { + case NL80211_IFTYPE_STATION: + DEBUGFS_ADD_MODE(dentry, smps, 0600); + break; + default: + break; + } +} + void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata) { char buf[10+IFNAMSIZ]; @@ -831,6 +878,9 @@ void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata) sdata->debugfs.subdir_stations = debugfs_create_dir("stations", sdata->vif.debugfs_dir); add_files(sdata); + + if (!(sdata->local->hw.wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO)) + add_link_files(&sdata->deflink, sdata->vif.debugfs_dir); } void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata) @@ -856,3 +906,66 @@ void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) sprintf(buf, "netdev:%s", sdata->name); debugfs_rename(dir->d_parent, dir, dir->d_parent, buf); } + +void ieee80211_link_debugfs_add(struct ieee80211_link_data *link) +{ + char link_dir_name[10]; + + if (WARN_ON(!link->sdata->vif.debugfs_dir)) + return; + + /* For now, this should not be called for non-MLO capable drivers */ + if (WARN_ON(!(link->sdata->local->hw.wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO))) + return; + + snprintf(link_dir_name, sizeof(link_dir_name), + "link-%d", link->link_id); + + link->debugfs_dir = + debugfs_create_dir(link_dir_name, + link->sdata->vif.debugfs_dir); + + DEBUGFS_ADD(link->debugfs_dir, addr); + add_link_files(link, link->debugfs_dir); +} + +void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link) +{ + if (!link->sdata->vif.debugfs_dir || !link->debugfs_dir) { + link->debugfs_dir = NULL; + return; + } + + if (link->debugfs_dir == link->sdata->vif.debugfs_dir) { + WARN_ON(link != &link->sdata->deflink); + link->debugfs_dir = NULL; + return; + } + + debugfs_remove_recursive(link->debugfs_dir); + link->debugfs_dir = NULL; +} + +void ieee80211_link_debugfs_drv_add(struct ieee80211_link_data *link) +{ + if (WARN_ON(!link->debugfs_dir)) + return; + + drv_link_add_debugfs(link->sdata->local, link->sdata, + link->conf, link->debugfs_dir); +} + +void ieee80211_link_debugfs_drv_remove(struct ieee80211_link_data *link) +{ + if (!link || !link->debugfs_dir) + return; + + if (WARN_ON(link->debugfs_dir == link->sdata->vif.debugfs_dir)) + return; + + /* Recreate the directory excluding the driver data */ + debugfs_remove_recursive(link->debugfs_dir); + link->debugfs_dir = NULL; + + ieee80211_link_debugfs_add(link); +} diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h index a7e9d8d518f9..99e688dcabd6 100644 --- a/net/mac80211/debugfs_netdev.h +++ b/net/mac80211/debugfs_netdev.h @@ -10,6 +10,12 @@ void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata); void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata); void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata); + +void ieee80211_link_debugfs_add(struct ieee80211_link_data *link); +void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link); + +void ieee80211_link_debugfs_drv_add(struct ieee80211_link_data *link); +void ieee80211_link_debugfs_drv_remove(struct ieee80211_link_data *link); #else static inline void ieee80211_debugfs_add_netdev( struct ieee80211_sub_if_data *sdata) @@ -20,6 +26,16 @@ static inline void ieee80211_debugfs_remove_netdev( static inline void ieee80211_debugfs_rename_netdev( struct ieee80211_sub_if_data *sdata) {} + +static inline void ieee80211_link_debugfs_add(struct ieee80211_link_data *link) +{} +static inline void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link) +{} + +static inline void ieee80211_link_debugfs_drv_add(struct ieee80211_link_data *link) +{} +static inline void ieee80211_link_debugfs_drv_remove(struct ieee80211_link_data *link) +{} #endif #endif /* __IEEE80211_DEBUGFS_NETDEV_H */ diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index cfb09e4aed4d..30cd0c905a24 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -8,6 +8,7 @@ #include "trace.h" #include "driver-ops.h" #include "debugfs_sta.h" +#include "debugfs_netdev.h" int drv_start(struct ieee80211_local *local) { @@ -477,6 +478,10 @@ int drv_change_vif_links(struct ieee80211_local *local, u16 old_links, u16 new_links, struct ieee80211_bss_conf *old[IEEE80211_MLD_MAX_NUM_LINKS]) { + struct ieee80211_link_data *link; + unsigned long links_to_add; + unsigned long links_to_rem; + unsigned int link_id; int ret = -EOPNOTSUPP; might_sleep(); @@ -487,13 +492,31 @@ int drv_change_vif_links(struct ieee80211_local *local, if (old_links == new_links) return 0; + links_to_add = ~old_links & new_links; + links_to_rem = old_links & ~new_links; + + for_each_set_bit(link_id, &links_to_rem, IEEE80211_MLD_MAX_NUM_LINKS) { + link = rcu_access_pointer(sdata->link[link_id]); + + ieee80211_link_debugfs_drv_remove(link); + } + trace_drv_change_vif_links(local, sdata, old_links, new_links); if (local->ops->change_vif_links) ret = local->ops->change_vif_links(&local->hw, &sdata->vif, old_links, new_links, old); trace_drv_return_int(local, ret); - return ret; + if (ret) + return ret; + + for_each_set_bit(link_id, &links_to_add, IEEE80211_MLD_MAX_NUM_LINKS) { + link = rcu_access_pointer(sdata->link[link_id]); + + ieee80211_link_debugfs_drv_add(link); + } + + return 0; } int drv_change_sta_links(struct ieee80211_local *local, diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 5d13a3dfd366..a68d606e6987 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -465,6 +465,22 @@ static inline void drv_sta_remove(struct ieee80211_local *local, } #ifdef CONFIG_MAC80211_DEBUGFS +static inline void drv_link_add_debugfs(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_bss_conf *link_conf, + struct dentry *dir) +{ + might_sleep(); + + sdata = get_bss_sdata(sdata); + if (!check_sdata_in_driver(sdata)) + return; + + if (local->ops->link_add_debugfs) + local->ops->link_add_debugfs(&local->hw, &sdata->vif, + link_conf, dir); +} + static inline void drv_sta_add_debugfs(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e082582e0aa2..491e6fc7ade6 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -999,6 +999,10 @@ struct ieee80211_link_data { struct ieee80211_tx_queue_params tx_conf[IEEE80211_NUM_ACS]; struct ieee80211_bss_conf *conf; + +#ifdef CONFIG_MAC80211_DEBUGFS + struct dentry *debugfs_dir; +#endif }; struct ieee80211_sub_if_data { diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 8c8869cc1fb4..e82db88a47f8 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -10,6 +10,7 @@ #include "ieee80211_i.h" #include "driver-ops.h" #include "key.h" +#include "debugfs_netdev.h" void ieee80211_link_setup(struct ieee80211_link_data *link) { @@ -34,6 +35,7 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, link->link_id = link_id; link->conf = link_conf; link_conf->link_id = link_id; + link_conf->vif = &sdata->vif; INIT_WORK(&link->csa_finalize_work, ieee80211_csa_finalize_work); @@ -60,6 +62,8 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, default: WARN_ON(1); } + + ieee80211_link_debugfs_add(link); } } @@ -93,6 +97,7 @@ static void ieee80211_tear_down_links(struct ieee80211_sub_if_data *sdata, if (WARN_ON(!link)) continue; ieee80211_remove_link_keys(link, &keys); + ieee80211_link_debugfs_remove(link); ieee80211_link_stop(link); } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 60792dfabc9d..e13a0354c397 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2744,7 +2744,7 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_link_data *link, return changed; } -static u32 ieee80211_link_set_associated(struct ieee80211_link_data *link, +static u64 ieee80211_link_set_associated(struct ieee80211_link_data *link, struct cfg80211_bss *cbss) { struct ieee80211_sub_if_data *sdata = link->sdata; @@ -3227,7 +3227,7 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; bool already = false; - if (WARN_ON(sdata->vif.valid_links)) + if (WARN_ON_ONCE(sdata->vif.valid_links)) return; if (!ieee80211_sdata_running(sdata)) @@ -5893,7 +5893,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, goto free; } - if (sta && elems->opmode_notif) + if (elems->opmode_notif) ieee80211_vht_handle_opmode(sdata, link_sta, *elems->opmode_notif, rx_status->band); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index e8de500eb9f3..7a37ada70684 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -43,6 +43,7 @@ static struct sk_buff *ieee80211_clean_skb(struct sk_buff *skb, unsigned int present_fcs_len, unsigned int rtap_space) { + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_hdr *hdr; unsigned int hdrlen; __le16 fc; @@ -51,6 +52,14 @@ static struct sk_buff *ieee80211_clean_skb(struct sk_buff *skb, __pskb_trim(skb, skb->len - present_fcs_len); pskb_pull(skb, rtap_space); + /* After pulling radiotap header, clear all flags that indicate + * info in skb->data. + */ + status->flag &= ~(RX_FLAG_RADIOTAP_TLV_AT_END | + RX_FLAG_RADIOTAP_LSIG | + RX_FLAG_RADIOTAP_HE_MU | + RX_FLAG_RADIOTAP_HE); + hdr = (void *)skb->data; fc = hdr->frame_control; @@ -117,9 +126,6 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local, /* allocate extra bitmaps */ if (status->chains) len += 4 * hweight8(status->chains); - /* vendor presence bitmap */ - if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) - len += 4; if (ieee80211_have_rx_timestamp(status)) { len = ALIGN(len, 8); @@ -181,34 +187,28 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local, len += 2 * hweight8(status->chains); } - if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) { - struct ieee80211_vendor_radiotap *rtap; - int vendor_data_offset = 0; + if (status->flag & RX_FLAG_RADIOTAP_TLV_AT_END) { + int tlv_offset = 0; /* * The position to look at depends on the existence (or non- * existence) of other elements, so take that into account... */ if (status->flag & RX_FLAG_RADIOTAP_HE) - vendor_data_offset += + tlv_offset += sizeof(struct ieee80211_radiotap_he); if (status->flag & RX_FLAG_RADIOTAP_HE_MU) - vendor_data_offset += + tlv_offset += sizeof(struct ieee80211_radiotap_he_mu); if (status->flag & RX_FLAG_RADIOTAP_LSIG) - vendor_data_offset += + tlv_offset += sizeof(struct ieee80211_radiotap_lsig); - rtap = (void *)&skb->data[vendor_data_offset]; + /* ensure 4 byte alignment for TLV */ + len = ALIGN(len, 4); - /* alignment for fixed 6-byte vendor data header */ - len = ALIGN(len, 2); - /* vendor data header */ - len += 6; - if (WARN_ON(rtap->align == 0)) - rtap->align = 1; - len = ALIGN(len, rtap->align); - len += rtap->len + rtap->pad; + /* TLVs until the mac header */ + len += skb_mac_header(skb) - &skb->data[tlv_offset]; } return len; @@ -304,9 +304,9 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, u32 it_present_val; u16 rx_flags = 0; u16 channel_flags = 0; + u32 tlvs_len = 0; int mpdulen, chain; unsigned long chains = status->chains; - struct ieee80211_vendor_radiotap rtap = {}; struct ieee80211_radiotap_he he = {}; struct ieee80211_radiotap_he_mu he_mu = {}; struct ieee80211_radiotap_lsig lsig = {}; @@ -327,18 +327,17 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, skb_pull(skb, sizeof(lsig)); } - if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) { - rtap = *(struct ieee80211_vendor_radiotap *)skb->data; - /* rtap.len and rtap.pad are undone immediately */ - skb_pull(skb, sizeof(rtap) + rtap.len + rtap.pad); + if (status->flag & RX_FLAG_RADIOTAP_TLV_AT_END) { + /* data is pointer at tlv all other info was pulled off */ + tlvs_len = skb_mac_header(skb) - skb->data; } mpdulen = skb->len; if (!(has_fcs && ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS))) mpdulen += FCS_LEN; - rthdr = skb_push(skb, rtap_len); - memset(rthdr, 0, rtap_len - rtap.len - rtap.pad); + rthdr = skb_push(skb, rtap_len - tlvs_len); + memset(rthdr, 0, rtap_len - tlvs_len); it_present = &rthdr->it_present; /* radiotap header, set always present flags */ @@ -360,13 +359,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, BIT(IEEE80211_RADIOTAP_DBM_ANTSIGNAL); } - if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) { - it_present_val |= BIT(IEEE80211_RADIOTAP_VENDOR_NAMESPACE) | - BIT(IEEE80211_RADIOTAP_EXT); - put_unaligned_le32(it_present_val, it_present); - it_present++; - it_present_val = rtap.present; - } + if (status->flag & RX_FLAG_RADIOTAP_TLV_AT_END) + it_present_val |= BIT(IEEE80211_RADIOTAP_TLV); put_unaligned_le32(it_present_val, it_present); @@ -697,22 +691,6 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, *pos++ = status->chain_signal[chain]; *pos++ = chain; } - - if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) { - /* ensure 2 byte alignment for the vendor field as required */ - if ((pos - (u8 *)rthdr) & 1) - *pos++ = 0; - *pos++ = rtap.oui[0]; - *pos++ = rtap.oui[1]; - *pos++ = rtap.oui[2]; - *pos++ = rtap.subns; - put_unaligned_le16(rtap.len, pos); - pos += 2; - /* align the actual payload as requested */ - while ((pos - (u8 *)rthdr) & (rtap.align - 1)) - *pos++ = 0; - /* data (and possible padding) already follows */ - } } static struct sk_buff * @@ -788,6 +766,13 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, bool only_monitor = false; unsigned int min_head_len; + if (WARN_ON_ONCE(status->flag & RX_FLAG_RADIOTAP_TLV_AT_END && + !skb_mac_header_was_set(origskb))) { + /* with this skb no way to know where frame payload starts */ + dev_kfree_skb(origskb); + return NULL; + } + if (status->flag & RX_FLAG_RADIOTAP_HE) rtap_space += sizeof(struct ieee80211_radiotap_he); @@ -797,12 +782,8 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, if (status->flag & RX_FLAG_RADIOTAP_LSIG) rtap_space += sizeof(struct ieee80211_radiotap_lsig); - if (unlikely(status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA)) { - struct ieee80211_vendor_radiotap *rtap = - (void *)(origskb->data + rtap_space); - - rtap_space += sizeof(*rtap) + rtap->len + rtap->pad; - } + if (status->flag & RX_FLAG_RADIOTAP_TLV_AT_END) + rtap_space += skb_mac_header(origskb) - &origskb->data[rtap_space]; min_head_len = rtap_space; @@ -2582,7 +2563,7 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb, struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); bool noencrypt = !(status->flag & RX_FLAG_DECRYPTED); - cfg80211_rx_control_port(dev, skb, noencrypt); + cfg80211_rx_control_port(dev, skb, noencrypt, rx->link_id); dev_kfree_skb(skb); } else { struct ethhdr *ehdr = (void *)skb_mac_header(skb); @@ -3916,8 +3897,6 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, if (!local->cooked_mntrs) goto out_free_skb; - /* vendor data is long removed here */ - status->flag &= ~RX_FLAG_RADIOTAP_VENDOR_DATA; /* room for the radiotap header based on driver features */ needed_headroom = ieee80211_rx_radiotap_hdrlen(local, status, skb); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index dc3cdee51e66..32fa8aca7005 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2013-2015 Intel Mobile Communications GmbH * Copyright 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation */ #include <linux/if_arp.h> @@ -1246,11 +1246,11 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, return ret; } -/* - * Only call this function when a scan can't be queued -- under RTNL. - */ void ieee80211_scan_cancel(struct ieee80211_local *local) { + /* ensure a new scan cannot be queued */ + lockdep_assert_wiphy(local->hw.wiphy); + /* * We are canceling software scan, or deferred scan that was not * yet really started (see __ieee80211_start_scan ). diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 7699fb410670..628d60f3db2a 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5115,6 +5115,16 @@ static int ieee80211_beacon_protect(struct sk_buff *skb, tx.key = rcu_dereference(link->default_beacon_key); if (!tx.key) return 0; + + if (unlikely(tx.key->flags & KEY_FLAG_TAINTED)) { + tx.key = NULL; + return -EINVAL; + } + + if (!(tx.key->conf.flags & IEEE80211_KEY_FLAG_SW_MGMT_TX) && + tx.key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) + IEEE80211_SKB_CB(skb)->control.hw_key = &tx.key->conf; + tx.local = local; tx.sdata = sdata; __skb_queue_head_init(&tx.skbs); diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index 3150f3f0c872..bb4bd0b6a4f7 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -704,7 +704,6 @@ subsys_initcall(mctp_init); module_exit(mctp_exit); MODULE_DESCRIPTION("MCTP core"); -MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Jeremy Kerr <jk@codeconstruct.com.au>"); MODULE_ALIAS_NETPROTO(PF_MCTP); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 5c8dea49626c..1c42bebca39e 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -2035,7 +2035,7 @@ static int mptcp_event_put_token_and_ssk(struct sk_buff *skb, nla_put_s32(skb, MPTCP_ATTR_IF_IDX, ssk->sk_bound_dev_if)) return -EMSGSIZE; - sk_err = ssk->sk_err; + sk_err = READ_ONCE(ssk->sk_err); if (sk_err && sk->sk_state == TCP_ESTABLISHED && nla_put_u8(skb, MPTCP_ATTR_ERROR, sk_err)) return -EMSGSIZE; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 60b23b2716c4..2d26b9114373 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -459,7 +459,7 @@ static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq) return false; } -static void mptcp_set_datafin_timeout(const struct sock *sk) +static void mptcp_set_datafin_timeout(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); u32 retransmits; @@ -2480,15 +2480,15 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) /* Mirror the tcp_reset() error propagation */ switch (sk->sk_state) { case TCP_SYN_SENT: - sk->sk_err = ECONNREFUSED; + WRITE_ONCE(sk->sk_err, ECONNREFUSED); break; case TCP_CLOSE_WAIT: - sk->sk_err = EPIPE; + WRITE_ONCE(sk->sk_err, EPIPE); break; case TCP_CLOSE: return; default: - sk->sk_err = ECONNRESET; + WRITE_ONCE(sk->sk_err, ECONNRESET); } inet_sk_state_store(sk, TCP_CLOSE); @@ -3791,7 +3791,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, /* This barrier is coupled with smp_wmb() in __mptcp_error_report() */ smp_rmb(); - if (sk->sk_err) + if (READ_ONCE(sk->sk_err)) mask |= EPOLLERR; return mask; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 339a6f072989..e1310bc113be 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -334,10 +334,7 @@ static inline void msk_owned_by_me(const struct mptcp_sock *msk) sock_owned_by_me((const struct sock *)msk); } -static inline struct mptcp_sock *mptcp_sk(const struct sock *sk) -{ - return (struct mptcp_sock *)sk; -} +#define mptcp_sk(ptr) container_of_const(ptr, struct mptcp_sock, sk.icsk_inet.sk) /* the msk socket don't use the backlog, also account for the bulk * free memory @@ -371,7 +368,7 @@ static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk) static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk) { - struct mptcp_sock *msk = mptcp_sk(sk); + const struct mptcp_sock *msk = mptcp_sk(sk); if (!msk->first_pending) return NULL; @@ -382,7 +379,7 @@ static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk) return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list); } -static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk) +static inline struct mptcp_data_frag *mptcp_rtx_head(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 8a9656248b0f..5cef4d3d21ac 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -1046,7 +1046,7 @@ static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval, static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a) { - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); memset(a, 0, sizeof(*a)); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index a0041360ee9d..dadaf85db720 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1350,7 +1350,7 @@ fallback: subflow->reset_reason = MPTCP_RST_EMPTCP; reset: - ssk->sk_err = EBADMSG; + WRITE_ONCE(ssk->sk_err, EBADMSG); tcp_set_state(ssk, TCP_CLOSE); while ((skb = skb_peek(&ssk->sk_receive_queue))) sk_eat_skb(ssk, skb); @@ -1434,7 +1434,7 @@ void __mptcp_error_report(struct sock *sk) ssk_state = inet_sk_state_load(ssk); if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) inet_sk_state_store(sk, ssk_state); - sk->sk_err = -err; + WRITE_ONCE(sk->sk_err, -err); /* This barrier is coupled with smp_rmb() in mptcp_poll() */ smp_wmb(); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 4d6737160857..d0bf630482c1 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -753,7 +753,6 @@ if NETFILTER_XTABLES config NETFILTER_XTABLES_COMPAT bool "Netfilter Xtables 32bit support" depends on COMPAT - default y help This option provides a translation layer to run 32bit arp,ip(6),ebtables binaries on 64bit kernels. diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index c6a6a6099b4e..db1ea361f2da 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1294,7 +1294,7 @@ dying: } EXPORT_SYMBOL_GPL(__nf_conntrack_confirm); -/* Returns true if a connection correspondings to the tuple (required +/* Returns true if a connection corresponds to the tuple (required for NAT). */ int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, diff --git a/net/netfilter/nf_conntrack_ovs.c b/net/netfilter/nf_conntrack_ovs.c index 52b776bdf526..068e9489e1c2 100644 --- a/net/netfilter/nf_conntrack_ovs.c +++ b/net/netfilter/nf_conntrack_ovs.c @@ -6,6 +6,7 @@ #include <net/netfilter/ipv6/nf_defrag_ipv6.h> #include <net/ipv6_frag.h> #include <net/ip.h> +#include <linux/netfilter_ipv6.h> /* 'skb' should already be pulled to nh_ofs. */ int nf_ct_helper(struct sk_buff *skb, struct nf_conn *ct, @@ -120,8 +121,14 @@ int nf_ct_skb_network_trim(struct sk_buff *skb, int family) len = skb_ip_totlen(skb); break; case NFPROTO_IPV6: - len = sizeof(struct ipv6hdr) - + ntohs(ipv6_hdr(skb)->payload_len); + len = ntohs(ipv6_hdr(skb)->payload_len); + if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP) { + int err = nf_ip6_check_hbh_len(skb, &len); + + if (err) + return err; + } + len += sizeof(struct ipv6hdr); break; default: len = skb->len; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index e29e4ccb5c5a..ce829d434f13 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -549,8 +549,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) && l4proto_in_range(tuple, maniptype, - &range->min_proto, - &range->max_proto) && + &range->min_proto, + &range->max_proto) && (range->min_proto.all == range->max_proto.all || !nf_nat_used_tuple(tuple, ct))) return; diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c index f91579c821e9..6616ba5d0b04 100644 --- a/net/netfilter/nf_nat_redirect.c +++ b/net/netfilter/nf_nat_redirect.c @@ -10,6 +10,7 @@ #include <linux/if.h> #include <linux/inetdevice.h> +#include <linux/in.h> #include <linux/ip.h> #include <linux/kernel.h> #include <linux/netdevice.h> @@ -24,54 +25,56 @@ #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_redirect.h> +static unsigned int +nf_nat_redirect(struct sk_buff *skb, const struct nf_nat_range2 *range, + const union nf_inet_addr *newdst) +{ + struct nf_nat_range2 newrange; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + + memset(&newrange, 0, sizeof(newrange)); + + newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr = *newdst; + newrange.max_addr = *newdst; + newrange.min_proto = range->min_proto; + newrange.max_proto = range->max_proto; + + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); +} + unsigned int -nf_nat_redirect_ipv4(struct sk_buff *skb, - const struct nf_nat_ipv4_multi_range_compat *mr, +nf_nat_redirect_ipv4(struct sk_buff *skb, const struct nf_nat_range2 *range, unsigned int hooknum) { - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - __be32 newdst; - struct nf_nat_range2 newrange; + union nf_inet_addr newdst = {}; WARN_ON(hooknum != NF_INET_PRE_ROUTING && hooknum != NF_INET_LOCAL_OUT); - ct = nf_ct_get(skb, &ctinfo); - WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED))); - /* Local packets: make them go to loopback */ if (hooknum == NF_INET_LOCAL_OUT) { - newdst = htonl(0x7F000001); + newdst.ip = htonl(INADDR_LOOPBACK); } else { const struct in_device *indev; - newdst = 0; - indev = __in_dev_get_rcu(skb->dev); if (indev) { const struct in_ifaddr *ifa; ifa = rcu_dereference(indev->ifa_list); if (ifa) - newdst = ifa->ifa_local; + newdst.ip = ifa->ifa_local; } - if (!newdst) + if (!newdst.ip) return NF_DROP; } - /* Transfer from original range. */ - memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); - memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); - newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; - newrange.min_addr.ip = newdst; - newrange.max_addr.ip = newdst; - newrange.min_proto = mr->range[0].min; - newrange.max_proto = mr->range[0].max; - - /* Hand modified range to generic setup. */ - return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); + return nf_nat_redirect(skb, range, &newdst); } EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv4); @@ -81,14 +84,10 @@ unsigned int nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, unsigned int hooknum) { - struct nf_nat_range2 newrange; - struct in6_addr newdst; - enum ip_conntrack_info ctinfo; - struct nf_conn *ct; + union nf_inet_addr newdst = {}; - ct = nf_ct_get(skb, &ctinfo); if (hooknum == NF_INET_LOCAL_OUT) { - newdst = loopback_addr; + newdst.in6 = loopback_addr; } else { struct inet6_dev *idev; struct inet6_ifaddr *ifa; @@ -98,7 +97,7 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, if (idev != NULL) { read_lock_bh(&idev->lock); list_for_each_entry(ifa, &idev->addr_list, if_list) { - newdst = ifa->addr; + newdst.in6 = ifa->addr; addr = true; break; } @@ -109,12 +108,6 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, return NF_DROP; } - newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; - newrange.min_addr.in6 = newdst; - newrange.max_addr.in6 = newdst; - newrange.min_proto = range->min_proto; - newrange.max_proto = range->max_proto; - - return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); + return nf_nat_redirect(skb, range, &newdst); } EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv6); diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 9544c2f16998..b115d77fbbc7 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -96,23 +96,39 @@ nla_put_failure: return -1; } -static void nft_masq_ipv4_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static void nft_masq_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { - struct nft_masq *priv = nft_expr_priv(expr); + const struct nft_masq *priv = nft_expr_priv(expr); struct nf_nat_range2 range; memset(&range, 0, sizeof(range)); range.flags = priv->flags; if (priv->sreg_proto_min) { - range.min_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_min]); - range.max_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_max]); + range.min_proto.all = (__force __be16) + nft_reg_load16(®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16) + nft_reg_load16(®s->data[priv->sreg_proto_max]); + } + + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: + regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, + nft_hook(pkt), + &range, + nft_out(pkt)); + break; +#ifdef CONFIG_NF_TABLES_IPV6 + case NFPROTO_IPV6: + regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, + nft_out(pkt)); + break; +#endif + default: + WARN_ON_ONCE(1); + break; } - regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, nft_hook(pkt), - &range, nft_out(pkt)); } static void @@ -125,7 +141,7 @@ static struct nft_expr_type nft_masq_ipv4_type; static const struct nft_expr_ops nft_masq_ipv4_ops = { .type = &nft_masq_ipv4_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)), - .eval = nft_masq_ipv4_eval, + .eval = nft_masq_eval, .init = nft_masq_init, .destroy = nft_masq_ipv4_destroy, .dump = nft_masq_dump, @@ -143,25 +159,6 @@ static struct nft_expr_type nft_masq_ipv4_type __read_mostly = { }; #ifdef CONFIG_NF_TABLES_IPV6 -static void nft_masq_ipv6_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - struct nft_masq *priv = nft_expr_priv(expr); - struct nf_nat_range2 range; - - memset(&range, 0, sizeof(range)); - range.flags = priv->flags; - if (priv->sreg_proto_min) { - range.min_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_min]); - range.max_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_max]); - } - regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, - nft_out(pkt)); -} - static void nft_masq_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { @@ -172,7 +169,7 @@ static struct nft_expr_type nft_masq_ipv6_type; static const struct nft_expr_ops nft_masq_ipv6_ops = { .type = &nft_masq_ipv6_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)), - .eval = nft_masq_ipv6_eval, + .eval = nft_masq_eval, .init = nft_masq_init, .destroy = nft_masq_ipv6_destroy, .dump = nft_masq_dump, @@ -204,20 +201,6 @@ static inline void nft_masq_module_exit_ipv6(void) {} #endif #ifdef CONFIG_NF_TABLES_INET -static void nft_masq_inet_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - switch (nft_pf(pkt)) { - case NFPROTO_IPV4: - return nft_masq_ipv4_eval(expr, regs, pkt); - case NFPROTO_IPV6: - return nft_masq_ipv6_eval(expr, regs, pkt); - } - - WARN_ON_ONCE(1); -} - static void nft_masq_inet_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { @@ -228,7 +211,7 @@ static struct nft_expr_type nft_masq_inet_type; static const struct nft_expr_ops nft_masq_inet_ops = { .type = &nft_masq_inet_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)), - .eval = nft_masq_inet_eval, + .eval = nft_masq_eval, .init = nft_masq_init, .destroy = nft_masq_inet_destroy, .dump = nft_masq_dump, diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index 67cec56bc84a..a70196ffcb1e 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -64,6 +64,8 @@ static int nft_redir_init(const struct nft_ctx *ctx, } else { priv->sreg_proto_max = priv->sreg_proto_min; } + + priv->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } if (tb[NFTA_REDIR_FLAGS]) { @@ -99,25 +101,37 @@ nla_put_failure: return -1; } -static void nft_redir_ipv4_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static void nft_redir_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { - struct nft_redir *priv = nft_expr_priv(expr); - struct nf_nat_ipv4_multi_range_compat mr; + const struct nft_redir *priv = nft_expr_priv(expr); + struct nf_nat_range2 range; - memset(&mr, 0, sizeof(mr)); + memset(&range, 0, sizeof(range)); + range.flags = priv->flags; if (priv->sreg_proto_min) { - mr.range[0].min.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_min]); - mr.range[0].max.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_max]); - mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + range.min_proto.all = (__force __be16) + nft_reg_load16(®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16) + nft_reg_load16(®s->data[priv->sreg_proto_max]); } - mr.range[0].flags |= priv->flags; - - regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr, nft_hook(pkt)); + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: + regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &range, + nft_hook(pkt)); + break; +#ifdef CONFIG_NF_TABLES_IPV6 + case NFPROTO_IPV6: + regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range, + nft_hook(pkt)); + break; +#endif + default: + WARN_ON_ONCE(1); + break; + } } static void @@ -130,7 +144,7 @@ static struct nft_expr_type nft_redir_ipv4_type; static const struct nft_expr_ops nft_redir_ipv4_ops = { .type = &nft_redir_ipv4_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), - .eval = nft_redir_ipv4_eval, + .eval = nft_redir_eval, .init = nft_redir_init, .destroy = nft_redir_ipv4_destroy, .dump = nft_redir_dump, @@ -148,28 +162,6 @@ static struct nft_expr_type nft_redir_ipv4_type __read_mostly = { }; #ifdef CONFIG_NF_TABLES_IPV6 -static void nft_redir_ipv6_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - struct nft_redir *priv = nft_expr_priv(expr); - struct nf_nat_range2 range; - - memset(&range, 0, sizeof(range)); - if (priv->sreg_proto_min) { - range.min_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_min]); - range.max_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_max]); - range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; - } - - range.flags |= priv->flags; - - regs->verdict.code = - nf_nat_redirect_ipv6(pkt->skb, &range, nft_hook(pkt)); -} - static void nft_redir_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { @@ -180,7 +172,7 @@ static struct nft_expr_type nft_redir_ipv6_type; static const struct nft_expr_ops nft_redir_ipv6_ops = { .type = &nft_redir_ipv6_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), - .eval = nft_redir_ipv6_eval, + .eval = nft_redir_eval, .init = nft_redir_init, .destroy = nft_redir_ipv6_destroy, .dump = nft_redir_dump, @@ -199,20 +191,6 @@ static struct nft_expr_type nft_redir_ipv6_type __read_mostly = { #endif #ifdef CONFIG_NF_TABLES_INET -static void nft_redir_inet_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - switch (nft_pf(pkt)) { - case NFPROTO_IPV4: - return nft_redir_ipv4_eval(expr, regs, pkt); - case NFPROTO_IPV6: - return nft_redir_ipv6_eval(expr, regs, pkt); - } - - WARN_ON_ONCE(1); -} - static void nft_redir_inet_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { @@ -223,7 +201,7 @@ static struct nft_expr_type nft_redir_inet_type; static const struct nft_expr_ops nft_redir_inet_ops = { .type = &nft_redir_inet_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), - .eval = nft_redir_inet_eval, + .eval = nft_redir_eval, .init = nft_redir_init, .destroy = nft_redir_inet_destroy, .dump = nft_redir_dump, diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c index 2182d361e273..acef4155f0da 100644 --- a/net/netfilter/utils.c +++ b/net/netfilter/utils.c @@ -215,3 +215,55 @@ int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry) } return ret; } + +/* Only get and check the lengths, not do any hop-by-hop stuff. */ +int nf_ip6_check_hbh_len(struct sk_buff *skb, u32 *plen) +{ + int len, off = sizeof(struct ipv6hdr); + unsigned char *nh; + + if (!pskb_may_pull(skb, off + 8)) + return -ENOMEM; + nh = (unsigned char *)(ipv6_hdr(skb) + 1); + len = (nh[1] + 1) << 3; + + if (!pskb_may_pull(skb, off + len)) + return -ENOMEM; + nh = skb_network_header(skb); + + off += 2; + len -= 2; + while (len > 0) { + int optlen; + + if (nh[off] == IPV6_TLV_PAD1) { + off++; + len--; + continue; + } + if (len < 2) + return -EBADMSG; + optlen = nh[off + 1] + 2; + if (optlen > len) + return -EBADMSG; + + if (nh[off] == IPV6_TLV_JUMBO) { + u32 pkt_len; + + if (nh[off + 1] != 4 || (off & 3) != 2) + return -EBADMSG; + pkt_len = ntohl(*(__be32 *)(nh + off + 2)); + if (pkt_len <= IPV6_MAXPLEN || + ipv6_hdr(skb)->payload_len) + return -EBADMSG; + if (pkt_len > skb->len - sizeof(struct ipv6hdr)) + return -EBADMSG; + *plen = pkt_len; + } + off += optlen; + len -= optlen; + } + + return len ? -EBADMSG : 0; +} +EXPORT_SYMBOL_GPL(nf_ip6_check_hbh_len); diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c index 353ca7801251..ff66b56a3f97 100644 --- a/net/netfilter/xt_REDIRECT.c +++ b/net/netfilter/xt_REDIRECT.c @@ -46,7 +46,6 @@ static void redirect_tg_destroy(const struct xt_tgdtor_param *par) nf_ct_netns_put(par->net, par->family); } -/* FIXME: Take multiple ranges --RR */ static int redirect_tg4_check(const struct xt_tgchk_param *par) { const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; @@ -65,7 +64,14 @@ static int redirect_tg4_check(const struct xt_tgchk_param *par) static unsigned int redirect_tg4(struct sk_buff *skb, const struct xt_action_param *par) { - return nf_nat_redirect_ipv4(skb, par->targinfo, xt_hooknum(par)); + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + struct nf_nat_range2 range = { + .flags = mr->range[0].flags, + .min_proto = mr->range[0].min, + .max_proto = mr->range[0].max, + }; + + return nf_nat_redirect_ipv4(skb, &range, xt_hooknum(par)); } static struct xt_target redirect_tg_reg[] __read_mostly = { diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c index 11ec2abf0c72..e8991130a3de 100644 --- a/net/netfilter/xt_tcpudp.c +++ b/net/netfilter/xt_tcpudp.c @@ -4,6 +4,7 @@ #include <linux/module.h> #include <net/ip.h> #include <linux/ipv6.h> +#include <linux/icmp.h> #include <net/ipv6.h> #include <net/tcp.h> #include <net/udp.h> @@ -20,6 +21,8 @@ MODULE_ALIAS("ipt_udp"); MODULE_ALIAS("ipt_tcp"); MODULE_ALIAS("ip6t_udp"); MODULE_ALIAS("ip6t_tcp"); +MODULE_ALIAS("ipt_icmp"); +MODULE_ALIAS("ip6t_icmp6"); /* Returns 1 if the port is matched by the range, 0 otherwise */ static inline bool @@ -161,6 +164,95 @@ static int udp_mt_check(const struct xt_mtchk_param *par) return (udpinfo->invflags & ~XT_UDP_INV_MASK) ? -EINVAL : 0; } +/* Returns 1 if the type and code is matched by the range, 0 otherwise */ +static bool type_code_in_range(u8 test_type, u8 min_code, u8 max_code, + u8 type, u8 code) +{ + return type == test_type && code >= min_code && code <= max_code; +} + +static bool icmp_type_code_match(u8 test_type, u8 min_code, u8 max_code, + u8 type, u8 code, bool invert) +{ + return (test_type == 0xFF || + type_code_in_range(test_type, min_code, max_code, type, code)) + ^ invert; +} + +static bool icmp6_type_code_match(u8 test_type, u8 min_code, u8 max_code, + u8 type, u8 code, bool invert) +{ + return type_code_in_range(test_type, min_code, max_code, type, code) ^ invert; +} + +static bool +icmp_match(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct icmphdr *ic; + struct icmphdr _icmph; + const struct ipt_icmp *icmpinfo = par->matchinfo; + + /* Must not be a fragment. */ + if (par->fragoff != 0) + return false; + + ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph); + if (!ic) { + /* We've been asked to examine this packet, and we + * can't. Hence, no choice but to drop. + */ + par->hotdrop = true; + return false; + } + + return icmp_type_code_match(icmpinfo->type, + icmpinfo->code[0], + icmpinfo->code[1], + ic->type, ic->code, + !!(icmpinfo->invflags & IPT_ICMP_INV)); +} + +static bool +icmp6_match(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct icmp6hdr *ic; + struct icmp6hdr _icmph; + const struct ip6t_icmp *icmpinfo = par->matchinfo; + + /* Must not be a fragment. */ + if (par->fragoff != 0) + return false; + + ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph); + if (!ic) { + /* We've been asked to examine this packet, and we + * can't. Hence, no choice but to drop. + */ + par->hotdrop = true; + return false; + } + + return icmp6_type_code_match(icmpinfo->type, + icmpinfo->code[0], + icmpinfo->code[1], + ic->icmp6_type, ic->icmp6_code, + !!(icmpinfo->invflags & IP6T_ICMP_INV)); +} + +static int icmp_checkentry(const struct xt_mtchk_param *par) +{ + const struct ipt_icmp *icmpinfo = par->matchinfo; + + return (icmpinfo->invflags & ~IPT_ICMP_INV) ? -EINVAL : 0; +} + +static int icmp6_checkentry(const struct xt_mtchk_param *par) +{ + const struct ip6t_icmp *icmpinfo = par->matchinfo; + + return (icmpinfo->invflags & ~IP6T_ICMP_INV) ? -EINVAL : 0; +} + static struct xt_match tcpudp_mt_reg[] __read_mostly = { { .name = "tcp", @@ -216,6 +308,24 @@ static struct xt_match tcpudp_mt_reg[] __read_mostly = { .proto = IPPROTO_UDPLITE, .me = THIS_MODULE, }, + { + .name = "icmp", + .match = icmp_match, + .matchsize = sizeof(struct ipt_icmp), + .checkentry = icmp_checkentry, + .proto = IPPROTO_ICMP, + .family = NFPROTO_IPV4, + .me = THIS_MODULE, + }, + { + .name = "icmp6", + .match = icmp6_match, + .matchsize = sizeof(struct ip6t_icmp), + .checkentry = icmp6_checkentry, + .proto = IPPROTO_ICMPV6, + .family = NFPROTO_IPV6, + .me = THIS_MODULE, + }, }; static int __init tcpudp_mt_init(void) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c64277659753..877f1da1a8ac 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2097,8 +2097,6 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, nl_table[unit].bind = cfg->bind; nl_table[unit].unbind = cfg->unbind; nl_table[unit].flags = cfg->flags; - if (cfg->compare) - nl_table[unit].compare = cfg->compare; } nl_table[unit].registered = 1; } else { diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 5f454c8de6a4..90a3198a9b7f 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -64,7 +64,6 @@ struct netlink_table { struct module *module; int (*bind)(struct net *net, int group); void (*unbind)(struct net *net, int group); - bool (*compare)(struct net *net, struct sock *sock); int registered; }; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d4e76e2ae153..497193f73030 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -270,8 +270,11 @@ static noinline struct sk_buff *nf_hook_direct_egress(struct sk_buff *skb) } #endif -static int packet_direct_xmit(struct sk_buff *skb) +static int packet_xmit(const struct packet_sock *po, struct sk_buff *skb) { + if (!packet_sock_flag(po, PACKET_SOCK_QDISC_BYPASS)) + return dev_queue_xmit(skb); + #ifdef CONFIG_NETFILTER_EGRESS if (nf_hook_egress_active()) { skb = nf_hook_direct_egress(skb); @@ -305,11 +308,6 @@ static void packet_cached_dev_reset(struct packet_sock *po) RCU_INIT_POINTER(po->cached_dev, NULL); } -static bool packet_use_direct_xmit(const struct packet_sock *po) -{ - return po->xmit == packet_direct_xmit; -} - static u16 packet_pick_tx_queue(struct sk_buff *skb) { struct net_device *dev = skb->dev; @@ -339,14 +337,14 @@ static void __register_prot_hook(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); - if (!po->running) { + if (!packet_sock_flag(po, PACKET_SOCK_RUNNING)) { if (po->fanout) __fanout_link(sk, po); else dev_add_pack(&po->prot_hook); sock_hold(sk); - po->running = 1; + packet_sock_flag_set(po, PACKET_SOCK_RUNNING, 1); } } @@ -368,7 +366,7 @@ static void __unregister_prot_hook(struct sock *sk, bool sync) lockdep_assert_held_once(&po->bind_lock); - po->running = 0; + packet_sock_flag_set(po, PACKET_SOCK_RUNNING, 0); if (po->fanout) __fanout_unlink(sk, po); @@ -388,7 +386,7 @@ static void unregister_prot_hook(struct sock *sk, bool sync) { struct packet_sock *po = pkt_sk(sk); - if (po->running) + if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) __unregister_prot_hook(sk, sync); } @@ -473,7 +471,7 @@ static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame, struct timespec64 ts; __u32 ts_status; - if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) + if (!(ts_status = tpacket_get_timestamp(skb, &ts, READ_ONCE(po->tp_tstamp)))) return 0; h.raw = frame; @@ -1306,22 +1304,23 @@ static int __packet_rcv_has_room(const struct packet_sock *po, static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) { - int pressure, ret; + bool pressure; + int ret; ret = __packet_rcv_has_room(po, skb); pressure = ret != ROOM_NORMAL; - if (READ_ONCE(po->pressure) != pressure) - WRITE_ONCE(po->pressure, pressure); + if (packet_sock_flag(po, PACKET_SOCK_PRESSURE) != pressure) + packet_sock_flag_set(po, PACKET_SOCK_PRESSURE, pressure); return ret; } static void packet_rcv_try_clear_pressure(struct packet_sock *po) { - if (READ_ONCE(po->pressure) && + if (packet_sock_flag(po, PACKET_SOCK_PRESSURE) && __packet_rcv_has_room(po, NULL) == ROOM_NORMAL) - WRITE_ONCE(po->pressure, 0); + packet_sock_flag_set(po, PACKET_SOCK_PRESSURE, false); } static void packet_sock_destruct(struct sock *sk) @@ -1408,7 +1407,8 @@ static unsigned int fanout_demux_rollover(struct packet_fanout *f, i = j = min_t(int, po->rollover->sock, num - 1); do { po_next = pkt_sk(rcu_dereference(f->arr[i])); - if (po_next != po_skip && !READ_ONCE(po_next->pressure) && + if (po_next != po_skip && + !packet_sock_flag(po_next, PACKET_SOCK_PRESSURE) && packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) { if (i != j) po->rollover->sock = i; @@ -1781,7 +1781,7 @@ static int fanout_add(struct sock *sk, struct fanout_args *args) err = -EINVAL; spin_lock(&po->bind_lock); - if (po->running && + if (packet_sock_flag(po, PACKET_SOCK_RUNNING) && match->type == type && match->prot_hook.type == po->prot_hook.type && match->prot_hook.dev == po->prot_hook.dev) { @@ -2183,7 +2183,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, sll = &PACKET_SKB_CB(skb)->sa.ll; sll->sll_hatype = dev->type; sll->sll_pkttype = skb->pkt_type; - if (unlikely(po->origdev)) + if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV))) sll->sll_ifindex = orig_dev->ifindex; else sll->sll_ifindex = dev->ifindex; @@ -2308,7 +2308,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, netoff = TPACKET_ALIGN(po->tp_hdrlen + (maclen < 16 ? 16 : maclen)) + po->tp_reserve; - if (po->has_vnet_hdr) { + if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) { netoff += sizeof(struct virtio_net_hdr); do_vnet = true; } @@ -2402,7 +2402,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, * closer to the time of capture. */ ts_status = tpacket_get_timestamp(skb, &ts, - po->tp_tstamp | SOF_TIMESTAMPING_SOFTWARE); + READ_ONCE(po->tp_tstamp) | + SOF_TIMESTAMPING_SOFTWARE); if (!ts_status) ktime_get_real_ts64(&ts); @@ -2460,7 +2461,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, sll->sll_hatype = dev->type; sll->sll_protocol = skb->protocol; sll->sll_pkttype = skb->pkt_type; - if (unlikely(po->origdev)) + if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV))) sll->sll_ifindex = orig_dev->ifindex; else sll->sll_ifindex = dev->ifindex; @@ -2670,7 +2671,7 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame, return -EMSGSIZE; } - if (unlikely(po->tp_tx_has_off)) { + if (unlikely(packet_sock_flag(po, PACKET_SOCK_TX_HAS_OFF))) { int off_min, off_max; off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); @@ -2778,7 +2779,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) size_max = po->tx_ring.frame_size - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); - if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr) + if ((size_max > dev->mtu + reserve + VLAN_HLEN) && + !packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) size_max = dev->mtu + reserve + VLAN_HLEN; reinit_completion(&po->skb_completion); @@ -2807,7 +2809,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) status = TP_STATUS_SEND_REQUEST; hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; - if (po->has_vnet_hdr) { + if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) { vnet_hdr = data; data += sizeof(*vnet_hdr); tp_len -= sizeof(*vnet_hdr); @@ -2835,13 +2837,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) addr, hlen, copylen, &sockc); if (likely(tp_len >= 0) && tp_len > dev->mtu + reserve && - !po->has_vnet_hdr && + !packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR) && !packet_extra_vlan_len_allowed(dev, skb)) tp_len = -EMSGSIZE; if (unlikely(tp_len < 0)) { tpacket_error: - if (po->tp_loss) { + if (packet_sock_flag(po, PACKET_SOCK_TP_LOSS)) { __packet_set_status(po, ph, TP_STATUS_AVAILABLE); packet_increment_head(&po->tx_ring); @@ -2854,7 +2856,7 @@ tpacket_error: } } - if (po->has_vnet_hdr) { + if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) { if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) { tp_len = -EINVAL; goto tpacket_error; @@ -2867,7 +2869,7 @@ tpacket_error: packet_inc_pending(&po->tx_ring); status = TP_STATUS_SEND_REQUEST; - err = po->xmit(skb); + err = packet_xmit(po, skb); if (unlikely(err != 0)) { if (err > 0) err = net_xmit_errno(err); @@ -2988,7 +2990,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (sock->type == SOCK_RAW) reserve = dev->hard_header_len; - if (po->has_vnet_hdr) { + if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) { err = packet_snd_vnet_parse(msg, &len, &vnet_hdr); if (err) goto out_unlock; @@ -3070,7 +3072,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) virtio_net_hdr_set_proto(skb, &vnet_hdr); } - err = po->xmit(skb); + err = packet_xmit(po, skb); + if (unlikely(err != 0)) { if (err > 0) err = net_xmit_errno(err); @@ -3217,7 +3220,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, if (need_rehook) { dev_hold(dev); - if (po->running) { + if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) { rcu_read_unlock(); /* prevents packet_notifier() from calling * register_prot_hook() @@ -3230,7 +3233,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, dev->ifindex); } - BUG_ON(po->running); + BUG_ON(packet_sock_flag(po, PACKET_SOCK_RUNNING)); WRITE_ONCE(po->num, proto); po->prot_hook.type = proto; @@ -3352,7 +3355,6 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, init_completion(&po->skb_completion); sk->sk_family = PF_PACKET; po->num = proto; - po->xmit = dev_queue_xmit; err = packet_alloc_pending(po); if (err) @@ -3447,7 +3449,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, packet_rcv_try_clear_pressure(pkt_sk(sk)); - if (pkt_sk(sk)->has_vnet_hdr) { + if (packet_sock_flag(pkt_sk(sk), PACKET_SOCK_HAS_VNET_HDR)) { err = packet_rcv_vnet(msg, skb, &len); if (err) goto out_free; @@ -3511,7 +3513,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len); } - if (pkt_sk(sk)->auxdata) { + if (packet_sock_flag(pkt_sk(sk), PACKET_SOCK_AUXDATA)) { struct tpacket_auxdata aux; aux.tp_status = TP_STATUS_USER; @@ -3882,7 +3884,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { ret = -EBUSY; } else { - po->tp_loss = !!val; + packet_sock_flag_set(po, PACKET_SOCK_TP_LOSS, val); ret = 0; } release_sock(sk); @@ -3897,9 +3899,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; - lock_sock(sk); - po->auxdata = !!val; - release_sock(sk); + packet_sock_flag_set(po, PACKET_SOCK_AUXDATA, val); return 0; } case PACKET_ORIGDEV: @@ -3911,9 +3911,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; - lock_sock(sk); - po->origdev = !!val; - release_sock(sk); + packet_sock_flag_set(po, PACKET_SOCK_ORIGDEV, val); return 0; } case PACKET_VNET_HDR: @@ -3931,7 +3929,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { ret = -EBUSY; } else { - po->has_vnet_hdr = !!val; + packet_sock_flag_set(po, PACKET_SOCK_HAS_VNET_HDR, val); ret = 0; } release_sock(sk); @@ -3946,7 +3944,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; - po->tp_tstamp = val; + WRITE_ONCE(po->tp_tstamp, val); return 0; } case PACKET_FANOUT: @@ -3993,7 +3991,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, lock_sock(sk); if (!po->rx_ring.pg_vec && !po->tx_ring.pg_vec) - po->tp_tx_has_off = !!val; + packet_sock_flag_set(po, PACKET_SOCK_TX_HAS_OFF, val); release_sock(sk); return 0; @@ -4007,7 +4005,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; - po->xmit = val ? packet_direct_xmit : dev_queue_xmit; + packet_sock_flag_set(po, PACKET_SOCK_QDISC_BYPASS, val); return 0; } default: @@ -4058,13 +4056,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, break; case PACKET_AUXDATA: - val = po->auxdata; + val = packet_sock_flag(po, PACKET_SOCK_AUXDATA); break; case PACKET_ORIGDEV: - val = po->origdev; + val = packet_sock_flag(po, PACKET_SOCK_ORIGDEV); break; case PACKET_VNET_HDR: - val = po->has_vnet_hdr; + val = packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR); break; case PACKET_VERSION: val = po->tp_version; @@ -4094,10 +4092,10 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, val = po->tp_reserve; break; case PACKET_LOSS: - val = po->tp_loss; + val = packet_sock_flag(po, PACKET_SOCK_TP_LOSS); break; case PACKET_TIMESTAMP: - val = po->tp_tstamp; + val = READ_ONCE(po->tp_tstamp); break; case PACKET_FANOUT: val = (po->fanout ? @@ -4119,10 +4117,10 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, lv = sizeof(rstats); break; case PACKET_TX_HAS_OFF: - val = po->tp_tx_has_off; + val = packet_sock_flag(po, PACKET_SOCK_TX_HAS_OFF); break; case PACKET_QDISC_BYPASS: - val = packet_use_direct_xmit(po); + val = packet_sock_flag(po, PACKET_SOCK_QDISC_BYPASS); break; default: return -ENOPROTOOPT; @@ -4157,7 +4155,7 @@ static int packet_notifier(struct notifier_block *this, case NETDEV_DOWN: if (dev->ifindex == po->ifindex) { spin_lock(&po->bind_lock); - if (po->running) { + if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) { __unregister_prot_hook(sk, false); sk->sk_err = ENETDOWN; if (!sock_flag(sk, SOCK_DEAD)) @@ -4468,7 +4466,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, /* Detach socket from network */ spin_lock(&po->bind_lock); - was_running = po->running; + was_running = packet_sock_flag(po, PACKET_SOCK_RUNNING); num = po->num; if (was_running) { WRITE_ONCE(po->num, 0); @@ -4679,7 +4677,7 @@ static int packet_seq_show(struct seq_file *seq, void *v) s->sk_type, ntohs(READ_ONCE(po->num)), READ_ONCE(po->ifindex), - po->running, + packet_sock_flag(po, PACKET_SOCK_RUNNING), atomic_read(&s->sk_rmem_alloc), from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)), sock_i_ino(s)); diff --git a/net/packet/diag.c b/net/packet/diag.c index 07812ae5ca07..de4ced5cf3e8 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -18,18 +18,18 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb) pinfo.pdi_version = po->tp_version; pinfo.pdi_reserve = po->tp_reserve; pinfo.pdi_copy_thresh = po->copy_thresh; - pinfo.pdi_tstamp = po->tp_tstamp; + pinfo.pdi_tstamp = READ_ONCE(po->tp_tstamp); pinfo.pdi_flags = 0; - if (po->running) + if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) pinfo.pdi_flags |= PDI_RUNNING; - if (po->auxdata) + if (packet_sock_flag(po, PACKET_SOCK_AUXDATA)) pinfo.pdi_flags |= PDI_AUXDATA; - if (po->origdev) + if (packet_sock_flag(po, PACKET_SOCK_ORIGDEV)) pinfo.pdi_flags |= PDI_ORIGDEV; - if (po->has_vnet_hdr) + if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) pinfo.pdi_flags |= PDI_VNETHDR; - if (po->tp_loss) + if (packet_sock_flag(po, PACKET_SOCK_TP_LOSS)) pinfo.pdi_flags |= PDI_LOSS; return nla_put(nlskb, PACKET_DIAG_INFO, sizeof(pinfo), &pinfo); diff --git a/net/packet/internal.h b/net/packet/internal.h index 48af35b1aed2..27930f69f368 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -116,13 +116,7 @@ struct packet_sock { int copy_thresh; spinlock_t bind_lock; struct mutex pg_vec_lock; - unsigned int running; /* bind_lock must be held */ - unsigned int auxdata:1, /* writer must hold sock lock */ - origdev:1, - has_vnet_hdr:1, - tp_loss:1, - tp_tx_has_off:1; - int pressure; + unsigned long flags; int ifindex; /* bound device */ __be16 num; struct packet_rollover *rollover; @@ -134,14 +128,37 @@ struct packet_sock { unsigned int tp_tstamp; struct completion skb_completion; struct net_device __rcu *cached_dev; - int (*xmit)(struct sk_buff *skb); struct packet_type prot_hook ____cacheline_aligned_in_smp; atomic_t tp_drops ____cacheline_aligned_in_smp; }; -static inline struct packet_sock *pkt_sk(struct sock *sk) +#define pkt_sk(ptr) container_of_const(ptr, struct packet_sock, sk) + +enum packet_sock_flags { + PACKET_SOCK_ORIGDEV, + PACKET_SOCK_AUXDATA, + PACKET_SOCK_TX_HAS_OFF, + PACKET_SOCK_TP_LOSS, + PACKET_SOCK_HAS_VNET_HDR, + PACKET_SOCK_RUNNING, + PACKET_SOCK_PRESSURE, + PACKET_SOCK_QDISC_BYPASS, +}; + +static inline void packet_sock_flag_set(struct packet_sock *po, + enum packet_sock_flags flag, + bool val) +{ + if (val) + set_bit(flag, &po->flags); + else + clear_bit(flag, &po->flags); +} + +static inline bool packet_sock_flag(const struct packet_sock *po, + enum packet_sock_flags flag) { - return (struct packet_sock *)sk; + return test_bit(flag, &po->flags); } #endif diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 296fc1afedd8..f7887f42d542 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -453,7 +453,7 @@ static size_t tcf_action_shared_attrs_size(const struct tc_action *act) + nla_total_size_64bit(sizeof(u64)) /* TCA_STATS_QUEUE */ + nla_total_size_64bit(sizeof(struct gnet_stats_queue)) - + nla_total_size(0) /* TCA_OPTIONS nested */ + + nla_total_size(0) /* TCA_ACT_OPTIONS nested */ + nla_total_size(sizeof(struct tcf_t)); /* TCA_GACT_TM */ } @@ -480,7 +480,7 @@ tcf_action_dump_terse(struct sk_buff *skb, struct tc_action *a, bool from_act) unsigned char *b = skb_tail_pointer(skb); struct tc_cookie *cookie; - if (nla_put_string(skb, TCA_KIND, a->ops->kind)) + if (nla_put_string(skb, TCA_ACT_KIND, a->ops->kind)) goto nla_put_failure; if (tcf_action_copy_stats(skb, a, 0)) goto nla_put_failure; @@ -598,7 +598,7 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, nest = nla_nest_start_noflag(skb, 0); if (nest == NULL) goto nla_put_failure; - if (nla_put_string(skb, TCA_KIND, ops->kind)) + if (nla_put_string(skb, TCA_ACT_KIND, ops->kind)) goto nla_put_failure; ret = 0; @@ -1189,7 +1189,7 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) if (nla_put_u32(skb, TCA_ACT_IN_HW_COUNT, a->in_hw_count)) goto nla_put_failure; - nest = nla_nest_start_noflag(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_ACT_OPTIONS); if (nest == NULL) goto nla_put_failure; err = tcf_action_dump_old(skb, a, bind, ref); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 8037ec9b1d31..ec43764e92e7 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -295,7 +295,7 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, at_nh = skb->data == skb_network_header(skb); if (at_nh != expects_nh) { mac_len = skb_at_tc_ingress(skb) ? skb->mac_len : - skb_network_header(skb) - skb_mac_header(skb); + skb_network_offset(skb); if (expects_nh) { /* target device/action expect data at nh */ skb_pull_rcsum(skb2, mac_len); diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index 809f7928a1be..1010dc632874 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -69,7 +69,7 @@ TC_INDIRECT_SCOPE int tcf_mpls_act(struct sk_buff *skb, skb_push_rcsum(skb, skb->mac_len); mac_len = skb->mac_len; } else { - mac_len = skb_network_header(skb) - skb_mac_header(skb); + mac_len = skb_network_offset(skb); } ret = READ_ONCE(m->tcf_action); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 475fe222a855..cc49256d5318 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1057,7 +1057,7 @@ static void fl_set_key_pppoe(struct nlattr **tb, * because ETH_P_PPP_SES was stored in basic.n_proto * which might get overwritten by ppp_proto * or might be set to 0, the role of key_val::type - * is simmilar to vlan_key::tpid + * is similar to vlan_key::tpid */ key_val->type = htons(ETH_P_PPP_SES); key_mask->type = cpu_to_be16(~0); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 49bae3d5006b..af85a73c4c54 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -44,7 +44,7 @@ * be provided for non-numeric types. * * Additionally, type dependent modifiers such as shift operators - * or mask may be applied to extend the functionaliy. As of now, + * or mask may be applied to extend the functionality. As of now, * the variable length type supports shifting the byte string to * the right, eating up any number of octets and thus supporting * wildcard interface name comparisons such as "ppp%" matching diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index aba789c30a2e..fdb8f429333d 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -639,14 +639,16 @@ void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires, return; if (hrtimer_is_queued(&wd->timer)) { + u64 softexpires; + + softexpires = ktime_to_ns(hrtimer_get_softexpires(&wd->timer)); /* If timer is already set in [expires, expires + delta_ns], * do not reprogram it. */ - if (wd->last_expires - expires <= delta_ns) + if (softexpires - expires <= delta_ns) return; } - wd->last_expires = expires; hrtimer_start_range_ns(&wd->timer, ns_to_ktime(expires), delta_ns, diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 7970217b565a..891e007d5c0b 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1360,7 +1360,7 @@ static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb) return cake_calc_overhead(q, len, off); /* borrowed from qdisc_pkt_len_init() */ - hdr_len = skb_transport_header(skb) - skb_mac_header(skb); + hdr_len = skb_transport_offset(skb); /* + transport layer */ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | @@ -1368,14 +1368,14 @@ static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb) const struct tcphdr *th; struct tcphdr _tcphdr; - th = skb_header_pointer(skb, skb_transport_offset(skb), + th = skb_header_pointer(skb, hdr_len, sizeof(_tcphdr), &_tcphdr); if (likely(th)) hdr_len += __tcp_hdrlen(th); } else { struct udphdr _udphdr; - if (skb_header_pointer(skb, skb_transport_offset(skb), + if (skb_header_pointer(skb, hdr_len, sizeof(_udphdr), &_udphdr)) hdr_len += sizeof(struct udphdr); } diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 265c238047a4..2152a56d73f8 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -319,7 +319,7 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars, } /* If qdelay is zero and backlog is not, it means backlog is very small, - * so we do not update probabilty in this round. + * so we do not update probability in this round. */ if (qdelay == 0 && backlog != 0) update_prob = false; diff --git a/net/sctp/Makefile b/net/sctp/Makefile index e845e4588535..0448398408d8 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -13,7 +13,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ tsnmap.o bind_addr.o socket.o primitive.o \ output.o input.o debug.o stream.o auth.o \ offload.o stream_sched.o stream_sched_prio.o \ - stream_sched_rr.o stream_interleave.o + stream_sched_rr.o stream_sched_fc.o \ + stream_interleave.o sctp_diag-y := diag.o diff --git a/net/sctp/input.c b/net/sctp/input.c index bf70371301ff..127bf28a6033 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -585,7 +585,7 @@ static void sctp_v4_err_handle(struct sctp_transport *t, struct sk_buff *skb, sk->sk_err = err; sk_error_report(sk); } else { /* Only an error on timeout */ - sk->sk_err_soft = err; + WRITE_ONCE(sk->sk_err_soft, err); } } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 62b436a2c8fe..43f2731bf590 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -155,7 +155,7 @@ static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb, sk->sk_err = err; sk_error_report(sk); } else { - sk->sk_err_soft = err; + WRITE_ONCE(sk->sk_err_soft, err); } } diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index 330067002deb..e843760e9aaa 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c @@ -124,6 +124,8 @@ void sctp_sched_ops_init(void) sctp_sched_ops_fcfs_init(); sctp_sched_ops_prio_init(); sctp_sched_ops_rr_init(); + sctp_sched_ops_fc_init(); + sctp_sched_ops_wfq_init(); } static void sctp_sched_free_sched(struct sctp_stream *stream) diff --git a/net/sctp/stream_sched_fc.c b/net/sctp/stream_sched_fc.c new file mode 100644 index 000000000000..4bd18a497a6d --- /dev/null +++ b/net/sctp/stream_sched_fc.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* SCTP kernel implementation + * (C) Copyright Red Hat Inc. 2022 + * + * This file is part of the SCTP kernel implementation + * + * These functions manipulate sctp stream queue/scheduling. + * + * Please send any bug reports or fixes you make to the + * email addresched(es): + * lksctp developers <linux-sctp@vger.kernel.org> + * + * Written or modified by: + * Xin Long <lucien.xin@gmail.com> + */ + +#include <linux/list.h> +#include <net/sctp/sctp.h> +#include <net/sctp/sm.h> +#include <net/sctp/stream_sched.h> + +/* Fair Capacity and Weighted Fair Queueing handling + * RFC 8260 section 3.5 and 3.6 + */ +static void sctp_sched_fc_unsched_all(struct sctp_stream *stream); + +static int sctp_sched_wfq_set(struct sctp_stream *stream, __u16 sid, + __u16 weight, gfp_t gfp) +{ + struct sctp_stream_out_ext *soute = SCTP_SO(stream, sid)->ext; + + if (!weight) + return -EINVAL; + + soute->fc_weight = weight; + return 0; +} + +static int sctp_sched_wfq_get(struct sctp_stream *stream, __u16 sid, + __u16 *value) +{ + struct sctp_stream_out_ext *soute = SCTP_SO(stream, sid)->ext; + + *value = soute->fc_weight; + return 0; +} + +static int sctp_sched_fc_set(struct sctp_stream *stream, __u16 sid, + __u16 weight, gfp_t gfp) +{ + return 0; +} + +static int sctp_sched_fc_get(struct sctp_stream *stream, __u16 sid, + __u16 *value) +{ + return 0; +} + +static int sctp_sched_fc_init(struct sctp_stream *stream) +{ + INIT_LIST_HEAD(&stream->fc_list); + + return 0; +} + +static int sctp_sched_fc_init_sid(struct sctp_stream *stream, __u16 sid, + gfp_t gfp) +{ + struct sctp_stream_out_ext *soute = SCTP_SO(stream, sid)->ext; + + INIT_LIST_HEAD(&soute->fc_list); + soute->fc_length = 0; + soute->fc_weight = 1; + + return 0; +} + +static void sctp_sched_fc_free_sid(struct sctp_stream *stream, __u16 sid) +{ +} + +static void sctp_sched_fc_sched(struct sctp_stream *stream, + struct sctp_stream_out_ext *soute) +{ + struct sctp_stream_out_ext *pos; + + if (!list_empty(&soute->fc_list)) + return; + + list_for_each_entry(pos, &stream->fc_list, fc_list) + if ((__u64)pos->fc_length * soute->fc_weight >= + (__u64)soute->fc_length * pos->fc_weight) + break; + list_add_tail(&soute->fc_list, &pos->fc_list); +} + +static void sctp_sched_fc_enqueue(struct sctp_outq *q, + struct sctp_datamsg *msg) +{ + struct sctp_stream *stream; + struct sctp_chunk *ch; + __u16 sid; + + ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list); + sid = sctp_chunk_stream_no(ch); + stream = &q->asoc->stream; + sctp_sched_fc_sched(stream, SCTP_SO(stream, sid)->ext); +} + +static struct sctp_chunk *sctp_sched_fc_dequeue(struct sctp_outq *q) +{ + struct sctp_stream *stream = &q->asoc->stream; + struct sctp_stream_out_ext *soute; + struct sctp_chunk *ch; + + /* Bail out quickly if queue is empty */ + if (list_empty(&q->out_chunk_list)) + return NULL; + + /* Find which chunk is next */ + if (stream->out_curr) + soute = stream->out_curr->ext; + else + soute = list_entry(stream->fc_list.next, struct sctp_stream_out_ext, fc_list); + ch = list_entry(soute->outq.next, struct sctp_chunk, stream_list); + + sctp_sched_dequeue_common(q, ch); + return ch; +} + +static void sctp_sched_fc_dequeue_done(struct sctp_outq *q, + struct sctp_chunk *ch) +{ + struct sctp_stream *stream = &q->asoc->stream; + struct sctp_stream_out_ext *soute, *pos; + __u16 sid, i; + + sid = sctp_chunk_stream_no(ch); + soute = SCTP_SO(stream, sid)->ext; + /* reduce all fc_lengths by U32_MAX / 4 if the current fc_length overflows. */ + if (soute->fc_length > U32_MAX - ch->skb->len) { + for (i = 0; i < stream->outcnt; i++) { + pos = SCTP_SO(stream, i)->ext; + if (!pos) + continue; + if (pos->fc_length <= (U32_MAX >> 2)) { + pos->fc_length = 0; + continue; + } + pos->fc_length -= (U32_MAX >> 2); + } + } + soute->fc_length += ch->skb->len; + + if (list_empty(&soute->outq)) { + list_del_init(&soute->fc_list); + return; + } + + pos = soute; + list_for_each_entry_continue(pos, &stream->fc_list, fc_list) + if ((__u64)pos->fc_length * soute->fc_weight >= + (__u64)soute->fc_length * pos->fc_weight) + break; + list_move_tail(&soute->fc_list, &pos->fc_list); +} + +static void sctp_sched_fc_sched_all(struct sctp_stream *stream) +{ + struct sctp_association *asoc; + struct sctp_chunk *ch; + + asoc = container_of(stream, struct sctp_association, stream); + list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) { + __u16 sid = sctp_chunk_stream_no(ch); + + if (SCTP_SO(stream, sid)->ext) + sctp_sched_fc_sched(stream, SCTP_SO(stream, sid)->ext); + } +} + +static void sctp_sched_fc_unsched_all(struct sctp_stream *stream) +{ + struct sctp_stream_out_ext *soute, *tmp; + + list_for_each_entry_safe(soute, tmp, &stream->fc_list, fc_list) + list_del_init(&soute->fc_list); +} + +static struct sctp_sched_ops sctp_sched_fc = { + .set = sctp_sched_fc_set, + .get = sctp_sched_fc_get, + .init = sctp_sched_fc_init, + .init_sid = sctp_sched_fc_init_sid, + .free_sid = sctp_sched_fc_free_sid, + .enqueue = sctp_sched_fc_enqueue, + .dequeue = sctp_sched_fc_dequeue, + .dequeue_done = sctp_sched_fc_dequeue_done, + .sched_all = sctp_sched_fc_sched_all, + .unsched_all = sctp_sched_fc_unsched_all, +}; + +void sctp_sched_ops_fc_init(void) +{ + sctp_sched_ops_register(SCTP_SS_FC, &sctp_sched_fc); +} + +static struct sctp_sched_ops sctp_sched_wfq = { + .set = sctp_sched_wfq_set, + .get = sctp_sched_wfq_get, + .init = sctp_sched_fc_init, + .init_sid = sctp_sched_fc_init_sid, + .free_sid = sctp_sched_fc_free_sid, + .enqueue = sctp_sched_fc_enqueue, + .dequeue = sctp_sched_fc_dequeue, + .dequeue_done = sctp_sched_fc_dequeue_done, + .sched_all = sctp_sched_fc_sched_all, + .unsched_all = sctp_sched_fc_unsched_all, +}; + +void sctp_sched_ops_wfq_init(void) +{ + sctp_sched_ops_register(SCTP_SS_WFQ, &sctp_sched_wfq); +} diff --git a/net/smc/smc.h b/net/smc/smc.h index 5ed765ea0c73..2eeea4cdc718 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -283,10 +283,7 @@ struct smc_sock { /* smc sock container */ * */ }; -static inline struct smc_sock *smc_sk(const struct sock *sk) -{ - return (struct smc_sock *)sk; -} +#define smc_sk(ptr) container_of_const(ptr, struct smc_sock, sk) static inline void smc_init_saved_callbacks(struct smc_sock *smc) { diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 08b457c2d294..1645fba0d2d3 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -106,7 +106,10 @@ struct smc_link { unsigned long *wr_tx_mask; /* bit mask of used indexes */ u32 wr_tx_cnt; /* number of WR send buffers */ wait_queue_head_t wr_tx_wait; /* wait for free WR send buf */ - atomic_t wr_tx_refcnt; /* tx refs to link */ + struct { + struct percpu_ref wr_tx_refs; + } ____cacheline_aligned_in_smp; + struct completion tx_ref_comp; struct smc_wr_buf *wr_rx_bufs; /* WR recv payload buffers */ struct ib_recv_wr *wr_rx_ibs; /* WR recv meta data */ @@ -122,7 +125,10 @@ struct smc_link { struct ib_reg_wr wr_reg; /* WR register memory region */ wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */ - atomic_t wr_reg_refcnt; /* reg refs to link */ + struct { + struct percpu_ref wr_reg_refs; + } ____cacheline_aligned_in_smp; + struct completion reg_ref_comp; enum smc_wr_reg_state wr_reg_state; /* state of wr_reg request */ u8 gid[SMC_GID_SIZE];/* gid matching used vlan id*/ diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 3b0b7710c6b0..fbee2493091f 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -429,7 +429,7 @@ static void smcd_register_dev(struct ism_dev *ism) u8 *system_eid = NULL; system_eid = smcd->ops->get_system_eid(); - if (system_eid[24] != '0' || system_eid[28] != '0') { + if (smcd->ops->supports_v2()) { smc_ism_v2_capable = true; memcpy(smc_ism_v2_system_eid, system_eid, SMC_MAX_EID_LEN); diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index b0678a417e09..0021065a600a 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -377,12 +377,11 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) if (rc) return rc; - atomic_inc(&link->wr_reg_refcnt); + percpu_ref_get(&link->wr_reg_refs); rc = wait_event_interruptible_timeout(link->wr_reg_wait, (link->wr_reg_state != POSTED), SMC_WR_REG_MR_WAIT_TIME); - if (atomic_dec_and_test(&link->wr_reg_refcnt)) - wake_up_all(&link->wr_reg_wait); + percpu_ref_put(&link->wr_reg_refs); if (!rc) { /* timeout - terminate link */ smcr_link_down_cond_sched(link); @@ -647,8 +646,10 @@ void smc_wr_free_link(struct smc_link *lnk) smc_wr_wakeup_tx_wait(lnk); smc_wr_tx_wait_no_pending_sends(lnk); - wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt))); - wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt))); + percpu_ref_kill(&lnk->wr_reg_refs); + wait_for_completion(&lnk->reg_ref_comp); + percpu_ref_kill(&lnk->wr_tx_refs); + wait_for_completion(&lnk->tx_ref_comp); if (lnk->wr_rx_dma_addr) { ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr, @@ -847,6 +848,20 @@ void smc_wr_add_dev(struct smc_ib_device *smcibdev) tasklet_setup(&smcibdev->send_tasklet, smc_wr_tx_tasklet_fn); } +static void smcr_wr_tx_refs_free(struct percpu_ref *ref) +{ + struct smc_link *lnk = container_of(ref, struct smc_link, wr_tx_refs); + + complete(&lnk->tx_ref_comp); +} + +static void smcr_wr_reg_refs_free(struct percpu_ref *ref) +{ + struct smc_link *lnk = container_of(ref, struct smc_link, wr_reg_refs); + + complete(&lnk->reg_ref_comp); +} + int smc_wr_create_link(struct smc_link *lnk) { struct ib_device *ibdev = lnk->smcibdev->ibdev; @@ -890,9 +905,15 @@ int smc_wr_create_link(struct smc_link *lnk) smc_wr_init_sge(lnk); bitmap_zero(lnk->wr_tx_mask, SMC_WR_BUF_CNT); init_waitqueue_head(&lnk->wr_tx_wait); - atomic_set(&lnk->wr_tx_refcnt, 0); + rc = percpu_ref_init(&lnk->wr_tx_refs, smcr_wr_tx_refs_free, 0, GFP_KERNEL); + if (rc) + goto dma_unmap; + init_completion(&lnk->tx_ref_comp); init_waitqueue_head(&lnk->wr_reg_wait); - atomic_set(&lnk->wr_reg_refcnt, 0); + rc = percpu_ref_init(&lnk->wr_reg_refs, smcr_wr_reg_refs_free, 0, GFP_KERNEL); + if (rc) + goto dma_unmap; + init_completion(&lnk->reg_ref_comp); init_waitqueue_head(&lnk->wr_rx_empty_wait); return rc; diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 45e9b894d3f8..f3008dda222a 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -63,14 +63,13 @@ static inline bool smc_wr_tx_link_hold(struct smc_link *link) { if (!smc_link_sendable(link)) return false; - atomic_inc(&link->wr_tx_refcnt); + percpu_ref_get(&link->wr_tx_refs); return true; } static inline void smc_wr_tx_link_put(struct smc_link *link) { - if (atomic_dec_and_test(&link->wr_tx_refcnt)) - wake_up_all(&link->wr_tx_wait); + percpu_ref_put(&link->wr_tx_refs); } static inline void smc_wr_drain_cq(struct smc_link *lnk) diff --git a/net/socket.c b/net/socket.c index 9c92c0e6c4da..73e493da4589 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2292,9 +2292,9 @@ INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level, int __sys_getsockopt(int fd, int level, int optname, char __user *optval, int __user *optlen) { + int max_optlen __maybe_unused; int err, fput_needed; struct socket *sock; - int max_optlen; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 0b0f18ecce44..fb31e8a4409e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -557,7 +557,7 @@ static void unix_dgram_disconnected(struct sock *sk, struct sock *other) * when peer was not connected to us. */ if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { - other->sk_err = ECONNRESET; + WRITE_ONCE(other->sk_err, ECONNRESET); sk_error_report(other); } } @@ -630,7 +630,7 @@ static void unix_release_sock(struct sock *sk, int embrion) /* No more writes */ skpair->sk_shutdown = SHUTDOWN_MASK; if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) - skpair->sk_err = ECONNRESET; + WRITE_ONCE(skpair->sk_err, ECONNRESET); unix_state_unlock(skpair); skpair->sk_state_change(skpair); sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); @@ -3165,7 +3165,7 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa mask = 0; /* exceptional events? */ - if (sk->sk_err) + if (READ_ONCE(sk->sk_err)) mask |= EPOLLERR; if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= EPOLLHUP; @@ -3208,7 +3208,8 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, mask = 0; /* exceptional events? */ - if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) + if (READ_ONCE(sk->sk_err) || + !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); diff --git a/net/unix/garbage.c b/net/unix/garbage.c index dc2763540393..2405f0f9af31 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -305,7 +305,7 @@ void unix_gc(void) * release.path eventually putting registered files. */ skb_queue_walk_safe(&hitlist, skb, next_skb) { - if (skb->scm_io_uring) { + if (skb->destructor == io_uring_destruct_scm) { __skb_unlink(skb, &hitlist); skb_queue_tail(&skb->sk->sk_receive_queue, skb); } diff --git a/net/unix/scm.c b/net/unix/scm.c index aa27a02478dc..f9152881d77f 100644 --- a/net/unix/scm.c +++ b/net/unix/scm.c @@ -152,3 +152,9 @@ void unix_destruct_scm(struct sk_buff *skb) sock_wfree(skb); } EXPORT_SYMBOL(unix_destruct_scm); + +void io_uring_destruct_scm(struct sk_buff *skb) +{ + unix_destruct_scm(skb); +} +EXPORT_SYMBOL(io_uring_destruct_scm); diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 6564192e7f20..957cdc01c8e8 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -850,6 +850,9 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t, if (le16_to_cpu(hdr->op) == VIRTIO_VSOCK_OP_RST) return 0; + if (!t) + return -ENOTCONN; + reply = virtio_transport_alloc_skb(&info, 0, le64_to_cpu(hdr->dst_cid), le32_to_cpu(hdr->dst_port), @@ -858,11 +861,6 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t, if (!reply) return -ENOMEM; - if (!t) { - kfree_skb(reply); - return -ENOTCONN; - } - return t->send_pkt(reply); } diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 81d3f40d6235..ac059cefbeb3 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -673,6 +673,39 @@ static bool cfg80211_allowed_address(struct wireless_dev *wdev, const u8 *addr) return ether_addr_equal(addr, wdev_address(wdev)); } +static bool cfg80211_allowed_random_address(struct wireless_dev *wdev, + const struct ieee80211_mgmt *mgmt) +{ + if (ieee80211_is_auth(mgmt->frame_control) || + ieee80211_is_deauth(mgmt->frame_control)) { + /* Allow random TA to be used with authentication and + * deauthentication frames if the driver has indicated support. + */ + if (wiphy_ext_feature_isset( + wdev->wiphy, + NL80211_EXT_FEATURE_AUTH_AND_DEAUTH_RANDOM_TA)) + return true; + } else if (ieee80211_is_action(mgmt->frame_control) && + mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) { + /* Allow random TA to be used with Public Action frames if the + * driver has indicated support. + */ + if (!wdev->connected && + wiphy_ext_feature_isset( + wdev->wiphy, + NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA)) + return true; + + if (wdev->connected && + wiphy_ext_feature_isset( + wdev->wiphy, + NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA_CONNECTED)) + return true; + } + + return false; +} + int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, struct cfg80211_mgmt_tx_params *params, u64 *cookie) @@ -774,25 +807,9 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, return err; } - if (!cfg80211_allowed_address(wdev, mgmt->sa)) { - /* Allow random TA to be used with Public Action frames if the - * driver has indicated support for this. Otherwise, only allow - * the local address to be used. - */ - if (!ieee80211_is_action(mgmt->frame_control) || - mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) - return -EINVAL; - if (!wdev->connected && - !wiphy_ext_feature_isset( - &rdev->wiphy, - NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA)) - return -EINVAL; - if (wdev->connected && - !wiphy_ext_feature_isset( - &rdev->wiphy, - NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA_CONNECTED)) - return -EINVAL; - } + if (!cfg80211_allowed_address(wdev, mgmt->sa) && + !cfg80211_allowed_random_address(wdev, mgmt)) + return -EINVAL; /* Transmit the management frame as requested by user space */ return rdev_mgmt_tx(rdev, wdev, params, cookie); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 4f63059efd81..bfa15defc04e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -812,6 +812,9 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_MAX_NUM_AKM_SUITES] = { .type = NLA_REJECT }, [NL80211_ATTR_PUNCT_BITMAP] = NLA_POLICY_FULL_RANGE(NLA_U32, &nl80211_punct_bitmap_range), + + [NL80211_ATTR_MAX_HW_TIMESTAMP_PEERS] = { .type = NLA_U16 }, + [NL80211_ATTR_HW_TIMESTAMP_ENABLED] = { .type = NLA_FLAG }, }; /* policy for the key attributes */ @@ -2970,6 +2973,11 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_MLO) nla_put_flag(msg, NL80211_ATTR_MLO_SUPPORT); + if (rdev->wiphy.hw_timestamp_max_peers && + nla_put_u16(msg, NL80211_ATTR_MAX_HW_TIMESTAMP_PEERS, + rdev->wiphy.hw_timestamp_max_peers)) + goto nla_put_failure; + /* done */ state->split_start = 0; break; @@ -9025,7 +9033,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) struct nlattr *attr; struct wiphy *wiphy; int err, tmp, n_ssids = 0, n_channels, i; - size_t ie_len; + size_t ie_len, size; wiphy = &rdev->wiphy; @@ -9070,10 +9078,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) if (ie_len > wiphy->max_scan_ie_len) return -EINVAL; - request = kzalloc(sizeof(*request) - + sizeof(*request->ssids) * n_ssids - + sizeof(*request->channels) * n_channels - + ie_len, GFP_KERNEL); + size = struct_size(request, channels, n_channels); + size = size_add(size, array_size(sizeof(*request->ssids), n_ssids)); + size = size_add(size, ie_len); + request = kzalloc(size, GFP_KERNEL); if (!request) return -ENOMEM; @@ -9406,7 +9414,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, struct nlattr *attr; int err, tmp, n_ssids = 0, n_match_sets = 0, n_channels, i, n_plans = 0; enum nl80211_band band; - size_t ie_len; + size_t ie_len, size; struct nlattr *tb[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1]; s32 default_match_rssi = NL80211_SCAN_RSSI_THOLD_OFF; @@ -9515,12 +9523,14 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, attrs[NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST])) return ERR_PTR(-EINVAL); - request = kzalloc(sizeof(*request) - + sizeof(*request->ssids) * n_ssids - + sizeof(*request->match_sets) * n_match_sets - + sizeof(*request->scan_plans) * n_plans - + sizeof(*request->channels) * n_channels - + ie_len, GFP_KERNEL); + size = struct_size(request, channels, n_channels); + size = size_add(size, array_size(sizeof(*request->ssids), n_ssids)); + size = size_add(size, array_size(sizeof(*request->match_sets), + n_match_sets)); + size = size_add(size, array_size(sizeof(*request->scan_plans), + n_plans)); + size = size_add(size, ie_len); + request = kzalloc(size, GFP_KERNEL); if (!request) return ERR_PTR(-ENOMEM); @@ -16166,6 +16176,29 @@ nl80211_remove_link_station(struct sk_buff *skb, struct genl_info *info) return ret; } +static int nl80211_set_hw_timestamp(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct cfg80211_set_hw_timestamp hwts = {}; + + if (!rdev->wiphy.hw_timestamp_max_peers) + return -EOPNOTSUPP; + + if (!info->attrs[NL80211_ATTR_MAC] && + rdev->wiphy.hw_timestamp_max_peers != CFG80211_HW_TIMESTAMP_ALL_PEERS) + return -EOPNOTSUPP; + + if (info->attrs[NL80211_ATTR_MAC]) + hwts.macaddr = nla_data(info->attrs[NL80211_ATTR_MAC]); + + hwts.enable = + nla_get_flag(info->attrs[NL80211_ATTR_HW_TIMESTAMP_ENABLED]); + + return rdev_set_hw_timestamp(rdev, dev, &hwts); +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -17340,6 +17373,12 @@ static const struct genl_small_ops nl80211_small_ops[] = { .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_MLO_VALID_LINK_ID), }, + { + .cmd = NL80211_CMD_SET_HW_TIMESTAMP, + .doit = nl80211_set_hw_timestamp, + .flags = GENL_UNS_ADMIN_PERM, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + }, }; static struct genl_family nl80211_fam __ro_after_init = { @@ -18721,7 +18760,9 @@ EXPORT_SYMBOL(cfg80211_mgmt_tx_status_ext); static int __nl80211_rx_control_port(struct net_device *dev, struct sk_buff *skb, - bool unencrypted, gfp_t gfp) + bool unencrypted, + int link_id, + gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); @@ -18753,6 +18794,8 @@ static int __nl80211_rx_control_port(struct net_device *dev, NL80211_ATTR_PAD) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) || nla_put_u16(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE, proto) || + (link_id >= 0 && + nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link_id)) || (unencrypted && nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT))) goto nla_put_failure; @@ -18771,13 +18814,14 @@ static int __nl80211_rx_control_port(struct net_device *dev, return -ENOBUFS; } -bool cfg80211_rx_control_port(struct net_device *dev, - struct sk_buff *skb, bool unencrypted) +bool cfg80211_rx_control_port(struct net_device *dev, struct sk_buff *skb, + bool unencrypted, int link_id) { int ret; - trace_cfg80211_rx_control_port(dev, skb, unencrypted); - ret = __nl80211_rx_control_port(dev, skb, unencrypted, GFP_ATOMIC); + trace_cfg80211_rx_control_port(dev, skb, unencrypted, link_id); + ret = __nl80211_rx_control_port(dev, skb, unencrypted, link_id, + GFP_ATOMIC); trace_cfg80211_return_bool(ret == 0); return ret == 0; } diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 13b209a8db28..2e497cf26ef2 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1494,4 +1494,21 @@ rdev_del_link_station(struct cfg80211_registered_device *rdev, return ret; } +static inline int +rdev_set_hw_timestamp(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_set_hw_timestamp *hwts) +{ + struct wiphy *wiphy = &rdev->wiphy; + int ret; + + if (!rdev->ops->set_hw_timestamp) + return -EOPNOTSUPP; + + trace_rdev_set_hw_timestamp(wiphy, dev, hwts); + ret = rdev->ops->set_hw_timestamp(wiphy, dev, hwts); + trace_rdev_return_int(wiphy, ret); + + return ret; +} #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 790bc31cf82e..a1382255fab3 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1810,8 +1810,7 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, } int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen, - enum nl80211_band band, - enum cfg80211_bss_frame_type ftype) + enum nl80211_band band) { const struct element *tmp; @@ -1830,9 +1829,7 @@ int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen, if (!he_6ghz_oper) return -1; - if (ftype != CFG80211_BSS_FTYPE_BEACON || - he_6ghz_oper->control & IEEE80211_HE_6GHZ_OPER_CTRL_DUP_BEACON) - return he_6ghz_oper->primary; + return he_6ghz_oper->primary; } } else if (band == NL80211_BAND_S1GHZ) { tmp = cfg80211_find_elem(WLAN_EID_S1G_OPERATION, ie, ielen); @@ -1870,15 +1867,14 @@ EXPORT_SYMBOL(cfg80211_get_ies_channel_number); static struct ieee80211_channel * cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, struct ieee80211_channel *channel, - enum nl80211_bss_scan_width scan_width, - enum cfg80211_bss_frame_type ftype) + enum nl80211_bss_scan_width scan_width) { u32 freq; int channel_number; struct ieee80211_channel *alt_channel; channel_number = cfg80211_get_ies_channel_number(ie, ielen, - channel->band, ftype); + channel->band); if (channel_number < 0) { /* No channel information in frame payload */ @@ -1888,22 +1884,21 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, freq = ieee80211_channel_to_freq_khz(channel_number, channel->band); /* - * In 6GHz, duplicated beacon indication is relevant for - * beacons only. + * Frame info (beacon/prob res) is the same as received channel, + * no need for further processing. */ - if (channel->band == NL80211_BAND_6GHZ && - (freq == channel->center_freq || - abs(freq - channel->center_freq) > 80)) + if (freq == ieee80211_channel_to_khz(channel)) return channel; alt_channel = ieee80211_get_channel_khz(wiphy, freq); if (!alt_channel) { - if (channel->band == NL80211_BAND_2GHZ) { + if (channel->band == NL80211_BAND_2GHZ || + channel->band == NL80211_BAND_6GHZ) { /* * Better not allow unexpected channels when that could * be going beyond the 1-11 range (e.g., discovering * BSS on channel 12 when radio is configured for - * channel 11. + * channel 11) or beyond the 6 GHz channel range. */ return NULL; } @@ -1957,7 +1952,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, return NULL; channel = cfg80211_get_bss_channel(wiphy, ie, ielen, data->chan, - data->scan_width, ftype); + data->scan_width); if (!channel) return NULL; @@ -2391,7 +2386,6 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, size_t ielen, min_hdr_len = offsetof(struct ieee80211_mgmt, u.probe_resp.variable); int bss_type; - enum cfg80211_bss_frame_type ftype; BUILD_BUG_ON(offsetof(struct ieee80211_mgmt, u.probe_resp.variable) != offsetof(struct ieee80211_mgmt, u.beacon.variable)); @@ -2428,16 +2422,8 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, variable = ext->u.s1g_beacon.variable; } - if (ieee80211_is_beacon(mgmt->frame_control)) - ftype = CFG80211_BSS_FTYPE_BEACON; - else if (ieee80211_is_probe_resp(mgmt->frame_control)) - ftype = CFG80211_BSS_FTYPE_PRESP; - else - ftype = CFG80211_BSS_FTYPE_UNKNOWN; - channel = cfg80211_get_bss_channel(wiphy, variable, - ielen, data->chan, data->scan_width, - ftype); + ielen, data->chan, data->scan_width); if (!channel) return NULL; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index ca7474eec723..716a1fa70069 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -3165,14 +3165,15 @@ TRACE_EVENT(cfg80211_control_port_tx_status, TRACE_EVENT(cfg80211_rx_control_port, TP_PROTO(struct net_device *netdev, struct sk_buff *skb, - bool unencrypted), - TP_ARGS(netdev, skb, unencrypted), + bool unencrypted, int link_id), + TP_ARGS(netdev, skb, unencrypted, link_id), TP_STRUCT__entry( NETDEV_ENTRY __field(int, len) MAC_ENTRY(from) __field(u16, proto) __field(bool, unencrypted) + __field(int, link_id) ), TP_fast_assign( NETDEV_ASSIGN; @@ -3180,10 +3181,12 @@ TRACE_EVENT(cfg80211_rx_control_port, MAC_ASSIGN(from, eth_hdr(skb)->h_source); __entry->proto = be16_to_cpu(skb->protocol); __entry->unencrypted = unencrypted; + __entry->link_id = link_id; ), - TP_printk(NETDEV_PR_FMT ", len=%d, %pM, proto: 0x%x, unencrypted: %s", + TP_printk(NETDEV_PR_FMT ", len=%d, %pM, proto: 0x%x, unencrypted: %s, link: %d", NETDEV_PR_ARG, __entry->len, __entry->from, - __entry->proto, BOOL_TO_STR(__entry->unencrypted)) + __entry->proto, BOOL_TO_STR(__entry->unencrypted), + __entry->link_id) ); TRACE_EVENT(cfg80211_cqm_rssi_notify, @@ -3918,6 +3921,31 @@ TRACE_EVENT(rdev_del_link_station, __entry->link_id) ); +TRACE_EVENT(rdev_set_hw_timestamp, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_set_hw_timestamp *hwts), + + TP_ARGS(wiphy, netdev, hwts), + + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + MAC_ENTRY(macaddr) + __field(bool, enable) + ), + + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + MAC_ASSIGN(macaddr, hwts->macaddr); + __entry->enable = hwts->enable; + ), + + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", mac %pM, enable: %u", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->macaddr, + __entry->enable) +); + #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c index 771d0fa90ef5..0c38d7175922 100644 --- a/net/xdp/xskmap.c +++ b/net/xdp/xskmap.c @@ -24,6 +24,7 @@ static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map, return ERR_PTR(-ENOMEM); bpf_map_inc(&map->map); + atomic_inc(&map->count); node->map = map; node->map_entry = map_entry; @@ -32,8 +33,11 @@ static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map, static void xsk_map_node_free(struct xsk_map_node *node) { + struct xsk_map *map = node->map; + bpf_map_put(&node->map->map); kfree(node); + atomic_dec(&map->count); } static void xsk_map_sock_add(struct xdp_sock *xs, struct xsk_map_node *node) @@ -85,6 +89,14 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) return &m->map; } +static u64 xsk_map_mem_usage(const struct bpf_map *map) +{ + struct xsk_map *m = container_of(map, struct xsk_map, map); + + return struct_size(m, xsk_map, map->max_entries) + + (u64)atomic_read(&m->count) * sizeof(struct xsk_map_node); +} + static void xsk_map_free(struct bpf_map *map) { struct xsk_map *m = container_of(map, struct xsk_map, map); @@ -267,6 +279,7 @@ const struct bpf_map_ops xsk_map_ops = { .map_update_elem = xsk_map_update_elem, .map_delete_elem = xsk_map_delete_elem, .map_check_btf = map_check_no_btf, + .map_mem_usage = xsk_map_mem_usage, .map_btf_id = &xsk_map_btf_ids[0], .map_redirect = xsk_map_redirect, }; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 50baf50dc513..49e63eea841d 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1272,6 +1272,7 @@ found: xso->dir = xdo->dir; xso->dev = xdo->dev; xso->real_dev = xdo->real_dev; + xso->flags = XFRM_DEV_OFFLOAD_FLAG_ACQ; netdev_tracker_alloc(xso->dev, &xso->dev_tracker, GFP_ATOMIC); error = xso->dev->xfrmdev_ops->xdo_dev_state_add(x, NULL); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 103af2b3e986..d720e163ae6e 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -901,6 +901,8 @@ static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) memcpy(&p->id, &x->id, sizeof(p->id)); memcpy(&p->sel, &x->sel, sizeof(p->sel)); memcpy(&p->lft, &x->lft, sizeof(p->lft)); + if (x->xso.dev) + xfrm_dev_state_update_curlft(x); memcpy(&p->curlft, &x->curlft, sizeof(p->curlft)); put_unaligned(x->stats.replay_window, &p->stats.replay_window); put_unaligned(x->stats.replay, &p->stats.replay); |