diff options
Diffstat (limited to 'net')
133 files changed, 1319 insertions, 786 deletions
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index c46daf09a501..bb7ec1a3915d 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -126,6 +126,7 @@ int vlan_check_real_dev(struct net_device *real_dev, void vlan_setup(struct net_device *dev); int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack); void unregister_vlan_dev(struct net_device *dev, struct list_head *head); +void vlan_dev_uninit(struct net_device *dev); bool vlan_dev_inherit_address(struct net_device *dev, struct net_device *real_dev); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index e5bff5cc6f97..2a78da4072de 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -586,7 +586,8 @@ static int vlan_dev_init(struct net_device *dev) return 0; } -static void vlan_dev_uninit(struct net_device *dev) +/* Note: this function might be called multiple times for the same device. */ +void vlan_dev_uninit(struct net_device *dev) { struct vlan_priority_tci_mapping *pm; struct vlan_dev_priv *vlan = vlan_dev_priv(dev); diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index c482a6fe9393..0db85aeb119b 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -108,11 +108,13 @@ static int vlan_changelink(struct net_device *dev, struct nlattr *tb[], struct ifla_vlan_flags *flags; struct ifla_vlan_qos_mapping *m; struct nlattr *attr; - int rem; + int rem, err; if (data[IFLA_VLAN_FLAGS]) { flags = nla_data(data[IFLA_VLAN_FLAGS]); - vlan_dev_change_flags(dev, flags->flags, flags->mask); + err = vlan_dev_change_flags(dev, flags->flags, flags->mask); + if (err) + return err; } if (data[IFLA_VLAN_INGRESS_QOS]) { nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) { @@ -123,7 +125,9 @@ static int vlan_changelink(struct net_device *dev, struct nlattr *tb[], if (data[IFLA_VLAN_EGRESS_QOS]) { nla_for_each_nested(attr, data[IFLA_VLAN_EGRESS_QOS], rem) { m = nla_data(attr); - vlan_dev_set_egress_priority(dev, m->from, m->to); + err = vlan_dev_set_egress_priority(dev, m->from, m->to); + if (err) + return err; } } return 0; @@ -179,10 +183,11 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev, return -EINVAL; err = vlan_changelink(dev, tb, data, extack); - if (err < 0) - return err; - - return register_vlan_dev(dev, extack); + if (!err) + err = register_vlan_dev(dev, extack); + if (err) + vlan_dev_uninit(dev); + return err; } static inline size_t vlan_qos_map_size(unsigned int n) diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index b0af3a11d406..ec7bf5a4a9fc 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -285,6 +285,7 @@ static u32 batadv_hash_dat(const void *data, u32 size) u32 hash = 0; const struct batadv_dat_entry *dat = data; const unsigned char *key; + __be16 vid; u32 i; key = (const unsigned char *)&dat->ip; @@ -294,7 +295,8 @@ static u32 batadv_hash_dat(const void *data, u32 size) hash ^= (hash >> 6); } - key = (const unsigned char *)&dat->vid; + vid = htons(dat->vid); + key = (__force const unsigned char *)&vid; for (i = 0; i < sizeof(dat->vid); i++) { hash += key[i]; hash += (hash << 10); diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index af7800103e51..59980ecfc962 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -662,6 +662,9 @@ static unsigned int br_nf_forward_arp(void *priv, nf_bridge_pull_encap_header(skb); } + if (unlikely(!pskb_may_pull(skb, sizeof(struct arphdr)))) + return NF_DROP; + if (arp_hdr(skb)->ar_pln != 4) { if (is_vlan_arp(skb, state->net)) nf_bridge_push_encap_header(skb); diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c index 2cdfc5d6c25d..8c69f0c95a8e 100644 --- a/net/bridge/br_nf_core.c +++ b/net/bridge/br_nf_core.c @@ -22,7 +22,8 @@ #endif static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { } diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 4096d8a74a2b..e1256e03a9a8 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1867,7 +1867,7 @@ static int ebt_buf_count(struct ebt_entries_buf_state *state, unsigned int sz) } static int ebt_buf_add(struct ebt_entries_buf_state *state, - void *data, unsigned int sz) + const void *data, unsigned int sz) { if (state->buf_kern_start == NULL) goto count_only; @@ -1901,7 +1901,7 @@ enum compat_mwt { EBT_COMPAT_TARGET, }; -static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt, +static int compat_mtw_from_user(const struct compat_ebt_entry_mwt *mwt, enum compat_mwt compat_mwt, struct ebt_entries_buf_state *state, const unsigned char *base) @@ -1979,22 +1979,23 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt, /* return size of all matches, watchers or target, including necessary * alignment and padding. */ -static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32, +static int ebt_size_mwt(const struct compat_ebt_entry_mwt *match32, unsigned int size_left, enum compat_mwt type, struct ebt_entries_buf_state *state, const void *base) { + const char *buf = (const char *)match32; int growth = 0; - char *buf; if (size_left == 0) return 0; - buf = (char *) match32; - - while (size_left >= sizeof(*match32)) { + do { struct ebt_entry_match *match_kern; int ret; + if (size_left < sizeof(*match32)) + return -EINVAL; + match_kern = (struct ebt_entry_match *) state->buf_kern_start; if (match_kern) { char *tmp; @@ -2031,22 +2032,18 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32, if (match_kern) match_kern->match_size = ret; - /* rule should have no remaining data after target */ - if (type == EBT_COMPAT_TARGET && size_left) - return -EINVAL; - match32 = (struct compat_ebt_entry_mwt *) buf; - } + } while (size_left); return growth; } /* called for all ebt_entry structures. */ -static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, +static int size_entry_mwt(const struct ebt_entry *entry, const unsigned char *base, unsigned int *total, struct ebt_entries_buf_state *state) { - unsigned int i, j, startoff, new_offset = 0; + unsigned int i, j, startoff, next_expected_off, new_offset = 0; /* stores match/watchers/targets & offset of next struct ebt_entry: */ unsigned int offsets[4]; unsigned int *offsets_update = NULL; @@ -2132,11 +2129,13 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, return ret; } - startoff = state->buf_user_offset - startoff; + next_expected_off = state->buf_user_offset - startoff; + if (next_expected_off != entry->next_offset) + return -EINVAL; - if (WARN_ON(*total < startoff)) + if (*total < entry->next_offset) return -EINVAL; - *total -= startoff; + *total -= entry->next_offset; return 0; } diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index de09b0a65791..f7587428febd 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -423,9 +423,9 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len) { struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; struct j1939_sock *jsk = j1939_sk(sock->sk); - struct j1939_priv *priv = jsk->priv; - struct sock *sk = sock->sk; - struct net *net = sock_net(sk); + struct j1939_priv *priv; + struct sock *sk; + struct net *net; int ret = 0; ret = j1939_sk_sanity_check(addr, len); @@ -434,6 +434,10 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len) lock_sock(sock->sk); + priv = jsk->priv; + sk = sock->sk; + net = sock_net(sk); + /* Already bound to an interface? */ if (jsk->state & J1939_SOCK_BOUND) { /* A re-bind() to a different interface is not diff --git a/net/core/dev.c b/net/core/dev.c index 0ad39c87b7fd..7e885d069707 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9177,22 +9177,10 @@ static void netdev_unregister_lockdep_key(struct net_device *dev) void netdev_update_lockdep_key(struct net_device *dev) { - struct netdev_queue *queue; - int i; - - lockdep_unregister_key(&dev->qdisc_xmit_lock_key); lockdep_unregister_key(&dev->addr_list_lock_key); - - lockdep_register_key(&dev->qdisc_xmit_lock_key); lockdep_register_key(&dev->addr_list_lock_key); lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key); - for (i = 0; i < dev->num_tx_queues; i++) { - queue = netdev_get_tx_queue(dev, i); - - lockdep_set_class(&queue->_xmit_lock, - &dev->qdisc_xmit_lock_key); - } } EXPORT_SYMBOL(netdev_update_lockdep_key); diff --git a/net/core/devlink.c b/net/core/devlink.c index 4c63c9a4c09e..f76219bf0c21 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -6406,7 +6406,7 @@ static bool devlink_port_type_should_warn(struct devlink_port *devlink_port) devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_DSA; } -#define DEVLINK_PORT_TYPE_WARN_TIMEOUT (HZ * 30) +#define DEVLINK_PORT_TYPE_WARN_TIMEOUT (HZ * 3600) static void devlink_port_type_warn_schedule(struct devlink_port *devlink_port) { @@ -7563,7 +7563,7 @@ void devlink_region_destroy(struct devlink_region *region) EXPORT_SYMBOL_GPL(devlink_region_destroy); /** - * devlink_region_shapshot_id_get - get snapshot ID + * devlink_region_snapshot_id_get - get snapshot ID * * This callback should be called when adding a new snapshot, * Driver should use the same id for multiple snapshots taken @@ -7571,7 +7571,7 @@ EXPORT_SYMBOL_GPL(devlink_region_destroy); * * @devlink: devlink */ -u32 devlink_region_shapshot_id_get(struct devlink *devlink) +u32 devlink_region_snapshot_id_get(struct devlink *devlink) { u32 id; @@ -7581,7 +7581,7 @@ u32 devlink_region_shapshot_id_get(struct devlink *devlink) return id; } -EXPORT_SYMBOL_GPL(devlink_region_shapshot_id_get); +EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_get); /** * devlink_region_snapshot_create - create a new snapshot diff --git a/net/core/filter.c b/net/core/filter.c index c19dd0973e0c..538f6a735a19 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2055,6 +2055,7 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) } skb->dev = dev; + skb->tstamp = 0; dev_xmit_recursion_inc(); ret = dev_queue_xmit(skb); @@ -2230,10 +2231,10 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start, /* First find the starting scatterlist element */ i = msg->sg.start; do { + offset += len; len = sk_msg_elem(msg, i)->length; if (start < offset + len) break; - offset += len; sk_msg_iter_var_next(i); } while (i != msg->sg.end); @@ -2345,7 +2346,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, u32, len, u64, flags) { struct scatterlist sge, nsge, nnsge, rsge = {0}, *psge; - u32 new, i = 0, l, space, copy = 0, offset = 0; + u32 new, i = 0, l = 0, space, copy = 0, offset = 0; u8 *raw, *to, *from; struct page *page; @@ -2355,11 +2356,11 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, /* First find the starting scatterlist element */ i = msg->sg.start; do { + offset += l; l = sk_msg_elem(msg, i)->length; if (start < offset + l) break; - offset += l; sk_msg_iter_var_next(i); } while (i != msg->sg.end); @@ -2414,6 +2415,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, sk_msg_iter_var_next(i); sg_unmark_end(psge); + sg_unmark_end(&rsge); sk_msg_iter_next(msg, end); } @@ -2505,7 +2507,7 @@ static void sk_msg_shift_right(struct sk_msg *msg, int i) BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, u32, len, u64, flags) { - u32 i = 0, l, space, offset = 0; + u32 i = 0, l = 0, space, offset = 0; u64 last = start + len; int pop; @@ -2515,11 +2517,11 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, /* First find the starting scatterlist element */ i = msg->sg.start; do { + offset += l; l = sk_msg_elem(msg, i)->length; if (start < offset + l) break; - offset += l; sk_msg_iter_var_next(i); } while (i != msg->sg.end); @@ -5317,8 +5319,7 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, if (sk) { sk = sk_to_full_sk(sk); if (!sk_fullsock(sk)) { - if (!sock_flag(sk, SOCK_RCU_FREE)) - sock_gen_put(sk); + sock_gen_put(sk); return NULL; } } @@ -5355,8 +5356,7 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, if (sk) { sk = sk_to_full_sk(sk); if (!sk_fullsock(sk)) { - if (!sock_flag(sk, SOCK_RCU_FREE)) - sock_gen_put(sk); + sock_gen_put(sk); return NULL; } } @@ -5423,7 +5423,8 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = { BPF_CALL_1(bpf_sk_release, struct sock *, sk) { - if (!sock_flag(sk, SOCK_RCU_FREE)) + /* Only full sockets have sk->sk_flags. */ + if (!sk_fullsock(sk) || !sock_flag(sk, SOCK_RCU_FREE)) sock_gen_put(sk); return 0; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 652da6369037..920784a9b7ff 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -98,9 +98,6 @@ static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb) static void neigh_cleanup_and_release(struct neighbour *neigh) { - if (neigh->parms->neigh_cleanup) - neigh->parms->neigh_cleanup(neigh); - trace_neigh_cleanup_and_release(neigh, 0); __neigh_notify(neigh, RTM_DELNEIGH, 0, 0); call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 5c4624298996..4c826b8bf9b1 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -919,14 +919,17 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) struct kobject *kobj = &queue->kobj; int error = 0; + /* Kobject_put later will trigger rx_queue_release call which + * decreases dev refcount: Take that reference here + */ + dev_hold(queue->dev); + kobj->kset = dev->queues_kset; error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, "rx-%u", index); if (error) goto err; - dev_hold(queue->dev); - if (dev->sysfs_rx_queue_group) { error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group); if (error) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index ded2d5227678..3866d7e20c07 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -594,6 +594,8 @@ EXPORT_SYMBOL_GPL(sk_psock_destroy); void sk_psock_drop(struct sock *sk, struct sk_psock *psock) { + sock_owned_by_me(sk); + sk_psock_cork_free(psock); sk_psock_zap_ingress(psock); diff --git a/net/core/sock.c b/net/core/sock.c index 043db3ce023e..8459ad579f73 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2916,7 +2916,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_max_pacing_rate = ~0UL; sk->sk_pacing_rate = ~0UL; - sk->sk_pacing_shift = 10; + WRITE_ONCE(sk->sk_pacing_shift, 10); sk->sk_incoming_cpu = -1; sk_rx_queue_clear(sk); diff --git a/net/core/sock_map.c b/net/core/sock_map.c index eb114ee419b6..8998e356f423 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -241,8 +241,11 @@ static void sock_map_free(struct bpf_map *map) struct sock *sk; sk = xchg(psk, NULL); - if (sk) + if (sk) { + lock_sock(sk); sock_map_unref(sk, psk); + release_sock(sk); + } } raw_spin_unlock_bh(&stab->lock); rcu_read_unlock(); @@ -862,7 +865,9 @@ static void sock_hash_free(struct bpf_map *map) raw_spin_lock_bh(&bucket->lock); hlist_for_each_entry_safe(elem, node, &bucket->head, node) { hlist_del_rcu(&elem->node); + lock_sock(elem->sk); sock_map_unref(elem->sk, elem); + release_sock(elem->sk); } raw_spin_unlock_bh(&bucket->lock); } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index eb29e5adc84d..9f9e00ba3ad7 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -288,6 +288,7 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, return ret; } +# ifdef CONFIG_HAVE_EBPF_JIT static int proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, @@ -298,6 +299,7 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, return proc_dointvec_minmax(table, write, buffer, lenp, ppos); } +# endif /* CONFIG_HAVE_EBPF_JIT */ static int proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write, diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index aea918135ec3..08c3dc45f1a4 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -110,7 +110,8 @@ static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how); static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); static void dn_dst_link_failure(struct sk_buff *); static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb , u32 mtu); + struct sk_buff *skb , u32 mtu, + bool confirm_neigh); static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, @@ -251,7 +252,8 @@ static int dn_dst_gc(struct dst_ops *ops) * advertise to the other end). */ static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { struct dn_route *rt = (struct dn_route *) dst; struct neighbour *n = rt->n; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 9ef2caa13f27..c66abbed4daf 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -124,7 +124,8 @@ static struct dsa_port *dsa_tree_find_port_by_node(struct dsa_switch_tree *dst, return NULL; } -struct dsa_link *dsa_link_touch(struct dsa_port *dp, struct dsa_port *link_dp) +static struct dsa_link *dsa_link_touch(struct dsa_port *dp, + struct dsa_port *link_dp) { struct dsa_switch *ds = dp->ds; struct dsa_switch_tree *dst; diff --git a/net/dsa/tag_gswip.c b/net/dsa/tag_gswip.c index b678160bbd66..408d4af390a0 100644 --- a/net/dsa/tag_gswip.c +++ b/net/dsa/tag_gswip.c @@ -104,7 +104,7 @@ static struct sk_buff *gswip_tag_rcv(struct sk_buff *skb, } static const struct dsa_device_ops gswip_netdev_ops = { - .name = "gwsip", + .name = "gswip", .proto = DSA_TAG_PROTO_GSWIP, .xmit = gswip_tag_xmit, .rcv = gswip_tag_rcv, diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index 73605bcbb385..90d055c4df9e 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -84,8 +84,6 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb, * (eg, 0x00=port1, 0x02=port3, 0x06=port7) */ -#define KSZ8795_INGRESS_TAG_LEN 1 - #define KSZ8795_TAIL_TAG_OVERRIDE BIT(6) #define KSZ8795_TAIL_TAG_LOOKUP BIT(7) @@ -96,12 +94,12 @@ static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev) u8 *tag; u8 *addr; - nskb = ksz_common_xmit(skb, dev, KSZ8795_INGRESS_TAG_LEN); + nskb = ksz_common_xmit(skb, dev, KSZ_INGRESS_TAG_LEN); if (!nskb) return NULL; /* Tag encoding */ - tag = skb_put(nskb, KSZ8795_INGRESS_TAG_LEN); + tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN); addr = skb_mac_header(nskb); *tag = 1 << dp->index; @@ -124,7 +122,7 @@ static const struct dsa_device_ops ksz8795_netdev_ops = { .proto = DSA_TAG_PROTO_KSZ8795, .xmit = ksz8795_xmit, .rcv = ksz8795_rcv, - .overhead = KSZ8795_INGRESS_TAG_LEN, + .overhead = KSZ_INGRESS_TAG_LEN, }; DSA_TAG_DRIVER(ksz8795_netdev_ops); diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index c95885215525..c8a128c9e5e0 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -33,9 +33,6 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) struct dsa_port *dp = dsa_slave_to_port(dev); u16 *phdr, hdr; - dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; - if (skb_cow_head(skb, 0) < 0) return NULL; diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c index 94447974a3c0..d5f709b940ff 100644 --- a/net/hsr/hsr_debugfs.c +++ b/net/hsr/hsr_debugfs.c @@ -20,6 +20,8 @@ #include "hsr_main.h" #include "hsr_framereg.h" +static struct dentry *hsr_debugfs_root_dir; + static void print_mac_address(struct seq_file *sfp, unsigned char *mac) { seq_printf(sfp, "%02x:%02x:%02x:%02x:%02x:%02x:", @@ -63,8 +65,20 @@ hsr_node_table_open(struct inode *inode, struct file *filp) return single_open(filp, hsr_node_table_show, inode->i_private); } +void hsr_debugfs_rename(struct net_device *dev) +{ + struct hsr_priv *priv = netdev_priv(dev); + struct dentry *d; + + d = debugfs_rename(hsr_debugfs_root_dir, priv->node_tbl_root, + hsr_debugfs_root_dir, dev->name); + if (IS_ERR(d)) + netdev_warn(dev, "failed to rename\n"); + else + priv->node_tbl_root = d; +} + static const struct file_operations hsr_fops = { - .owner = THIS_MODULE, .open = hsr_node_table_open, .read = seq_read, .llseek = seq_lseek, @@ -78,15 +92,14 @@ static const struct file_operations hsr_fops = { * When debugfs is configured this routine sets up the node_table file per * hsr device for dumping the node_table entries */ -int hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev) +void hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev) { - int rc = -1; struct dentry *de = NULL; - de = debugfs_create_dir(hsr_dev->name, NULL); - if (!de) { - pr_err("Cannot create hsr debugfs root\n"); - return rc; + de = debugfs_create_dir(hsr_dev->name, hsr_debugfs_root_dir); + if (IS_ERR(de)) { + pr_err("Cannot create hsr debugfs directory\n"); + return; } priv->node_tbl_root = de; @@ -94,13 +107,13 @@ int hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev) de = debugfs_create_file("node_table", S_IFREG | 0444, priv->node_tbl_root, priv, &hsr_fops); - if (!de) { - pr_err("Cannot create hsr node_table directory\n"); - return rc; + if (IS_ERR(de)) { + pr_err("Cannot create hsr node_table file\n"); + debugfs_remove(priv->node_tbl_root); + priv->node_tbl_root = NULL; + return; } priv->node_tbl_file = de; - - return 0; } /* hsr_debugfs_term - Tear down debugfs intrastructure @@ -117,3 +130,18 @@ hsr_debugfs_term(struct hsr_priv *priv) debugfs_remove(priv->node_tbl_root); priv->node_tbl_root = NULL; } + +void hsr_debugfs_create_root(void) +{ + hsr_debugfs_root_dir = debugfs_create_dir("hsr", NULL); + if (IS_ERR(hsr_debugfs_root_dir)) { + pr_err("Cannot create hsr debugfs root directory\n"); + hsr_debugfs_root_dir = NULL; + } +} + +void hsr_debugfs_remove_root(void) +{ + /* debugfs_remove() internally checks NULL and ERROR */ + debugfs_remove(hsr_debugfs_root_dir); +} diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index b01e1bae4ddc..c7bd6c49fadf 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -272,6 +272,8 @@ static void send_hsr_supervision_frame(struct hsr_port *master, skb->dev->dev_addr, skb->len) <= 0) goto out; skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); if (hsr_ver > 0) { hsr_tag = skb_put(skb, sizeof(struct hsr_tag)); @@ -368,7 +370,7 @@ static void hsr_dev_destroy(struct net_device *hsr_dev) del_timer_sync(&hsr->prune_timer); del_timer_sync(&hsr->announce_timer); - hsr_del_self_node(&hsr->self_node_db); + hsr_del_self_node(hsr); hsr_del_nodes(&hsr->node_db); } @@ -440,11 +442,12 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], INIT_LIST_HEAD(&hsr->ports); INIT_LIST_HEAD(&hsr->node_db); INIT_LIST_HEAD(&hsr->self_node_db); + spin_lock_init(&hsr->list_lock); ether_addr_copy(hsr_dev->dev_addr, slave[0]->dev_addr); /* Make sure we recognize frames from ourselves in hsr_rcv() */ - res = hsr_create_self_node(&hsr->self_node_db, hsr_dev->dev_addr, + res = hsr_create_self_node(hsr, hsr_dev->dev_addr, slave[1]->dev_addr); if (res < 0) return res; @@ -477,31 +480,32 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], res = hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER); if (res) - goto err_add_port; + goto err_add_master; res = register_netdevice(hsr_dev); if (res) - goto fail; + goto err_unregister; res = hsr_add_port(hsr, slave[0], HSR_PT_SLAVE_A); if (res) - goto fail; + goto err_add_slaves; + res = hsr_add_port(hsr, slave[1], HSR_PT_SLAVE_B); if (res) - goto fail; + goto err_add_slaves; + hsr_debugfs_init(hsr, hsr_dev); mod_timer(&hsr->prune_timer, jiffies + msecs_to_jiffies(PRUNE_PERIOD)); - res = hsr_debugfs_init(hsr, hsr_dev); - if (res) - goto fail; return 0; -fail: +err_add_slaves: + unregister_netdevice(hsr_dev); +err_unregister: list_for_each_entry_safe(port, tmp, &hsr->ports, port_list) hsr_del_port(port); -err_add_port: - hsr_del_self_node(&hsr->self_node_db); +err_add_master: + hsr_del_self_node(hsr); return res; } diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 292be446007b..27dc65d7de67 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -75,10 +75,11 @@ static struct hsr_node *find_node_by_addr_A(struct list_head *node_db, /* Helper for device init; the self_node_db is used in hsr_rcv() to recognize * frames from self that's been looped over the HSR ring. */ -int hsr_create_self_node(struct list_head *self_node_db, +int hsr_create_self_node(struct hsr_priv *hsr, unsigned char addr_a[ETH_ALEN], unsigned char addr_b[ETH_ALEN]) { + struct list_head *self_node_db = &hsr->self_node_db; struct hsr_node *node, *oldnode; node = kmalloc(sizeof(*node), GFP_KERNEL); @@ -88,33 +89,33 @@ int hsr_create_self_node(struct list_head *self_node_db, ether_addr_copy(node->macaddress_A, addr_a); ether_addr_copy(node->macaddress_B, addr_b); - rcu_read_lock(); + spin_lock_bh(&hsr->list_lock); oldnode = list_first_or_null_rcu(self_node_db, struct hsr_node, mac_list); if (oldnode) { list_replace_rcu(&oldnode->mac_list, &node->mac_list); - rcu_read_unlock(); - synchronize_rcu(); - kfree(oldnode); + spin_unlock_bh(&hsr->list_lock); + kfree_rcu(oldnode, rcu_head); } else { - rcu_read_unlock(); list_add_tail_rcu(&node->mac_list, self_node_db); + spin_unlock_bh(&hsr->list_lock); } return 0; } -void hsr_del_self_node(struct list_head *self_node_db) +void hsr_del_self_node(struct hsr_priv *hsr) { + struct list_head *self_node_db = &hsr->self_node_db; struct hsr_node *node; - rcu_read_lock(); + spin_lock_bh(&hsr->list_lock); node = list_first_or_null_rcu(self_node_db, struct hsr_node, mac_list); - rcu_read_unlock(); if (node) { list_del_rcu(&node->mac_list); - kfree(node); + kfree_rcu(node, rcu_head); } + spin_unlock_bh(&hsr->list_lock); } void hsr_del_nodes(struct list_head *node_db) @@ -130,30 +131,43 @@ void hsr_del_nodes(struct list_head *node_db) * seq_out is used to initialize filtering of outgoing duplicate frames * originating from the newly added node. */ -struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[], - u16 seq_out) +static struct hsr_node *hsr_add_node(struct hsr_priv *hsr, + struct list_head *node_db, + unsigned char addr[], + u16 seq_out) { - struct hsr_node *node; + struct hsr_node *new_node, *node; unsigned long now; int i; - node = kzalloc(sizeof(*node), GFP_ATOMIC); - if (!node) + new_node = kzalloc(sizeof(*new_node), GFP_ATOMIC); + if (!new_node) return NULL; - ether_addr_copy(node->macaddress_A, addr); + ether_addr_copy(new_node->macaddress_A, addr); /* We are only interested in time diffs here, so use current jiffies * as initialization. (0 could trigger an spurious ring error warning). */ now = jiffies; for (i = 0; i < HSR_PT_PORTS; i++) - node->time_in[i] = now; + new_node->time_in[i] = now; for (i = 0; i < HSR_PT_PORTS; i++) - node->seq_out[i] = seq_out; - - list_add_tail_rcu(&node->mac_list, node_db); + new_node->seq_out[i] = seq_out; + spin_lock_bh(&hsr->list_lock); + list_for_each_entry_rcu(node, node_db, mac_list) { + if (ether_addr_equal(node->macaddress_A, addr)) + goto out; + if (ether_addr_equal(node->macaddress_B, addr)) + goto out; + } + list_add_tail_rcu(&new_node->mac_list, node_db); + spin_unlock_bh(&hsr->list_lock); + return new_node; +out: + spin_unlock_bh(&hsr->list_lock); + kfree(new_node); return node; } @@ -163,6 +177,7 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb, bool is_sup) { struct list_head *node_db = &port->hsr->node_db; + struct hsr_priv *hsr = port->hsr; struct hsr_node *node; struct ethhdr *ethhdr; u16 seq_out; @@ -196,7 +211,7 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb, seq_out = HSR_SEQNR_START; } - return hsr_add_node(node_db, ethhdr->h_source, seq_out); + return hsr_add_node(hsr, node_db, ethhdr->h_source, seq_out); } /* Use the Supervision frame's info about an eventual macaddress_B for merging @@ -206,10 +221,11 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb, void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, struct hsr_port *port_rcv) { - struct ethhdr *ethhdr; - struct hsr_node *node_real; + struct hsr_priv *hsr = port_rcv->hsr; struct hsr_sup_payload *hsr_sp; + struct hsr_node *node_real; struct list_head *node_db; + struct ethhdr *ethhdr; int i; ethhdr = (struct ethhdr *)skb_mac_header(skb); @@ -231,7 +247,7 @@ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, node_real = find_node_by_addr_A(node_db, hsr_sp->macaddress_A); if (!node_real) /* No frame received from AddrA of this node yet */ - node_real = hsr_add_node(node_db, hsr_sp->macaddress_A, + node_real = hsr_add_node(hsr, node_db, hsr_sp->macaddress_A, HSR_SEQNR_START - 1); if (!node_real) goto done; /* No mem */ @@ -252,7 +268,9 @@ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, } node_real->addr_B_port = port_rcv->type; + spin_lock_bh(&hsr->list_lock); list_del_rcu(&node_curr->mac_list); + spin_unlock_bh(&hsr->list_lock); kfree_rcu(node_curr, rcu_head); done: @@ -368,12 +386,13 @@ void hsr_prune_nodes(struct timer_list *t) { struct hsr_priv *hsr = from_timer(hsr, t, prune_timer); struct hsr_node *node; + struct hsr_node *tmp; struct hsr_port *port; unsigned long timestamp; unsigned long time_a, time_b; - rcu_read_lock(); - list_for_each_entry_rcu(node, &hsr->node_db, mac_list) { + spin_lock_bh(&hsr->list_lock); + list_for_each_entry_safe(node, tmp, &hsr->node_db, mac_list) { /* Don't prune own node. Neither time_in[HSR_PT_SLAVE_A] * nor time_in[HSR_PT_SLAVE_B], will ever be updated for * the master port. Thus the master node will be repeatedly @@ -421,7 +440,7 @@ void hsr_prune_nodes(struct timer_list *t) kfree_rcu(node, rcu_head); } } - rcu_read_unlock(); + spin_unlock_bh(&hsr->list_lock); /* Restart timer */ mod_timer(&hsr->prune_timer, diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index 89a3ce38151d..0f0fa12b4329 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -12,10 +12,8 @@ struct hsr_node; -void hsr_del_self_node(struct list_head *self_node_db); +void hsr_del_self_node(struct hsr_priv *hsr); void hsr_del_nodes(struct list_head *node_db); -struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[], - u16 seq_out); struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb, bool is_sup); void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, @@ -33,7 +31,7 @@ int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node, void hsr_prune_nodes(struct timer_list *t); -int hsr_create_self_node(struct list_head *self_node_db, +int hsr_create_self_node(struct hsr_priv *hsr, unsigned char addr_a[ETH_ALEN], unsigned char addr_b[ETH_ALEN]); diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index b9988a662ee1..9e389accbfc7 100644 --- a/net/hsr/hsr_main.c +++ b/net/hsr/hsr_main.c @@ -45,6 +45,10 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, case NETDEV_CHANGE: /* Link (carrier) state changes */ hsr_check_carrier_and_operstate(hsr); break; + case NETDEV_CHANGENAME: + if (is_hsr_master(dev)) + hsr_debugfs_rename(dev); + break; case NETDEV_CHANGEADDR: if (port->type == HSR_PT_MASTER) { /* This should not happen since there's no @@ -64,7 +68,7 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, /* Make sure we recognize frames from ourselves in hsr_rcv() */ port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B); - res = hsr_create_self_node(&hsr->self_node_db, + res = hsr_create_self_node(hsr, master->dev->dev_addr, port ? port->dev->dev_addr : @@ -123,6 +127,7 @@ static void __exit hsr_exit(void) { unregister_netdevice_notifier(&hsr_nb); hsr_netlink_exit(); + hsr_debugfs_remove_root(); } module_init(hsr_init); diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 96fac696a1e1..d40de84a637f 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -160,8 +160,9 @@ struct hsr_priv { int announce_count; u16 sequence_nr; u16 sup_sequence_nr; /* For HSRv1 separate seq_nr for supervision */ - u8 prot_version; /* Indicate if HSRv0 or HSRv1. */ - spinlock_t seqnr_lock; /* locking for sequence_nr */ + u8 prot_version; /* Indicate if HSRv0 or HSRv1. */ + spinlock_t seqnr_lock; /* locking for sequence_nr */ + spinlock_t list_lock; /* locking for node list */ unsigned char sup_multicast_addr[ETH_ALEN]; #ifdef CONFIG_DEBUG_FS struct dentry *node_tbl_root; @@ -184,17 +185,24 @@ static inline u16 hsr_get_skb_sequence_nr(struct sk_buff *skb) } #if IS_ENABLED(CONFIG_DEBUG_FS) -int hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev); +void hsr_debugfs_rename(struct net_device *dev); +void hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev); void hsr_debugfs_term(struct hsr_priv *priv); +void hsr_debugfs_create_root(void); +void hsr_debugfs_remove_root(void); #else -static inline int hsr_debugfs_init(struct hsr_priv *priv, - struct net_device *hsr_dev) +static inline void void hsr_debugfs_rename(struct net_device *dev) { - return 0; } - +static inline void hsr_debugfs_init(struct hsr_priv *priv, + struct net_device *hsr_dev) +{} static inline void hsr_debugfs_term(struct hsr_priv *priv) {} +static inline void hsr_debugfs_create_root(void) +{} +static inline void hsr_debugfs_remove_root(void) +{} #endif #endif /* __HSR_PRIVATE_H */ diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 8f8337f893ba..8dc0547f01d0 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -476,6 +476,7 @@ int __init hsr_netlink_init(void) if (rc) goto fail_genl_register_family; + hsr_debugfs_create_root(); return 0; fail_genl_register_family: diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index b9df9c09b84e..195469a13371 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2193,6 +2193,12 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, int count = cb->args[2]; t_key key = cb->args[3]; + /* First time here, count and key are both always 0. Count > 0 + * and key == 0 means the dump has wrapped around and we are done. + */ + if (count && !key) + return skb->len; + while ((l = leaf_walk_rcu(&tp, key)) != NULL) { int err; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index e4c6e8b40490..18c0d5bffe12 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1086,7 +1086,7 @@ struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu) if (!dst) goto out; } - dst->ops->update_pmtu(dst, sk, NULL, mtu); + dst->ops->update_pmtu(dst, sk, NULL, mtu, true); dst = __sk_dst_check(sk, 0); if (!dst) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index af154977904c..f11e997e517b 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -911,11 +911,12 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, for (i = s_i; i < INET_LHTABLE_SIZE; i++) { struct inet_listen_hashbucket *ilb; + struct hlist_nulls_node *node; num = 0; ilb = &hashinfo->listening_hash[i]; spin_lock(&ilb->lock); - sk_for_each(sk, &ilb->head) { + sk_nulls_for_each(sk, node, &ilb->nulls_head) { struct inet_sock *inet = inet_sk(sk); if (!net_eq(sock_net(sk), net)) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 83fb00153018..2bbaaf0c7176 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -516,10 +516,11 @@ static int inet_reuseport_add_sock(struct sock *sk, struct inet_listen_hashbucket *ilb) { struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash; + const struct hlist_nulls_node *node; struct sock *sk2; kuid_t uid = sock_i_uid(sk); - sk_for_each_rcu(sk2, &ilb->head) { + sk_nulls_for_each_rcu(sk2, node, &ilb->nulls_head) { if (sk2 != sk && sk2->sk_family == sk->sk_family && ipv6_only_sock(sk2) == ipv6_only_sock(sk) && @@ -555,9 +556,9 @@ int __inet_hash(struct sock *sk, struct sock *osk) } if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && sk->sk_family == AF_INET6) - hlist_add_tail_rcu(&sk->sk_node, &ilb->head); + __sk_nulls_add_node_tail_rcu(sk, &ilb->nulls_head); else - hlist_add_head_rcu(&sk->sk_node, &ilb->head); + __sk_nulls_add_node_rcu(sk, &ilb->nulls_head); inet_hash2(hashinfo, sk); ilb->count++; sock_set_flag(sk, SOCK_RCU_FREE); @@ -606,11 +607,9 @@ void inet_unhash(struct sock *sk) reuseport_detach_sock(sk); if (ilb) { inet_unhash2(hashinfo, sk); - __sk_del_node_init(sk); - ilb->count--; - } else { - __sk_nulls_del_node_init_rcu(sk); + ilb->count--; } + __sk_nulls_del_node_init_rcu(sk); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); unlock: spin_unlock_bh(lock); @@ -750,7 +749,8 @@ void inet_hashinfo_init(struct inet_hashinfo *h) for (i = 0; i < INET_LHTABLE_SIZE; i++) { spin_lock_init(&h->listening_hash[i].lock); - INIT_HLIST_HEAD(&h->listening_hash[i].head); + INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].nulls_head, + i + LISTENING_NULLS_BASE); h->listening_hash[i].count = 0; } diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 38c02bb62e2c..0fe2a5d3e258 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -505,7 +505,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; if (skb_valid_dst(skb)) - skb_dst_update_pmtu(skb, mtu); + skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->protocol == htons(ETH_P_IP)) { if (!skb_is_gso(skb) && diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 9b153c7fcbb4..e90b600c7a25 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -214,7 +214,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, mtu = dst_mtu(dst); if (skb->len > mtu) { - skb_dst_update_pmtu(skb, mtu); + skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->protocol == htons(ETH_P_IP)) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 214154b47d56..f1f78a742b36 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -384,10 +384,11 @@ next: ; return 1; } -static inline int check_target(struct arpt_entry *e, const char *name) +static int check_target(struct arpt_entry *e, struct net *net, const char *name) { struct xt_entry_target *t = arpt_get_target(e); struct xt_tgchk_param par = { + .net = net, .table = name, .entryinfo = e, .target = t->u.kernel.target, @@ -399,8 +400,9 @@ static inline int check_target(struct arpt_entry *e, const char *name) return xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false); } -static inline int -find_check_entry(struct arpt_entry *e, const char *name, unsigned int size, +static int +find_check_entry(struct arpt_entry *e, struct net *net, const char *name, + unsigned int size, struct xt_percpu_counter_alloc_state *alloc_state) { struct xt_entry_target *t; @@ -419,7 +421,7 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size, } t->u.kernel.target = target; - ret = check_target(e, name); + ret = check_target(e, net, name); if (ret) goto err; return 0; @@ -494,12 +496,13 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, return 0; } -static inline void cleanup_entry(struct arpt_entry *e) +static void cleanup_entry(struct arpt_entry *e, struct net *net) { struct xt_tgdtor_param par; struct xt_entry_target *t; t = arpt_get_target(e); + par.net = net; par.target = t->u.kernel.target; par.targinfo = t->data; par.family = NFPROTO_ARP; @@ -512,7 +515,9 @@ static inline void cleanup_entry(struct arpt_entry *e) /* Checks and translates the user-supplied table segment (held in * newinfo). */ -static int translate_table(struct xt_table_info *newinfo, void *entry0, +static int translate_table(struct net *net, + struct xt_table_info *newinfo, + void *entry0, const struct arpt_replace *repl) { struct xt_percpu_counter_alloc_state alloc_state = { 0 }; @@ -569,7 +574,7 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, /* Finally, each sanity check must pass */ i = 0; xt_entry_foreach(iter, entry0, newinfo->size) { - ret = find_check_entry(iter, repl->name, repl->size, + ret = find_check_entry(iter, net, repl->name, repl->size, &alloc_state); if (ret != 0) break; @@ -580,7 +585,7 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, xt_entry_foreach(iter, entry0, newinfo->size) { if (i-- == 0) break; - cleanup_entry(iter); + cleanup_entry(iter, net); } return ret; } @@ -923,7 +928,7 @@ static int __do_replace(struct net *net, const char *name, /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries; xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) - cleanup_entry(iter); + cleanup_entry(iter, net); xt_free_table_info(oldinfo); if (copy_to_user(counters_ptr, counters, @@ -974,7 +979,7 @@ static int do_replace(struct net *net, const void __user *user, goto free_newinfo; } - ret = translate_table(newinfo, loc_cpu_entry, &tmp); + ret = translate_table(net, newinfo, loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; @@ -986,7 +991,7 @@ static int do_replace(struct net *net, const void __user *user, free_newinfo_untrans: xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) - cleanup_entry(iter); + cleanup_entry(iter, net); free_newinfo: xt_free_table_info(newinfo); return ret; @@ -1149,7 +1154,8 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, } } -static int translate_compat_table(struct xt_table_info **pinfo, +static int translate_compat_table(struct net *net, + struct xt_table_info **pinfo, void **pentry0, const struct compat_arpt_replace *compatr) { @@ -1217,7 +1223,7 @@ static int translate_compat_table(struct xt_table_info **pinfo, repl.num_counters = 0; repl.counters = NULL; repl.size = newinfo->size; - ret = translate_table(newinfo, entry1, &repl); + ret = translate_table(net, newinfo, entry1, &repl); if (ret) goto free_newinfo; @@ -1270,7 +1276,7 @@ static int compat_do_replace(struct net *net, void __user *user, goto free_newinfo; } - ret = translate_compat_table(&newinfo, &loc_cpu_entry, &tmp); + ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; @@ -1282,7 +1288,7 @@ static int compat_do_replace(struct net *net, void __user *user, free_newinfo_untrans: xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) - cleanup_entry(iter); + cleanup_entry(iter, net); free_newinfo: xt_free_table_info(newinfo); return ret; @@ -1509,7 +1515,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len return ret; } -static void __arpt_unregister_table(struct xt_table *table) +static void __arpt_unregister_table(struct net *net, struct xt_table *table) { struct xt_table_info *private; void *loc_cpu_entry; @@ -1521,7 +1527,7 @@ static void __arpt_unregister_table(struct xt_table *table) /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries; xt_entry_foreach(iter, loc_cpu_entry, private->size) - cleanup_entry(iter); + cleanup_entry(iter, net); if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); @@ -1546,7 +1552,7 @@ int arpt_register_table(struct net *net, loc_cpu_entry = newinfo->entries; memcpy(loc_cpu_entry, repl->entries, repl->size); - ret = translate_table(newinfo, loc_cpu_entry, repl); + ret = translate_table(net, newinfo, loc_cpu_entry, repl); if (ret != 0) goto out_free; @@ -1561,7 +1567,7 @@ int arpt_register_table(struct net *net, ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks)); if (ret != 0) { - __arpt_unregister_table(new_table); + __arpt_unregister_table(net, new_table); *res = NULL; } @@ -1576,7 +1582,7 @@ void arpt_unregister_table(struct net *net, struct xt_table *table, const struct nf_hook_ops *ops) { nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); - __arpt_unregister_table(table); + __arpt_unregister_table(net, table); } /* The built-in targets: standard (NULL) and error. */ diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f88c93c38f11..87e979f2b74a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -139,7 +139,8 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst); static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu); + struct sk_buff *skb, u32 mtu, + bool confirm_neigh); static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); static void ipv4_dst_destroy(struct dst_entry *dst); @@ -1043,7 +1044,8 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) } static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { struct rtable *rt = (struct rtable *) dst; struct flowi4 fl4; @@ -2687,7 +2689,8 @@ static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst) } static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3e50ac24fe41..d885ba868822 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1087,8 +1087,7 @@ do_error: goto out; out_err: /* make sure we wake any epoll edge trigger waiter */ - if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && - err == -EAGAIN)) { + if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) { sk->sk_write_space(sk); tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED); } @@ -1419,8 +1418,7 @@ out_err: sock_zerocopy_put_abort(uarg, true); err = sk_stream_error(sk, flags, err); /* make sure we wake any epoll edge trigger waiter */ - if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && - err == -EAGAIN)) { + if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) { sk->sk_write_space(sk); tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED); } diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 32772d6ded4e..a6545ef0d27b 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -306,7 +306,8 @@ static u32 bbr_tso_segs_goal(struct sock *sk) /* Sort of tcp_tso_autosize() but ignoring * driver provided sk_gso_max_size. */ - bytes = min_t(unsigned long, sk->sk_pacing_rate >> sk->sk_pacing_shift, + bytes = min_t(unsigned long, + sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift), GSO_MAX_SIZE - 1 - MAX_TCP_HEADER); segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk)); diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index e38705165ac9..8a01428f80c1 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -121,14 +121,14 @@ int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, struct sk_psock *psock; int copied, ret; - if (unlikely(flags & MSG_ERRQUEUE)) - return inet_recv_error(sk, msg, len, addr_len); - if (!skb_queue_empty(&sk->sk_receive_queue)) - return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); - psock = sk_psock_get(sk); if (unlikely(!psock)) return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); + if (unlikely(flags & MSG_ERRQUEUE)) + return inet_recv_error(sk, msg, len, addr_len); + if (!skb_queue_empty(&sk->sk_receive_queue) && + sk_psock_queue_empty(psock)) + return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); lock_sock(sk); msg_bytes_ready: copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags); @@ -139,7 +139,7 @@ msg_bytes_ready: timeo = sock_rcvtimeo(sk, nonblock); data = tcp_bpf_wait_data(sk, psock, flags, timeo, &err); if (data) { - if (skb_queue_empty(&sk->sk_receive_queue)) + if (!sk_psock_queue_empty(psock)) goto msg_bytes_ready; release_sock(sk); sk_psock_put(sk, psock); @@ -315,10 +315,7 @@ more_data: */ delta = msg->sg.size; psock->eval = sk_psock_msg_verdict(sk, psock, msg); - if (msg->sg.size < delta) - delta -= msg->sg.size; - else - delta = 0; + delta -= msg->sg.size; } if (msg->cork_bytes && diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 88b987ca9ebb..5347ab2c9c58 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -915,9 +915,10 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq, /* This must be called before lost_out is incremented */ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) { - if (!tp->retransmit_skb_hint || - before(TCP_SKB_CB(skb)->seq, - TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) + if ((!tp->retransmit_skb_hint && tp->retrans_out >= tp->lost_out) || + (tp->retransmit_skb_hint && + before(TCP_SKB_CB(skb)->seq, + TCP_SKB_CB(tp->retransmit_skb_hint)->seq))) tp->retransmit_skb_hint = skb; } @@ -1727,8 +1728,11 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, } /* Ignore very old stuff early */ - if (!after(sp[used_sacks].end_seq, prior_snd_una)) + if (!after(sp[used_sacks].end_seq, prior_snd_una)) { + if (i == 0) + first_sack_index = -1; continue; + } used_sacks++; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 92282f98dc82..1c7326e04f9b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2147,13 +2147,14 @@ static void *listening_get_next(struct seq_file *seq, void *cur) struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); struct inet_listen_hashbucket *ilb; + struct hlist_nulls_node *node; struct sock *sk = cur; if (!sk) { get_head: ilb = &tcp_hashinfo.listening_hash[st->bucket]; spin_lock(&ilb->lock); - sk = sk_head(&ilb->head); + sk = sk_nulls_head(&ilb->nulls_head); st->offset = 0; goto get_sk; } @@ -2161,9 +2162,9 @@ get_head: ++st->num; ++st->offset; - sk = sk_next(sk); + sk = sk_nulls_next(sk); get_sk: - sk_for_each_from(sk) { + sk_nulls_for_each_from(sk, node) { if (!net_eq(sock_net(sk), net)) continue; if (sk->sk_family == afinfo->family) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b184f03d7437..58c92a7d671c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -72,6 +72,9 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) __skb_unlink(skb, &sk->sk_write_queue); tcp_rbtree_insert(&sk->tcp_rtx_queue, skb); + if (tp->highest_sack == NULL) + tp->highest_sack = skb; + tp->packets_out += tcp_skb_pcount(skb); if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); @@ -1725,7 +1728,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, u32 bytes, segs; bytes = min_t(unsigned long, - sk->sk_pacing_rate >> sk->sk_pacing_shift, + sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift), sk->sk_gso_max_size - 1 - MAX_TCP_HEADER); /* Goal is to send at least one packet per ms, @@ -2260,7 +2263,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, limit = max_t(unsigned long, 2 * skb->truesize, - sk->sk_pacing_rate >> sk->sk_pacing_shift); + sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift)); if (sk->sk_pacing_status == SK_PACING_NONE) limit = min_t(unsigned long, limit, sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes); @@ -2438,6 +2441,14 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, if (tcp_small_queue_check(sk, skb, 0)) break; + /* Argh, we hit an empty skb(), presumably a thread + * is sleeping in sendmsg()/sk_stream_wait_memory(). + * We do not want to send a pure-ack packet and have + * a strange looking rtx queue with empty packet(s). + */ + if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) + break; + if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) break; @@ -3121,7 +3132,7 @@ void sk_forced_mem_schedule(struct sock *sk, int size) */ void tcp_send_fin(struct sock *sk) { - struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk); + struct sk_buff *skb, *tskb, *tail = tcp_write_queue_tail(sk); struct tcp_sock *tp = tcp_sk(sk); /* Optimization, tack on the FIN if we have one skb in write queue and @@ -3129,6 +3140,7 @@ void tcp_send_fin(struct sock *sk) * Note: in the latter case, FIN packet will be sent after a timeout, * as TCP stack thinks it has already been transmitted. */ + tskb = tail; if (!tskb && tcp_under_memory_pressure(sk)) tskb = skb_rb_last(&sk->tcp_rtx_queue); @@ -3136,7 +3148,7 @@ void tcp_send_fin(struct sock *sk) TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; TCP_SKB_CB(tskb)->end_seq++; tp->write_seq++; - if (tcp_write_queue_empty(sk)) { + if (!tail) { /* This means tskb was already sent. * Pretend we included the FIN on previous transmit. * We need to set tp->snd_nxt to the value it would have diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index 12ab5db2b71c..38d3ad141161 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -99,17 +99,19 @@ void tcp_get_available_ulp(char *buf, size_t maxlen) rcu_read_unlock(); } -void tcp_update_ulp(struct sock *sk, struct proto *proto) +void tcp_update_ulp(struct sock *sk, struct proto *proto, + void (*write_space)(struct sock *sk)) { struct inet_connection_sock *icsk = inet_csk(sk); if (!icsk->icsk_ulp_ops) { + sk->sk_write_space = write_space; sk->sk_prot = proto; return; } if (icsk->icsk_ulp_ops->update) - icsk->icsk_ulp_ops->update(sk, proto); + icsk->icsk_ulp_ops->update(sk, proto, write_space); } void tcp_cleanup_ulp(struct sock *sk) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 4da5758cc718..93a355b6b092 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1475,7 +1475,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) * queue contains some other skb */ rmem = atomic_add_return(size, &sk->sk_rmem_alloc); - if (rmem > (size + sk->sk_rcvbuf)) + if (rmem > (size + (unsigned int)sk->sk_rcvbuf)) goto uncharge_drop; spin_lock(&list->lock); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 35b84b52b702..9ebd54752e03 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -100,12 +100,13 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, } static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->update_pmtu(path, sk, skb, mtu); + path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh); } static void xfrm4_redirect(struct dst_entry *dst, struct sock *sk, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 98d82305d6de..39d861d00377 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5231,16 +5231,16 @@ static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb, return -EINVAL; } + if (!netlink_strict_get_check(skb)) + return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv6_policy, extack); + ifm = nlmsg_data(nlh); if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) { NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get address request"); return -EINVAL; } - if (!netlink_strict_get_check(skb)) - return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, - ifa_ipv6_policy, extack); - err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy, extack); if (err) diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index fe9cb8d1adca..e315526fa244 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -146,7 +146,7 @@ struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu) if (IS_ERR(dst)) return NULL; - dst->ops->update_pmtu(dst, sk, NULL, mtu); + dst->ops->update_pmtu(dst, sk, NULL, mtu, true); dst = inet6_csk_route_socket(sk, &fl6); return IS_ERR(dst) ? NULL : dst; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 9d0965252ddf..ee968d980746 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1040,7 +1040,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, /* TooBig packet may have updated dst->dev's mtu */ if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu) - dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu); + dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, false); err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, NEXTHDR_GRE); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 754a484d35df..2f376dbc37d5 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -640,7 +640,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (rel_info > dst_mtu(skb_dst(skb2))) goto out; - skb_dst_update_pmtu(skb2, rel_info); + skb_dst_update_pmtu_no_confirm(skb2, rel_info); } icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); @@ -1132,7 +1132,7 @@ route_lookup: mtu = max(mtu, skb->protocol == htons(ETH_P_IPV6) ? IPV6_MIN_MTU : IPV4_MIN_MTU); - skb_dst_update_pmtu(skb, mtu); + skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { *pmtu = mtu; err = -EMSGSIZE; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 024db17386d2..6f08b760c2a7 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -479,7 +479,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) mtu = dst_mtu(dst); if (skb->len > mtu) { - skb_dst_update_pmtu(skb, mtu); + skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->protocol == htons(ETH_P_IPV6)) { if (mtu < IPV6_MIN_MTU) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b59940416cb5..affb51c11a25 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -95,7 +95,8 @@ static int ip6_pkt_prohibit(struct sk_buff *skb); static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb); static void ip6_link_failure(struct sk_buff *skb); static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu); + struct sk_buff *skb, u32 mtu, + bool confirm_neigh); static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif, @@ -264,7 +265,8 @@ static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) } static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { } @@ -2692,7 +2694,8 @@ static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) } static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, - const struct ipv6hdr *iph, u32 mtu) + const struct ipv6hdr *iph, u32 mtu, + bool confirm_neigh) { const struct in6_addr *daddr, *saddr; struct rt6_info *rt6 = (struct rt6_info *)dst; @@ -2710,7 +2713,10 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, daddr = NULL; saddr = NULL; } - dst_confirm_neigh(dst, daddr); + + if (confirm_neigh) + dst_confirm_neigh(dst, daddr); + mtu = max_t(u32, mtu, IPV6_MIN_MTU); if (mtu >= dst_mtu(dst)) return; @@ -2764,9 +2770,11 @@ out_unlock: } static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { - __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu); + __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu, + confirm_neigh); } void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, @@ -2785,7 +2793,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) - __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu)); + __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), true); dst_release(dst); } EXPORT_SYMBOL_GPL(ip6_update_pmtu); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index b2ccbc473127..98954830c40b 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -944,7 +944,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, } if (tunnel->parms.iph.daddr) - skb_dst_update_pmtu(skb, mtu); + skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->len > mtu && !skb_is_gso(skb)) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 699e0730ce8e..af7a4b8b1e9c 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -98,12 +98,13 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, } static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->update_pmtu(path, sk, skb, mtu); + path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh); } static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk, diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index 204a8351efff..c29170e767a8 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -32,7 +32,7 @@ static int llc_stat_ev_rx_null_dsap_xid_c(struct sk_buff *skb) return LLC_PDU_IS_CMD(pdu) && /* command PDU */ LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */ LLC_U_PDU_CMD(pdu) == LLC_1_PDU_CMD_XID && - !pdu->dsap ? 0 : 1; /* NULL DSAP value */ + !pdu->dsap; /* NULL DSAP value */ } static int llc_stat_ev_rx_null_dsap_test_c(struct sk_buff *skb) @@ -42,7 +42,7 @@ static int llc_stat_ev_rx_null_dsap_test_c(struct sk_buff *skb) return LLC_PDU_IS_CMD(pdu) && /* command PDU */ LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */ LLC_U_PDU_CMD(pdu) == LLC_1_PDU_CMD_TEST && - !pdu->dsap ? 0 : 1; /* NULL DSAP */ + !pdu->dsap; /* NULL DSAP */ } static int llc_station_ac_send_xid_r(struct sk_buff *skb) diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c index 63cb0028b02d..9fc2968856c0 100644 --- a/net/mac80211/airtime.c +++ b/net/mac80211/airtime.c @@ -442,7 +442,7 @@ u32 ieee80211_calc_rx_airtime(struct ieee80211_hw *hw, return 0; sband = hw->wiphy->bands[status->band]; - if (!sband || status->rate_idx > sband->n_bitrates) + if (!sband || status->rate_idx >= sband->n_bitrates) return 0; rate = &sband->bitrates[status->rate_idx]; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 4fb7f1f12109..000c742d0527 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2954,6 +2954,28 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy, return err; } +static void ieee80211_end_cac(struct wiphy *wiphy, + struct net_device *dev) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + + mutex_lock(&local->mtx); + list_for_each_entry(sdata, &local->interfaces, list) { + /* it might be waiting for the local->mtx, but then + * by the time it gets it, sdata->wdev.cac_started + * will no longer be true + */ + cancel_delayed_work(&sdata->dfs_cac_timer_work); + + if (sdata->wdev.cac_started) { + ieee80211_vif_release_channel(sdata); + sdata->wdev.cac_started = false; + } + } + mutex_unlock(&local->mtx); +} + static struct cfg80211_beacon_data * cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon) { @@ -4023,6 +4045,7 @@ const struct cfg80211_ops mac80211_config_ops = { #endif .get_channel = ieee80211_cfg_get_channel, .start_radar_detection = ieee80211_start_radar_detection, + .end_cac = ieee80211_end_cac, .channel_switch = ieee80211_channel_switch, .set_qos_map = ieee80211_set_qos_map, .set_ap_chanwidth = ieee80211_set_ap_chanwidth, diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index b3c9001d1f43..c80b1e163ea4 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -201,8 +201,6 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf, char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf; u64 rx_airtime = 0, tx_airtime = 0; s64 deficit[IEEE80211_NUM_ACS]; - u32 q_depth[IEEE80211_NUM_ACS]; - u32 q_limit_l[IEEE80211_NUM_ACS], q_limit_h[IEEE80211_NUM_ACS]; ssize_t rv; int ac; @@ -214,6 +212,56 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf, rx_airtime += sta->airtime[ac].rx_airtime; tx_airtime += sta->airtime[ac].tx_airtime; deficit[ac] = sta->airtime[ac].deficit; + spin_unlock_bh(&local->active_txq_lock[ac]); + } + + p += scnprintf(p, bufsz + buf - p, + "RX: %llu us\nTX: %llu us\nWeight: %u\n" + "Deficit: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n", + rx_airtime, tx_airtime, sta->airtime_weight, + deficit[0], deficit[1], deficit[2], deficit[3]); + + rv = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); + kfree(buf); + return rv; +} + +static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct sta_info *sta = file->private_data; + struct ieee80211_local *local = sta->sdata->local; + int ac; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + spin_lock_bh(&local->active_txq_lock[ac]); + sta->airtime[ac].rx_airtime = 0; + sta->airtime[ac].tx_airtime = 0; + sta->airtime[ac].deficit = sta->airtime_weight; + spin_unlock_bh(&local->active_txq_lock[ac]); + } + + return count; +} +STA_OPS_RW(airtime); + +static ssize_t sta_aql_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct sta_info *sta = file->private_data; + struct ieee80211_local *local = sta->sdata->local; + size_t bufsz = 400; + char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf; + u32 q_depth[IEEE80211_NUM_ACS]; + u32 q_limit_l[IEEE80211_NUM_ACS], q_limit_h[IEEE80211_NUM_ACS]; + ssize_t rv; + int ac; + + if (!buf) + return -ENOMEM; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + spin_lock_bh(&local->active_txq_lock[ac]); q_limit_l[ac] = sta->airtime[ac].aql_limit_low; q_limit_h[ac] = sta->airtime[ac].aql_limit_high; spin_unlock_bh(&local->active_txq_lock[ac]); @@ -221,12 +269,8 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf, } p += scnprintf(p, bufsz + buf - p, - "RX: %llu us\nTX: %llu us\nWeight: %u\n" - "Deficit: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n" "Q depth: VO: %u us VI: %u us BE: %u us BK: %u us\n" "Q limit[low/high]: VO: %u/%u VI: %u/%u BE: %u/%u BK: %u/%u\n", - rx_airtime, tx_airtime, sta->airtime_weight, - deficit[0], deficit[1], deficit[2], deficit[3], q_depth[0], q_depth[1], q_depth[2], q_depth[3], q_limit_l[0], q_limit_h[0], q_limit_l[1], q_limit_h[1], q_limit_l[2], q_limit_h[2], q_limit_l[3], q_limit_h[3]), @@ -236,11 +280,10 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf, return rv; } -static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf, +static ssize_t sta_aql_write(struct file *file, const char __user *userbuf, size_t count, loff_t *ppos) { struct sta_info *sta = file->private_data; - struct ieee80211_local *local = sta->sdata->local; u32 ac, q_limit_l, q_limit_h; char _buf[100] = {}, *buf = _buf; @@ -251,7 +294,7 @@ static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf, return -EFAULT; buf[sizeof(_buf) - 1] = '\0'; - if (sscanf(buf, "queue limit %u %u %u", &ac, &q_limit_l, &q_limit_h) + if (sscanf(buf, "limit %u %u %u", &ac, &q_limit_l, &q_limit_h) != 3) return -EINVAL; @@ -261,17 +304,10 @@ static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf, sta->airtime[ac].aql_limit_low = q_limit_l; sta->airtime[ac].aql_limit_high = q_limit_h; - for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { - spin_lock_bh(&local->active_txq_lock[ac]); - sta->airtime[ac].rx_airtime = 0; - sta->airtime[ac].tx_airtime = 0; - sta->airtime[ac].deficit = sta->airtime_weight; - spin_unlock_bh(&local->active_txq_lock[ac]); - } - return count; } -STA_OPS_RW(airtime); +STA_OPS_RW(aql); + static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) @@ -996,6 +1032,10 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) DEBUGFS_ADD(airtime); + if (wiphy_ext_feature_isset(local->hw.wiphy, + NL80211_EXT_FEATURE_AQL)) + DEBUGFS_ADD(aql); + debugfs_create_xul("driver_buffered_tids", 0400, sta->debugfs_dir, &sta->driver_buffered_tids); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 6cca0853f183..4c2b5ba3ac09 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -672,9 +672,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H; } - local->airtime_flags = AIRTIME_USE_TX | - AIRTIME_USE_RX | - AIRTIME_USE_AQL; + local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX; local->aql_threshold = IEEE80211_AQL_THRESHOLD; atomic_set(&local->aql_total_pending_airtime, 0); diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 68af62306385..d69983370381 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -328,6 +328,9 @@ u32 airtime_link_metric_get(struct ieee80211_local *local, unsigned long fail_avg = ewma_mesh_fail_avg_read(&sta->mesh->fail_avg); + if (sta->mesh->plink_state != NL80211_PLINK_ESTAB) + return MAX_METRIC; + /* Try to get rate based on HW/SW RC algorithm. * Rate is returned in units of Kbps, correct this * to comply with airtime calculation units diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 8eafd81e97b4..0f5f40678885 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1916,6 +1916,9 @@ void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local, { int tx_pending; + if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) + return; + if (!tx_completed) { if (sta) atomic_add(tx_airtime, diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index ad5d8a4ae56d..c00e28585f9d 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -127,7 +127,6 @@ enum ieee80211_agg_stop_reason { /* Debugfs flags to enable/disable use of RX/TX airtime in scheduler */ #define AIRTIME_USE_TX BIT(0) #define AIRTIME_USE_RX BIT(1) -#define AIRTIME_USE_AQL BIT(2) struct airtime_info { u64 rx_airtime; diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c index 727dc9f3f3b3..e7f57bb18f6e 100644 --- a/net/mac80211/tkip.c +++ b/net/mac80211/tkip.c @@ -263,9 +263,21 @@ int ieee80211_tkip_decrypt_data(struct arc4_ctx *ctx, if ((keyid >> 6) != key->conf.keyidx) return TKIP_DECRYPT_INVALID_KEYIDX; - if (rx_ctx->ctx.state != TKIP_STATE_NOT_INIT && - (iv32 < rx_ctx->iv32 || - (iv32 == rx_ctx->iv32 && iv16 <= rx_ctx->iv16))) + /* Reject replays if the received TSC is smaller than or equal to the + * last received value in a valid message, but with an exception for + * the case where a new key has been set and no valid frame using that + * key has yet received and the local RSC was initialized to 0. This + * exception allows the very first frame sent by the transmitter to be + * accepted even if that transmitter were to use TSC 0 (IEEE 802.11 + * described TSC to be initialized to 1 whenever a new key is taken into + * use). + */ + if (iv32 < rx_ctx->iv32 || + (iv32 == rx_ctx->iv32 && + (iv16 < rx_ctx->iv16 || + (iv16 == rx_ctx->iv16 && + (rx_ctx->iv32 || rx_ctx->iv16 || + rx_ctx->ctx.state != TKIP_STATE_NOT_INIT))))) return TKIP_DECRYPT_REPLAY; if (only_iv) { diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index b696b9136f4c..a8a7306a1f56 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2256,6 +2256,15 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, payload[7]); } + /* + * Initialize skb->priority for QoS frames. This is put in the TID field + * of the frame before passing it to the driver. + */ + if (ieee80211_is_data_qos(hdr->frame_control)) { + u8 *p = ieee80211_get_qos_ctl(hdr); + skb->priority = *p & IEEE80211_QOS_CTL_TAG1D_MASK; + } + memset(info, 0, sizeof(*info)); info->flags = IEEE80211_TX_CTL_REQ_TX_STATUS | @@ -3668,7 +3677,7 @@ begin: IEEE80211_SKB_CB(skb)->control.vif = vif; - if (local->airtime_flags & AIRTIME_USE_AQL) { + if (wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) { u32 airtime; airtime = ieee80211_calc_expected_tx_airtime(hw, vif, txq->sta, @@ -3790,7 +3799,7 @@ bool ieee80211_txq_airtime_check(struct ieee80211_hw *hw, struct sta_info *sta; struct ieee80211_local *local = hw_to_local(hw); - if (!(local->airtime_flags & AIRTIME_USE_AQL)) + if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) return true; if (!txq->sta) diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 1abd6f0dc227..077a2cb65fcb 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -60,9 +60,9 @@ mtype_destroy(struct ip_set *set) if (SET_WITH_TIMEOUT(set)) del_timer_sync(&map->gc); - ip_set_free(map->members); if (set->dsize && set->extensions & IPSET_EXT_DESTROY) mtype_ext_cleanup(set); + ip_set_free(map->members); ip_set_free(map); set->data = NULL; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 169e0a04f814..cf895bc80871 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1848,6 +1848,7 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb, struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; int ret = 0; + u32 lineno; if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME] || @@ -1864,7 +1865,7 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb, return -IPSET_ERR_PROTOCOL; rcu_read_lock_bh(); - ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0); + ret = set->variant->uadt(set, tb, IPSET_TEST, &lineno, 0, 0); rcu_read_unlock_bh(); /* Userspace can't trigger element to be re-added */ if (ret == -EAGAIN) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index b1e300f8881b..b00866d777fe 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -208,7 +208,7 @@ static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu) struct rtable *ort = skb_rtable(skb); if (!skb->dev && sk && sk_fullsock(sk)) - ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu); + ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu, true); } static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0af1898af2b8..f475fec84536 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -895,9 +895,10 @@ static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo, } /* Resolve race on insertion if this protocol allows this. */ -static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - struct nf_conntrack_tuple_hash *h) +static __cold noinline int +nf_ct_resolve_clash(struct net *net, struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + struct nf_conntrack_tuple_hash *h) { /* This is the conntrack entry already in hashes that won race. */ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d8d33ef52ce0..6a1c8f1f6171 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -3626,6 +3626,9 @@ static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list) list_for_each_entry(net, net_exit_list, exit_list) ctnetlink_net_exit(net); + + /* wait for other cpus until they are done with ctnl_notifiers */ + synchronize_rcu(); } static struct pernet_operations ctnetlink_net_ops = { diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index b6b14db3955b..b3f4a334f9d7 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -677,6 +677,9 @@ static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[], unsigned int *timeouts = data; int i; + if (!timeouts) + timeouts = dn->dccp_timeout; + /* set default DCCP timeouts. */ for (i=0; i<CT_DCCP_MAX; i++) timeouts[i] = dn->dccp_timeout[i]; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index fce3d93f1541..0399ae8f1188 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -594,6 +594,9 @@ static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[], struct nf_sctp_net *sn = nf_sctp_pernet(net); int i; + if (!timeouts) + timeouts = sn->timeouts; + /* set default SCTP timeouts. */ for (i=0; i<SCTP_CONNTRACK_MAX; i++) timeouts[i] = sn->timeouts[i]; diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 9889d52eda82..e33a73cb1f42 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -134,11 +134,6 @@ static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp) #define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ) #define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ) -static inline __s32 nf_flow_timeout_delta(unsigned int timeout) -{ - return (__s32)(timeout - (u32)jiffies); -} - static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) { const struct nf_conntrack_l4proto *l4proto; @@ -232,7 +227,7 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) { int err; - flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; + flow->timeout = nf_flowtable_time_stamp + NF_FLOW_TIMEOUT; err = rhashtable_insert_fast(&flow_table->rhashtable, &flow->tuplehash[0].node, diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index b9e7dd6e60ce..7ea2ddc2aa93 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -280,7 +280,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0) return NF_DROP; - flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; + flow->timeout = nf_flowtable_time_stamp + NF_FLOW_TIMEOUT; iph = ip_hdr(skb); ip_decrease_ttl(iph); skb->tstamp = 0; @@ -509,7 +509,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, if (nf_flow_nat_ipv6(flow, skb, dir) < 0) return NF_DROP; - flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; + flow->timeout = nf_flowtable_time_stamp + NF_FLOW_TIMEOUT; ip6h = ipv6_hdr(skb); ip6h->hop_limit--; skb->tstamp = 0; diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index c54c9a6cc981..d06969af1085 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -28,6 +28,7 @@ struct nf_flow_key { struct flow_dissector_key_basic basic; union { struct flow_dissector_key_ipv4_addrs ipv4; + struct flow_dissector_key_ipv6_addrs ipv6; }; struct flow_dissector_key_tcp tcp; struct flow_dissector_key_ports tp; @@ -57,6 +58,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match, NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control); NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic); NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4); + NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6); NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp); NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp); @@ -69,15 +71,24 @@ static int nf_flow_rule_match(struct nf_flow_match *match, key->ipv4.dst = tuple->dst_v4.s_addr; mask->ipv4.dst = 0xffffffff; break; + case AF_INET6: + key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + key->basic.n_proto = htons(ETH_P_IPV6); + key->ipv6.src = tuple->src_v6; + memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src)); + key->ipv6.dst = tuple->dst_v6; + memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst)); + break; default: return -EOPNOTSUPP; } + match->dissector.used_keys |= BIT(key->control.addr_type); mask->basic.n_proto = 0xffff; switch (tuple->l4proto) { case IPPROTO_TCP: key->tcp.flags = 0; - mask->tcp.flags = TCP_FLAG_RST | TCP_FLAG_FIN; + mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16); match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP); break; case IPPROTO_UDP: @@ -96,14 +107,13 @@ static int nf_flow_rule_match(struct nf_flow_match *match, match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CONTROL) | BIT(FLOW_DISSECTOR_KEY_BASIC) | - BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | BIT(FLOW_DISSECTOR_KEY_PORTS); return 0; } static void flow_offload_mangle(struct flow_action_entry *entry, - enum flow_action_mangle_base htype, - u32 offset, u8 *value, u8 *mask) + enum flow_action_mangle_base htype, u32 offset, + const __be32 *value, const __be32 *mask) { entry->id = FLOW_ACTION_MANGLE; entry->mangle.htype = htype; @@ -140,12 +150,12 @@ static int flow_offload_eth_src(struct net *net, memcpy(&val16, dev->dev_addr, 2); val = val16 << 16; flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4, - (u8 *)&val, (u8 *)&mask); + &val, &mask); mask = ~0xffffffff; memcpy(&val, dev->dev_addr + 2, 4); flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8, - (u8 *)&val, (u8 *)&mask); + &val, &mask); dev_put(dev); return 0; @@ -156,27 +166,41 @@ static int flow_offload_eth_dst(struct net *net, enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule) { - const struct flow_offload_tuple *tuple = &flow->tuplehash[dir].tuple; struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule); struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule); + const void *daddr = &flow->tuplehash[!dir].tuple.src_v4; + const struct dst_entry *dst_cache; + unsigned char ha[ETH_ALEN]; struct neighbour *n; u32 mask, val; + u8 nud_state; u16 val16; - n = dst_neigh_lookup(tuple->dst_cache, &tuple->dst_v4); + dst_cache = flow->tuplehash[dir].tuple.dst_cache; + n = dst_neigh_lookup(dst_cache, daddr); if (!n) return -ENOENT; + read_lock_bh(&n->lock); + nud_state = n->nud_state; + ether_addr_copy(ha, n->ha); + read_unlock_bh(&n->lock); + + if (!(nud_state & NUD_VALID)) { + neigh_release(n); + return -ENOENT; + } + mask = ~0xffffffff; - memcpy(&val, n->ha, 4); + memcpy(&val, ha, 4); flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0, - (u8 *)&val, (u8 *)&mask); + &val, &mask); mask = ~0x0000ffff; - memcpy(&val16, n->ha + 4, 2); + memcpy(&val16, ha + 4, 2); val = val16; flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4, - (u8 *)&val, (u8 *)&mask); + &val, &mask); neigh_release(n); return 0; @@ -206,7 +230,7 @@ static void flow_offload_ipv4_snat(struct net *net, } flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset, - (u8 *)&addr, (u8 *)&mask); + &addr, &mask); } static void flow_offload_ipv4_dnat(struct net *net, @@ -233,12 +257,12 @@ static void flow_offload_ipv4_dnat(struct net *net, } flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset, - (u8 *)&addr, (u8 *)&mask); + &addr, &mask); } static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule, unsigned int offset, - u8 *addr, u8 *mask) + const __be32 *addr, const __be32 *mask) { struct flow_action_entry *entry; int i; @@ -246,8 +270,7 @@ static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule, for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32)) { entry = flow_action_entry_next(flow_rule); flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6, - offset + i, - &addr[i], mask); + offset + i, &addr[i], mask); } } @@ -257,23 +280,23 @@ static void flow_offload_ipv6_snat(struct net *net, struct nf_flow_rule *flow_rule) { u32 mask = ~htonl(0xffffffff); - const u8 *addr; + const __be32 *addr; u32 offset; switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: - addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr; + addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32; offset = offsetof(struct ipv6hdr, saddr); break; case FLOW_OFFLOAD_DIR_REPLY: - addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr; + addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32; offset = offsetof(struct ipv6hdr, daddr); break; default: return; } - flow_offload_ipv6_mangle(flow_rule, offset, (u8 *)addr, (u8 *)&mask); + flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask); } static void flow_offload_ipv6_dnat(struct net *net, @@ -282,23 +305,23 @@ static void flow_offload_ipv6_dnat(struct net *net, struct nf_flow_rule *flow_rule) { u32 mask = ~htonl(0xffffffff); - const u8 *addr; + const __be32 *addr; u32 offset; switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: - addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr; + addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32; offset = offsetof(struct ipv6hdr, daddr); break; case FLOW_OFFLOAD_DIR_REPLY: - addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr; + addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32; offset = offsetof(struct ipv6hdr, saddr); break; default: return; } - flow_offload_ipv6_mangle(flow_rule, offset, (u8 *)addr, (u8 *)&mask); + flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask); } static int flow_offload_l4proto(const struct flow_offload *flow) @@ -326,25 +349,28 @@ static void flow_offload_port_snat(struct net *net, struct nf_flow_rule *flow_rule) { struct flow_action_entry *entry = flow_action_entry_next(flow_rule); - u32 mask = ~htonl(0xffff0000); - __be16 port; + u32 mask, port; u32 offset; switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: - port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port; + port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port); offset = 0; /* offsetof(struct tcphdr, source); */ + port = htonl(port << 16); + mask = ~htonl(0xffff0000); break; case FLOW_OFFLOAD_DIR_REPLY: - port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port; + port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port); offset = 0; /* offsetof(struct tcphdr, dest); */ + port = htonl(port); + mask = ~htonl(0xffff); break; default: - break; + return; } flow_offload_mangle(entry, flow_offload_l4proto(flow), offset, - (u8 *)&port, (u8 *)&mask); + &port, &mask); } static void flow_offload_port_dnat(struct net *net, @@ -353,25 +379,28 @@ static void flow_offload_port_dnat(struct net *net, struct nf_flow_rule *flow_rule) { struct flow_action_entry *entry = flow_action_entry_next(flow_rule); - u32 mask = ~htonl(0xffff); - __be16 port; + u32 mask, port; u32 offset; switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: - port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port; - offset = 0; /* offsetof(struct tcphdr, source); */ + port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port); + offset = 0; /* offsetof(struct tcphdr, dest); */ + port = htonl(port); + mask = ~htonl(0xffff); break; case FLOW_OFFLOAD_DIR_REPLY: - port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port; - offset = 0; /* offsetof(struct tcphdr, dest); */ + port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port); + offset = 0; /* offsetof(struct tcphdr, source); */ + port = htonl(port << 16); + mask = ~htonl(0xffff0000); break; default: - break; + return; } flow_offload_mangle(entry, flow_offload_l4proto(flow), offset, - (u8 *)&port, (u8 *)&mask); + &port, &mask); } static void flow_offload_ipv4_checksum(struct net *net, @@ -574,7 +603,7 @@ static int flow_offload_tuple_add(struct flow_offload_work *offload, cls_flow.rule = flow_rule->rule; list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list) { - err = block_cb->cb(TC_SETUP_FT, &cls_flow, + err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow, block_cb->cb_priv); if (err < 0) continue; @@ -599,7 +628,7 @@ static void flow_offload_tuple_del(struct flow_offload_work *offload, &offload->flow->tuplehash[dir].tuple, &extack); list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list) - block_cb->cb(TC_SETUP_FT, &cls_flow, block_cb->cb_priv); + block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow, block_cb->cb_priv); offload->flow->flags |= FLOW_OFFLOAD_HW_DEAD; } @@ -656,7 +685,7 @@ static void flow_offload_tuple_stats(struct flow_offload_work *offload, &offload->flow->tuplehash[dir].tuple, &extack); list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list) - block_cb->cb(TC_SETUP_FT, &cls_flow, block_cb->cb_priv); + block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow, block_cb->cb_priv); memcpy(stats, &cls_flow.stats, sizeof(*stats)); } @@ -752,9 +781,9 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable, struct flow_offload *flow) { struct flow_offload_work *offload; - s64 delta; + __s32 delta; - delta = flow->timeout - jiffies; + delta = nf_flow_timeout_delta(flow->timeout); if ((delta >= (9 * NF_FLOW_TIMEOUT) / 10) || flow->flags & FLOW_OFFLOAD_HW_DYING) return; @@ -822,7 +851,7 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, bo.extack = &extack; INIT_LIST_HEAD(&bo.cb_list); - err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo); + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, &bo); if (err < 0) return err; diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c index 0a59c14b5177..64eedc17037a 100644 --- a/net/netfilter/nf_nat_proto.c +++ b/net/netfilter/nf_nat_proto.c @@ -233,6 +233,19 @@ icmp_manip_pkt(struct sk_buff *skb, return false; hdr = (struct icmphdr *)(skb->data + hdroff); + switch (hdr->type) { + case ICMP_ECHO: + case ICMP_ECHOREPLY: + case ICMP_TIMESTAMP: + case ICMP_TIMESTAMPREPLY: + case ICMP_INFO_REQUEST: + case ICMP_INFO_REPLY: + case ICMP_ADDRESS: + case ICMP_ADDRESSREPLY: + break; + default: + return true; + } inet_proto_csum_replace2(&hdr->checksum, skb, hdr->un.echo.id, tuple->src.u.icmp.id, false); hdr->un.echo.id = tuple->src.u.icmp.id; diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index a2b58de82600..f8f52ff99cfb 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -189,7 +189,7 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, goto err; } - if (!skb_dst_force(skb) && state->hook != NF_INET_PRE_ROUTING) { + if (skb_dst(skb) && !skb_dst_force(skb)) { status = -ENETDOWN; goto err; } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c26a5663795e..65f51a2e9c2a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -22,6 +22,8 @@ #include <net/net_namespace.h> #include <net/sock.h> +#define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-")) + static LIST_HEAD(nf_tables_expressions); static LIST_HEAD(nf_tables_objects); static LIST_HEAD(nf_tables_flowtables); @@ -564,33 +566,34 @@ __nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family) } /* - * Loading a module requires dropping mutex that guards the - * transaction. - * We first need to abort any pending transactions as once - * mutex is unlocked a different client could start a new - * transaction. It must not see any 'future generation' - * changes * as these changes will never happen. + * Loading a module requires dropping mutex that guards the transaction. + * A different client might race to start a new transaction meanwhile. Zap the + * list of pending transaction and then restore it once the mutex is grabbed + * again. Users of this function return EAGAIN which implicitly triggers the + * transaction abort path to clean up the list of pending transactions. */ #ifdef CONFIG_MODULES -static int __nf_tables_abort(struct net *net); - static void nft_request_module(struct net *net, const char *fmt, ...) { char module_name[MODULE_NAME_LEN]; + LIST_HEAD(commit_list); va_list args; int ret; - __nf_tables_abort(net); + list_splice_init(&net->nft.commit_list, &commit_list); va_start(args, fmt); ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args); va_end(args); - if (WARN(ret >= MODULE_NAME_LEN, "truncated: '%s' (len %d)", module_name, ret)) + if (ret >= MODULE_NAME_LEN) return; mutex_unlock(&net->nft.commit_mutex); request_module("%s", module_name); mutex_lock(&net->nft.commit_mutex); + + WARN_ON_ONCE(!list_empty(&net->nft.commit_list)); + list_splice(&commit_list, &net->nft.commit_list); } #endif @@ -1045,12 +1048,18 @@ static int nft_flush_table(struct nft_ctx *ctx) } list_for_each_entry_safe(flowtable, nft, &ctx->table->flowtables, list) { + if (!nft_is_active_next(ctx->net, flowtable)) + continue; + err = nft_delflowtable(ctx, flowtable); if (err < 0) goto out; } list_for_each_entry_safe(obj, ne, &ctx->table->objects, list) { + if (!nft_is_active_next(ctx->net, obj)) + continue; + err = nft_delobj(ctx, obj); if (err < 0) goto out; @@ -1241,7 +1250,8 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = { .len = NFT_CHAIN_MAXNAMELEN - 1 }, [NFTA_CHAIN_HOOK] = { .type = NLA_NESTED }, [NFTA_CHAIN_POLICY] = { .type = NLA_U32 }, - [NFTA_CHAIN_TYPE] = { .type = NLA_STRING }, + [NFTA_CHAIN_TYPE] = { .type = NLA_STRING, + .len = NFT_MODULE_AUTOLOAD_LIMIT }, [NFTA_CHAIN_COUNTERS] = { .type = NLA_NESTED }, [NFTA_CHAIN_FLAGS] = { .type = NLA_U32 }, }; @@ -1676,6 +1686,7 @@ static int nf_tables_parse_netdev_hooks(struct net *net, goto err_hook; } if (nft_hook_list_find(hook_list, hook)) { + kfree(hook); err = -EEXIST; goto err_hook; } @@ -2355,7 +2366,8 @@ static const struct nft_expr_type *nft_expr_type_get(struct net *net, } static const struct nla_policy nft_expr_policy[NFTA_EXPR_MAX + 1] = { - [NFTA_EXPR_NAME] = { .type = NLA_STRING }, + [NFTA_EXPR_NAME] = { .type = NLA_STRING, + .len = NFT_MODULE_AUTOLOAD_LIMIT }, [NFTA_EXPR_DATA] = { .type = NLA_NESTED }, }; @@ -4198,7 +4210,8 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = { [NFTA_SET_ELEM_USERDATA] = { .type = NLA_BINARY, .len = NFT_USERDATA_MAXLEN }, [NFTA_SET_ELEM_EXPR] = { .type = NLA_NESTED }, - [NFTA_SET_ELEM_OBJREF] = { .type = NLA_STRING }, + [NFTA_SET_ELEM_OBJREF] = { .type = NLA_STRING, + .len = NFT_OBJ_MAXNAMELEN - 1 }, }; static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = { @@ -4519,8 +4532,10 @@ static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set, return err; err = -EINVAL; - if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) + if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) { + nft_data_release(&elem.key.val, desc.type); return err; + } priv = set->ops->get(ctx->net, set, &elem, flags); if (IS_ERR(priv)) @@ -4756,14 +4771,20 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (nla[NFTA_SET_ELEM_DATA] == NULL && !(flags & NFT_SET_ELEM_INTERVAL_END)) return -EINVAL; - if (nla[NFTA_SET_ELEM_DATA] != NULL && - flags & NFT_SET_ELEM_INTERVAL_END) - return -EINVAL; } else { if (nla[NFTA_SET_ELEM_DATA] != NULL) return -EINVAL; } + if ((flags & NFT_SET_ELEM_INTERVAL_END) && + (nla[NFTA_SET_ELEM_DATA] || + nla[NFTA_SET_ELEM_OBJREF] || + nla[NFTA_SET_ELEM_TIMEOUT] || + nla[NFTA_SET_ELEM_EXPIRATION] || + nla[NFTA_SET_ELEM_USERDATA] || + nla[NFTA_SET_ELEM_EXPR])) + return -EINVAL; + timeout = 0; if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) { if (!(set->flags & NFT_SET_TIMEOUT)) @@ -5476,7 +5497,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; - type = nft_obj_type_get(net, objtype); + type = __nft_obj_type_get(objtype); nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); return nf_tables_updobj(&ctx, type, nla[NFTA_OBJ_DATA], obj); @@ -5976,6 +5997,7 @@ nft_flowtable_type_get(struct net *net, u8 family) return ERR_PTR(-ENOENT); } +/* Only called from error and netdev event paths. */ static void nft_unregister_flowtable_hook(struct net *net, struct nft_flowtable *flowtable, struct nft_hook *hook) @@ -5991,7 +6013,7 @@ static void nft_unregister_flowtable_net_hooks(struct net *net, struct nft_hook *hook; list_for_each_entry(hook, &flowtable->hook_list, list) - nft_unregister_flowtable_hook(net, flowtable, hook); + nf_unregister_net_hook(net, &hook->ops); } static int nft_register_flowtable_net_hooks(struct net *net, @@ -6440,12 +6462,14 @@ static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable) { struct nft_hook *hook, *next; + flowtable->data.type->free(&flowtable->data); list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) { + flowtable->data.type->setup(&flowtable->data, hook->ops.dev, + FLOW_BLOCK_UNBIND); list_del_rcu(&hook->list); kfree(hook); } kfree(flowtable->name); - flowtable->data.type->free(&flowtable->data); module_put(flowtable->data.type->owner); kfree(flowtable); } @@ -6489,6 +6513,7 @@ static void nft_flowtable_event(unsigned long event, struct net_device *dev, if (hook->ops.dev != dev) continue; + /* flow_offload_netdev_event() cleans up entries for us. */ nft_unregister_flowtable_hook(dev_net(dev), flowtable, hook); list_del_rcu(&hook->list); kfree_rcu(hook, rcu); diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 431f3b803bfb..a9ea29afb09f 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -44,6 +44,9 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net, expr = nft_expr_next(expr); } + if (num_actions == 0) + return ERR_PTR(-EOPNOTSUPP); + flow = nft_flow_rule_alloc(num_actions); if (!flow) return ERR_PTR(-ENOMEM); @@ -577,6 +580,9 @@ static int nft_offload_netdev_event(struct notifier_block *this, struct net *net = dev_net(dev); struct nft_chain *chain; + if (event != NETDEV_UNREGISTER) + return NOTIFY_DONE; + mutex_lock(&net->nft.commit_mutex); chain = __nft_offload_get_chain(dev); if (chain) diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 02afa752dd2e..10e9d50e4e19 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -80,7 +80,7 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, tb[NFTA_BITWISE_MASK]); if (err < 0) return err; - if (d1.len != priv->len) { + if (d1.type != NFT_DATA_VALUE || d1.len != priv->len) { err = -EINVAL; goto err1; } @@ -89,7 +89,7 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, tb[NFTA_BITWISE_XOR]); if (err < 0) goto err1; - if (d2.len != priv->len) { + if (d2.type != NFT_DATA_VALUE || d2.len != priv->len) { err = -EINVAL; goto err2; } diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index b8092069f868..8a28c127effc 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -81,6 +81,12 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (err < 0) return err; + if (desc.type != NFT_DATA_VALUE) { + err = -EINVAL; + nft_data_release(&priv->data, desc.type); + return err; + } + priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]); err = nft_validate_register_load(priv->sreg, desc.len); if (err < 0) diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index dd82ff2ee19f..b70b48996801 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -200,9 +200,6 @@ static void nft_flow_offload_activate(const struct nft_ctx *ctx, static void nft_flow_offload_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { - struct nft_flow_offload *priv = nft_expr_priv(expr); - - priv->flowtable->use--; nf_ct_netns_put(ctx->net, ctx->family); } diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index 4701fa8a45e7..89efcc5a533d 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -66,11 +66,21 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr if (err < 0) return err; + if (desc_from.type != NFT_DATA_VALUE) { + err = -EINVAL; + goto err1; + } + err = nft_data_init(NULL, &priv->data_to, sizeof(priv->data_to), &desc_to, tb[NFTA_RANGE_TO_DATA]); if (err < 0) goto err1; + if (desc_to.type != NFT_DATA_VALUE) { + err = -EINVAL; + goto err2; + } + if (desc_from.len != desc_to.len) { err = -EINVAL; goto err2; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 57123259452f..a9f804f7a04a 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -74,8 +74,13 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set parent = rcu_dereference_raw(parent->rb_left); continue; } - if (nft_rbtree_interval_end(rbe)) - goto out; + if (nft_rbtree_interval_end(rbe)) { + if (nft_set_is_anonymous(set)) + return false; + parent = rcu_dereference_raw(parent->rb_left); + interval = NULL; + continue; + } *ext = &rbe->ext; return true; @@ -88,7 +93,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set *ext = &interval->ext; return true; } -out: + return false; } @@ -139,8 +144,10 @@ static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set, if (flags & NFT_SET_ELEM_INTERVAL_END) interval = rbe; } else { - if (!nft_set_elem_active(&rbe->ext, genmask)) + if (!nft_set_elem_active(&rbe->ext, genmask)) { parent = rcu_dereference_raw(parent->rb_left); + continue; + } if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) || (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) == @@ -148,7 +155,11 @@ static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set, *elem = rbe; return true; } - return false; + + if (nft_rbtree_interval_end(rbe)) + interval = NULL; + + parent = rcu_dereference_raw(parent->rb_left); } } diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c index 4c33dfc9dab5..d67f83a0958d 100644 --- a/net/netfilter/nft_tproxy.c +++ b/net/netfilter/nft_tproxy.c @@ -50,7 +50,7 @@ static void nft_tproxy_eval_v4(const struct nft_expr *expr, taddr = nf_tproxy_laddr4(skb, taddr, iph->daddr); if (priv->sreg_port) - tport = regs->data[priv->sreg_port]; + tport = nft_reg_load16(®s->data[priv->sreg_port]); if (!tport) tport = hp->dest; @@ -117,7 +117,7 @@ static void nft_tproxy_eval_v6(const struct nft_expr *expr, taddr = *nf_tproxy_laddr6(skb, &taddr, &iph->daddr); if (priv->sreg_port) - tport = regs->data[priv->sreg_port]; + tport = nft_reg_load16(®s->data[priv->sreg_port]); if (!tport) tport = hp->dest; diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 3d4c2ae605a8..5284fcf16be7 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -76,7 +76,7 @@ static int nft_tunnel_get_init(const struct nft_ctx *ctx, struct nft_tunnel *priv = nft_expr_priv(expr); u32 len; - if (!tb[NFTA_TUNNEL_KEY] && + if (!tb[NFTA_TUNNEL_KEY] || !tb[NFTA_TUNNEL_DREG]) return -EINVAL; @@ -266,6 +266,9 @@ static int nft_tunnel_obj_erspan_init(const struct nlattr *attr, if (err < 0) return err; + if (!tb[NFTA_TUNNEL_KEY_ERSPAN_VERSION]) + return -EINVAL; + version = ntohl(nla_get_be32(tb[NFTA_TUNNEL_KEY_ERSPAN_VERSION])); switch (version) { case ERSPAN_VERSION: diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c index 78fe622eba65..11b554ce07ff 100644 --- a/net/nfc/nci/uart.c +++ b/net/nfc/nci/uart.c @@ -346,7 +346,7 @@ static int nci_uart_default_recv_buf(struct nci_uart *nu, const u8 *data, nu->rx_packet_len = -1; nu->rx_skb = nci_skb_alloc(nu->ndev, NCI_MAX_PACKET_SIZE, - GFP_KERNEL); + GFP_ATOMIC); if (!nu->rx_skb) return -ENOMEM; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 53c1d41fb1c9..118cd66b7516 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -544,7 +544,8 @@ static int prb_calc_retire_blk_tmo(struct packet_sock *po, msec = 1; div = ecmd.base.speed / 1000; } - } + } else + return DEFAULT_PRB_RETIRE_TOV; mbits = (blk_size_in_bytes * 8) / (1024 * 1024); diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 88f98f27ad88..3d24d45be5f4 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -196,7 +196,7 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb, hdr->size = cpu_to_le32(len); hdr->confirm_rx = 0; - skb_put_padto(skb, ALIGN(len, 4)); + skb_put_padto(skb, ALIGN(len, 4) + sizeof(*hdr)); mutex_lock(&node->ep_lock); if (node->ep) diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 461d75274fb3..971c73c7d34c 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -1002,10 +1002,13 @@ static void rfkill_sync_work(struct work_struct *work) int __must_check rfkill_register(struct rfkill *rfkill) { static unsigned long rfkill_no; - struct device *dev = &rfkill->dev; + struct device *dev; int error; - BUG_ON(!rfkill); + if (!rfkill) + return -EINVAL; + + dev = &rfkill->dev; mutex_lock(&rfkill_global_mutex); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 7c7d10f2e0c1..5e99df80e80a 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -209,6 +209,7 @@ struct rxrpc_skb_priv { struct rxrpc_security { const char *name; /* name of this service */ u8 security_index; /* security type provided */ + u32 no_key_abort; /* Abort code indicating no key */ /* Initialise a security service */ int (*init)(void); @@ -977,8 +978,9 @@ static inline void rxrpc_reduce_conn_timer(struct rxrpc_connection *conn, struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *, struct sk_buff *); struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *, gfp_t); -void rxrpc_new_incoming_connection(struct rxrpc_sock *, - struct rxrpc_connection *, struct sk_buff *); +void rxrpc_new_incoming_connection(struct rxrpc_sock *, struct rxrpc_connection *, + const struct rxrpc_security *, struct key *, + struct sk_buff *); void rxrpc_unpublish_service_conn(struct rxrpc_connection *); /* @@ -1103,7 +1105,9 @@ extern const struct rxrpc_security rxkad; int __init rxrpc_init_security(void); void rxrpc_exit_security(void); int rxrpc_init_client_conn_security(struct rxrpc_connection *); -int rxrpc_init_server_conn_security(struct rxrpc_connection *); +bool rxrpc_look_up_server_security(struct rxrpc_local *, struct rxrpc_sock *, + const struct rxrpc_security **, struct key **, + struct sk_buff *); /* * sendmsg.c diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 135bf5cd8dd5..70e44abf106c 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -240,6 +240,22 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) } /* + * Ping the other end to fill our RTT cache and to retrieve the rwind + * and MTU parameters. + */ +static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + ktime_t now = skb->tstamp; + + if (call->peer->rtt_usage < 3 || + ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now)) + rxrpc_propose_ACK(call, RXRPC_ACK_PING, sp->hdr.serial, + true, true, + rxrpc_propose_ack_ping_for_params); +} + +/* * Allocate a new incoming call from the prealloc pool, along with a connection * and a peer as necessary. */ @@ -247,6 +263,8 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, struct rxrpc_local *local, struct rxrpc_peer *peer, struct rxrpc_connection *conn, + const struct rxrpc_security *sec, + struct key *key, struct sk_buff *skb) { struct rxrpc_backlog *b = rx->backlog; @@ -294,7 +312,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, conn->params.local = rxrpc_get_local(local); conn->params.peer = peer; rxrpc_see_connection(conn); - rxrpc_new_incoming_connection(rx, conn, skb); + rxrpc_new_incoming_connection(rx, conn, sec, key, skb); } else { rxrpc_get_connection(conn); } @@ -333,9 +351,11 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + const struct rxrpc_security *sec = NULL; struct rxrpc_connection *conn; struct rxrpc_peer *peer = NULL; - struct rxrpc_call *call; + struct rxrpc_call *call = NULL; + struct key *key = NULL; _enter(""); @@ -346,9 +366,7 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, sp->hdr.seq, RX_INVALID_OPERATION, ESHUTDOWN); skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; skb->priority = RX_INVALID_OPERATION; - _leave(" = NULL [close]"); - call = NULL; - goto out; + goto no_call; } /* The peer, connection and call may all have sprung into existence due @@ -358,29 +376,19 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, */ conn = rxrpc_find_connection_rcu(local, skb, &peer); - call = rxrpc_alloc_incoming_call(rx, local, peer, conn, skb); + if (!conn && !rxrpc_look_up_server_security(local, rx, &sec, &key, skb)) + goto no_call; + + call = rxrpc_alloc_incoming_call(rx, local, peer, conn, sec, key, skb); + key_put(key); if (!call) { skb->mark = RXRPC_SKB_MARK_REJECT_BUSY; - _leave(" = NULL [busy]"); - call = NULL; - goto out; + goto no_call; } trace_rxrpc_receive(call, rxrpc_receive_incoming, sp->hdr.serial, sp->hdr.seq); - /* Lock the call to prevent rxrpc_kernel_send/recv_data() and - * sendmsg()/recvmsg() inconveniently stealing the mutex once the - * notification is generated. - * - * The BUG should never happen because the kernel should be well - * behaved enough not to access the call before the first notification - * event and userspace is prevented from doing so until the state is - * appropriate. - */ - if (!mutex_trylock(&call->user_mutex)) - BUG(); - /* Make the call live. */ rxrpc_incoming_call(rx, call, skb); conn = call->conn; @@ -421,6 +429,9 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, BUG(); } spin_unlock(&conn->state_lock); + spin_unlock(&rx->incoming_lock); + + rxrpc_send_ping(call, skb); if (call->state == RXRPC_CALL_SERVER_ACCEPTING) rxrpc_notify_socket(call); @@ -433,9 +444,12 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, rxrpc_put_call(call, rxrpc_call_put); _leave(" = %p{%d}", call, call->debug_id); -out: - spin_unlock(&rx->incoming_lock); return call; + +no_call: + spin_unlock(&rx->incoming_lock); + _leave(" = NULL [%u]", skb->mark); + return NULL; } /* diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index a1ceef4f5cd0..808a4723f868 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -376,21 +376,7 @@ static void rxrpc_secure_connection(struct rxrpc_connection *conn) _enter("{%d}", conn->debug_id); ASSERT(conn->security_ix != 0); - - if (!conn->params.key) { - _debug("set up security"); - ret = rxrpc_init_server_conn_security(conn); - switch (ret) { - case 0: - break; - case -ENOENT: - abort_code = RX_CALL_DEAD; - goto abort; - default: - abort_code = RXKADNOAUTH; - goto abort; - } - } + ASSERT(conn->server_key); if (conn->security->issue_challenge(conn) < 0) { abort_code = RX_CALL_DEAD; diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index 123d6ceab15c..21da48e3d2e5 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -148,6 +148,8 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn */ void rxrpc_new_incoming_connection(struct rxrpc_sock *rx, struct rxrpc_connection *conn, + const struct rxrpc_security *sec, + struct key *key, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); @@ -160,6 +162,8 @@ void rxrpc_new_incoming_connection(struct rxrpc_sock *rx, conn->service_id = sp->hdr.serviceId; conn->security_ix = sp->hdr.securityIndex; conn->out_clientflag = 0; + conn->security = sec; + conn->server_key = key_get(key); if (conn->security_ix) conn->state = RXRPC_CONN_SERVICE_UNSECURED; else diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 157be1ff8697..86bd133b4fa0 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -193,22 +193,6 @@ send_extra_data: } /* - * Ping the other end to fill our RTT cache and to retrieve the rwind - * and MTU parameters. - */ -static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - ktime_t now = skb->tstamp; - - if (call->peer->rtt_usage < 3 || - ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now)) - rxrpc_propose_ACK(call, RXRPC_ACK_PING, sp->hdr.serial, - true, true, - rxrpc_propose_ack_ping_for_params); -} - -/* * Apply a hard ACK by advancing the Tx window. */ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, @@ -1396,8 +1380,6 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) call = rxrpc_new_incoming_call(local, rx, skb); if (!call) goto reject_packet; - rxrpc_send_ping(call, skb); - mutex_unlock(&call->user_mutex); } /* Process a call packet; this either discards or passes on the ref diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 8d8aa3c230b5..098f1f9ec53b 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -648,9 +648,9 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn) u32 serial; int ret; - _enter("{%d,%x}", conn->debug_id, key_serial(conn->params.key)); + _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key)); - ret = key_validate(conn->params.key); + ret = key_validate(conn->server_key); if (ret < 0) return ret; @@ -1293,6 +1293,7 @@ static void rxkad_exit(void) const struct rxrpc_security rxkad = { .name = "rxkad", .security_index = RXRPC_SECURITY_RXKAD, + .no_key_abort = RXKADUNKNOWNKEY, .init = rxkad_init, .exit = rxkad_exit, .init_connection_security = rxkad_init_connection_security, diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c index a4c47d2b7054..9b1fb9ed0717 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -101,62 +101,58 @@ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn) } /* - * initialise the security on a server connection + * Find the security key for a server connection. */ -int rxrpc_init_server_conn_security(struct rxrpc_connection *conn) +bool rxrpc_look_up_server_security(struct rxrpc_local *local, struct rxrpc_sock *rx, + const struct rxrpc_security **_sec, + struct key **_key, + struct sk_buff *skb) { const struct rxrpc_security *sec; - struct rxrpc_local *local = conn->params.local; - struct rxrpc_sock *rx; - struct key *key; - key_ref_t kref; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + key_ref_t kref = NULL; char kdesc[5 + 1 + 3 + 1]; _enter(""); - sprintf(kdesc, "%u:%u", conn->service_id, conn->security_ix); + sprintf(kdesc, "%u:%u", sp->hdr.serviceId, sp->hdr.securityIndex); - sec = rxrpc_security_lookup(conn->security_ix); + sec = rxrpc_security_lookup(sp->hdr.securityIndex); if (!sec) { - _leave(" = -ENOKEY [lookup]"); - return -ENOKEY; + trace_rxrpc_abort(0, "SVS", + sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_INVALID_OPERATION, EKEYREJECTED); + skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; + skb->priority = RX_INVALID_OPERATION; + return false; } - /* find the service */ - read_lock(&local->services_lock); - rx = rcu_dereference_protected(local->service, - lockdep_is_held(&local->services_lock)); - if (rx && (rx->srx.srx_service == conn->service_id || - rx->second_service == conn->service_id)) - goto found_service; + if (sp->hdr.securityIndex == RXRPC_SECURITY_NONE) + goto out; - /* the service appears to have died */ - read_unlock(&local->services_lock); - _leave(" = -ENOENT"); - return -ENOENT; - -found_service: if (!rx->securities) { - read_unlock(&local->services_lock); - _leave(" = -ENOKEY"); - return -ENOKEY; + trace_rxrpc_abort(0, "SVR", + sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_INVALID_OPERATION, EKEYREJECTED); + skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; + skb->priority = RX_INVALID_OPERATION; + return false; } /* look through the service's keyring */ kref = keyring_search(make_key_ref(rx->securities, 1UL), &key_type_rxrpc_s, kdesc, true); if (IS_ERR(kref)) { - read_unlock(&local->services_lock); - _leave(" = %ld [search]", PTR_ERR(kref)); - return PTR_ERR(kref); + trace_rxrpc_abort(0, "SVK", + sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + sec->no_key_abort, EKEYREJECTED); + skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; + skb->priority = sec->no_key_abort; + return false; } - key = key_ref_to_ptr(kref); - read_unlock(&local->services_lock); - - conn->server_key = key; - conn->security = sec; - - _leave(" = 0"); - return 0; +out: + *_sec = sec; + *_key = key_ref_to_ptr(kref); + return true; } diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index 40038c321b4a..19649623493b 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -360,6 +360,16 @@ static int tcf_ctinfo_search(struct net *net, struct tc_action **a, u32 index) return tcf_idr_search(tn, a, index); } +static void tcf_ctinfo_cleanup(struct tc_action *a) +{ + struct tcf_ctinfo *ci = to_ctinfo(a); + struct tcf_ctinfo_params *cp; + + cp = rcu_dereference_protected(ci->params, 1); + if (cp) + kfree_rcu(cp, rcu); +} + static struct tc_action_ops act_ctinfo_ops = { .kind = "ctinfo", .id = TCA_ID_CTINFO, @@ -367,6 +377,7 @@ static struct tc_action_ops act_ctinfo_ops = { .act = tcf_ctinfo_act, .dump = tcf_ctinfo_dump, .init = tcf_ctinfo_init, + .cleanup= tcf_ctinfo_cleanup, .walk = tcf_ctinfo_walker, .lookup = tcf_ctinfo_search, .size = sizeof(struct tcf_ctinfo), diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 5e6379028fc3..c1fcd85719d6 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -537,6 +537,9 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, } ife = to_ife(*a); + if (ret == ACT_P_CREATED) + INIT_LIST_HEAD(&ife->metalist); + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); if (err < 0) goto release_idr; @@ -566,10 +569,6 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, p->eth_type = ife_type; } - - if (ret == ACT_P_CREATED) - INIT_LIST_HEAD(&ife->metalist); - if (tb[TCA_IFE_METALST]) { err = nla_parse_nested_deprecated(tb2, IFE_META_MAX, tb[TCA_IFE_METALST], NULL, diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 1e3eb3a97532..1ad300e6dbc0 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -219,8 +219,10 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, bool use_reinsert; bool want_ingress; bool is_redirect; + bool expects_nh; int m_eaction; int mac_len; + bool at_nh; rec_level = __this_cpu_inc_return(mirred_rec_level); if (unlikely(rec_level > MIRRED_RECURSION_LIMIT)) { @@ -261,19 +263,19 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, goto out; } - /* If action's target direction differs than filter's direction, - * and devices expect a mac header on xmit, then mac push/pull is - * needed. - */ want_ingress = tcf_mirred_act_wants_ingress(m_eaction); - if (skb_at_tc_ingress(skb) != want_ingress && m_mac_header_xmit) { - if (!skb_at_tc_ingress(skb)) { - /* caught at egress, act ingress: pull mac */ - mac_len = skb_network_header(skb) - skb_mac_header(skb); + + expects_nh = want_ingress || !m_mac_header_xmit; + at_nh = skb->data == skb_network_header(skb); + if (at_nh != expects_nh) { + mac_len = skb_at_tc_ingress(skb) ? skb->mac_len : + skb_network_header(skb) - skb_mac_header(skb); + if (expects_nh) { + /* target device/action expect data at nh */ skb_pull_rcsum(skb2, mac_len); } else { - /* caught at ingress, act egress: push mac */ - skb_push_rcsum(skb2, skb->mac_len); + /* target device/action expect data at mac */ + skb_push_rcsum(skb2, mac_len); } } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 6a0eacafdb19..76e0d122616a 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -308,33 +308,12 @@ static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held, tcf_proto_destroy(tp, rtnl_held, true, extack); } -static int walker_check_empty(struct tcf_proto *tp, void *fh, - struct tcf_walker *arg) +static bool tcf_proto_check_delete(struct tcf_proto *tp) { - if (fh) { - arg->nonempty = true; - return -1; - } - return 0; -} - -static bool tcf_proto_is_empty(struct tcf_proto *tp, bool rtnl_held) -{ - struct tcf_walker walker = { .fn = walker_check_empty, }; - - if (tp->ops->walk) { - tp->ops->walk(tp, &walker, rtnl_held); - return !walker.nonempty; - } - return true; -} + if (tp->ops->delete_empty) + return tp->ops->delete_empty(tp); -static bool tcf_proto_check_delete(struct tcf_proto *tp, bool rtnl_held) -{ - spin_lock(&tp->lock); - if (tcf_proto_is_empty(tp, rtnl_held)) - tp->deleting = true; - spin_unlock(&tp->lock); + tp->deleting = true; return tp->deleting; } @@ -1751,7 +1730,7 @@ static void tcf_chain_tp_delete_empty(struct tcf_chain *chain, * concurrently. * Mark tp for deletion if it is empty. */ - if (!tp_iter || !tcf_proto_check_delete(tp, rtnl_held)) { + if (!tp_iter || !tcf_proto_check_delete(tp)) { mutex_unlock(&chain->filter_chain_lock); return; } diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 0d125de54285..b0f42e62dd76 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -2773,6 +2773,17 @@ static void fl_bind_class(void *fh, u32 classid, unsigned long cl) f->res.class = cl; } +static bool fl_delete_empty(struct tcf_proto *tp) +{ + struct cls_fl_head *head = fl_head_dereference(tp); + + spin_lock(&tp->lock); + tp->deleting = idr_is_empty(&head->handle_idr); + spin_unlock(&tp->lock); + + return tp->deleting; +} + static struct tcf_proto_ops cls_fl_ops __read_mostly = { .kind = "flower", .classify = fl_classify, @@ -2782,6 +2793,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = { .put = fl_put, .change = fl_change, .delete = fl_delete, + .delete_empty = fl_delete_empty, .walk = fl_walk, .reoffload = fl_reoffload, .hw_add = fl_hw_add, diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index e0f40400f679..2277369feae5 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1769,7 +1769,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, q->avg_window_begin)); u64 b = q->avg_window_bytes * (u64)NSEC_PER_SEC; - do_div(b, window_interval); + b = div64_u64(b, window_interval); q->avg_peak_bandwidth = cake_ewma(q->avg_peak_bandwidth, b, b > q->avg_peak_bandwidth ? 2 : 8); diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index b1c7e726ce5d..a5a295477ecc 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -301,6 +301,9 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) f->socket_hash != sk->sk_hash)) { f->credit = q->initial_quantum; f->socket_hash = sk->sk_hash; + if (q->rate_enable) + smp_store_release(&sk->sk_pacing_status, + SK_PACING_FQ); if (fq_flow_is_throttled(f)) fq_flow_unset_throttled(q, f); f->time_next_packet = 0ULL; @@ -322,8 +325,12 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) fq_flow_set_detached(f); f->sk = sk; - if (skb->sk == sk) + if (skb->sk == sk) { f->socket_hash = sk->sk_hash; + if (q->rate_enable) + smp_store_release(&sk->sk_pacing_status, + SK_PACING_FQ); + } f->credit = q->initial_quantum; rb_link_node(&f->fq_node, parent, p); @@ -428,17 +435,9 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, f->qlen++; qdisc_qstats_backlog_inc(sch, skb); if (fq_flow_is_detached(f)) { - struct sock *sk = skb->sk; - fq_flow_add_tail(&q->new_flows, f); if (time_after(jiffies, f->age + q->flow_refill_delay)) f->credit = max_t(u32, f->credit, q->quantum); - if (sk && q->rate_enable) { - if (unlikely(smp_load_acquire(&sk->sk_pacing_status) != - SK_PACING_FQ)) - smp_store_release(&sk->sk_pacing_status, - SK_PACING_FQ); - } q->inactive_flows--; } @@ -787,10 +786,12 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, if (tb[TCA_FQ_QUANTUM]) { u32 quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]); - if (quantum > 0) + if (quantum > 0 && quantum <= (1 << 20)) { q->quantum = quantum; - else + } else { + NL_SET_ERR_MSG_MOD(extack, "invalid quantum"); err = -EINVAL; + } } if (tb[TCA_FQ_INITIAL_QUANTUM]) diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 18b884cfdfe8..647941702f9f 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -292,8 +292,14 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct tc_prio_qopt_offload graft_offload; unsigned long band = arg - 1; - if (new == NULL) - new = &noop_qdisc; + if (!new) { + new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + TC_H_MAKE(sch->handle, arg), extack); + if (!new) + new = &noop_qdisc; + else + qdisc_hash_add(new, true); + } *old = qdisc_replace(sch, new, &q->queues[band]); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index fbbf19128c2d..78af2fcf90cc 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -227,6 +227,7 @@ static void sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb, sa->sin_port = sh->dest; sa->sin_addr.s_addr = ip_hdr(skb)->daddr; } + memset(sa->sin_zero, 0, sizeof(sa->sin_zero)); } /* Initialize an sctp_addr from a socket. */ @@ -235,6 +236,7 @@ static void sctp_v4_from_sk(union sctp_addr *addr, struct sock *sk) addr->v4.sin_family = AF_INET; addr->v4.sin_port = 0; addr->v4.sin_addr.s_addr = inet_sk(sk)->inet_rcv_saddr; + memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero)); } /* Initialize sk->sk_rcv_saddr from sctp_addr. */ @@ -257,6 +259,7 @@ static void sctp_v4_from_addr_param(union sctp_addr *addr, addr->v4.sin_family = AF_INET; addr->v4.sin_port = port; addr->v4.sin_addr.s_addr = param->v4.addr.s_addr; + memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero)); } /* Initialize an address parameter from a sctp_addr and return the length @@ -281,6 +284,7 @@ static void sctp_v4_dst_saddr(union sctp_addr *saddr, struct flowi4 *fl4, saddr->v4.sin_family = AF_INET; saddr->v4.sin_port = port; saddr->v4.sin_addr.s_addr = fl4->saddr; + memset(saddr->v4.sin_zero, 0, sizeof(saddr->v4.sin_zero)); } /* Compare two addresses exactly. */ @@ -303,6 +307,7 @@ static void sctp_v4_inaddr_any(union sctp_addr *addr, __be16 port) addr->v4.sin_family = AF_INET; addr->v4.sin_addr.s_addr = htonl(INADDR_ANY); addr->v4.sin_port = port; + memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero)); } /* Is this a wildcard address? */ diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index acd737d4c0e0..834e9f82afed 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1363,8 +1363,10 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type, /* Generate an INIT ACK chunk. */ new_obj = sctp_make_init_ack(asoc, chunk, GFP_ATOMIC, 0); - if (!new_obj) - goto nomem; + if (!new_obj) { + error = -ENOMEM; + break; + } sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(new_obj)); @@ -1386,7 +1388,8 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type, if (!new_obj) { if (cmd->obj.chunk) sctp_chunk_free(cmd->obj.chunk); - goto nomem; + error = -ENOMEM; + break; } sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(new_obj)); @@ -1433,8 +1436,10 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type, /* Generate a SHUTDOWN chunk. */ new_obj = sctp_make_shutdown(asoc, chunk); - if (!new_obj) - goto nomem; + if (!new_obj) { + error = -ENOMEM; + break; + } sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(new_obj)); break; @@ -1770,11 +1775,17 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type, break; } - if (error) + if (error) { + cmd = sctp_next_cmd(commands); + while (cmd) { + if (cmd->verb == SCTP_CMD_REPLY) + sctp_chunk_free(cmd->obj.chunk); + cmd = sctp_next_cmd(commands); + } break; + } } -out: /* If this is in response to a received chunk, wait until * we are done with the packet to open the queue so that we don't * send multiple packets in response to a single request. @@ -1789,7 +1800,4 @@ out: sp->data_ready_signalled = 0; return error; -nomem: - error = -ENOMEM; - goto out; } diff --git a/net/sctp/stream.c b/net/sctp/stream.c index e83cdaa2ab76..c1a100d2fed3 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -119,7 +119,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, * a new one with new outcnt to save memory if needed. */ if (outcnt == stream->outcnt) - goto in; + goto handle_in; /* Filter out chunks queued on streams that won't exist anymore */ sched->unsched_all(stream); @@ -128,24 +128,28 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, ret = sctp_stream_alloc_out(stream, outcnt, gfp); if (ret) - goto out; + goto out_err; for (i = 0; i < stream->outcnt; i++) SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; -in: +handle_in: sctp_stream_interleave_init(stream); if (!incnt) goto out; ret = sctp_stream_alloc_in(stream, incnt, gfp); - if (ret) { - sched->free(stream); - genradix_free(&stream->out); - stream->outcnt = 0; - goto out; - } + if (ret) + goto in_err; + goto out; + +in_err: + sched->free(stream); + genradix_free(&stream->in); +out_err: + genradix_free(&stream->out); + stream->outcnt = 0; out: return ret; } diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 7235a6032671..3bbe1a58ec87 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -263,7 +263,7 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) pf->af->from_sk(&addr, sk); pf->to_sk_daddr(&t->ipaddr, sk); - dst->ops->update_pmtu(dst, sk, NULL, pmtu); + dst->ops->update_pmtu(dst, sk, NULL, pmtu, true); pf->to_sk_daddr(&addr, sk); dst = sctp_transport_dst_check(t); diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index b997072c72e5..cee5bf4a9bb9 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -857,6 +857,8 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr, goto out; sock_hold(&smc->sk); /* sock put in passive closing */ + if (smc->use_fallback) + goto out; if (flags & O_NONBLOCK) { if (schedule_work(&smc->connect_work)) smc->connect_nonblock = 1; @@ -1721,8 +1723,6 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, sk->sk_err = smc->clcsock->sk->sk_err; sk->sk_error_report(sk); } - if (rc) - return rc; if (optlen < sizeof(int)) return -EINVAL; @@ -1730,6 +1730,8 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, return -EFAULT; lock_sock(sk); + if (rc || smc->use_fallback) + goto out; switch (optname) { case TCP_ULP: case TCP_FASTOPEN: @@ -1741,15 +1743,14 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, smc_switch_to_fallback(smc); smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; } else { - if (!smc->use_fallback) - rc = -EINVAL; + rc = -EINVAL; } break; case TCP_NODELAY: if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_CLOSED) { - if (val && !smc->use_fallback) + if (val) mod_delayed_work(system_wq, &smc->conn.tx_work, 0); } @@ -1758,7 +1759,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_CLOSED) { - if (!val && !smc->use_fallback) + if (!val) mod_delayed_work(system_wq, &smc->conn.tx_work, 0); } @@ -1769,6 +1770,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, default: break; } +out: release_sock(sk); return rc; diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index bb92c7c6214c..e419ff277e55 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1287,7 +1287,7 @@ static int smc_core_reboot_event(struct notifier_block *this, unsigned long event, void *ptr) { smc_lgrs_shutdown(); - + smc_ib_unregister_client(); return 0; } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 77c7dd7f05e8..fda3889993cb 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -77,7 +77,7 @@ static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, struct rpcrdma_sendctx *sc); static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt); -static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf); +static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt); static struct rpcrdma_regbuf * @@ -244,6 +244,7 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) ia->ri_id->device->name, rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt)); #endif + init_completion(&ia->ri_remove_done); set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags); ep->rep_connected = -ENODEV; xprt_force_disconnect(xprt); @@ -297,7 +298,6 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia) int rc; init_completion(&ia->ri_done); - init_completion(&ia->ri_remove_done); id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_cm_event_handler, xprt, RDMA_PS_TCP, IB_QPT_RC); @@ -421,7 +421,7 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) /* The ULP is responsible for ensuring all DMA * mappings and MRs are gone. */ - rpcrdma_reps_destroy(buf); + rpcrdma_reps_unmap(r_xprt); list_for_each_entry(req, &buf->rb_allreqs, rl_all) { rpcrdma_regbuf_dma_unmap(req->rl_rdmabuf); rpcrdma_regbuf_dma_unmap(req->rl_sendbuf); @@ -599,6 +599,7 @@ static int rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, struct ib_qp_init_attr *qp_init_attr) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; + struct rpcrdma_ep *ep = &r_xprt->rx_ep; int rc, err; trace_xprtrdma_reinsert(r_xprt); @@ -613,6 +614,7 @@ static int rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, pr_err("rpcrdma: rpcrdma_ep_create returned %d\n", err); goto out2; } + memcpy(qp_init_attr, &ep->rep_attr, sizeof(*qp_init_attr)); rc = -ENETUNREACH; err = rdma_create_qp(ia->ri_id, ia->ri_pd, qp_init_attr); @@ -1090,6 +1092,7 @@ static struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; rep->rr_recv_wr.num_sge = 1; rep->rr_temp = temp; + list_add(&rep->rr_all, &r_xprt->rx_buf.rb_all_reps); return rep; out_free: @@ -1100,6 +1103,7 @@ out: static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep) { + list_del(&rep->rr_all); rpcrdma_regbuf_free(rep->rr_rdmabuf); kfree(rep); } @@ -1118,10 +1122,16 @@ static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf) static void rpcrdma_rep_put(struct rpcrdma_buffer *buf, struct rpcrdma_rep *rep) { - if (!rep->rr_temp) - llist_add(&rep->rr_node, &buf->rb_free_reps); - else - rpcrdma_rep_destroy(rep); + llist_add(&rep->rr_node, &buf->rb_free_reps); +} + +static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt) +{ + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; + struct rpcrdma_rep *rep; + + list_for_each_entry(rep, &buf->rb_all_reps, rr_all) + rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf); } static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf) @@ -1152,6 +1162,7 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) INIT_LIST_HEAD(&buf->rb_send_bufs); INIT_LIST_HEAD(&buf->rb_allreqs); + INIT_LIST_HEAD(&buf->rb_all_reps); rc = -ENOMEM; for (i = 0; i < buf->rb_max_requests; i++) { @@ -1504,6 +1515,10 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) wr = NULL; while (needed) { rep = rpcrdma_rep_get_locked(buf); + if (rep && rep->rr_temp) { + rpcrdma_rep_destroy(rep); + continue; + } if (!rep) rep = rpcrdma_rep_create(r_xprt, temp); if (!rep) diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 5d15140a0266..d796d68609ed 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -203,6 +203,7 @@ struct rpcrdma_rep { struct xdr_stream rr_stream; struct llist_node rr_node; struct ib_recv_wr rr_recv_wr; + struct list_head rr_all; }; /* To reduce the rate at which a transport invokes ib_post_recv @@ -368,6 +369,7 @@ struct rpcrdma_buffer { struct list_head rb_allreqs; struct list_head rb_all_mrs; + struct list_head rb_all_reps; struct llist_head rb_free_reps; diff --git a/net/tipc/Makefile b/net/tipc/Makefile index 11255e970dd4..ee49a9f1dd4f 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -9,7 +9,7 @@ tipc-y += addr.o bcast.o bearer.o \ core.o link.o discover.o msg.o \ name_distr.o subscr.o monitor.o name_table.o net.o \ netlink.o netlink_compat.o node.o socket.o eth_media.o \ - topsrv.o socket.o group.o trace.o + topsrv.o group.o trace.o CFLAGS_trace.o += -I$(src) @@ -20,5 +20,3 @@ tipc-$(CONFIG_TIPC_CRYPTO) += crypto.o obj-$(CONFIG_TIPC_DIAG) += diag.o - -tipc_diag-y := diag.o diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 55aeba681cf4..656ebc79c64e 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -305,17 +305,17 @@ static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts, * @skb: socket buffer to copy * @method: send method to be used * @dests: destination nodes for message. - * @cong_link_cnt: returns number of encountered congested destination links * Returns 0 if success, otherwise errno */ static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb, struct tipc_mc_method *method, - struct tipc_nlist *dests, - u16 *cong_link_cnt) + struct tipc_nlist *dests) { struct tipc_msg *hdr, *_hdr; struct sk_buff_head tmpq; struct sk_buff *_skb; + u16 cong_link_cnt; + int rc = 0; /* Is a cluster supporting with new capabilities ? */ if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL)) @@ -343,18 +343,19 @@ static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb, _hdr = buf_msg(_skb); msg_set_size(_hdr, MCAST_H_SIZE); msg_set_is_rcast(_hdr, !msg_is_rcast(hdr)); + msg_set_errcode(_hdr, TIPC_ERR_NO_PORT); __skb_queue_head_init(&tmpq); __skb_queue_tail(&tmpq, _skb); if (method->rcast) - tipc_bcast_xmit(net, &tmpq, cong_link_cnt); + rc = tipc_bcast_xmit(net, &tmpq, &cong_link_cnt); else - tipc_rcast_xmit(net, &tmpq, dests, cong_link_cnt); + rc = tipc_rcast_xmit(net, &tmpq, dests, &cong_link_cnt); /* This queue should normally be empty by now */ __skb_queue_purge(&tmpq); - return 0; + return rc; } /* tipc_mcast_xmit - deliver message to indicated destination nodes @@ -396,9 +397,14 @@ int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, msg_set_is_rcast(hdr, method->rcast); /* Switch method ? */ - if (rcast != method->rcast) - tipc_mcast_send_sync(net, skb, method, - dests, cong_link_cnt); + if (rcast != method->rcast) { + rc = tipc_mcast_send_sync(net, skb, method, dests); + if (unlikely(rc)) { + pr_err("Unable to send SYN: method %d, rc %d\n", + rcast, rc); + goto exit; + } + } if (method->rcast) rc = tipc_rcast_xmit(net, pkts, dests, cong_link_cnt); diff --git a/net/tipc/discover.c b/net/tipc/discover.c index b043e8c6397a..bfe43da127c0 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -194,6 +194,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb, { struct tipc_net *tn = tipc_net(net); struct tipc_msg *hdr = buf_msg(skb); + u32 pnet_hash = msg_peer_net_hash(hdr); u16 caps = msg_node_capabilities(hdr); bool legacy = tn->legacy_addr_format; u32 sugg = msg_sugg_node_addr(hdr); @@ -242,9 +243,8 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb, return; if (!tipc_in_scope(legacy, b->domain, src)) return; - tipc_node_check_dest(net, src, peer_id, b, caps, signature, - msg_peer_net_hash(hdr), &maddr, &respond, - &dupl_addr); + tipc_node_check_dest(net, src, peer_id, b, caps, signature, pnet_hash, + &maddr, &respond, &dupl_addr); if (dupl_addr) disc_dupl_alert(b, src, &maddr); if (!respond) diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 92d04dc2a44b..359b2bc888cf 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -36,6 +36,7 @@ #include <net/sock.h> #include <linux/list_sort.h> +#include <linux/rbtree_augmented.h> #include "core.h" #include "netlink.h" #include "name_table.h" @@ -51,6 +52,7 @@ * @lower: service range lower bound * @upper: service range upper bound * @tree_node: member of service range RB tree + * @max: largest 'upper' in this node subtree * @local_publ: list of identical publications made from this node * Used by closest_first lookup and multicast lookup algorithm * @all_publ: all publications identical to this one, whatever node and scope @@ -60,6 +62,7 @@ struct service_range { u32 lower; u32 upper; struct rb_node tree_node; + u32 max; struct list_head local_publ; struct list_head all_publ; }; @@ -84,6 +87,130 @@ struct tipc_service { struct rcu_head rcu; }; +#define service_range_upper(sr) ((sr)->upper) +RB_DECLARE_CALLBACKS_MAX(static, sr_callbacks, + struct service_range, tree_node, u32, max, + service_range_upper) + +#define service_range_entry(rbtree_node) \ + (container_of(rbtree_node, struct service_range, tree_node)) + +#define service_range_overlap(sr, start, end) \ + ((sr)->lower <= (end) && (sr)->upper >= (start)) + +/** + * service_range_foreach_match - iterate over tipc service rbtree for each + * range match + * @sr: the service range pointer as a loop cursor + * @sc: the pointer to tipc service which holds the service range rbtree + * @start, end: the range (end >= start) for matching + */ +#define service_range_foreach_match(sr, sc, start, end) \ + for (sr = service_range_match_first((sc)->ranges.rb_node, \ + start, \ + end); \ + sr; \ + sr = service_range_match_next(&(sr)->tree_node, \ + start, \ + end)) + +/** + * service_range_match_first - find first service range matching a range + * @n: the root node of service range rbtree for searching + * @start, end: the range (end >= start) for matching + * + * Return: the leftmost service range node in the rbtree that overlaps the + * specific range if any. Otherwise, returns NULL. + */ +static struct service_range *service_range_match_first(struct rb_node *n, + u32 start, u32 end) +{ + struct service_range *sr; + struct rb_node *l, *r; + + /* Non overlaps in tree at all? */ + if (!n || service_range_entry(n)->max < start) + return NULL; + + while (n) { + l = n->rb_left; + if (l && service_range_entry(l)->max >= start) { + /* A leftmost overlap range node must be one in the left + * subtree. If not, it has lower > end, then nodes on + * the right side cannot satisfy the condition either. + */ + n = l; + continue; + } + + /* No one in the left subtree can match, return if this node is + * an overlap i.e. leftmost. + */ + sr = service_range_entry(n); + if (service_range_overlap(sr, start, end)) + return sr; + + /* Ok, try to lookup on the right side */ + r = n->rb_right; + if (sr->lower <= end && + r && service_range_entry(r)->max >= start) { + n = r; + continue; + } + break; + } + + return NULL; +} + +/** + * service_range_match_next - find next service range matching a range + * @n: a node in service range rbtree from which the searching starts + * @start, end: the range (end >= start) for matching + * + * Return: the next service range node to the given node in the rbtree that + * overlaps the specific range if any. Otherwise, returns NULL. + */ +static struct service_range *service_range_match_next(struct rb_node *n, + u32 start, u32 end) +{ + struct service_range *sr; + struct rb_node *p, *r; + + while (n) { + r = n->rb_right; + if (r && service_range_entry(r)->max >= start) + /* A next overlap range node must be one in the right + * subtree. If not, it has lower > end, then any next + * successor (- an ancestor) of this node cannot + * satisfy the condition either. + */ + return service_range_match_first(r, start, end); + + /* No one in the right subtree can match, go up to find an + * ancestor of this node which is parent of a left-hand child. + */ + while ((p = rb_parent(n)) && n == p->rb_right) + n = p; + if (!p) + break; + + /* Return if this ancestor is an overlap */ + sr = service_range_entry(p); + if (service_range_overlap(sr, start, end)) + return sr; + + /* Ok, try to lookup more from this ancestor */ + if (sr->lower <= end) { + n = p; + continue; + } + break; + } + + return NULL; +} + static int hash(int x) { return x & (TIPC_NAMETBL_SIZE - 1); @@ -139,84 +266,51 @@ static struct tipc_service *tipc_service_create(u32 type, struct hlist_head *hd) return service; } -/** - * tipc_service_first_range - find first service range in tree matching instance - * - * Very time-critical, so binary search through range rb tree - */ -static struct service_range *tipc_service_first_range(struct tipc_service *sc, - u32 instance) -{ - struct rb_node *n = sc->ranges.rb_node; - struct service_range *sr; - - while (n) { - sr = container_of(n, struct service_range, tree_node); - if (sr->lower > instance) - n = n->rb_left; - else if (sr->upper < instance) - n = n->rb_right; - else - return sr; - } - return NULL; -} - /* tipc_service_find_range - find service range matching publication parameters */ static struct service_range *tipc_service_find_range(struct tipc_service *sc, u32 lower, u32 upper) { - struct rb_node *n = sc->ranges.rb_node; struct service_range *sr; - sr = tipc_service_first_range(sc, lower); - if (!sr) - return NULL; - - /* Look for exact match */ - for (n = &sr->tree_node; n; n = rb_next(n)) { - sr = container_of(n, struct service_range, tree_node); - if (sr->upper == upper) - break; + service_range_foreach_match(sr, sc, lower, upper) { + /* Look for exact match */ + if (sr->lower == lower && sr->upper == upper) + return sr; } - if (!n || sr->lower != lower || sr->upper != upper) - return NULL; - return sr; + return NULL; } static struct service_range *tipc_service_create_range(struct tipc_service *sc, u32 lower, u32 upper) { struct rb_node **n, *parent = NULL; - struct service_range *sr, *tmp; + struct service_range *sr; n = &sc->ranges.rb_node; while (*n) { - tmp = container_of(*n, struct service_range, tree_node); parent = *n; - tmp = container_of(parent, struct service_range, tree_node); - if (lower < tmp->lower) - n = &(*n)->rb_left; - else if (lower > tmp->lower) - n = &(*n)->rb_right; - else if (upper < tmp->upper) - n = &(*n)->rb_left; - else if (upper > tmp->upper) - n = &(*n)->rb_right; + sr = service_range_entry(parent); + if (lower == sr->lower && upper == sr->upper) + return sr; + if (sr->max < upper) + sr->max = upper; + if (lower <= sr->lower) + n = &parent->rb_left; else - return tmp; + n = &parent->rb_right; } sr = kzalloc(sizeof(*sr), GFP_ATOMIC); if (!sr) return NULL; sr->lower = lower; sr->upper = upper; + sr->max = upper; INIT_LIST_HEAD(&sr->local_publ); INIT_LIST_HEAD(&sr->all_publ); rb_link_node(&sr->tree_node, parent, n); - rb_insert_color(&sr->tree_node, &sc->ranges); + rb_insert_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks); return sr; } @@ -310,7 +404,6 @@ static void tipc_service_subscribe(struct tipc_service *service, struct list_head publ_list; struct service_range *sr; struct tipc_name_seq ns; - struct rb_node *n; u32 filter; ns.type = tipc_sub_read(sb, seq.type); @@ -325,13 +418,7 @@ static void tipc_service_subscribe(struct tipc_service *service, return; INIT_LIST_HEAD(&publ_list); - for (n = rb_first(&service->ranges); n; n = rb_next(n)) { - sr = container_of(n, struct service_range, tree_node); - if (sr->lower > ns.upper) - break; - if (!tipc_sub_check_overlap(&ns, sr->lower, sr->upper)) - continue; - + service_range_foreach_match(sr, service, ns.lower, ns.upper) { first = NULL; list_for_each_entry(p, &sr->all_publ, all_publ) { if (filter & TIPC_SUB_PORTS) @@ -425,7 +512,7 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, /* Remove service range item if this was its last publication */ if (list_empty(&sr->all_publ)) { - rb_erase(&sr->tree_node, &sc->ranges); + rb_erase_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks); kfree(sr); } @@ -473,34 +560,39 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *dnode) rcu_read_lock(); sc = tipc_service_find(net, type); if (unlikely(!sc)) - goto not_found; + goto exit; spin_lock_bh(&sc->lock); - sr = tipc_service_first_range(sc, instance); - if (unlikely(!sr)) - goto no_match; - - /* Select lookup algorithm: local, closest-first or round-robin */ - if (*dnode == self) { - list = &sr->local_publ; - if (list_empty(list)) - goto no_match; - p = list_first_entry(list, struct publication, local_publ); - list_move_tail(&p->local_publ, &sr->local_publ); - } else if (legacy && !*dnode && !list_empty(&sr->local_publ)) { - list = &sr->local_publ; - p = list_first_entry(list, struct publication, local_publ); - list_move_tail(&p->local_publ, &sr->local_publ); - } else { - list = &sr->all_publ; - p = list_first_entry(list, struct publication, all_publ); - list_move_tail(&p->all_publ, &sr->all_publ); + service_range_foreach_match(sr, sc, instance, instance) { + /* Select lookup algo: local, closest-first or round-robin */ + if (*dnode == self) { + list = &sr->local_publ; + if (list_empty(list)) + continue; + p = list_first_entry(list, struct publication, + local_publ); + list_move_tail(&p->local_publ, &sr->local_publ); + } else if (legacy && !*dnode && !list_empty(&sr->local_publ)) { + list = &sr->local_publ; + p = list_first_entry(list, struct publication, + local_publ); + list_move_tail(&p->local_publ, &sr->local_publ); + } else { + list = &sr->all_publ; + p = list_first_entry(list, struct publication, + all_publ); + list_move_tail(&p->all_publ, &sr->all_publ); + } + port = p->port; + node = p->node; + /* Todo: as for legacy, pick the first matching range only, a + * "true" round-robin will be performed as needed. + */ + break; } - port = p->port; - node = p->node; -no_match: spin_unlock_bh(&sc->lock); -not_found: + +exit: rcu_read_unlock(); *dnode = node; return port; @@ -523,7 +615,8 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope, spin_lock_bh(&sc->lock); - sr = tipc_service_first_range(sc, instance); + /* Todo: a full search i.e. service_range_foreach_match() instead? */ + sr = service_range_match_first(sc->ranges.rb_node, instance, instance); if (!sr) goto no_match; @@ -552,7 +645,6 @@ void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, struct service_range *sr; struct tipc_service *sc; struct publication *p; - struct rb_node *n; rcu_read_lock(); sc = tipc_service_find(net, type); @@ -560,13 +652,7 @@ void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, goto exit; spin_lock_bh(&sc->lock); - - for (n = rb_first(&sc->ranges); n; n = rb_next(n)) { - sr = container_of(n, struct service_range, tree_node); - if (sr->upper < lower) - continue; - if (sr->lower > upper) - break; + service_range_foreach_match(sr, sc, lower, upper) { list_for_each_entry(p, &sr->local_publ, local_publ) { if (p->scope == scope || (!exact && p->scope < scope)) tipc_dest_push(dports, 0, p->port); @@ -587,7 +673,6 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, struct service_range *sr; struct tipc_service *sc; struct publication *p; - struct rb_node *n; rcu_read_lock(); sc = tipc_service_find(net, type); @@ -595,13 +680,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, goto exit; spin_lock_bh(&sc->lock); - - for (n = rb_first(&sc->ranges); n; n = rb_next(n)) { - sr = container_of(n, struct service_range, tree_node); - if (sr->upper < lower) - continue; - if (sr->lower > upper) - break; + service_range_foreach_match(sr, sc, lower, upper) { list_for_each_entry(p, &sr->all_publ, all_publ) { tipc_nlist_add(nodes, p->node); } @@ -799,7 +878,7 @@ static void tipc_service_delete(struct net *net, struct tipc_service *sc) tipc_service_remove_publ(sr, p->node, p->key); kfree_rcu(p, rcu); } - rb_erase(&sr->tree_node, &sc->ranges); + rb_erase_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks); kfree(sr); } hlist_del_init_rcu(&sc->service_list); diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 0254bb7e418b..217516357ef2 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -204,8 +204,8 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, return -ENOMEM; } - attrbuf = kmalloc_array(tipc_genl_family.maxattr + 1, - sizeof(struct nlattr *), GFP_KERNEL); + attrbuf = kcalloc(tipc_genl_family.maxattr + 1, + sizeof(struct nlattr *), GFP_KERNEL); if (!attrbuf) { err = -ENOMEM; goto err_out; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 41688da233ab..f9b4fb92c0b1 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -287,12 +287,12 @@ static void tipc_sk_respond(struct sock *sk, struct sk_buff *skb, int err) * * Caller must hold socket lock */ -static void tsk_rej_rx_queue(struct sock *sk) +static void tsk_rej_rx_queue(struct sock *sk, int error) { struct sk_buff *skb; while ((skb = __skb_dequeue(&sk->sk_receive_queue))) - tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT); + tipc_sk_respond(sk, skb, error); } static bool tipc_sk_connected(struct sock *sk) @@ -545,34 +545,45 @@ static void __tipc_shutdown(struct socket *sock, int error) /* Remove pending SYN */ __skb_queue_purge(&sk->sk_write_queue); - /* Reject all unreceived messages, except on an active connection - * (which disconnects locally & sends a 'FIN+' to peer). - */ - while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { - if (TIPC_SKB_CB(skb)->bytes_read) { - kfree_skb(skb); - continue; - } - if (!tipc_sk_type_connectionless(sk) && - sk->sk_state != TIPC_DISCONNECTING) { - tipc_set_sk_state(sk, TIPC_DISCONNECTING); - tipc_node_remove_conn(net, dnode, tsk->portid); - } - tipc_sk_respond(sk, skb, error); + /* Remove partially received buffer if any */ + skb = skb_peek(&sk->sk_receive_queue); + if (skb && TIPC_SKB_CB(skb)->bytes_read) { + __skb_unlink(skb, &sk->sk_receive_queue); + kfree_skb(skb); } - if (tipc_sk_type_connectionless(sk)) + /* Reject all unreceived messages if connectionless */ + if (tipc_sk_type_connectionless(sk)) { + tsk_rej_rx_queue(sk, error); return; + } - if (sk->sk_state != TIPC_DISCONNECTING) { + switch (sk->sk_state) { + case TIPC_CONNECTING: + case TIPC_ESTABLISHED: + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + tipc_node_remove_conn(net, dnode, tsk->portid); + /* Send a FIN+/- to its peer */ + skb = __skb_dequeue(&sk->sk_receive_queue); + if (skb) { + __skb_queue_purge(&sk->sk_receive_queue); + tipc_sk_respond(sk, skb, error); + break; + } skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, tsk_own_node(tsk), tsk_peer_port(tsk), tsk->portid, error); if (skb) tipc_node_xmit_skb(net, skb, dnode, tsk->portid); - tipc_node_remove_conn(net, dnode, tsk->portid); - tipc_set_sk_state(sk, TIPC_DISCONNECTING); + break; + case TIPC_LISTEN: + /* Reject all SYN messages */ + tsk_rej_rx_queue(sk, error); + break; + default: + __skb_queue_purge(&sk->sk_receive_queue); + break; } } @@ -1364,8 +1375,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) struct tipc_msg *hdr = &tsk->phdr; struct tipc_name_seq *seq; struct sk_buff_head pkts; - u32 dport, dnode = 0; - u32 type, inst; + u32 dport = 0, dnode = 0; + u32 type = 0, inst = 0; int mtu, rc; if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE)) @@ -1418,23 +1429,11 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) type = dest->addr.name.name.type; inst = dest->addr.name.name.instance; dnode = dest->addr.name.domain; - msg_set_type(hdr, TIPC_NAMED_MSG); - msg_set_hdr_sz(hdr, NAMED_H_SIZE); - msg_set_nametype(hdr, type); - msg_set_nameinst(hdr, inst); - msg_set_lookup_scope(hdr, tipc_node2scope(dnode)); dport = tipc_nametbl_translate(net, type, inst, &dnode); - msg_set_destnode(hdr, dnode); - msg_set_destport(hdr, dport); if (unlikely(!dport && !dnode)) return -EHOSTUNREACH; } else if (dest->addrtype == TIPC_ADDR_ID) { dnode = dest->addr.id.node; - msg_set_type(hdr, TIPC_DIRECT_MSG); - msg_set_lookup_scope(hdr, 0); - msg_set_destnode(hdr, dnode); - msg_set_destport(hdr, dest->addr.id.ref); - msg_set_hdr_sz(hdr, BASIC_H_SIZE); } else { return -EINVAL; } @@ -1445,6 +1444,22 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) if (unlikely(rc)) return rc; + if (dest->addrtype == TIPC_ADDR_NAME) { + msg_set_type(hdr, TIPC_NAMED_MSG); + msg_set_hdr_sz(hdr, NAMED_H_SIZE); + msg_set_nametype(hdr, type); + msg_set_nameinst(hdr, inst); + msg_set_lookup_scope(hdr, tipc_node2scope(dnode)); + msg_set_destnode(hdr, dnode); + msg_set_destport(hdr, dport); + } else { /* TIPC_ADDR_ID */ + msg_set_type(hdr, TIPC_DIRECT_MSG); + msg_set_lookup_scope(hdr, 0); + msg_set_destnode(hdr, dnode); + msg_set_destport(hdr, dest->addr.id.ref); + msg_set_hdr_sz(hdr, BASIC_H_SIZE); + } + __skb_queue_head_init(&pkts); mtu = tipc_node_get_mtu(net, dnode, tsk->portid, false); rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); @@ -2428,8 +2443,8 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) return sock_intr_errno(*timeo_p); add_wait_queue(sk_sleep(sk), &wait); - done = sk_wait_event(sk, timeo_p, - sk->sk_state != TIPC_CONNECTING, &wait); + done = sk_wait_event(sk, timeo_p, tipc_sk_connected(sk), + &wait); remove_wait_queue(sk_sleep(sk), &wait); } while (!done); return 0; @@ -2639,7 +2654,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, * Reject any stray messages received by new socket * before the socket lock was taken (very, very unlikely) */ - tsk_rej_rx_queue(new_sk); + tsk_rej_rx_queue(new_sk, TIPC_ERR_NO_PORT); /* Connect new socket to it's peer */ tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg)); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index dac24c7aa7d4..94774c0e5ff3 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -732,15 +732,19 @@ out: return rc; } -static void tls_update(struct sock *sk, struct proto *p) +static void tls_update(struct sock *sk, struct proto *p, + void (*write_space)(struct sock *sk)) { struct tls_context *ctx; ctx = tls_get_ctx(sk); - if (likely(ctx)) + if (likely(ctx)) { + ctx->sk_write_space = write_space; ctx->sk_proto = p; - else + } else { sk->sk_prot = p; + sk->sk_write_space = write_space; + } } static int tls_get_info(const struct sock *sk, struct sk_buff *skb) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index c6803a82b769..c98e602a1a2d 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -256,8 +256,6 @@ static int tls_do_decryption(struct sock *sk, return ret; ret = crypto_wait_req(ret, &ctx->async_wait); - } else if (ret == -EBADMSG) { - TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR); } if (async) @@ -682,12 +680,32 @@ static int tls_push_record(struct sock *sk, int flags, split_point = msg_pl->apply_bytes; split = split_point && split_point < msg_pl->sg.size; + if (unlikely((!split && + msg_pl->sg.size + + prot->overhead_size > msg_en->sg.size) || + (split && + split_point + + prot->overhead_size > msg_en->sg.size))) { + split = true; + split_point = msg_en->sg.size; + } if (split) { rc = tls_split_open_record(sk, rec, &tmp, msg_pl, msg_en, split_point, prot->overhead_size, &orig_end); if (rc < 0) return rc; + /* This can happen if above tls_split_open_record allocates + * a single large encryption buffer instead of two smaller + * ones. In this case adjust pointers and continue without + * split. + */ + if (!msg_pl->sg.size) { + tls_merge_open_record(sk, rec, tmp, orig_end); + msg_pl = &rec->msg_plaintext; + msg_en = &rec->msg_encrypted; + split = false; + } sk_msg_trim(sk, msg_en, msg_pl->sg.size + prot->overhead_size); } @@ -709,6 +727,12 @@ static int tls_push_record(struct sock *sk, int flags, sg_mark_end(sk_msg_elem(msg_pl, i)); } + if (msg_pl->sg.end < msg_pl->sg.start) { + sg_chain(&msg_pl->sg.data[msg_pl->sg.start], + MAX_SKB_FRAGS - msg_pl->sg.start + 1, + msg_pl->sg.data); + } + i = msg_pl->sg.start; sg_chain(rec->sg_aead_in, 2, &msg_pl->sg.data[i]); @@ -772,7 +796,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, psock = sk_psock_get(sk); if (!psock || !policy) { err = tls_push_record(sk, flags, record_type); - if (err) { + if (err && err != -EINPROGRESS) { *copied -= sk_msg_free(sk, msg); tls_free_open_rec(sk); } @@ -783,10 +807,7 @@ more_data: if (psock->eval == __SK_NONE) { delta = msg->sg.size; psock->eval = sk_psock_msg_verdict(sk, psock, msg); - if (delta < msg->sg.size) - delta -= msg->sg.size; - else - delta = 0; + delta -= msg->sg.size; } if (msg->cork_bytes && msg->cork_bytes > msg->sg.size && !enospc && !full_record) { @@ -801,7 +822,7 @@ more_data: switch (psock->eval) { case __SK_PASS: err = tls_push_record(sk, flags, record_type); - if (err < 0) { + if (err && err != -EINPROGRESS) { *copied -= sk_msg_free(sk, msg); tls_free_open_rec(sk); goto out_err; @@ -1515,7 +1536,9 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, if (err == -EINPROGRESS) tls_advance_record_sn(sk, prot, &tls_ctx->rx); - + else if (err == -EBADMSG) + TLS_INC_STATS(sock_net(sk), + LINUX_MIB_TLSDECRYPTERROR); return err; } } else { diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index b3bdae74c243..3492c021925f 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -138,28 +138,15 @@ struct hvsock { **************************************************************************** * The only valid Service GUIDs, from the perspectives of both the host and * * Linux VM, that can be connected by the other end, must conform to this * - * format: <port>-facb-11e6-bd58-64006a7986d3, and the "port" must be in * - * this range [0, 0x7FFFFFFF]. * + * format: <port>-facb-11e6-bd58-64006a7986d3. * **************************************************************************** * * When we write apps on the host to connect(), the GUID ServiceID is used. * When we write apps in Linux VM to connect(), we only need to specify the * port and the driver will form the GUID and use that to request the host. * - * From the perspective of Linux VM: - * 1. the local ephemeral port (i.e. the local auto-bound port when we call - * connect() without explicit bind()) is generated by __vsock_bind_stream(), - * and the range is [1024, 0xFFFFFFFF). - * 2. the remote ephemeral port (i.e. the auto-generated remote port for - * a connect request initiated by the host's connect()) is generated by - * hvs_remote_addr_init() and the range is [0x80000000, 0xFFFFFFFF). */ -#define MAX_LISTEN_PORT ((u32)0x7FFFFFFF) -#define MAX_VM_LISTEN_PORT MAX_LISTEN_PORT -#define MAX_HOST_LISTEN_PORT MAX_LISTEN_PORT -#define MIN_HOST_EPHEMERAL_PORT (MAX_HOST_LISTEN_PORT + 1) - /* 00000000-facb-11e6-bd58-64006a7986d3 */ static const guid_t srv_id_template = GUID_INIT(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58, @@ -184,34 +171,6 @@ static void hvs_addr_init(struct sockaddr_vm *addr, const guid_t *svr_id) vsock_addr_init(addr, VMADDR_CID_ANY, port); } -static void hvs_remote_addr_init(struct sockaddr_vm *remote, - struct sockaddr_vm *local) -{ - static u32 host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT; - struct sock *sk; - - /* Remote peer is always the host */ - vsock_addr_init(remote, VMADDR_CID_HOST, VMADDR_PORT_ANY); - - while (1) { - /* Wrap around ? */ - if (host_ephemeral_port < MIN_HOST_EPHEMERAL_PORT || - host_ephemeral_port == VMADDR_PORT_ANY) - host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT; - - remote->svm_port = host_ephemeral_port++; - - sk = vsock_find_connected_socket(remote, local); - if (!sk) { - /* Found an available ephemeral port */ - return; - } - - /* Release refcnt got in vsock_find_connected_socket */ - sock_put(sk); - } -} - static void hvs_set_channel_pending_send_size(struct vmbus_channel *chan) { set_channel_pending_send_size(chan, @@ -341,12 +300,7 @@ static void hvs_open_connection(struct vmbus_channel *chan) if_type = &chan->offermsg.offer.if_type; if_instance = &chan->offermsg.offer.if_instance; conn_from_host = chan->offermsg.offer.u.pipe.user_def[0]; - - /* The host or the VM should only listen on a port in - * [0, MAX_LISTEN_PORT] - */ - if (!is_valid_srv_id(if_type) || - get_port_by_srv_id(if_type) > MAX_LISTEN_PORT) + if (!is_valid_srv_id(if_type)) return; hvs_addr_init(&addr, conn_from_host ? if_type : if_instance); @@ -371,8 +325,11 @@ static void hvs_open_connection(struct vmbus_channel *chan) vnew = vsock_sk(new); hvs_addr_init(&vnew->local_addr, if_type); - hvs_remote_addr_init(&vnew->remote_addr, &vnew->local_addr); + /* Remote peer is always the host */ + vsock_addr_init(&vnew->remote_addr, + VMADDR_CID_HOST, VMADDR_PORT_ANY); + vnew->remote_addr.svm_port = get_port_by_srv_id(if_instance); ret = vsock_assign_transport(vnew, vsock_sk(sk)); /* Transport assigned (looking at remote_addr) must be the * same where we received the request. @@ -766,16 +723,6 @@ static bool hvs_stream_is_active(struct vsock_sock *vsk) static bool hvs_stream_allow(u32 cid, u32 port) { - /* The host's port range [MIN_HOST_EPHEMERAL_PORT, 0xFFFFFFFF) is - * reserved as ephemeral ports, which are used as the host's ports - * when the host initiates connections. - * - * Perform this check in the guest so an immediate error is produced - * instead of a timeout. - */ - if (port > MAX_HOST_LISTEN_PORT) - return false; - if (cid == VMADDR_CID_HOST) return true; diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index e5ea29c6bca7..6abec3fc81d1 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -34,6 +34,9 @@ virtio_transport_get_ops(struct vsock_sock *vsk) { const struct vsock_transport *t = vsock_core_get_transport(vsk); + if (WARN_ON(!t)) + return NULL; + return container_of(t, struct virtio_transport, transport); } @@ -161,15 +164,25 @@ void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt) } EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); +/* This function can only be used on connecting/connected sockets, + * since a socket assigned to a transport is required. + * + * Do not use on listener sockets! + */ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, struct virtio_vsock_pkt_info *info) { u32 src_cid, src_port, dst_cid, dst_port; + const struct virtio_transport *t_ops; struct virtio_vsock_sock *vvs; struct virtio_vsock_pkt *pkt; u32 pkt_len = info->pkt_len; - src_cid = virtio_transport_get_ops(vsk)->transport.get_local_cid(); + t_ops = virtio_transport_get_ops(vsk); + if (unlikely(!t_ops)) + return -EFAULT; + + src_cid = t_ops->transport.get_local_cid(); src_port = vsk->local_addr.svm_port; if (!info->remote_cid) { dst_cid = vsk->remote_addr.svm_cid; @@ -202,7 +215,7 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, virtio_transport_inc_tx_pkt(vvs, pkt); - return virtio_transport_get_ops(vsk)->send_pkt(pkt); + return t_ops->send_pkt(pkt); } static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, @@ -1021,18 +1034,18 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt, int ret; if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_REQUEST) { - virtio_transport_reset(vsk, pkt); + virtio_transport_reset_no_sock(t, pkt); return -EINVAL; } if (sk_acceptq_is_full(sk)) { - virtio_transport_reset(vsk, pkt); + virtio_transport_reset_no_sock(t, pkt); return -ENOMEM; } child = vsock_create_connected(sk); if (!child) { - virtio_transport_reset(vsk, pkt); + virtio_transport_reset_no_sock(t, pkt); return -ENOMEM; } @@ -1054,7 +1067,7 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt, */ if (ret || vchild->transport != &t->transport) { release_sock(child); - virtio_transport_reset(vsk, pkt); + virtio_transport_reset_no_sock(t, pkt); sock_put(child); return ret; } diff --git a/net/wireless/core.c b/net/wireless/core.c index 350513744575..3e25229a059d 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1102,6 +1102,7 @@ static void __cfg80211_unregister_wdev(struct wireless_dev *wdev, bool sync) #ifdef CONFIG_CFG80211_WEXT kzfree(wdev->wext.keys); + wdev->wext.keys = NULL; #endif /* only initialized if we have a netdev */ if (wdev->netdev) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index da5262b2298b..1e97ac5435b2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -10843,6 +10843,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, if (err) return err; + cfg80211_sinfo_release_content(&sinfo); if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_BEACON_SIGNAL_AVG)) wdev->cqm_config->last_rssi_event_value = (s8) sinfo.rx_beacon_signal_avg; @@ -13796,6 +13797,8 @@ static int nl80211_probe_mesh_link(struct sk_buff *skb, struct genl_info *info) if (err) return err; + cfg80211_sinfo_release_content(&sinfo); + return rdev_probe_mesh_link(rdev, dev, dest, buf, len); } diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index e853a4fe6f97..e0d34f796d0b 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -538,6 +538,10 @@ static inline int rdev_set_wiphy_params(struct cfg80211_registered_device *rdev, u32 changed) { int ret; + + if (!rdev->ops->set_wiphy_params) + return -EOPNOTSUPP; + trace_rdev_set_wiphy_params(&rdev->wiphy, changed); ret = rdev->ops->set_wiphy_params(&rdev->wiphy, changed); trace_rdev_return_int(&rdev->wiphy, ret); @@ -1167,6 +1171,16 @@ rdev_start_radar_detection(struct cfg80211_registered_device *rdev, return ret; } +static inline void +rdev_end_cac(struct cfg80211_registered_device *rdev, + struct net_device *dev) +{ + trace_rdev_end_cac(&rdev->wiphy, dev); + if (rdev->ops->end_cac) + rdev->ops->end_cac(&rdev->wiphy, dev); + trace_rdev_return_void(&rdev->wiphy); +} + static inline int rdev_set_mcast_rate(struct cfg80211_registered_device *rdev, struct net_device *dev, diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 446c76d44e65..fff9a74891fc 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2261,14 +2261,15 @@ static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator) static void handle_channel_custom(struct wiphy *wiphy, struct ieee80211_channel *chan, - const struct ieee80211_regdomain *regd) + const struct ieee80211_regdomain *regd, + u32 min_bw) { u32 bw_flags = 0; const struct ieee80211_reg_rule *reg_rule = NULL; const struct ieee80211_power_rule *power_rule = NULL; u32 bw; - for (bw = MHZ_TO_KHZ(20); bw >= MHZ_TO_KHZ(5); bw = bw / 2) { + for (bw = MHZ_TO_KHZ(20); bw >= min_bw; bw = bw / 2) { reg_rule = freq_reg_info_regd(MHZ_TO_KHZ(chan->center_freq), regd, bw); if (!IS_ERR(reg_rule)) @@ -2324,8 +2325,14 @@ static void handle_band_custom(struct wiphy *wiphy, if (!sband) return; + /* + * We currently assume that you always want at least 20 MHz, + * otherwise channel 12 might get enabled if this rule is + * compatible to US, which permits 2402 - 2472 MHz. + */ for (i = 0; i < sband->n_channels; i++) - handle_channel_custom(wiphy, &sband->channels[i], regd); + handle_channel_custom(wiphy, &sband->channels[i], regd, + MHZ_TO_KHZ(20)); } /* Used by drivers prior to wiphy registration */ @@ -3885,6 +3892,25 @@ bool regulatory_pre_cac_allowed(struct wiphy *wiphy) } EXPORT_SYMBOL(regulatory_pre_cac_allowed); +static void cfg80211_check_and_end_cac(struct cfg80211_registered_device *rdev) +{ + struct wireless_dev *wdev; + /* If we finished CAC or received radar, we should end any + * CAC running on the same channels. + * the check !cfg80211_chandef_dfs_usable contain 2 options: + * either all channels are available - those the CAC_FINISHED + * event has effected another wdev state, or there is a channel + * in unavailable state in wdev chandef - those the RADAR_DETECTED + * event has effected another wdev state. + * In both cases we should end the CAC on the wdev. + */ + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { + if (wdev->cac_started && + !cfg80211_chandef_dfs_usable(&rdev->wiphy, &wdev->chandef)) + rdev_end_cac(rdev, wdev->netdev); + } +} + void regulatory_propagate_dfs_state(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, enum nl80211_dfs_state dfs_state, @@ -3911,8 +3937,10 @@ void regulatory_propagate_dfs_state(struct wiphy *wiphy, cfg80211_set_dfs_state(&rdev->wiphy, chandef, dfs_state); if (event == NL80211_RADAR_DETECTED || - event == NL80211_RADAR_CAC_FINISHED) + event == NL80211_RADAR_CAC_FINISHED) { cfg80211_sched_dfs_chan_update(rdev); + cfg80211_check_and_end_cac(rdev); + } nl80211_radar_notify(rdev, chandef, event, NULL, GFP_KERNEL); } diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 7a6c38ddc65a..d32a2ec4d96a 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -1307,14 +1307,14 @@ void cfg80211_autodisconnect_wk(struct work_struct *work) if (wdev->conn_owner_nlportid) { switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: - cfg80211_leave_ibss(rdev, wdev->netdev, false); + __cfg80211_leave_ibss(rdev, wdev->netdev, false); break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - cfg80211_stop_ap(rdev, wdev->netdev, false); + __cfg80211_stop_ap(rdev, wdev->netdev, false); break; case NL80211_IFTYPE_MESH_POINT: - cfg80211_leave_mesh(rdev, wdev->netdev); + __cfg80211_leave_mesh(rdev, wdev->netdev); break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: diff --git a/net/wireless/trace.h b/net/wireless/trace.h index d98ad2b3143b..8677d7ab7d69 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -646,6 +646,11 @@ DEFINE_EVENT(wiphy_netdev_evt, rdev_flush_pmksa, TP_ARGS(wiphy, netdev) ); +DEFINE_EVENT(wiphy_netdev_evt, rdev_end_cac, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev), + TP_ARGS(wiphy, netdev) +); + DECLARE_EVENT_CLASS(station_add_change, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *mac, struct station_parameters *params), diff --git a/net/wireless/util.c b/net/wireless/util.c index 5b4ed5bbc542..8481e9ac33da 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -564,7 +564,7 @@ __frame_add_frag(struct sk_buff *skb, struct page *page, struct skb_shared_info *sh = skb_shinfo(skb); int page_offset; - page_ref_inc(page); + get_page(page); page_offset = ptr - page_address(page); skb_add_rx_frag(skb, sh->nr_frags, page, page_offset, len, size); } diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 5e677dac2a0c..69102fda9ebd 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -657,7 +657,8 @@ struct iw_statistics *get_wireless_stats(struct net_device *dev) return NULL; } -static int iw_handler_get_iwstats(struct net_device * dev, +/* noinline to avoid a bogus warning with -O3 */ +static noinline int iw_handler_get_iwstats(struct net_device * dev, struct iw_request_info * info, union iwreq_data * wrqu, char * extra) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index c34f7d077604..d5b09bbff375 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -659,6 +659,12 @@ static int x25_release(struct socket *sock) sock_set_flag(sk, SOCK_DEAD); sock_set_flag(sk, SOCK_DESTROY); break; + + case X25_STATE_5: + x25_write_internal(sk, X25_CLEAR_REQUEST); + x25_disconnect(sk, 0, 0, 0); + __x25_destroy_socket(sk); + goto out; } sock_orphan(sk); @@ -760,6 +766,10 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr, if (sk->sk_state == TCP_ESTABLISHED) goto out; + rc = -EALREADY; /* Do nothing if call is already in progress */ + if (sk->sk_state == TCP_SYN_SENT) + goto out; + sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; @@ -806,7 +816,7 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr, /* Now the loop */ rc = -EINPROGRESS; if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) - goto out_put_neigh; + goto out; rc = x25_wait_for_connection_establishment(sk); if (rc) @@ -1054,6 +1064,8 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, if (test_bit(X25_ACCPT_APPRV_FLAG, &makex25->flags)) { x25_write_internal(make, X25_CALL_ACCEPTED); makex25->state = X25_STATE_3; + } else { + makex25->state = X25_STATE_5; } /* diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index f97c43344e95..4d3bb46aaae0 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c @@ -382,6 +382,35 @@ out_clear: return 0; } +/* + * State machine for state 5, Call Accepted / Call Connected pending (X25_ACCPT_APPRV_FLAG). + * The handling of the timer(s) is in file x25_timer.c + * Handling of state 0 and connection release is in af_x25.c. + */ +static int x25_state5_machine(struct sock *sk, struct sk_buff *skb, int frametype) +{ + struct x25_sock *x25 = x25_sk(sk); + + switch (frametype) { + case X25_CLEAR_REQUEST: + if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 2)) { + x25_write_internal(sk, X25_CLEAR_REQUEST); + x25->state = X25_STATE_2; + x25_start_t23timer(sk); + return 0; + } + + x25_write_internal(sk, X25_CLEAR_CONFIRMATION); + x25_disconnect(sk, 0, skb->data[3], skb->data[4]); + break; + + default: + break; + } + + return 0; +} + /* Higher level upcall for a LAPB frame */ int x25_process_rx_frame(struct sock *sk, struct sk_buff *skb) { @@ -406,6 +435,9 @@ int x25_process_rx_frame(struct sock *sk, struct sk_buff *skb) case X25_STATE_4: queued = x25_state4_machine(sk, skb, frametype); break; + case X25_STATE_5: + queued = x25_state5_machine(sk, skb, frametype); + break; } x25_kick(sk); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 956793893c9d..328f661b83b2 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -334,12 +334,21 @@ out: } EXPORT_SYMBOL(xsk_umem_consume_tx); -static int xsk_zc_xmit(struct xdp_sock *xs) +static int xsk_wakeup(struct xdp_sock *xs, u8 flags) { struct net_device *dev = xs->dev; + int err; + + rcu_read_lock(); + err = dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags); + rcu_read_unlock(); + + return err; +} - return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, - XDP_WAKEUP_TX); +static int xsk_zc_xmit(struct xdp_sock *xs) +{ + return xsk_wakeup(xs, XDP_WAKEUP_TX); } static void xsk_destruct_skb(struct sk_buff *skb) @@ -453,19 +462,16 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, __poll_t mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; struct xdp_sock *xs = xdp_sk(sk); - struct net_device *dev; struct xdp_umem *umem; if (unlikely(!xsk_is_bound(xs))) return mask; - dev = xs->dev; umem = xs->umem; if (umem->need_wakeup) { - if (dev->netdev_ops->ndo_xsk_wakeup) - dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, - umem->need_wakeup); + if (xs->zc) + xsk_wakeup(xs, umem->need_wakeup); else /* Poll needs to drive Tx also in copy mode */ __xsk_sendmsg(sk); |