summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/atm/common.c22
-rw-r--r--net/atm/lec.c6
-rw-r--r--net/ax25/af_ax25.c6
-rw-r--r--net/batman-adv/bat_v_ogm.c2
-rw-r--r--net/batman-adv/network-coding.c9
-rw-r--r--net/batman-adv/sysfs.c3
-rw-r--r--net/bridge/br_netlink.c1
-rw-r--r--net/core/dev.c30
-rw-r--r--net/core/devlink.c12
-rw-r--r--net/core/drop_monitor.c11
-rw-r--r--net/core/filter.c4
-rw-r--r--net/core/flow_dissector.c26
-rw-r--r--net/core/neighbour.c6
-rw-r--r--net/core/net-sysfs.c2
-rw-r--r--net/core/netclassid_cgroup.c4
-rw-r--r--net/core/netprio_cgroup.c2
-rw-r--r--net/core/sock.c3
-rw-r--r--net/dsa/dsa2.c2
-rw-r--r--net/dsa/master.c3
-rw-r--r--net/dsa/port.c7
-rw-r--r--net/dsa/slave.c16
-rw-r--r--net/dsa/tag_mtk.c15
-rw-r--r--net/ethtool/netlink.c4
-rw-r--r--net/ethtool/strset.c1
-rw-r--r--net/hsr/hsr_netlink.c10
-rw-r--r--net/hsr/hsr_slave.c2
-rw-r--r--net/ipv4/cipso_ipv4.c6
-rw-r--r--net/ipv4/devinet.c13
-rw-r--r--net/ipv4/fib_frontend.c3
-rw-r--r--net/ipv4/fib_semantics.c6
-rw-r--r--net/ipv4/inet_connection_sock.c43
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/ipmr.c8
-rw-r--r--net/ipv4/nexthop.c3
-rw-r--r--net/ipv4/route.c16
-rw-r--r--net/ipv4/tcp.c27
-rw-r--r--net/ipv4/tcp_bpf.c10
-rw-r--r--net/ipv4/tcp_input.c10
-rw-r--r--net/ipv4/xfrm4_output.c2
-rw-r--r--net/ipv6/calipso.c3
-rw-r--r--net/ipv6/icmp.c21
-rw-r--r--net/ipv6/ip6_fib.c2
-rw-r--r--net/ipv6/ip6mr.c5
-rw-r--r--net/ipv6/ipv6_sockglue.c13
-rw-r--r--net/ipv6/route.c31
-rw-r--r--net/ipv6/rpl.c7
-rw-r--r--net/ipv6/seg6.c12
-rw-r--r--net/ipv6/xfrm6_output.c2
-rw-r--r--net/l2tp/l2tp_netlink.c16
-rw-r--r--net/mac80211/main.c29
-rw-r--r--net/mac80211/mesh.c11
-rw-r--r--net/mac80211/rate.c15
-rw-r--r--net/mac80211/rate.h23
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c19
-rw-r--r--net/mac80211/sta_info.c3
-rw-r--r--net/mptcp/crypto.c24
-rw-r--r--net/mptcp/options.c105
-rw-r--r--net/mptcp/pm_netlink.c12
-rw-r--r--net/mptcp/protocol.c57
-rw-r--r--net/mptcp/protocol.h50
-rw-r--r--net/mptcp/subflow.c194
-rw-r--r--net/netfilter/ipset/ip_set_core.c3
-rw-r--r--net/netfilter/nf_conntrack_core.c17
-rw-r--r--net/netfilter/nf_flow_table_core.c14
-rw-r--r--net/netfilter/nf_flow_table_offload.c10
-rw-r--r--net/netfilter/nf_nat_proto.c8
-rw-r--r--net/netfilter/nf_tables_api.c7
-rw-r--r--net/netfilter/nfnetlink_osf.c12
-rw-r--r--net/netfilter/nft_lookup.c12
-rw-r--r--net/netfilter/nft_set_bitmap.c1
-rw-r--r--net/netfilter/nft_set_rbtree.c34
-rw-r--r--net/netfilter/xt_IDLETIMER.c3
-rw-r--r--net/netlabel/Kconfig2
-rw-r--r--net/netlabel/netlabel_kapi.c6
-rw-r--r--net/netrom/nr_route.c1
-rw-r--r--net/openvswitch/conntrack.c3
-rw-r--r--net/openvswitch/datapath.c4
-rw-r--r--net/qrtr/qrtr.c9
-rw-r--r--net/rds/message.c25
-rw-r--r--net/rds/rdma.c65
-rw-r--r--net/rds/rds.h20
-rw-r--r--net/rds/send.c6
-rw-r--r--net/rxrpc/Makefile1
-rw-r--r--net/rxrpc/ar-internal.h25
-rw-r--r--net/rxrpc/call_accept.c2
-rw-r--r--net/rxrpc/call_event.c22
-rw-r--r--net/rxrpc/input.c44
-rw-r--r--net/rxrpc/local_object.c9
-rw-r--r--net/rxrpc/misc.c5
-rw-r--r--net/rxrpc/output.c53
-rw-r--r--net/rxrpc/peer_event.c46
-rw-r--r--net/rxrpc/peer_object.c12
-rw-r--r--net/rxrpc/proc.c8
-rw-r--r--net/rxrpc/rtt.c195
-rw-r--r--net/rxrpc/rxkad.c3
-rw-r--r--net/rxrpc/sendmsg.c26
-rw-r--r--net/rxrpc/sysctl.c9
-rw-r--r--net/sched/cls_api.c23
-rw-r--r--net/sched/sch_choke.c3
-rw-r--r--net/sched/sch_etf.c7
-rw-r--r--net/sched/sch_fq_codel.c2
-rw-r--r--net/sched/sch_sfq.c9
-rw-r--r--net/sched/sch_skbprio.c3
-rw-r--r--net/sctp/sm_make_chunk.c6
-rw-r--r--net/sctp/sm_sideeffect.c14
-rw-r--r--net/sctp/sm_statefuns.c15
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c12
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c8
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c44
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c3
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c10
-rw-r--r--net/sunrpc/cache.c5
-rw-r--r--net/sunrpc/clnt.c33
-rw-r--r--net/sunrpc/svc_xprt.c5
-rw-r--r--net/sunrpc/svcsock.c4
-rw-r--r--net/sunrpc/xdr.c41
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c15
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c2
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c22
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c3
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c29
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c5
-rw-r--r--net/sunrpc/xprtrdma/verbs.c3
-rw-r--r--net/sunrpc/xprtsock.c1
-rw-r--r--net/tipc/crypto.c1
-rw-r--r--net/tipc/link.c2
-rw-r--r--net/tipc/node.c4
-rw-r--r--net/tipc/socket.c42
-rw-r--r--net/tipc/subscr.h10
-rw-r--r--net/tipc/topsrv.c18
-rw-r--r--net/tipc/udp_media.c6
-rw-r--r--net/tls/tls_main.c4
-rw-r--r--net/tls/tls_sw.c24
-rw-r--r--net/vmw_vsock/virtio_transport_common.c4
-rw-r--r--net/wireless/nl80211.c6
-rw-r--r--net/x25/x25_dev.c4
-rw-r--r--net/x25/x25_subr.c6
-rw-r--r--net/xdp/xdp_umem.c5
-rw-r--r--net/xdp/xsk.c5
139 files changed, 1417 insertions, 785 deletions
diff --git a/net/atm/common.c b/net/atm/common.c
index 0ce530af534d..8575f5d52087 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -177,18 +177,18 @@ static void vcc_destroy_socket(struct sock *sk)
set_bit(ATM_VF_CLOSE, &vcc->flags);
clear_bit(ATM_VF_READY, &vcc->flags);
- if (vcc->dev) {
- if (vcc->dev->ops->close)
- vcc->dev->ops->close(vcc);
- if (vcc->push)
- vcc->push(vcc, NULL); /* atmarpd has no push */
- module_put(vcc->owner);
-
- while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
- atm_return(vcc, skb->truesize);
- kfree_skb(skb);
- }
+ if (vcc->dev && vcc->dev->ops->close)
+ vcc->dev->ops->close(vcc);
+ if (vcc->push)
+ vcc->push(vcc, NULL); /* atmarpd has no push */
+ module_put(vcc->owner);
+
+ while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+ atm_return(vcc, skb->truesize);
+ kfree_skb(skb);
+ }
+ if (vcc->dev && vcc->dev->ops->owner) {
module_put(vcc->dev->ops->owner);
atm_dev_put(vcc->dev);
}
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 25fa3a7b72bd..ca37f5a71f5e 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1264,6 +1264,12 @@ static void lec_arp_clear_vccs(struct lec_arp_table *entry)
entry->vcc = NULL;
}
if (entry->recv_vcc) {
+ struct atm_vcc *vcc = entry->recv_vcc;
+ struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc);
+
+ kfree(vpriv);
+ vcc->user_back = NULL;
+
entry->recv_vcc->push = entry->old_recv_push;
vcc_release_async(entry->recv_vcc, -EPIPE);
entry->recv_vcc = NULL;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index ff57ea89c27e..fd91cd34f25e 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -635,8 +635,10 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
break;
case SO_BINDTODEVICE:
- if (optlen > IFNAMSIZ)
- optlen = IFNAMSIZ;
+ if (optlen > IFNAMSIZ - 1)
+ optlen = IFNAMSIZ - 1;
+
+ memset(devname, 0, sizeof(devname));
if (copy_from_user(devname, optval, optlen)) {
res = -EFAULT;
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 969466218999..80b87b1f4e3a 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -893,7 +893,7 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
orig_node = batadv_v_ogm_orig_get(bat_priv, ogm_packet->orig);
if (!orig_node)
- return;
+ goto out;
neigh_node = batadv_neigh_node_get_or_create(orig_node, if_incoming,
ethhdr->h_source);
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 8f0717c3f7b5..b0469d15da0e 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1009,15 +1009,8 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
*/
static u8 batadv_nc_random_weight_tq(u8 tq)
{
- u8 rand_val, rand_tq;
-
- get_random_bytes(&rand_val, sizeof(rand_val));
-
/* randomize the estimated packet loss (max TQ - estimated TQ) */
- rand_tq = rand_val * (BATADV_TQ_MAX_VALUE - tq);
-
- /* normalize the randomized packet loss */
- rand_tq /= BATADV_TQ_MAX_VALUE;
+ u8 rand_tq = prandom_u32_max(BATADV_TQ_MAX_VALUE + 1 - tq);
/* convert to (randomized) estimated tq again */
return BATADV_TQ_MAX_VALUE - rand_tq;
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index c45962d8527b..0f962dcd239e 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -1150,7 +1150,7 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj,
ret = batadv_parse_throughput(net_dev, buff, "throughput_override",
&tp_override);
if (!ret)
- return count;
+ goto out;
old_tp_override = atomic_read(&hard_iface->bat_v.throughput_override);
if (old_tp_override == tp_override)
@@ -1190,6 +1190,7 @@ static ssize_t batadv_show_throughput_override(struct kobject *kobj,
tp_override = atomic_read(&hard_iface->bat_v.throughput_override);
+ batadv_hardif_put(hard_iface);
return sprintf(buff, "%u.%u MBit\n", tp_override / 10,
tp_override % 10);
}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 43dab4066f91..a0f5dbee8f9c 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -612,6 +612,7 @@ int br_process_vlan_info(struct net_bridge *br,
v - 1, rtm_cmd);
v_change_start = 0;
}
+ cond_resched();
}
/* v_change_start is set only if the last/whole range changed */
if (v_change_start)
diff --git a/net/core/dev.c b/net/core/dev.c
index 9c9e763bfe0e..2d8aceee4284 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4140,7 +4140,8 @@ EXPORT_SYMBOL(netdev_max_backlog);
int netdev_tstamp_prequeue __read_mostly = 1;
int netdev_budget __read_mostly = 300;
-unsigned int __read_mostly netdev_budget_usecs = 2000;
+/* Must be at least 2 jiffes to guarantee 1 jiffy timeout */
+unsigned int __read_mostly netdev_budget_usecs = 2 * USEC_PER_SEC / HZ;
int weight_p __read_mostly = 64; /* old backlog weight */
int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */
int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */
@@ -4987,11 +4988,12 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
return 0;
}
-static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
+static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
struct packet_type **ppt_prev)
{
struct packet_type *ptype, *pt_prev;
rx_handler_func_t *rx_handler;
+ struct sk_buff *skb = *pskb;
struct net_device *orig_dev;
bool deliver_exact = false;
int ret = NET_RX_DROP;
@@ -5022,8 +5024,10 @@ another_round:
ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
preempt_enable();
- if (ret2 != XDP_PASS)
- return NET_RX_DROP;
+ if (ret2 != XDP_PASS) {
+ ret = NET_RX_DROP;
+ goto out;
+ }
skb_reset_mac_len(skb);
}
@@ -5173,6 +5177,13 @@ drop:
}
out:
+ /* The invariant here is that if *ppt_prev is not NULL
+ * then skb should also be non-NULL.
+ *
+ * Apparently *ppt_prev assignment above holds this invariant due to
+ * skb dereferencing near it.
+ */
+ *pskb = skb;
return ret;
}
@@ -5182,7 +5193,7 @@ static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc)
struct packet_type *pt_prev = NULL;
int ret;
- ret = __netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
+ ret = __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev);
if (pt_prev)
ret = INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb,
skb->dev, pt_prev, orig_dev);
@@ -5260,7 +5271,7 @@ static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemallo
struct packet_type *pt_prev = NULL;
skb_list_del_init(skb);
- __netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
+ __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev);
if (!pt_prev)
continue;
if (pt_curr != pt_prev || od_curr != orig_dev) {
@@ -8666,8 +8677,8 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
const struct net_device_ops *ops = dev->netdev_ops;
enum bpf_netdev_command query;
u32 prog_id, expected_id = 0;
- struct bpf_prog *prog = NULL;
bpf_op_t bpf_op, bpf_chk;
+ struct bpf_prog *prog;
bool offload;
int err;
@@ -8733,6 +8744,7 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
} else {
if (!prog_id)
return 0;
+ prog = NULL;
}
err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
@@ -8905,11 +8917,13 @@ static void netdev_sync_lower_features(struct net_device *upper,
netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n",
&feature, lower->name);
lower->wanted_features &= ~feature;
- netdev_update_features(lower);
+ __netdev_update_features(lower);
if (unlikely(lower->features & feature))
netdev_WARN(upper, "failed to disable %pNF on %s!\n",
&feature, lower->name);
+ else
+ netdev_features_change(lower);
}
}
}
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 80f97722f31f..899edcee7dab 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4283,6 +4283,11 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
end_offset = nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]);
end_offset += nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]);
dump = false;
+
+ if (start_offset == end_offset) {
+ err = 0;
+ goto nla_put_failure;
+ }
}
err = devlink_nl_region_read_snapshot_fill(skb, devlink,
@@ -5363,6 +5368,7 @@ int devlink_health_report(struct devlink_health_reporter *reporter,
{
enum devlink_health_reporter_state prev_health_state;
struct devlink *devlink = reporter->devlink;
+ unsigned long recover_ts_threshold;
/* write a log message of the current error */
WARN_ON(!msg);
@@ -5373,10 +5379,12 @@ int devlink_health_report(struct devlink_health_reporter *reporter,
devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
/* abort if the previous error wasn't recovered */
+ recover_ts_threshold = reporter->last_recovery_ts +
+ msecs_to_jiffies(reporter->graceful_period);
if (reporter->auto_recover &&
(prev_health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY ||
- jiffies - reporter->last_recovery_ts <
- msecs_to_jiffies(reporter->graceful_period))) {
+ (reporter->last_recovery_ts && reporter->recovery_count &&
+ time_is_after_jiffies(recover_ts_threshold)))) {
trace_devlink_health_recover_aborted(devlink,
reporter->ops->name,
reporter->health_state,
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 8e33cec9fc4e..2ee7bc4c9e03 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -213,6 +213,7 @@ static void sched_send_work(struct timer_list *t)
static void trace_drop_common(struct sk_buff *skb, void *location)
{
struct net_dm_alert_msg *msg;
+ struct net_dm_drop_point *point;
struct nlmsghdr *nlh;
struct nlattr *nla;
int i;
@@ -231,11 +232,13 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
nlh = (struct nlmsghdr *)dskb->data;
nla = genlmsg_data(nlmsg_data(nlh));
msg = nla_data(nla);
+ point = msg->points;
for (i = 0; i < msg->entries; i++) {
- if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) {
- msg->points[i].count++;
+ if (!memcmp(&location, &point->pc, sizeof(void *))) {
+ point->count++;
goto out;
}
+ point++;
}
if (msg->entries == dm_hit_limit)
goto out;
@@ -244,8 +247,8 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
*/
__nla_reserve_nohdr(dskb, sizeof(struct net_dm_drop_point));
nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point));
- memcpy(msg->points[msg->entries].pc, &location, sizeof(void *));
- msg->points[msg->entries].count = 1;
+ memcpy(point->pc, &location, sizeof(void *));
+ point->count = 1;
msg->entries++;
if (!timer_pending(&data->send_timer)) {
diff --git a/net/core/filter.c b/net/core/filter.c
index 7628b947dbc3..5cc9276f1023 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2590,8 +2590,8 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
}
pop = 0;
} else if (pop >= sge->length - a) {
- sge->length = a;
pop -= (sge->length - a);
+ sge->length = a;
}
}
@@ -5925,7 +5925,7 @@ BPF_CALL_3(bpf_sk_assign, struct sk_buff *, skb, struct sock *, sk, u64, flags)
return -EOPNOTSUPP;
if (unlikely(dev_net(skb->dev) != sock_net(sk)))
return -ENETUNREACH;
- if (unlikely(sk->sk_reuseport))
+ if (unlikely(sk_fullsock(sk) && sk->sk_reuseport))
return -ESOCKTNOSUPPORT;
if (sk_is_refcounted(sk) &&
unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 3eff84824c8b..5dceed467f64 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -160,12 +160,10 @@ out:
return ret;
}
-int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
+static int flow_dissector_bpf_prog_detach(struct net *net)
{
struct bpf_prog *attached;
- struct net *net;
- net = current->nsproxy->net_ns;
mutex_lock(&flow_dissector_mutex);
attached = rcu_dereference_protected(net->flow_dissector_prog,
lockdep_is_held(&flow_dissector_mutex));
@@ -179,6 +177,24 @@ int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
return 0;
}
+int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
+{
+ return flow_dissector_bpf_prog_detach(current->nsproxy->net_ns);
+}
+
+static void __net_exit flow_dissector_pernet_pre_exit(struct net *net)
+{
+ /* We're not racing with attach/detach because there are no
+ * references to netns left when pre_exit gets called.
+ */
+ if (rcu_access_pointer(net->flow_dissector_prog))
+ flow_dissector_bpf_prog_detach(net);
+}
+
+static struct pernet_operations flow_dissector_pernet_ops __net_initdata = {
+ .pre_exit = flow_dissector_pernet_pre_exit,
+};
+
/**
* __skb_flow_get_ports - extract the upper layer ports and return them
* @skb: sk_buff to extract the ports from
@@ -1836,7 +1852,7 @@ static int __init init_default_flow_dissectors(void)
skb_flow_dissector_init(&flow_keys_basic_dissector,
flow_keys_basic_dissector_keys,
ARRAY_SIZE(flow_keys_basic_dissector_keys));
- return 0;
-}
+ return register_pernet_subsys(&flow_dissector_pernet_ops);
+}
core_initcall(init_default_flow_dissectors);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 39d37d0ef575..116139233d57 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1956,6 +1956,9 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
}
+ if (protocol)
+ neigh->protocol = protocol;
+
if (ndm->ndm_flags & NTF_EXT_LEARNED)
flags |= NEIGH_UPDATE_F_EXT_LEARNED;
@@ -1969,9 +1972,6 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
NETLINK_CB(skb).portid, extack);
- if (protocol)
- neigh->protocol = protocol;
-
neigh_release(neigh);
out:
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index cf0215734ceb..4773ad6ec111 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -80,7 +80,7 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
struct net_device *netdev = to_net_dev(dev);
struct net *net = dev_net(netdev);
unsigned long new;
- int ret = -EINVAL;
+ int ret;
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index b4c87fe31be2..41b24cd31562 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -127,10 +127,8 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
cs->classid = (u32)value;
css_task_iter_start(css, 0, &it);
- while ((p = css_task_iter_next(&it))) {
+ while ((p = css_task_iter_next(&it)))
update_classid_task(p, cs->classid);
- cond_resched();
- }
css_task_iter_end(&it);
return 0;
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 8881dd943dd0..9bd4cab7d510 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -236,6 +236,8 @@ static void net_prio_attach(struct cgroup_taskset *tset)
struct task_struct *p;
struct cgroup_subsys_state *css;
+ cgroup_sk_alloc_disable();
+
cgroup_taskset_for_each(p, css, tset) {
void *v = (void *)(unsigned long)css->id;
diff --git a/net/core/sock.c b/net/core/sock.c
index ce1d8dce9b7a..b714162213ae 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1872,7 +1872,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
* as not suitable for copying when cloning.
*/
if (sk_user_data_is_nocopy(newsk))
- RCU_INIT_POINTER(newsk->sk_user_data, NULL);
+ newsk->sk_user_data = NULL;
newsk->sk_err = 0;
newsk->sk_err_soft = 0;
@@ -2364,7 +2364,6 @@ static void sk_leave_memory_pressure(struct sock *sk)
}
}
-/* On 32bit arches, an skb frag is limited to 2^15 */
#define SKB_FRAG_PAGE_ORDER get_order(32768)
DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 9a271a58a41d..d90665b465b8 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -459,7 +459,7 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
list_for_each_entry(dp, &dst->ports, list) {
err = dsa_port_setup(dp);
if (err)
- goto teardown;
+ continue;
}
return 0;
diff --git a/net/dsa/master.c b/net/dsa/master.c
index b5c535af63a3..a621367c6e8c 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -289,7 +289,8 @@ static void dsa_master_ndo_teardown(struct net_device *dev)
{
struct dsa_port *cpu_dp = dev->dsa_ptr;
- dev->netdev_ops = cpu_dp->orig_ndo_ops;
+ if (cpu_dp->orig_ndo_ops)
+ dev->netdev_ops = cpu_dp->orig_ndo_ops;
cpu_dp->orig_ndo_ops = NULL;
}
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 231b2d494f1c..a58fdd362574 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -670,11 +670,16 @@ int dsa_port_link_register_of(struct dsa_port *dp)
{
struct dsa_switch *ds = dp->ds;
struct device_node *phy_np;
+ int port = dp->index;
if (!ds->ops->adjust_link) {
phy_np = of_parse_phandle(dp->dn, "phy-handle", 0);
- if (of_phy_is_fixed_link(dp->dn) || phy_np)
+ if (of_phy_is_fixed_link(dp->dn) || phy_np) {
+ if (ds->ops->phylink_mac_link_down)
+ ds->ops->phylink_mac_link_down(ds, port,
+ MLO_AN_FIXED, PHY_INTERFACE_MODE_NA);
return dsa_port_phylink_register(dp);
+ }
return 0;
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index e94eb1aac602..62f4ee3da172 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -856,20 +856,18 @@ dsa_slave_add_cls_matchall_mirred(struct net_device *dev,
struct dsa_port *to_dp;
int err;
- act = &cls->rule->action.entries[0];
-
if (!ds->ops->port_mirror_add)
return -EOPNOTSUPP;
- if (!act->dev)
- return -EINVAL;
-
if (!flow_action_basic_hw_stats_check(&cls->rule->action,
cls->common.extack))
return -EOPNOTSUPP;
act = &cls->rule->action.entries[0];
+ if (!act->dev)
+ return -EINVAL;
+
if (!dsa_slave_dev_check(act->dev))
return -EOPNOTSUPP;
@@ -1770,11 +1768,9 @@ int dsa_slave_create(struct dsa_port *port)
rtnl_lock();
ret = dsa_slave_change_mtu(slave_dev, ETH_DATA_LEN);
rtnl_unlock();
- if (ret && ret != -EOPNOTSUPP) {
- dev_err(ds->dev, "error %d setting MTU on port %d\n",
- ret, port->index);
- goto out_free;
- }
+ if (ret)
+ dev_warn(ds->dev, "nonfatal error %d setting MTU on port %d\n",
+ ret, port->index);
netif_carrier_off(slave_dev);
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index b5705cba8318..d6619edd53e5 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -15,6 +15,7 @@
#define MTK_HDR_XMIT_TAGGED_TPID_8100 1
#define MTK_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0)
#define MTK_HDR_XMIT_DP_BIT_MASK GENMASK(5, 0)
+#define MTK_HDR_XMIT_SA_DIS BIT(6)
static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
struct net_device *dev)
@@ -22,6 +23,9 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
struct dsa_port *dp = dsa_slave_to_port(dev);
u8 *mtk_tag;
bool is_vlan_skb = true;
+ unsigned char *dest = eth_hdr(skb)->h_dest;
+ bool is_multicast_skb = is_multicast_ether_addr(dest) &&
+ !is_broadcast_ether_addr(dest);
/* Build the special tag after the MAC Source Address. If VLAN header
* is present, it's required that VLAN header and special tag is
@@ -47,6 +51,10 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
MTK_HDR_XMIT_UNTAGGED;
mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK;
+ /* Disable SA learning for multicast frames */
+ if (unlikely(is_multicast_skb))
+ mtk_tag[1] |= MTK_HDR_XMIT_SA_DIS;
+
/* Tag control information is kept for 802.1Q */
if (!is_vlan_skb) {
mtk_tag[2] = 0;
@@ -61,6 +69,9 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
{
int port;
__be16 *phdr, hdr;
+ unsigned char *dest = eth_hdr(skb)->h_dest;
+ bool is_multicast_skb = is_multicast_ether_addr(dest) &&
+ !is_broadcast_ether_addr(dest);
if (unlikely(!pskb_may_pull(skb, MTK_HDR_LEN)))
return NULL;
@@ -86,6 +97,10 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
if (!skb->dev)
return NULL;
+ /* Only unicast or broadcast frames are offloaded */
+ if (likely(!is_multicast_skb))
+ skb->offload_fwd_mark = 1;
+
return skb;
}
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 0c772318c023..ed5357210193 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -342,7 +342,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info)
ret = ops->reply_size(req_info, reply_data);
if (ret < 0)
goto err_cleanup;
- reply_len = ret;
+ reply_len = ret + ethnl_reply_header_size();
ret = -ENOMEM;
rskb = ethnl_reply_init(reply_len, req_info->dev, ops->reply_cmd,
ops->hdr_attr, info, &reply_payload);
@@ -588,7 +588,7 @@ static void ethnl_default_notify(struct net_device *dev, unsigned int cmd,
ret = ops->reply_size(req_info, reply_data);
if (ret < 0)
goto err_cleanup;
- reply_len = ret;
+ reply_len = ret + ethnl_reply_header_size();
ret = -ENOMEM;
skb = genlmsg_new(reply_len, GFP_KERNEL);
if (!skb)
diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c
index 95eae5c68a52..0eed4e4909ab 100644
--- a/net/ethtool/strset.c
+++ b/net/ethtool/strset.c
@@ -324,7 +324,6 @@ static int strset_reply_size(const struct ethnl_req_info *req_base,
int len = 0;
int ret;
- len += ethnl_reply_header_size();
for (i = 0; i < ETH_SS_COUNT; i++) {
const struct strset_info *set_info = &data->sets[i];
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index 5465a395da04..1decb25f6764 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -69,10 +69,16 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev,
else
multicast_spec = nla_get_u8(data[IFLA_HSR_MULTICAST_SPEC]);
- if (!data[IFLA_HSR_VERSION])
+ if (!data[IFLA_HSR_VERSION]) {
hsr_version = 0;
- else
+ } else {
hsr_version = nla_get_u8(data[IFLA_HSR_VERSION]);
+ if (hsr_version > 1) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Only versions 0..1 are supported");
+ return -EINVAL;
+ }
+ }
return hsr_dev_finalize(dev, link, multicast_spec, hsr_version, extack);
}
diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
index f4b9f7a3ce51..25b6ffba26cd 100644
--- a/net/hsr/hsr_slave.c
+++ b/net/hsr/hsr_slave.c
@@ -18,7 +18,7 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb)
{
struct sk_buff *skb = *pskb;
struct hsr_port *port;
- u16 protocol;
+ __be16 protocol;
if (!skb_mac_header_was_set(skb)) {
WARN_ONCE(1, "%s: skb invalid", __func__);
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 0bd10a1f477f..a23094b050f8 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1258,7 +1258,8 @@ static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def,
return ret_val;
}
- secattr->flags |= NETLBL_SECATTR_MLS_CAT;
+ if (secattr->attr.mls.cat)
+ secattr->flags |= NETLBL_SECATTR_MLS_CAT;
}
return 0;
@@ -1439,7 +1440,8 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def,
return ret_val;
}
- secattr->flags |= NETLBL_SECATTR_MLS_CAT;
+ if (secattr->attr.mls.cat)
+ secattr->flags |= NETLBL_SECATTR_MLS_CAT;
}
return 0;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 30fa42f5997d..c0dd561aa190 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -614,12 +614,15 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
return NULL;
}
-static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
+static int ip_mc_autojoin_config(struct net *net, bool join,
+ const struct in_ifaddr *ifa)
{
+#if defined(CONFIG_IP_MULTICAST)
struct ip_mreqn mreq = {
.imr_multiaddr.s_addr = ifa->ifa_address,
.imr_ifindex = ifa->ifa_dev->dev->ifindex,
};
+ struct sock *sk = net->ipv4.mc_autojoin_sk;
int ret;
ASSERT_RTNL();
@@ -632,6 +635,9 @@ static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
release_sock(sk);
return ret;
+#else
+ return -EOPNOTSUPP;
+#endif
}
static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -675,7 +681,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
continue;
if (ipv4_is_multicast(ifa->ifa_address))
- ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
+ ip_mc_autojoin_config(net, false, ifa);
__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
return 0;
}
@@ -940,8 +946,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
*/
set_ifa_lifetime(ifa, valid_lft, prefered_lft);
if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
- int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
- true, ifa);
+ int ret = ip_mc_autojoin_config(net, true, ifa);
if (ret < 0) {
inet_free_ifa(ifa);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 213be9c050ad..1bf9da3a75f9 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -918,7 +918,6 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
else
filter->dump_exceptions = false;
- filter->dump_all_families = (rtm->rtm_family == AF_UNSPEC);
filter->flags = rtm->rtm_flags;
filter->protocol = rtm->rtm_protocol;
filter->rt_type = rtm->rtm_type;
@@ -990,7 +989,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
if (filter.table_id) {
tb = fib_get_table(net, filter.table_id);
if (!tb) {
- if (filter.dump_all_families)
+ if (rtnl_msg_family(cb->nlh) != PF_INET)
return skb->len;
NL_SET_ERR_MSG(cb->extack, "ipv4: FIB table does not exist");
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 6ed8c9317179..55ca2e521828 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -2014,7 +2014,7 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
hlist_for_each_entry_rcu(fa, fa_head, fa_list) {
struct fib_info *next_fi = fa->fa_info;
- struct fib_nh *nh;
+ struct fib_nh_common *nhc;
if (fa->fa_slen != slen)
continue;
@@ -2037,8 +2037,8 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
fa->fa_type != RTN_UNICAST)
continue;
- nh = fib_info_nh(next_fi, 0);
- if (!nh->fib_nh_gw4 || nh->fib_nh_scope != RT_SCOPE_LINK)
+ nhc = fib_info_nhc(next_fi, 0);
+ if (!nhc->nhc_gw_family || nhc->nhc_scope != RT_SCOPE_LINK)
continue;
fib_alias_accessed(fa);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 5f34eb951627..65c29f2bd89f 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -24,17 +24,19 @@
#include <net/addrconf.h>
#if IS_ENABLED(CONFIG_IPV6)
-/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
- * only, and any IPv4 addresses if not IPv6 only
- * match_wildcard == false: addresses must be exactly the same, i.e.
- * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
- * and 0.0.0.0 equals to 0.0.0.0 only
+/* match_sk*_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses
+ * if IPv6 only, and any IPv4 addresses
+ * if not IPv6 only
+ * match_sk*_wildcard == false: addresses must be exactly the same, i.e.
+ * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
+ * and 0.0.0.0 equals to 0.0.0.0 only
*/
static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
const struct in6_addr *sk2_rcv_saddr6,
__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
bool sk1_ipv6only, bool sk2_ipv6only,
- bool match_wildcard)
+ bool match_sk1_wildcard,
+ bool match_sk2_wildcard)
{
int addr_type = ipv6_addr_type(sk1_rcv_saddr6);
int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
@@ -44,8 +46,8 @@ static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
if (!sk2_ipv6only) {
if (sk1_rcv_saddr == sk2_rcv_saddr)
return true;
- if (!sk1_rcv_saddr || !sk2_rcv_saddr)
- return match_wildcard;
+ return (match_sk1_wildcard && !sk1_rcv_saddr) ||
+ (match_sk2_wildcard && !sk2_rcv_saddr);
}
return false;
}
@@ -53,11 +55,11 @@ static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
return true;
- if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
+ if (addr_type2 == IPV6_ADDR_ANY && match_sk2_wildcard &&
!(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
return true;
- if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
+ if (addr_type == IPV6_ADDR_ANY && match_sk1_wildcard &&
!(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
return true;
@@ -69,18 +71,19 @@ static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
}
#endif
-/* match_wildcard == true: 0.0.0.0 equals to any IPv4 addresses
- * match_wildcard == false: addresses must be exactly the same, i.e.
- * 0.0.0.0 only equals to 0.0.0.0
+/* match_sk*_wildcard == true: 0.0.0.0 equals to any IPv4 addresses
+ * match_sk*_wildcard == false: addresses must be exactly the same, i.e.
+ * 0.0.0.0 only equals to 0.0.0.0
*/
static bool ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
- bool sk2_ipv6only, bool match_wildcard)
+ bool sk2_ipv6only, bool match_sk1_wildcard,
+ bool match_sk2_wildcard)
{
if (!sk2_ipv6only) {
if (sk1_rcv_saddr == sk2_rcv_saddr)
return true;
- if (!sk1_rcv_saddr || !sk2_rcv_saddr)
- return match_wildcard;
+ return (match_sk1_wildcard && !sk1_rcv_saddr) ||
+ (match_sk2_wildcard && !sk2_rcv_saddr);
}
return false;
}
@@ -96,10 +99,12 @@ bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
sk2->sk_rcv_saddr,
ipv6_only_sock(sk),
ipv6_only_sock(sk2),
+ match_wildcard,
match_wildcard);
#endif
return ipv4_rcv_saddr_equal(sk->sk_rcv_saddr, sk2->sk_rcv_saddr,
- ipv6_only_sock(sk2), match_wildcard);
+ ipv6_only_sock(sk2), match_wildcard,
+ match_wildcard);
}
EXPORT_SYMBOL(inet_rcv_saddr_equal);
@@ -285,10 +290,10 @@ static inline int sk_reuseport_match(struct inet_bind_bucket *tb,
tb->fast_rcv_saddr,
sk->sk_rcv_saddr,
tb->fast_ipv6_only,
- ipv6_only_sock(sk), true);
+ ipv6_only_sock(sk), true, false);
#endif
return ipv4_rcv_saddr_equal(tb->fast_rcv_saddr, sk->sk_rcv_saddr,
- ipv6_only_sock(sk), true);
+ ipv6_only_sock(sk), true, false);
}
/* Obtain a reference to a local port for the given sock,
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 2f01cf6fa0de..678575adaf3b 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -698,7 +698,7 @@ out:
rtnl_link_failed:
#if IS_ENABLED(CONFIG_MPLS)
- xfrm4_tunnel_deregister(&mplsip_handler, AF_INET);
+ xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS);
xfrm_tunnel_mplsip_failed:
#endif
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9cf83cc85e4a..b2363b82b48d 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -109,8 +109,10 @@ static void mroute_clean_tables(struct mr_table *mrt, int flags);
static void ipmr_expire_process(struct timer_list *t);
#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
-#define ipmr_for_each_table(mrt, net) \
- list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
+#define ipmr_for_each_table(mrt, net) \
+ list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list, \
+ lockdep_rtnl_is_held() || \
+ list_empty(&net->ipv4.mr_tables))
static struct mr_table *ipmr_mr_table_iter(struct net *net,
struct mr_table *mrt)
@@ -2611,7 +2613,7 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
mrt = ipmr_get_table(sock_net(skb->sk), filter.table_id);
if (!mrt) {
- if (filter.dump_all_families)
+ if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IPMR)
return skb->len;
NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist");
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index fdfca534d094..715e14475220 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -276,6 +276,7 @@ out:
return 0;
nla_put_failure:
+ nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
}
@@ -433,7 +434,7 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[],
if (!valid_group_nh(nh, len, extack))
return -EINVAL;
}
- for (i = NHA_GROUP + 1; i < __NHA_MAX; ++i) {
+ for (i = NHA_GROUP_TYPE + 1; i < __NHA_MAX; ++i) {
if (!tb[i])
continue;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 788c69d9bfe0..b73f540fa19b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -491,18 +491,16 @@ u32 ip_idents_reserve(u32 hash, int segs)
atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
u32 old = READ_ONCE(*p_tstamp);
u32 now = (u32)jiffies;
- u32 new, delta = 0;
+ u32 delta = 0;
if (old != now && cmpxchg(p_tstamp, old, now) == old)
delta = prandom_u32_max(now - old);
- /* Do not use atomic_add_return() as it makes UBSAN unhappy */
- do {
- old = (u32)atomic_read(p_id);
- new = old + delta + segs;
- } while (atomic_cmpxchg(p_id, old, new) != old);
-
- return new - segs;
+ /* If UBSAN reports an error there, please make sure your compiler
+ * supports -fno-strict-overflow before reporting it that was a bug
+ * in UBSAN, and it has been fixed in GCC-8.
+ */
+ return atomic_add_return(segs + delta, p_id) - segs;
}
EXPORT_SYMBOL(ip_idents_reserve);
@@ -915,7 +913,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
/* Check for load limit; set rate_last to the latest sent
* redirect.
*/
- if (peer->rate_tokens == 0 ||
+ if (peer->n_redirects == 0 ||
time_after(jiffies,
(peer->rate_last +
(ip_rt_redirect_load << peer->n_redirects)))) {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6d87de434377..dd401757eea1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -476,9 +476,17 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
int target, struct sock *sk)
{
- return (READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq) >= target) ||
- (sk->sk_prot->stream_memory_read ?
- sk->sk_prot->stream_memory_read(sk) : false);
+ int avail = READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq);
+
+ if (avail > 0) {
+ if (avail >= target)
+ return true;
+ if (tcp_rmem_pressure(sk))
+ return true;
+ }
+ if (sk->sk_prot->stream_memory_read)
+ return sk->sk_prot->stream_memory_read(sk);
+ return false;
}
/*
@@ -1756,10 +1764,11 @@ static int tcp_zerocopy_receive(struct sock *sk,
down_read(&current->mm->mmap_sem);
- ret = -EINVAL;
vma = find_vma(current->mm, address);
- if (!vma || vma->vm_start > address || vma->vm_ops != &tcp_vm_ops)
- goto out;
+ if (!vma || vma->vm_start > address || vma->vm_ops != &tcp_vm_ops) {
+ up_read(&current->mm->mmap_sem);
+ return -EINVAL;
+ }
zc->length = min_t(unsigned long, zc->length, vma->vm_end - address);
tp = tcp_sk(sk);
@@ -2154,13 +2163,15 @@ skip_copy:
tp->urg_data = 0;
tcp_fast_path_check(sk);
}
- if (used + offset < skb->len)
- continue;
if (TCP_SKB_CB(skb)->has_rxtstamp) {
tcp_update_recv_tstamps(skb, &tss);
cmsg_flags |= 2;
}
+
+ if (used + offset < skb->len)
+ continue;
+
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
goto found_fin_ok;
if (!(flags & MSG_PEEK))
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 5a05327f97c1..629aaa9a1eb9 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -125,7 +125,6 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
if (!ret) {
msg->sg.start = i;
- msg->sg.size -= apply_bytes;
sk_psock_queue_msg(psock, tmp);
sk_psock_data_ready(sk, psock);
} else {
@@ -262,14 +261,17 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
struct sk_psock *psock;
int copied, ret;
+ if (unlikely(flags & MSG_ERRQUEUE))
+ return inet_recv_error(sk, msg, len, addr_len);
+
psock = sk_psock_get(sk);
if (unlikely(!psock))
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
- if (unlikely(flags & MSG_ERRQUEUE))
- return inet_recv_error(sk, msg, len, addr_len);
if (!skb_queue_empty(&sk->sk_receive_queue) &&
- sk_psock_queue_empty(psock))
+ sk_psock_queue_empty(psock)) {
+ sk_psock_put(sk, psock);
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ }
lock_sock(sk);
msg_bytes_ready:
copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bf4ced9273e8..29c6fc8c7716 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3926,10 +3926,6 @@ void tcp_parse_options(const struct net *net,
*/
break;
#endif
- case TCPOPT_MPTCP:
- mptcp_parse_option(skb, ptr, opsize, opt_rx);
- break;
-
case TCPOPT_FASTOPEN:
tcp_parse_fastopen_option(
opsize - TCPOLEN_FASTOPEN_BASE,
@@ -4761,7 +4757,8 @@ void tcp_data_ready(struct sock *sk)
const struct tcp_sock *tp = tcp_sk(sk);
int avail = tp->rcv_nxt - tp->copied_seq;
- if (avail < sk->sk_rcvlowat && !sock_flag(sk, SOCK_DONE))
+ if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) &&
+ !sock_flag(sk, SOCK_DONE))
return;
sk->sk_data_ready(sk);
@@ -5990,9 +5987,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk);
- if (sk_is_mptcp(sk))
- mptcp_rcv_synsent(sk);
-
/* Remember, tcp_poll() does not lock socket!
* Change state from SYN-SENT only after copied_seq
* is initialized. */
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 89ba7c87de5d..30ddb9dc9398 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -58,9 +58,7 @@ int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb)
{
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
-#ifdef CONFIG_NETFILTER
IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
-#endif
return xfrm_output(sk, skb);
}
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index 221c81f85cbf..8d3f66c310db 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -1047,7 +1047,8 @@ static int calipso_opt_getattr(const unsigned char *calipso,
goto getattr_return;
}
- secattr->flags |= NETLBL_SECATTR_MLS_CAT;
+ if (secattr->attr.mls.cat)
+ secattr->flags |= NETLBL_SECATTR_MLS_CAT;
}
secattr->type = NETLBL_NLTYPE_CALIPSO;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 2688f3e82165..fc5000370030 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -229,6 +229,25 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
return res;
}
+static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
+ struct flowi6 *fl6)
+{
+ struct net *net = sock_net(sk);
+ struct dst_entry *dst;
+ bool res = false;
+
+ dst = ip6_route_output(net, sk, fl6);
+ if (!dst->error) {
+ struct rt6_info *rt = (struct rt6_info *)dst;
+ struct in6_addr prefsrc;
+
+ rt6_get_prefsrc(rt, &prefsrc);
+ res = !ipv6_addr_any(&prefsrc);
+ }
+ dst_release(dst);
+ return res;
+}
+
/*
* an inline helper for the "simple" if statement below
* checks if parameter problem report is caused by an
@@ -527,7 +546,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
saddr = force_saddr;
if (saddr) {
fl6.saddr = *saddr;
- } else {
+ } else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
/* select a more meaningful saddr from input if */
struct net_device *in_netdev;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 46ed56719476..20314895509c 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -664,7 +664,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
if (arg.filter.table_id) {
tb = fib6_get_table(net, arg.filter.table_id);
if (!tb) {
- if (arg.filter.dump_all_families)
+ if (rtnl_msg_family(cb->nlh) != PF_INET6)
goto out;
NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist");
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 65a54d74acc1..1f4d20e97c07 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -98,7 +98,8 @@ static void ipmr_expire_process(struct timer_list *t);
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
#define ip6mr_for_each_table(mrt, net) \
list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \
- lockdep_rtnl_is_held())
+ lockdep_rtnl_is_held() || \
+ list_empty(&net->ipv6.mr6_tables))
static struct mr_table *ip6mr_mr_table_iter(struct net *net,
struct mr_table *mrt)
@@ -2502,7 +2503,7 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id);
if (!mrt) {
- if (filter.dump_all_families)
+ if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR)
return skb->len;
NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index debdaeba5d8c..18d05403d3b5 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -183,15 +183,14 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
retv = -EBUSY;
break;
}
- } else if (sk->sk_protocol == IPPROTO_TCP) {
- if (sk->sk_prot != &tcpv6_prot) {
- retv = -EBUSY;
- break;
- }
- break;
- } else {
+ }
+ if (sk->sk_protocol == IPPROTO_TCP &&
+ sk->sk_prot != &tcpv6_prot) {
+ retv = -EBUSY;
break;
}
+ if (sk->sk_protocol != IPPROTO_TCP)
+ break;
if (sk->sk_state != TCP_ESTABLISHED) {
retv = -ENOTCONN;
break;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 310cbddaa533..ff847a324220 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1385,9 +1385,18 @@ static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res)
}
ip6_rt_copy_init(pcpu_rt, res);
pcpu_rt->rt6i_flags |= RTF_PCPU;
+
+ if (f6i->nh)
+ pcpu_rt->sernum = rt_genid_ipv6(dev_net(dev));
+
return pcpu_rt;
}
+static bool rt6_is_valid(const struct rt6_info *rt6)
+{
+ return rt6->sernum == rt_genid_ipv6(dev_net(rt6->dst.dev));
+}
+
/* It should be called with rcu_read_lock() acquired */
static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
{
@@ -1395,6 +1404,19 @@ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu);
+ if (pcpu_rt && pcpu_rt->sernum && !rt6_is_valid(pcpu_rt)) {
+ struct rt6_info *prev, **p;
+
+ p = this_cpu_ptr(res->nh->rt6i_pcpu);
+ prev = xchg(p, NULL);
+ if (prev) {
+ dst_dev_put(&prev->dst);
+ dst_release(&prev->dst);
+ }
+
+ pcpu_rt = NULL;
+ }
+
return pcpu_rt;
}
@@ -2593,6 +2615,9 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
rt = container_of(dst, struct rt6_info, dst);
+ if (rt->sernum)
+ return rt6_is_valid(rt) ? dst : NULL;
+
rcu_read_lock();
/* All IPV6 dsts are created with ->obsolete set to the value
@@ -2697,8 +2722,10 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
const struct in6_addr *daddr, *saddr;
struct rt6_info *rt6 = (struct rt6_info *)dst;
- if (dst_metric_locked(dst, RTAX_MTU))
- return;
+ /* Note: do *NOT* check dst_metric_locked(dst, RTAX_MTU)
+ * IPv6 pmtu discovery isn't optional, so 'mtu lock' cannot disable it.
+ * [see also comment in rt6_mtu_change_route()]
+ */
if (iph) {
daddr = &iph->daddr;
diff --git a/net/ipv6/rpl.c b/net/ipv6/rpl.c
index d38b476fc7f2..307f336b5353 100644
--- a/net/ipv6/rpl.c
+++ b/net/ipv6/rpl.c
@@ -8,6 +8,7 @@
#include <net/rpl.h>
#define IPV6_PFXTAIL_LEN(x) (sizeof(struct in6_addr) - (x))
+#define IPV6_RPL_BEST_ADDR_COMPRESSION 15
static void ipv6_rpl_addr_decompress(struct in6_addr *dst,
const struct in6_addr *daddr,
@@ -73,7 +74,7 @@ static unsigned char ipv6_rpl_srh_calc_cmpri(const struct ipv6_rpl_sr_hdr *inhdr
}
}
- return plen;
+ return IPV6_RPL_BEST_ADDR_COMPRESSION;
}
static unsigned char ipv6_rpl_srh_calc_cmpre(const struct in6_addr *daddr,
@@ -83,10 +84,10 @@ static unsigned char ipv6_rpl_srh_calc_cmpre(const struct in6_addr *daddr,
for (plen = 0; plen < sizeof(*daddr); plen++) {
if (daddr->s6_addr[plen] != last_segment->s6_addr[plen])
- break;
+ return plen;
}
- return plen;
+ return IPV6_RPL_BEST_ADDR_COMPRESSION;
}
void ipv6_rpl_srh_compress(struct ipv6_rpl_sr_hdr *outhdr,
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 75421a472d25..37b434293bda 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -27,8 +27,9 @@
bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len)
{
- int trailing;
unsigned int tlv_offset;
+ int max_last_entry;
+ int trailing;
if (srh->type != IPV6_SRCRT_TYPE_4)
return false;
@@ -36,7 +37,12 @@ bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len)
if (((srh->hdrlen + 1) << 3) != len)
return false;
- if (srh->segments_left > srh->first_segment)
+ max_last_entry = (srh->hdrlen / 2) - 1;
+
+ if (srh->first_segment > max_last_entry)
+ return false;
+
+ if (srh->segments_left > srh->first_segment + 1)
return false;
tlv_offset = sizeof(*srh) + ((srh->first_segment + 1) << 4);
@@ -434,7 +440,7 @@ static struct genl_family seg6_genl_family __ro_after_init = {
int __init seg6_init(void)
{
- int err = -ENOMEM;
+ int err;
err = genl_register_family(&seg6_genl_family);
if (err)
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index fbe51d40bd7e..e34167f790e6 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -111,9 +111,7 @@ int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb)
{
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
-#ifdef CONFIG_NETFILTER
IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
-#endif
return xfrm_output(sk, skb);
}
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index f5a9bdc4980c..ebb381c3f1b9 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -920,51 +920,51 @@ static const struct genl_ops l2tp_nl_ops[] = {
.cmd = L2TP_CMD_TUNNEL_CREATE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = l2tp_nl_cmd_tunnel_create,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
},
{
.cmd = L2TP_CMD_TUNNEL_DELETE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = l2tp_nl_cmd_tunnel_delete,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
},
{
.cmd = L2TP_CMD_TUNNEL_MODIFY,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = l2tp_nl_cmd_tunnel_modify,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
},
{
.cmd = L2TP_CMD_TUNNEL_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = l2tp_nl_cmd_tunnel_get,
.dumpit = l2tp_nl_cmd_tunnel_dump,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
},
{
.cmd = L2TP_CMD_SESSION_CREATE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = l2tp_nl_cmd_session_create,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
},
{
.cmd = L2TP_CMD_SESSION_DELETE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = l2tp_nl_cmd_session_delete,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
},
{
.cmd = L2TP_CMD_SESSION_MODIFY,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = l2tp_nl_cmd_session_modify,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
},
{
.cmd = L2TP_CMD_SESSION_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = l2tp_nl_cmd_session_get,
.dumpit = l2tp_nl_cmd_session_dump,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
},
};
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 8345926193de..6423173bb87e 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1069,7 +1069,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_UNSPEC;
if (hw->max_signal <= 0) {
result = -EINVAL;
- goto fail_wiphy_register;
+ goto fail_workqueue;
}
}
@@ -1135,7 +1135,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
result = ieee80211_init_cipher_suites(local);
if (result < 0)
- goto fail_wiphy_register;
+ goto fail_workqueue;
if (!local->ops->remain_on_channel)
local->hw.wiphy->max_remain_on_channel_duration = 5000;
@@ -1161,10 +1161,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
local->hw.wiphy->max_num_csa_counters = IEEE80211_MAX_CSA_COUNTERS_NUM;
- result = wiphy_register(local->hw.wiphy);
- if (result < 0)
- goto fail_wiphy_register;
-
/*
* We use the number of queues for feature tests (QoS, HT) internally
* so restrict them appropriately.
@@ -1187,8 +1183,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
local->tx_headroom = max_t(unsigned int , local->hw.extra_tx_headroom,
IEEE80211_TX_STATUS_HEADROOM);
- debugfs_hw_add(local);
-
/*
* if the driver doesn't specify a max listen interval we
* use 5 which should be a safe default
@@ -1217,9 +1211,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
goto fail_flows;
rtnl_lock();
-
result = ieee80211_init_rate_ctrl_alg(local,
hw->rate_control_algorithm);
+ rtnl_unlock();
if (result < 0) {
wiphy_debug(local->hw.wiphy,
"Failed to initialize rate control algorithm\n");
@@ -1273,6 +1267,15 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
local->sband_allocated |= BIT(band);
}
+ result = wiphy_register(local->hw.wiphy);
+ if (result < 0)
+ goto fail_wiphy_register;
+
+ debugfs_hw_add(local);
+ rate_control_add_debugfs(local);
+
+ rtnl_lock();
+
/* add one default STA interface if supported */
if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION) &&
!ieee80211_hw_check(hw, NO_AUTO_VIF)) {
@@ -1312,17 +1315,17 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
#if defined(CONFIG_INET) || defined(CONFIG_IPV6)
fail_ifa:
#endif
+ wiphy_unregister(local->hw.wiphy);
+ fail_wiphy_register:
rtnl_lock();
rate_control_deinitialize(local);
ieee80211_remove_interfaces(local);
- fail_rate:
rtnl_unlock();
+ fail_rate:
fail_flows:
ieee80211_led_exit(local);
destroy_workqueue(local->workqueue);
fail_workqueue:
- wiphy_unregister(local->hw.wiphy);
- fail_wiphy_register:
if (local->wiphy_ciphers_allocated)
kfree(local->hw.wiphy->cipher_suites);
kfree(local->int_scan_req);
@@ -1372,8 +1375,8 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
skb_queue_purge(&local->skb_queue_unreliable);
skb_queue_purge(&local->skb_queue_tdls_chsw);
- destroy_workqueue(local->workqueue);
wiphy_unregister(local->hw.wiphy);
+ destroy_workqueue(local->workqueue);
ieee80211_led_exit(local);
kfree(local->int_scan_req);
}
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index d09b3c789314..36978a0e5000 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -1257,15 +1257,15 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
sdata->u.mesh.mshcfg.rssi_threshold < rx_status->signal)
mesh_neighbour_update(sdata, mgmt->sa, &elems,
rx_status);
+
+ if (ifmsh->csa_role != IEEE80211_MESH_CSA_ROLE_INIT &&
+ !sdata->vif.csa_active)
+ ieee80211_mesh_process_chnswitch(sdata, &elems, true);
}
if (ifmsh->sync_ops)
ifmsh->sync_ops->rx_bcn_presp(sdata,
stype, mgmt, &elems, rx_status);
-
- if (ifmsh->csa_role != IEEE80211_MESH_CSA_ROLE_INIT &&
- !sdata->vif.csa_active)
- ieee80211_mesh_process_chnswitch(sdata, &elems, true);
}
int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata)
@@ -1373,6 +1373,9 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata,
ieee802_11_parse_elems(pos, len - baselen, true, &elems,
mgmt->bssid, NULL);
+ if (!mesh_matches_local(sdata, &elems))
+ return;
+
ifmsh->chsw_ttl = elems.mesh_chansw_params_ie->mesh_ttl;
if (!--ifmsh->chsw_ttl)
fwd_csa = false;
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index a1e9fc7878aa..b051f125d3af 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -214,17 +214,16 @@ static ssize_t rcname_read(struct file *file, char __user *userbuf,
ref->ops->name, len);
}
-static const struct file_operations rcname_ops = {
+const struct file_operations rcname_ops = {
.read = rcname_read,
.open = simple_open,
.llseek = default_llseek,
};
#endif
-static struct rate_control_ref *rate_control_alloc(const char *name,
- struct ieee80211_local *local)
+static struct rate_control_ref *
+rate_control_alloc(const char *name, struct ieee80211_local *local)
{
- struct dentry *debugfsdir = NULL;
struct rate_control_ref *ref;
ref = kmalloc(sizeof(struct rate_control_ref), GFP_KERNEL);
@@ -234,13 +233,7 @@ static struct rate_control_ref *rate_control_alloc(const char *name,
if (!ref->ops)
goto free;
-#ifdef CONFIG_MAC80211_DEBUGFS
- debugfsdir = debugfs_create_dir("rc", local->hw.wiphy->debugfsdir);
- local->debugfs.rcdir = debugfsdir;
- debugfs_create_file("name", 0400, debugfsdir, ref, &rcname_ops);
-#endif
-
- ref->priv = ref->ops->alloc(&local->hw, debugfsdir);
+ ref->priv = ref->ops->alloc(&local->hw);
if (!ref->priv)
goto free;
return ref;
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index 5397c6dad056..79b44d3db171 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -60,6 +60,29 @@ static inline void rate_control_add_sta_debugfs(struct sta_info *sta)
#endif
}
+extern const struct file_operations rcname_ops;
+
+static inline void rate_control_add_debugfs(struct ieee80211_local *local)
+{
+#ifdef CONFIG_MAC80211_DEBUGFS
+ struct dentry *debugfsdir;
+
+ if (!local->rate_ctrl)
+ return;
+
+ if (!local->rate_ctrl->ops->add_debugfs)
+ return;
+
+ debugfsdir = debugfs_create_dir("rc", local->hw.wiphy->debugfsdir);
+ local->debugfs.rcdir = debugfsdir;
+ debugfs_create_file("name", 0400, debugfsdir,
+ local->rate_ctrl, &rcname_ops);
+
+ local->rate_ctrl->ops->add_debugfs(&local->hw, local->rate_ctrl->priv,
+ debugfsdir);
+#endif
+}
+
void ieee80211_check_rate_mask(struct ieee80211_sub_if_data *sdata);
/* Get a reference to the rate control algorithm. If `name' is NULL, get the
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 694a31978a04..5dc3e5bc4e64 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -1635,7 +1635,7 @@ minstrel_ht_init_cck_rates(struct minstrel_priv *mp)
}
static void *
-minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
+minstrel_ht_alloc(struct ieee80211_hw *hw)
{
struct minstrel_priv *mp;
@@ -1673,7 +1673,17 @@ minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
mp->update_interval = HZ / 10;
mp->new_avg = true;
+ minstrel_ht_init_cck_rates(mp);
+
+ return mp;
+}
+
#ifdef CONFIG_MAC80211_DEBUGFS
+static void minstrel_ht_add_debugfs(struct ieee80211_hw *hw, void *priv,
+ struct dentry *debugfsdir)
+{
+ struct minstrel_priv *mp = priv;
+
mp->fixed_rate_idx = (u32) -1;
debugfs_create_u32("fixed_rate_idx", S_IRUGO | S_IWUGO, debugfsdir,
&mp->fixed_rate_idx);
@@ -1681,12 +1691,8 @@ minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
&mp->sample_switch);
debugfs_create_bool("new_avg", S_IRUGO | S_IWUSR, debugfsdir,
&mp->new_avg);
-#endif
-
- minstrel_ht_init_cck_rates(mp);
-
- return mp;
}
+#endif
static void
minstrel_ht_free(void *priv)
@@ -1725,6 +1731,7 @@ static const struct rate_control_ops mac80211_minstrel_ht = {
.alloc = minstrel_ht_alloc,
.free = minstrel_ht_free,
#ifdef CONFIG_MAC80211_DEBUGFS
+ .add_debugfs = minstrel_ht_add_debugfs,
.add_sta_debugfs = minstrel_ht_add_sta_debugfs,
#endif
.get_expected_throughput = minstrel_ht_get_expected_throughput,
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index f8d5c2515829..cd8487bc6fc2 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -231,7 +231,8 @@ struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta;
int i = 0;
- list_for_each_entry_rcu(sta, &local->sta_list, list) {
+ list_for_each_entry_rcu(sta, &local->sta_list, list,
+ lockdep_is_held(&local->sta_mtx)) {
if (sdata != sta->sdata)
continue;
if (i < idx) {
diff --git a/net/mptcp/crypto.c b/net/mptcp/crypto.c
index c151628bd416..0f5a414a9366 100644
--- a/net/mptcp/crypto.c
+++ b/net/mptcp/crypto.c
@@ -47,8 +47,6 @@ void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn)
void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac)
{
u8 input[SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE];
- __be32 mptcp_hashed_key[SHA256_DIGEST_WORDS];
- __be32 *hash_out = (__force __be32 *)hmac;
struct sha256_state state;
u8 key1be[8];
u8 key2be[8];
@@ -86,11 +84,7 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac)
sha256_init(&state);
sha256_update(&state, input, SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE);
- sha256_final(&state, (u8 *)mptcp_hashed_key);
-
- /* takes only first 160 bits */
- for (i = 0; i < 5; i++)
- hash_out[i] = mptcp_hashed_key[i];
+ sha256_final(&state, (u8 *)hmac);
}
#ifdef CONFIG_MPTCP_HMAC_TEST
@@ -101,29 +95,29 @@ struct test_cast {
};
/* we can't reuse RFC 4231 test vectors, as we have constraint on the
- * input and key size, and we truncate the output.
+ * input and key size.
*/
static struct test_cast tests[] = {
{
.key = "0b0b0b0b0b0b0b0b",
.msg = "48692054",
- .result = "8385e24fb4235ac37556b6b886db106284a1da67",
+ .result = "8385e24fb4235ac37556b6b886db106284a1da671699f46db1f235ec622dcafa",
},
{
.key = "aaaaaaaaaaaaaaaa",
.msg = "dddddddd",
- .result = "2c5e219164ff1dca1c4a92318d847bb6b9d44492",
+ .result = "2c5e219164ff1dca1c4a92318d847bb6b9d44492984e1eb71aff9022f71046e9",
},
{
.key = "0102030405060708",
.msg = "cdcdcdcd",
- .result = "e73b9ba9969969cefb04aa0d6df18ec2fcc075b6",
+ .result = "e73b9ba9969969cefb04aa0d6df18ec2fcc075b6f23b4d8c4da736a5dbbc6e7d",
},
};
static int __init test_mptcp_crypto(void)
{
- char hmac[20], hmac_hex[41];
+ char hmac[32], hmac_hex[65];
u32 nonce1, nonce2;
u64 key1, key2;
u8 msg[8];
@@ -140,11 +134,11 @@ static int __init test_mptcp_crypto(void)
put_unaligned_be32(nonce2, &msg[4]);
mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac);
- for (j = 0; j < 20; ++j)
+ for (j = 0; j < 32; ++j)
sprintf(&hmac_hex[j << 1], "%02x", hmac[j] & 0xff);
- hmac_hex[40] = 0;
+ hmac_hex[64] = 0;
- if (memcmp(hmac_hex, tests[i].result, 40))
+ if (memcmp(hmac_hex, tests[i].result, 64))
pr_err("test %d failed, got %s expected %s", i,
hmac_hex, tests[i].result);
else
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index faf57585b892..7793b6011fa7 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -7,6 +7,7 @@
#define pr_fmt(fmt) "MPTCP: " fmt
#include <linux/kernel.h>
+#include <crypto/sha.h>
#include <net/tcp.h>
#include <net/mptcp.h>
#include "protocol.h"
@@ -16,10 +17,10 @@ static bool mptcp_cap_flag_sha256(u8 flags)
return (flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA256;
}
-void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr,
- int opsize, struct tcp_options_received *opt_rx)
+static void mptcp_parse_option(const struct sk_buff *skb,
+ const unsigned char *ptr, int opsize,
+ struct mptcp_options_received *mp_opt)
{
- struct mptcp_options_received *mp_opt = &opt_rx->mptcp;
u8 subtype = *ptr >> 4;
int expected_opsize;
u8 version;
@@ -283,12 +284,20 @@ void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr,
}
void mptcp_get_options(const struct sk_buff *skb,
- struct tcp_options_received *opt_rx)
+ struct mptcp_options_received *mp_opt)
{
- const unsigned char *ptr;
const struct tcphdr *th = tcp_hdr(skb);
- int length = (th->doff * 4) - sizeof(struct tcphdr);
+ const unsigned char *ptr;
+ int length;
+
+ /* initialize option status */
+ mp_opt->mp_capable = 0;
+ mp_opt->mp_join = 0;
+ mp_opt->add_addr = 0;
+ mp_opt->rm_addr = 0;
+ mp_opt->dss = 0;
+ length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (const unsigned char *)(th + 1);
while (length > 0) {
@@ -308,7 +317,7 @@ void mptcp_get_options(const struct sk_buff *skb,
if (opsize > length)
return; /* don't parse partial options */
if (opcode == TCPOPT_MPTCP)
- mptcp_parse_option(skb, ptr, opsize, opt_rx);
+ mptcp_parse_option(skb, ptr, opsize, mp_opt);
ptr += opsize - 2;
length -= opsize;
}
@@ -344,28 +353,6 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
return false;
}
-void mptcp_rcv_synsent(struct sock *sk)
-{
- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (subflow->request_mptcp && tp->rx_opt.mptcp.mp_capable) {
- subflow->mp_capable = 1;
- subflow->can_ack = 1;
- subflow->remote_key = tp->rx_opt.mptcp.sndr_key;
- pr_debug("subflow=%p, remote_key=%llu", subflow,
- subflow->remote_key);
- } else if (subflow->request_join && tp->rx_opt.mptcp.mp_join) {
- subflow->mp_join = 1;
- subflow->thmac = tp->rx_opt.mptcp.thmac;
- subflow->remote_nonce = tp->rx_opt.mptcp.nonce;
- pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
- subflow->thmac, subflow->remote_nonce);
- } else if (subflow->request_mptcp) {
- tcp_sk(sk)->is_mptcp = 0;
- }
-}
-
/* MP_JOIN client subflow must wait for 4th ack before sending any data:
* TCP can't schedule delack timer before the subflow is fully established.
* MPTCP uses the delack timer to do 3rd ack retransmissions
@@ -549,7 +536,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id,
struct in_addr *addr)
{
- u8 hmac[MPTCP_ADDR_HMAC_LEN];
+ u8 hmac[SHA256_DIGEST_SIZE];
u8 msg[7];
msg[0] = addr_id;
@@ -559,14 +546,14 @@ static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id,
mptcp_crypto_hmac_sha(key1, key2, msg, 7, hmac);
- return get_unaligned_be64(hmac);
+ return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]);
}
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id,
struct in6_addr *addr)
{
- u8 hmac[MPTCP_ADDR_HMAC_LEN];
+ u8 hmac[SHA256_DIGEST_SIZE];
u8 msg[19];
msg[0] = addr_id;
@@ -576,7 +563,7 @@ static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id,
mptcp_crypto_hmac_sha(key1, key2, msg, 19, hmac);
- return get_unaligned_be64(hmac);
+ return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]);
}
#endif
@@ -709,7 +696,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1)
return subflow->mp_capable;
- if (mp_opt->use_ack) {
+ if (mp_opt->dss && mp_opt->use_ack) {
/* subflows are fully established as soon as we get any
* additional ack.
*/
@@ -717,8 +704,6 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
goto fully_established;
}
- WARN_ON_ONCE(subflow->can_ack);
-
/* If the first established packet does not contain MP_CAPABLE + data
* then fallback to TCP
*/
@@ -728,6 +713,8 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
return false;
}
+ if (unlikely(!READ_ONCE(msk->pm.server_side)))
+ pr_warn_once("bogus mpc option on established client sk");
subflow->fully_established = 1;
subflow->remote_key = mp_opt->sndr_key;
subflow->can_ack = 1;
@@ -819,41 +806,41 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
- struct mptcp_options_received *mp_opt;
+ struct mptcp_options_received mp_opt;
struct mptcp_ext *mpext;
- mp_opt = &opt_rx->mptcp;
- if (!check_fully_established(msk, sk, subflow, skb, mp_opt))
+ mptcp_get_options(skb, &mp_opt);
+ if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
return;
- if (mp_opt->add_addr && add_addr_hmac_valid(msk, mp_opt)) {
+ if (mp_opt.add_addr && add_addr_hmac_valid(msk, &mp_opt)) {
struct mptcp_addr_info addr;
- addr.port = htons(mp_opt->port);
- addr.id = mp_opt->addr_id;
- if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) {
+ addr.port = htons(mp_opt.port);
+ addr.id = mp_opt.addr_id;
+ if (mp_opt.family == MPTCP_ADDR_IPVERSION_4) {
addr.family = AF_INET;
- addr.addr = mp_opt->addr;
+ addr.addr = mp_opt.addr;
}
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- else if (mp_opt->family == MPTCP_ADDR_IPVERSION_6) {
+ else if (mp_opt.family == MPTCP_ADDR_IPVERSION_6) {
addr.family = AF_INET6;
- addr.addr6 = mp_opt->addr6;
+ addr.addr6 = mp_opt.addr6;
}
#endif
- if (!mp_opt->echo)
+ if (!mp_opt.echo)
mptcp_pm_add_addr_received(msk, &addr);
- mp_opt->add_addr = 0;
+ mp_opt.add_addr = 0;
}
- if (!mp_opt->dss)
+ if (!mp_opt.dss)
return;
/* we can't wait for recvmsg() to update the ack_seq, otherwise
* monodirectional flows will stuck
*/
- if (mp_opt->use_ack)
- update_una(msk, mp_opt);
+ if (mp_opt.use_ack)
+ update_una(msk, &mp_opt);
mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
if (!mpext)
@@ -861,8 +848,8 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
memset(mpext, 0, sizeof(*mpext));
- if (mp_opt->use_map) {
- if (mp_opt->mpc_map) {
+ if (mp_opt.use_map) {
+ if (mp_opt.mpc_map) {
/* this is an MP_CAPABLE carrying MPTCP data
* we know this map the first chunk of data
*/
@@ -872,16 +859,16 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
mpext->subflow_seq = 1;
mpext->dsn64 = 1;
mpext->mpc_map = 1;
+ mpext->data_fin = 0;
} else {
- mpext->data_seq = mp_opt->data_seq;
- mpext->subflow_seq = mp_opt->subflow_seq;
- mpext->dsn64 = mp_opt->dsn64;
+ mpext->data_seq = mp_opt.data_seq;
+ mpext->subflow_seq = mp_opt.subflow_seq;
+ mpext->dsn64 = mp_opt.dsn64;
+ mpext->data_fin = mp_opt.data_fin;
}
- mpext->data_len = mp_opt->data_len;
+ mpext->data_len = mp_opt.data_len;
mpext->use_map = 1;
}
-
- mpext->data_fin = mp_opt->data_fin;
}
void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 86d61ab34c7c..b78edf237ba0 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -599,12 +599,14 @@ static int mptcp_nl_fill_addr(struct sk_buff *skb,
nla_put_s32(skb, MPTCP_PM_ADDR_ATTR_IF_IDX, entry->ifindex))
goto nla_put_failure;
- if (addr->family == AF_INET)
- nla_put_in_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR4,
- addr->addr.s_addr);
+ if (addr->family == AF_INET &&
+ nla_put_in_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR4,
+ addr->addr.s_addr))
+ goto nla_put_failure;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- else if (addr->family == AF_INET6)
- nla_put_in6_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR6, &addr->addr6);
+ else if (addr->family == AF_INET6 &&
+ nla_put_in6_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR6, &addr->addr6))
+ goto nla_put_failure;
#endif
nla_nest_end(skb, attr);
return 0;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 939a5045181a..32ea8d35489a 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -97,12 +97,7 @@ static struct socket *__mptcp_tcp_fallback(struct mptcp_sock *msk)
if (likely(!__mptcp_needs_tcp_fallback(msk)))
return NULL;
- if (msk->subflow) {
- release_sock((struct sock *)msk);
- return msk->subflow;
- }
-
- return NULL;
+ return msk->subflow;
}
static bool __mptcp_can_create_subflow(const struct mptcp_sock *msk)
@@ -734,9 +729,10 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto out;
}
+fallback:
ssock = __mptcp_tcp_fallback(msk);
if (unlikely(ssock)) {
-fallback:
+ release_sock(sk);
pr_debug("fallback passthrough");
ret = sock_sendmsg(ssock, msg);
return ret >= 0 ? ret + copied : (copied ? copied : ret);
@@ -769,8 +765,14 @@ fallback:
if (ret < 0)
break;
if (ret == 0 && unlikely(__mptcp_needs_tcp_fallback(msk))) {
+ /* Can happen for passive sockets:
+ * 3WHS negotiated MPTCP, but first packet after is
+ * plain TCP (e.g. due to middlebox filtering unknown
+ * options).
+ *
+ * Fall back to TCP.
+ */
release_sock(ssk);
- ssock = __mptcp_tcp_fallback(msk);
goto fallback;
}
@@ -883,6 +885,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
ssock = __mptcp_tcp_fallback(msk);
if (unlikely(ssock)) {
fallback:
+ release_sock(sk);
pr_debug("fallback-read subflow=%p",
mptcp_subflow_ctx(ssock->sk));
copied = sock_recvmsg(ssock, msg, flags);
@@ -1313,11 +1316,12 @@ static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
static int mptcp_disconnect(struct sock *sk, int flags)
{
- lock_sock(sk);
- __mptcp_clear_xmit(sk);
- release_sock(sk);
- mptcp_cancel_work(sk);
- return tcp_disconnect(sk, flags);
+ /* Should never be called.
+ * inet_stream_connect() calls ->disconnect, but that
+ * refers to the subflow socket, not the mptcp one.
+ */
+ WARN_ON_ONCE(1);
+ return 0;
}
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
@@ -1329,7 +1333,9 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
}
#endif
-struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req)
+struct sock *mptcp_sk_clone(const struct sock *sk,
+ const struct mptcp_options_received *mp_opt,
+ struct request_sock *req)
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
@@ -1352,26 +1358,30 @@ struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req)
msk->subflow = NULL;
if (unlikely(mptcp_token_new_accept(subflow_req->token, nsk))) {
+ nsk->sk_state = TCP_CLOSE;
bh_unlock_sock(nsk);
/* we can't call into mptcp_close() here - possible BH context
- * free the sock directly
+ * free the sock directly.
+ * sk_clone_lock() sets nsk refcnt to two, hence call sk_free()
+ * too.
*/
- nsk->sk_prot->destroy(nsk);
+ sk_common_release(nsk);
sk_free(nsk);
return NULL;
}
msk->write_seq = subflow_req->idsn + 1;
atomic64_set(&msk->snd_una, msk->write_seq);
- if (subflow_req->remote_key_valid) {
+ if (mp_opt->mp_capable) {
msk->can_ack = true;
- msk->remote_key = subflow_req->remote_key;
+ msk->remote_key = mp_opt->sndr_key;
mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
ack_seq++;
msk->ack_seq = ack_seq;
}
+ sock_reset_flag(nsk, SOCK_RCU_FREE);
/* will be fully established after successful MPC subflow creation */
inet_sk_state_store(nsk, TCP_SYN_RECV);
bh_unlock_sock(nsk);
@@ -1428,6 +1438,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
newsk = new_mptcp_sock;
mptcp_copy_inaddrs(newsk, ssk);
list_add(&subflow->node, &msk->conn_list);
+ inet_sk_state_store(newsk, TCP_ESTABLISHED);
bh_unlock_sock(new_mptcp_sock);
@@ -1467,12 +1478,11 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname,
*/
lock_sock(sk);
ssock = __mptcp_tcp_fallback(msk);
+ release_sock(sk);
if (ssock)
return tcp_setsockopt(ssock->sk, level, optname, optval,
optlen);
- release_sock(sk);
-
return -EOPNOTSUPP;
}
@@ -1492,12 +1502,11 @@ static int mptcp_getsockopt(struct sock *sk, int level, int optname,
*/
lock_sock(sk);
ssock = __mptcp_tcp_fallback(msk);
+ release_sock(sk);
if (ssock)
return tcp_getsockopt(ssock->sk, level, optname, optval,
option);
- release_sock(sk);
-
return -EOPNOTSUPP;
}
@@ -1620,6 +1629,8 @@ bool mptcp_finish_join(struct sock *sk)
ret = mptcp_pm_allow_new_subflow(msk);
if (ret) {
+ subflow->map_seq = msk->ack_seq;
+
/* active connections are already on conn_list */
spin_lock_bh(&msk->join_list_lock);
if (!WARN_ON_ONCE(!list_empty(&subflow->node)))
@@ -1774,6 +1785,8 @@ static int mptcp_listen(struct socket *sock, int backlog)
goto unlock;
}
+ sock_set_flag(sock->sk, SOCK_RCU_FREE);
+
err = ssock->ops->listen(ssock, backlog);
inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
if (!err)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 67448002a2d7..d0803dfb8108 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -81,7 +81,6 @@
/* MPTCP ADD_ADDR flags */
#define MPTCP_ADDR_ECHO BIT(0)
-#define MPTCP_ADDR_HMAC_LEN 20
#define MPTCP_ADDR_IPVERSION_4 4
#define MPTCP_ADDR_IPVERSION_6 6
@@ -91,6 +90,45 @@
#define MPTCP_WORK_RTX 2
#define MPTCP_WORK_EOF 3
+struct mptcp_options_received {
+ u64 sndr_key;
+ u64 rcvr_key;
+ u64 data_ack;
+ u64 data_seq;
+ u32 subflow_seq;
+ u16 data_len;
+ u16 mp_capable : 1,
+ mp_join : 1,
+ dss : 1,
+ add_addr : 1,
+ rm_addr : 1,
+ family : 4,
+ echo : 1,
+ backup : 1;
+ u32 token;
+ u32 nonce;
+ u64 thmac;
+ u8 hmac[20];
+ u8 join_id;
+ u8 use_map:1,
+ dsn64:1,
+ data_fin:1,
+ use_ack:1,
+ ack64:1,
+ mpc_map:1,
+ __unused:2;
+ u8 addr_id;
+ u8 rm_id;
+ union {
+ struct in_addr addr;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ struct in6_addr addr6;
+#endif
+ };
+ u64 ahmac;
+ u16 port;
+};
+
static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
{
return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) |
@@ -206,12 +244,10 @@ struct mptcp_subflow_request_sock {
struct tcp_request_sock sk;
u16 mp_capable : 1,
mp_join : 1,
- backup : 1,
- remote_key_valid : 1;
+ backup : 1;
u8 local_id;
u8 remote_id;
u64 local_key;
- u64 remote_key;
u64 idsn;
u32 token;
u32 ssn_offset;
@@ -332,9 +368,11 @@ void mptcp_proto_init(void);
int mptcp_proto_v6_init(void);
#endif
-struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req);
+struct sock *mptcp_sk_clone(const struct sock *sk,
+ const struct mptcp_options_received *mp_opt,
+ struct request_sock *req);
void mptcp_get_options(const struct sk_buff *skb,
- struct tcp_options_received *opt_rx);
+ struct mptcp_options_received *mp_opt);
void mptcp_finish_connect(struct sock *sk);
void mptcp_data_ready(struct sock *sk, struct sock *ssk);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 50a8bea987c6..8968b2c065e7 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/netdevice.h>
#include <crypto/algapi.h>
+#include <crypto/sha.h>
#include <net/sock.h>
#include <net/inet_common.h>
#include <net/inet_hashtables.h>
@@ -89,7 +90,7 @@ static bool subflow_token_join_request(struct request_sock *req,
const struct sk_buff *skb)
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
- u8 hmac[MPTCPOPT_HMAC_LEN];
+ u8 hmac[SHA256_DIGEST_SIZE];
struct mptcp_sock *msk;
int local_id;
@@ -124,16 +125,14 @@ static void subflow_init_req(struct request_sock *req,
{
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
- struct tcp_options_received rx_opt;
+ struct mptcp_options_received mp_opt;
pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
- memset(&rx_opt.mptcp, 0, sizeof(rx_opt.mptcp));
- mptcp_get_options(skb, &rx_opt);
+ mptcp_get_options(skb, &mp_opt);
subflow_req->mp_capable = 0;
subflow_req->mp_join = 0;
- subflow_req->remote_key_valid = 0;
#ifdef CONFIG_TCP_MD5SIG
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
@@ -143,16 +142,16 @@ static void subflow_init_req(struct request_sock *req,
return;
#endif
- if (rx_opt.mptcp.mp_capable) {
+ if (mp_opt.mp_capable) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
- if (rx_opt.mptcp.mp_join)
+ if (mp_opt.mp_join)
return;
- } else if (rx_opt.mptcp.mp_join) {
+ } else if (mp_opt.mp_join) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX);
}
- if (rx_opt.mptcp.mp_capable && listener->request_mptcp) {
+ if (mp_opt.mp_capable && listener->request_mptcp) {
int err;
err = mptcp_token_new_request(req);
@@ -160,13 +159,13 @@ static void subflow_init_req(struct request_sock *req,
subflow_req->mp_capable = 1;
subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
- } else if (rx_opt.mptcp.mp_join && listener->request_mptcp) {
+ } else if (mp_opt.mp_join && listener->request_mptcp) {
subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
subflow_req->mp_join = 1;
- subflow_req->backup = rx_opt.mptcp.backup;
- subflow_req->remote_id = rx_opt.mptcp.join_id;
- subflow_req->token = rx_opt.mptcp.token;
- subflow_req->remote_nonce = rx_opt.mptcp.nonce;
+ subflow_req->backup = mp_opt.backup;
+ subflow_req->remote_id = mp_opt.join_id;
+ subflow_req->token = mp_opt.token;
+ subflow_req->remote_nonce = mp_opt.nonce;
pr_debug("token=%u, remote_nonce=%u", subflow_req->token,
subflow_req->remote_nonce);
if (!subflow_token_join_request(req, skb)) {
@@ -203,7 +202,7 @@ static void subflow_v6_init_req(struct request_sock *req,
/* validate received truncated hmac and create hmac for third ACK */
static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
{
- u8 hmac[MPTCPOPT_HMAC_LEN];
+ u8 hmac[SHA256_DIGEST_SIZE];
u64 thmac;
subflow_generate_hmac(subflow->remote_key, subflow->local_key,
@@ -222,29 +221,55 @@ static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_options_received mp_opt;
struct sock *parent = subflow->conn;
+ struct tcp_sock *tp = tcp_sk(sk);
subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
- if (inet_sk_state_load(parent) != TCP_ESTABLISHED) {
+ if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
inet_sk_state_store(parent, TCP_ESTABLISHED);
parent->sk_state_change(parent);
}
- if (subflow->conn_finished || !tcp_sk(sk)->is_mptcp)
+ /* be sure no special action on any packet other than syn-ack */
+ if (subflow->conn_finished)
+ return;
+
+ subflow->conn_finished = 1;
+
+ mptcp_get_options(skb, &mp_opt);
+ if (subflow->request_mptcp && mp_opt.mp_capable) {
+ subflow->mp_capable = 1;
+ subflow->can_ack = 1;
+ subflow->remote_key = mp_opt.sndr_key;
+ pr_debug("subflow=%p, remote_key=%llu", subflow,
+ subflow->remote_key);
+ } else if (subflow->request_join && mp_opt.mp_join) {
+ subflow->mp_join = 1;
+ subflow->thmac = mp_opt.thmac;
+ subflow->remote_nonce = mp_opt.nonce;
+ pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
+ subflow->thmac, subflow->remote_nonce);
+ } else if (subflow->request_mptcp) {
+ tp->is_mptcp = 0;
+ }
+
+ if (!tp->is_mptcp)
return;
if (subflow->mp_capable) {
pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk),
subflow->remote_key);
mptcp_finish_connect(sk);
- subflow->conn_finished = 1;
if (skb) {
pr_debug("synack seq=%u", TCP_SKB_CB(skb)->seq);
subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
}
} else if (subflow->mp_join) {
+ u8 hmac[SHA256_DIGEST_SIZE];
+
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u",
subflow, subflow->thmac,
subflow->remote_nonce);
@@ -257,7 +282,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow_generate_hmac(subflow->local_key, subflow->remote_key,
subflow->local_nonce,
subflow->remote_nonce,
- subflow->hmac);
+ hmac);
+
+ memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
if (skb)
subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
@@ -265,7 +292,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
if (!mptcp_finish_join(sk))
goto do_reset;
- subflow->conn_finished = 1;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
} else {
do_reset:
@@ -323,10 +349,10 @@ drop:
/* validate hmac received in third ACK */
static bool subflow_hmac_valid(const struct request_sock *req,
- const struct tcp_options_received *rx_opt)
+ const struct mptcp_options_received *mp_opt)
{
const struct mptcp_subflow_request_sock *subflow_req;
- u8 hmac[MPTCPOPT_HMAC_LEN];
+ u8 hmac[SHA256_DIGEST_SIZE];
struct mptcp_sock *msk;
bool ret;
@@ -340,13 +366,53 @@ static bool subflow_hmac_valid(const struct request_sock *req,
subflow_req->local_nonce, hmac);
ret = true;
- if (crypto_memneq(hmac, rx_opt->mptcp.hmac, sizeof(hmac)))
+ if (crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN))
ret = false;
sock_put((struct sock *)msk);
return ret;
}
+static void mptcp_sock_destruct(struct sock *sk)
+{
+ /* if new mptcp socket isn't accepted, it is free'd
+ * from the tcp listener sockets request queue, linked
+ * from req->sk. The tcp socket is released.
+ * This calls the ULP release function which will
+ * also remove the mptcp socket, via
+ * sock_put(ctx->conn).
+ *
+ * Problem is that the mptcp socket will not be in
+ * SYN_RECV state and doesn't have SOCK_DEAD flag.
+ * Both result in warnings from inet_sock_destruct.
+ */
+
+ if (sk->sk_state == TCP_SYN_RECV) {
+ sk->sk_state = TCP_CLOSE;
+ WARN_ON_ONCE(sk->sk_socket);
+ sock_orphan(sk);
+ }
+
+ inet_sock_destruct(sk);
+}
+
+static void mptcp_force_close(struct sock *sk)
+{
+ inet_sk_state_store(sk, TCP_CLOSE);
+ sk_common_release(sk);
+}
+
+static void subflow_ulp_fallback(struct sock *sk,
+ struct mptcp_subflow_context *old_ctx)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ mptcp_subflow_tcp_fallback(sk, old_ctx);
+ icsk->icsk_ulp_ops = NULL;
+ rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
+ tcp_sk(sk)->is_mptcp = 0;
+}
+
static struct sock *subflow_syn_recv_sock(const struct sock *sk,
struct sk_buff *skb,
struct request_sock *req,
@@ -356,13 +422,18 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
{
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
struct mptcp_subflow_request_sock *subflow_req;
- struct tcp_options_received opt_rx;
+ struct mptcp_options_received mp_opt;
bool fallback_is_fatal = false;
struct sock *new_msk = NULL;
+ bool fallback = false;
struct sock *child;
pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
+ /* we need later a valid 'mp_capable' value even when options are not
+ * parsed
+ */
+ mp_opt.mp_capable = 0;
if (tcp_rsk(req)->is_mptcp == 0)
goto create_child;
@@ -377,26 +448,21 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
goto create_msk;
}
- opt_rx.mptcp.mp_capable = 0;
- mptcp_get_options(skb, &opt_rx);
- if (opt_rx.mptcp.mp_capable) {
- subflow_req->remote_key = opt_rx.mptcp.sndr_key;
- subflow_req->remote_key_valid = 1;
- } else {
- subflow_req->mp_capable = 0;
+ mptcp_get_options(skb, &mp_opt);
+ if (!mp_opt.mp_capable) {
+ fallback = true;
goto create_child;
}
create_msk:
- new_msk = mptcp_sk_clone(listener->conn, req);
+ new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req);
if (!new_msk)
- subflow_req->mp_capable = 0;
+ fallback = true;
} else if (subflow_req->mp_join) {
fallback_is_fatal = true;
- opt_rx.mptcp.mp_join = 0;
- mptcp_get_options(skb, &opt_rx);
- if (!opt_rx.mptcp.mp_join ||
- !subflow_hmac_valid(req, &opt_rx)) {
+ mptcp_get_options(skb, &mp_opt);
+ if (!mp_opt.mp_join ||
+ !subflow_hmac_valid(req, &mp_opt)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
return NULL;
}
@@ -409,12 +475,18 @@ create_child:
if (child && *own_req) {
struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child);
- /* we have null ctx on TCP fallback, which is fatal on
- * MPJ handshake
+ /* we need to fallback on ctx allocation failure and on pre-reqs
+ * checking above. In the latter scenario we additionally need
+ * to reset the context to non MPTCP status.
*/
- if (!ctx) {
+ if (!ctx || fallback) {
if (fallback_is_fatal)
goto close_child;
+
+ if (ctx) {
+ subflow_ulp_fallback(child, ctx);
+ kfree_rcu(ctx, rcu);
+ }
goto out;
}
@@ -422,10 +494,17 @@ create_child:
/* new mpc subflow takes ownership of the newly
* created mptcp socket
*/
- inet_sk_state_store(new_msk, TCP_ESTABLISHED);
+ new_msk->sk_destruct = mptcp_sock_destruct;
mptcp_pm_new_connection(mptcp_sk(new_msk), 1);
ctx->conn = new_msk;
new_msk = NULL;
+
+ /* with OoO packets we can reach here without ingress
+ * mpc option
+ */
+ ctx->remote_key = mp_opt.sndr_key;
+ ctx->fully_established = mp_opt.mp_capable;
+ ctx->can_ack = mp_opt.mp_capable;
} else if (ctx->mp_join) {
struct mptcp_sock *owner;
@@ -444,7 +523,14 @@ create_child:
out:
/* dispose of the left over mptcp master, if any */
if (unlikely(new_msk))
- sock_put(new_msk);
+ mptcp_force_close(new_msk);
+
+ /* check for expected invariant - should never trigger, just help
+ * catching eariler subtle bugs
+ */
+ WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp &&
+ (!mptcp_subflow_ctx(child) ||
+ !mptcp_subflow_ctx(child)->conn));
return child;
close_child:
@@ -931,6 +1017,16 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
if (err)
return err;
+ /* the newly created socket really belongs to the owning MPTCP master
+ * socket, even if for additional subflows the allocation is performed
+ * by a kernel workqueue. Adjust inode references, so that the
+ * procfs/diag interaces really show this one belonging to the correct
+ * user.
+ */
+ SOCK_INODE(sf)->i_ino = SOCK_INODE(sk->sk_socket)->i_ino;
+ SOCK_INODE(sf)->i_uid = SOCK_INODE(sk->sk_socket)->i_uid;
+ SOCK_INODE(sf)->i_gid = SOCK_INODE(sk->sk_socket)->i_gid;
+
subflow = mptcp_subflow_ctx(sf->sk);
pr_debug("subflow=%p", subflow);
@@ -1047,17 +1143,6 @@ static void subflow_ulp_release(struct sock *sk)
kfree_rcu(ctx, rcu);
}
-static void subflow_ulp_fallback(struct sock *sk,
- struct mptcp_subflow_context *old_ctx)
-{
- struct inet_connection_sock *icsk = inet_csk(sk);
-
- mptcp_subflow_tcp_fallback(sk, old_ctx);
- icsk->icsk_ulp_ops = NULL;
- rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
- tcp_sk(sk)->is_mptcp = 0;
-}
-
static void subflow_ulp_clone(const struct request_sock *req,
struct sock *newsk,
const gfp_t priority)
@@ -1091,9 +1176,6 @@ static void subflow_ulp_clone(const struct request_sock *req,
* is fully established only after we receive the remote key
*/
new_ctx->mp_capable = 1;
- new_ctx->fully_established = subflow_req->remote_key_valid;
- new_ctx->can_ack = subflow_req->remote_key_valid;
- new_ctx->remote_key = subflow_req->remote_key;
new_ctx->local_key = subflow_req->local_key;
new_ctx->token = subflow_req->token;
new_ctx->ssn_offset = subflow_req->ssn_offset;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 8dd17589217d..340cb955af25 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -86,7 +86,8 @@ find_set_type(const char *name, u8 family, u8 revision)
{
struct ip_set_type *type;
- list_for_each_entry_rcu(type, &ip_set_type_list, list)
+ list_for_each_entry_rcu(type, &ip_set_type_list, list,
+ lockdep_is_held(&ip_set_type_mutex))
if (STRNCMP(type->name, name) &&
(type->family == family ||
type->family == NFPROTO_UNSPEC) &&
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c4582eb71766..1d57b95d3481 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1519,9 +1519,9 @@ __nf_conntrack_alloc(struct net *net,
ct->status = 0;
ct->timeout = 0;
write_pnet(&ct->ct_net, net);
- memset(&ct->__nfct_init_offset[0], 0,
+ memset(&ct->__nfct_init_offset, 0,
offsetof(struct nf_conn, proto) -
- offsetof(struct nf_conn, __nfct_init_offset[0]));
+ offsetof(struct nf_conn, __nfct_init_offset));
nf_ct_zone_add(ct, zone);
@@ -2139,8 +2139,19 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
nf_conntrack_lock(lockp);
if (*bucket < nf_conntrack_htable_size) {
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) {
- if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
+ if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY)
continue;
+ /* All nf_conn objects are added to hash table twice, one
+ * for original direction tuple, once for the reply tuple.
+ *
+ * Exception: In the IPS_NAT_CLASH case, only the reply
+ * tuple is added (the original tuple already existed for
+ * a different object).
+ *
+ * We only need to call the iterator once for each
+ * conntrack, so we just use the 'reply' direction
+ * tuple while iterating.
+ */
ct = nf_ct_tuplehash_to_ctrack(h);
if (iter(ct, data))
goto found;
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index c0cb79495c35..42da6e337276 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -284,7 +284,7 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
if (nf_flow_has_expired(flow))
flow_offload_fixup_ct(flow->ct);
- else if (test_bit(NF_FLOW_TEARDOWN, &flow->flags))
+ else
flow_offload_fixup_ct_timeout(flow->ct);
flow_offload_free(flow);
@@ -361,8 +361,10 @@ static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
{
struct nf_flowtable *flow_table = data;
- if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct) ||
- test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
+ if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct))
+ set_bit(NF_FLOW_TEARDOWN, &flow->flags);
+
+ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
if (test_bit(NF_FLOW_HW, &flow->flags)) {
if (!test_bit(NF_FLOW_HW_DYING, &flow->flags))
nf_flow_offload_del(flow_table, flow);
@@ -421,10 +423,12 @@ void nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
down_write(&flow_table->flow_block_lock);
block_cb = flow_block_cb_lookup(block, cb, cb_priv);
- if (block_cb)
+ if (block_cb) {
list_del(&block_cb->list);
- else
+ flow_block_cb_free(block_cb);
+ } else {
WARN_ON(true);
+ }
up_write(&flow_table->flow_block_lock);
}
EXPORT_SYMBOL_GPL(nf_flow_table_offload_del_cb);
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index e3b099c14eff..2276a73ccba2 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -817,6 +817,7 @@ static void flow_offload_work_handler(struct work_struct *work)
WARN_ON_ONCE(1);
}
+ clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags);
kfree(offload);
}
@@ -831,9 +832,14 @@ nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
{
struct flow_offload_work *offload;
+ if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags))
+ return NULL;
+
offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
- if (!offload)
+ if (!offload) {
+ clear_bit(NF_FLOW_HW_PENDING, &flow->flags);
return NULL;
+ }
offload->cmd = cmd;
offload->flow = flow;
@@ -1056,7 +1062,7 @@ static struct flow_indr_block_entry block_ing_entry = {
int nf_flow_table_offload_init(void)
{
nf_flow_offload_wq = alloc_workqueue("nf_flow_table_offload",
- WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
+ WQ_UNBOUND, 0);
if (!nf_flow_offload_wq)
return -ENOMEM;
diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
index 64eedc17037a..59151dc07fdc 100644
--- a/net/netfilter/nf_nat_proto.c
+++ b/net/netfilter/nf_nat_proto.c
@@ -68,15 +68,13 @@ static bool udp_manip_pkt(struct sk_buff *skb,
enum nf_nat_manip_type maniptype)
{
struct udphdr *hdr;
- bool do_csum;
if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
return false;
hdr = (struct udphdr *)(skb->data + hdroff);
- do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL;
+ __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, !!hdr->check);
- __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, do_csum);
return true;
}
@@ -1035,8 +1033,8 @@ int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops)
ret = nf_nat_register_fn(net, NFPROTO_IPV4, ops, nf_nat_ipv4_ops,
ARRAY_SIZE(nf_nat_ipv4_ops));
if (ret)
- nf_nat_ipv6_unregister_fn(net, ops);
-
+ nf_nat_unregister_fn(net, NFPROTO_IPV6, ops,
+ ARRAY_SIZE(nf_nat_ipv6_ops));
return ret;
}
EXPORT_SYMBOL_GPL(nf_nat_inet_register_fn);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4471393da6d8..9780bd93b7e4 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3542,6 +3542,7 @@ cont:
continue;
if (!strcmp(set->name, i->name)) {
kfree(set->name);
+ set->name = NULL;
return -ENFILE;
}
}
@@ -3961,8 +3962,8 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT |
NFT_SET_INTERVAL | NFT_SET_TIMEOUT |
NFT_SET_MAP | NFT_SET_EVAL |
- NFT_SET_OBJECT))
- return -EINVAL;
+ NFT_SET_OBJECT | NFT_SET_CONCAT))
+ return -EOPNOTSUPP;
/* Only one of these operations is supported */
if ((flags & (NFT_SET_MAP | NFT_SET_OBJECT)) ==
(NFT_SET_MAP | NFT_SET_OBJECT))
@@ -4000,7 +4001,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE]));
if (objtype == NFT_OBJECT_UNSPEC ||
objtype > NFT_OBJECT_MAX)
- return -EINVAL;
+ return -EOPNOTSUPP;
} else if (flags & NFT_SET_OBJECT)
return -EINVAL;
else
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index 9f5dea0064ea..916a3c7f9eaf 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -165,12 +165,12 @@ static bool nf_osf_match_one(const struct sk_buff *skb,
static const struct tcphdr *nf_osf_hdr_ctx_init(struct nf_osf_hdr_ctx *ctx,
const struct sk_buff *skb,
const struct iphdr *ip,
- unsigned char *opts)
+ unsigned char *opts,
+ struct tcphdr *_tcph)
{
const struct tcphdr *tcp;
- struct tcphdr _tcph;
- tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph);
+ tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), _tcph);
if (!tcp)
return NULL;
@@ -205,10 +205,11 @@ nf_osf_match(const struct sk_buff *skb, u_int8_t family,
int fmatch = FMATCH_WRONG;
struct nf_osf_hdr_ctx ctx;
const struct tcphdr *tcp;
+ struct tcphdr _tcph;
memset(&ctx, 0, sizeof(ctx));
- tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts);
+ tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts, &_tcph);
if (!tcp)
return false;
@@ -265,10 +266,11 @@ bool nf_osf_find(const struct sk_buff *skb,
const struct nf_osf_finger *kf;
struct nf_osf_hdr_ctx ctx;
const struct tcphdr *tcp;
+ struct tcphdr _tcph;
memset(&ctx, 0, sizeof(ctx));
- tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts);
+ tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts, &_tcph);
if (!tcp)
return false;
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 1e70359d633c..f1363b8aabba 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -29,7 +29,7 @@ void nft_lookup_eval(const struct nft_expr *expr,
{
const struct nft_lookup *priv = nft_expr_priv(expr);
const struct nft_set *set = priv->set;
- const struct nft_set_ext *ext;
+ const struct nft_set_ext *ext = NULL;
bool found;
found = set->ops->lookup(nft_net(pkt), set, &regs->data[priv->sreg],
@@ -39,11 +39,13 @@ void nft_lookup_eval(const struct nft_expr *expr,
return;
}
- if (set->flags & NFT_SET_MAP)
- nft_data_copy(&regs->data[priv->dreg],
- nft_set_ext_data(ext), set->dlen);
+ if (ext) {
+ if (set->flags & NFT_SET_MAP)
+ nft_data_copy(&regs->data[priv->dreg],
+ nft_set_ext_data(ext), set->dlen);
- nft_set_elem_update_expr(ext, regs, pkt);
+ nft_set_elem_update_expr(ext, regs, pkt);
+ }
}
static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = {
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 32f0fc8be3a4..2a81ea421819 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -81,7 +81,6 @@ static bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
u32 idx, off;
nft_bitmap_location(set, key, &idx, &off);
- *ext = NULL;
return nft_bitmap_active(priv->bitmap, idx, off, genmask);
}
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 3a5552e14f75..62f416bc0579 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -79,6 +79,10 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
parent = rcu_dereference_raw(parent->rb_left);
continue;
}
+
+ if (nft_set_elem_expired(&rbe->ext))
+ return false;
+
if (nft_rbtree_interval_end(rbe)) {
if (nft_set_is_anonymous(set))
return false;
@@ -94,6 +98,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(&interval->ext, genmask) &&
+ !nft_set_elem_expired(&interval->ext) &&
nft_rbtree_interval_start(interval)) {
*ext = &interval->ext;
return true;
@@ -154,6 +159,9 @@ static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set,
continue;
}
+ if (nft_set_elem_expired(&rbe->ext))
+ return false;
+
if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) ||
(*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) ==
(flags & NFT_SET_ELEM_INTERVAL_END)) {
@@ -170,6 +178,7 @@ static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set,
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(&interval->ext, genmask) &&
+ !nft_set_elem_expired(&interval->ext) &&
((!nft_rbtree_interval_end(interval) &&
!(flags & NFT_SET_ELEM_INTERVAL_END)) ||
(nft_rbtree_interval_end(interval) &&
@@ -218,27 +227,26 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
/* Detect overlaps as we descend the tree. Set the flag in these cases:
*
- * a1. |__ _ _? >|__ _ _ (insert start after existing start)
- * a2. _ _ __>| ?_ _ __| (insert end before existing end)
- * a3. _ _ ___| ?_ _ _>| (insert end after existing end)
- * a4. >|__ _ _ _ _ __| (insert start before existing end)
+ * a1. _ _ __>| ?_ _ __| (insert end before existing end)
+ * a2. _ _ ___| ?_ _ _>| (insert end after existing end)
+ * a3. _ _ ___? >|_ _ __| (insert start before existing end)
*
* and clear it later on, as we eventually reach the points indicated by
* '?' above, in the cases described below. We'll always meet these
* later, locally, due to tree ordering, and overlaps for the intervals
* that are the closest together are always evaluated last.
*
- * b1. |__ _ _! >|__ _ _ (insert start after existing end)
- * b2. _ _ __>| !_ _ __| (insert end before existing start)
- * b3. !_____>| (insert end after existing start)
+ * b1. _ _ __>| !_ _ __| (insert end before existing start)
+ * b2. _ _ ___| !_ _ _>| (insert end after existing start)
+ * b3. _ _ ___! >|_ _ __| (insert start after existing end)
*
- * Case a4. resolves to b1.:
+ * Case a3. resolves to b3.:
* - if the inserted start element is the leftmost, because the '0'
* element in the tree serves as end element
* - otherwise, if an existing end is found. Note that end elements are
* always inserted after corresponding start elements.
*
- * For a new, rightmost pair of elements, we'll hit cases b1. and b3.,
+ * For a new, rightmost pair of elements, we'll hit cases b3. and b2.,
* in that order.
*
* The flag is also cleared in two special cases:
@@ -262,9 +270,9 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
p = &parent->rb_left;
if (nft_rbtree_interval_start(new)) {
- overlap = nft_rbtree_interval_start(rbe) &&
- nft_set_elem_active(&rbe->ext,
- genmask);
+ if (nft_rbtree_interval_end(rbe) &&
+ nft_set_elem_active(&rbe->ext, genmask))
+ overlap = false;
} else {
overlap = nft_rbtree_interval_end(rbe) &&
nft_set_elem_active(&rbe->ext,
@@ -419,6 +427,8 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
if (iter->count < iter->skip)
goto cont;
+ if (nft_set_elem_expired(&rbe->ext))
+ goto cont;
if (!nft_set_elem_active(&rbe->ext, iter->genmask))
goto cont;
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index 75bd0e5dd312..7b2f359bfce4 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -346,6 +346,9 @@ static int idletimer_tg_checkentry_v1(const struct xt_tgchk_param *par)
pr_debug("checkentry targinfo%s\n", info->label);
+ if (info->send_nl_msg)
+ return -EOPNOTSUPP;
+
ret = idletimer_tg_helper((struct idletimer_tg_info *)info);
if(ret < 0)
{
diff --git a/net/netlabel/Kconfig b/net/netlabel/Kconfig
index 64280a1d3906..07b03c306f28 100644
--- a/net/netlabel/Kconfig
+++ b/net/netlabel/Kconfig
@@ -14,6 +14,6 @@ config NETLABEL
Documentation/netlabel as well as the NetLabel SourceForge project
for configuration tools and additional documentation.
- * http://netlabel.sf.net
+ * https://github.com/netlabel/netlabel_tools
If you are unsure, say N.
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 409a3ae47ce2..5e1239cef000 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -734,6 +734,12 @@ int netlbl_catmap_getlong(struct netlbl_lsm_catmap *catmap,
if ((off & (BITS_PER_LONG - 1)) != 0)
return -EINVAL;
+ /* a null catmap is equivalent to an empty one */
+ if (!catmap) {
+ *offset = (u32)-1;
+ return 0;
+ }
+
if (off < catmap->startbit) {
off = catmap->startbit;
*offset = off;
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 79f12d8c7b86..0891ee02ca4f 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -208,6 +208,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
/* refcount initialized at 1 */
spin_unlock_bh(&nr_node_list_lock);
+ nr_neigh_put(nr_neigh);
return 0;
}
nr_node_lock(nr_node);
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index e726159cfcfa..4340f25fe390 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1895,7 +1895,8 @@ static void ovs_ct_limit_exit(struct net *net, struct ovs_net *ovs_net)
struct hlist_head *head = &info->limits[i];
struct ovs_ct_limit *ct_limit;
- hlist_for_each_entry_rcu(ct_limit, head, hlist_node)
+ hlist_for_each_entry_rcu(ct_limit, head, hlist_node,
+ lockdep_ovsl_is_held())
kfree_rcu(ct_limit, rcu);
}
kfree(ovs_net->ct_limit_info->limits);
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index d8ae541d22a8..94b024534987 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -2466,8 +2466,10 @@ static void __net_exit ovs_exit_net(struct net *dnet)
struct net *net;
LIST_HEAD(head);
- ovs_ct_exit(dnet);
ovs_lock();
+
+ ovs_ct_exit(dnet);
+
list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
__dp_destroy(dp);
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index e22092e4a783..2d8d6131bc5f 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -854,7 +854,7 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb,
}
mutex_unlock(&qrtr_node_lock);
- qrtr_local_enqueue(node, skb, type, from, to);
+ qrtr_local_enqueue(NULL, skb, type, from, to);
return 0;
}
@@ -906,20 +906,21 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
node = NULL;
if (addr->sq_node == QRTR_NODE_BCAST) {
- enqueue_fn = qrtr_bcast_enqueue;
- if (addr->sq_port != QRTR_PORT_CTRL) {
+ if (addr->sq_port != QRTR_PORT_CTRL &&
+ qrtr_local_nid != QRTR_NODE_BCAST) {
release_sock(sk);
return -ENOTCONN;
}
+ enqueue_fn = qrtr_bcast_enqueue;
} else if (addr->sq_node == ipc->us.sq_node) {
enqueue_fn = qrtr_local_enqueue;
} else {
- enqueue_fn = qrtr_node_enqueue;
node = qrtr_node_lookup(addr->sq_node);
if (!node) {
release_sock(sk);
return -ECONNRESET;
}
+ enqueue_fn = qrtr_node_enqueue;
}
plen = (len + 3) & ~3;
diff --git a/net/rds/message.c b/net/rds/message.c
index 50f13f1d4ae0..071a261fdaab 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 Oracle. All rights reserved.
+ * Copyright (c) 2006, 2020 Oracle and/or its affiliates.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -162,12 +162,12 @@ static void rds_message_purge(struct rds_message *rm)
if (rm->rdma.op_active)
rds_rdma_free_op(&rm->rdma);
if (rm->rdma.op_rdma_mr)
- rds_mr_put(rm->rdma.op_rdma_mr);
+ kref_put(&rm->rdma.op_rdma_mr->r_kref, __rds_put_mr_final);
if (rm->atomic.op_active)
rds_atomic_free_op(&rm->atomic);
if (rm->atomic.op_rdma_mr)
- rds_mr_put(rm->atomic.op_rdma_mr);
+ kref_put(&rm->atomic.op_rdma_mr->r_kref, __rds_put_mr_final);
}
void rds_message_put(struct rds_message *rm)
@@ -308,26 +308,20 @@ out:
/*
* RDS ops use this to grab SG entries from the rm's sg pool.
*/
-struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents,
- int *ret)
+struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents)
{
struct scatterlist *sg_first = (struct scatterlist *) &rm[1];
struct scatterlist *sg_ret;
- if (WARN_ON(!ret))
- return NULL;
-
if (nents <= 0) {
pr_warn("rds: alloc sgs failed! nents <= 0\n");
- *ret = -EINVAL;
- return NULL;
+ return ERR_PTR(-EINVAL);
}
if (rm->m_used_sgs + nents > rm->m_total_sgs) {
pr_warn("rds: alloc sgs failed! total %d used %d nents %d\n",
rm->m_total_sgs, rm->m_used_sgs, nents);
- *ret = -ENOMEM;
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
sg_ret = &sg_first[rm->m_used_sgs];
@@ -343,7 +337,6 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
unsigned int i;
int num_sgs = DIV_ROUND_UP(total_len, PAGE_SIZE);
int extra_bytes = num_sgs * sizeof(struct scatterlist);
- int ret;
rm = rds_message_alloc(extra_bytes, GFP_NOWAIT);
if (!rm)
@@ -352,10 +345,10 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
set_bit(RDS_MSG_PAGEVEC, &rm->m_flags);
rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
rm->data.op_nents = DIV_ROUND_UP(total_len, PAGE_SIZE);
- rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs, &ret);
- if (!rm->data.op_sg) {
+ rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
+ if (IS_ERR(rm->data.op_sg)) {
rds_message_put(rm);
- return ERR_PTR(ret);
+ return ERR_CAST(rm->data.op_sg);
}
for (i = 0; i < rm->data.op_nents; ++i) {
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 585e6b3b69ce..a7ae11846cd7 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007, 2017 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2020 Oracle and/or its affiliates.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -84,7 +84,7 @@ static struct rds_mr *rds_mr_tree_walk(struct rb_root *root, u64 key,
if (insert) {
rb_link_node(&insert->r_rb_node, parent, p);
rb_insert_color(&insert->r_rb_node, root);
- refcount_inc(&insert->r_refcount);
+ kref_get(&insert->r_kref);
}
return NULL;
}
@@ -99,10 +99,7 @@ static void rds_destroy_mr(struct rds_mr *mr)
unsigned long flags;
rdsdebug("RDS: destroy mr key is %x refcnt %u\n",
- mr->r_key, refcount_read(&mr->r_refcount));
-
- if (test_and_set_bit(RDS_MR_DEAD, &mr->r_state))
- return;
+ mr->r_key, kref_read(&mr->r_kref));
spin_lock_irqsave(&rs->rs_rdma_lock, flags);
if (!RB_EMPTY_NODE(&mr->r_rb_node))
@@ -115,8 +112,10 @@ static void rds_destroy_mr(struct rds_mr *mr)
mr->r_trans->free_mr(trans_private, mr->r_invalidate);
}
-void __rds_put_mr_final(struct rds_mr *mr)
+void __rds_put_mr_final(struct kref *kref)
{
+ struct rds_mr *mr = container_of(kref, struct rds_mr, r_kref);
+
rds_destroy_mr(mr);
kfree(mr);
}
@@ -140,8 +139,7 @@ void rds_rdma_drop_keys(struct rds_sock *rs)
rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
RB_CLEAR_NODE(&mr->r_rb_node);
spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
- rds_destroy_mr(mr);
- rds_mr_put(mr);
+ kref_put(&mr->r_kref, __rds_put_mr_final);
spin_lock_irqsave(&rs->rs_rdma_lock, flags);
}
spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
@@ -242,7 +240,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
goto out;
}
- refcount_set(&mr->r_refcount, 1);
+ kref_init(&mr->r_kref);
RB_CLEAR_NODE(&mr->r_rb_node);
mr->r_trans = rs->rs_transport;
mr->r_sock = rs;
@@ -343,7 +341,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
rdsdebug("RDS: get_mr key is %x\n", mr->r_key);
if (mr_ret) {
- refcount_inc(&mr->r_refcount);
+ kref_get(&mr->r_kref);
*mr_ret = mr;
}
@@ -351,7 +349,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
out:
kfree(pages);
if (mr)
- rds_mr_put(mr);
+ kref_put(&mr->r_kref, __rds_put_mr_final);
return ret;
}
@@ -434,13 +432,7 @@ int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen)
if (!mr)
return -EINVAL;
- /*
- * call rds_destroy_mr() ourselves so that we're sure it's done by the time
- * we return. If we let rds_mr_put() do it it might not happen until
- * someone else drops their ref.
- */
- rds_destroy_mr(mr);
- rds_mr_put(mr);
+ kref_put(&mr->r_kref, __rds_put_mr_final);
return 0;
}
@@ -464,6 +456,14 @@ void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force)
return;
}
+ /* Get a reference so that the MR won't go away before calling
+ * sync_mr() below.
+ */
+ kref_get(&mr->r_kref);
+
+ /* If it is going to be freed, remove it from the tree now so
+ * that no other thread can find it and free it.
+ */
if (mr->r_use_once || force) {
rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
RB_CLEAR_NODE(&mr->r_rb_node);
@@ -477,12 +477,13 @@ void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force)
if (mr->r_trans->sync_mr)
mr->r_trans->sync_mr(mr->r_trans_private, DMA_FROM_DEVICE);
+ /* Release the reference held above. */
+ kref_put(&mr->r_kref, __rds_put_mr_final);
+
/* If the MR was marked as invalidate, this will
* trigger an async flush. */
- if (zot_me) {
- rds_destroy_mr(mr);
- rds_mr_put(mr);
- }
+ if (zot_me)
+ kref_put(&mr->r_kref, __rds_put_mr_final);
}
void rds_rdma_free_op(struct rm_rdma_op *ro)
@@ -490,7 +491,7 @@ void rds_rdma_free_op(struct rm_rdma_op *ro)
unsigned int i;
if (ro->op_odp_mr) {
- rds_mr_put(ro->op_odp_mr);
+ kref_put(&ro->op_odp_mr->r_kref, __rds_put_mr_final);
} else {
for (i = 0; i < ro->op_nents; i++) {
struct page *page = sg_page(&ro->op_sg[i]);
@@ -664,9 +665,11 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
op->op_odp_mr = NULL;
WARN_ON(!nr_pages);
- op->op_sg = rds_message_alloc_sgs(rm, nr_pages, &ret);
- if (!op->op_sg)
+ op->op_sg = rds_message_alloc_sgs(rm, nr_pages);
+ if (IS_ERR(op->op_sg)) {
+ ret = PTR_ERR(op->op_sg);
goto out_pages;
+ }
if (op->op_notify || op->op_recverr) {
/* We allocate an uninitialized notifier here, because
@@ -730,7 +733,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
goto out_pages;
}
RB_CLEAR_NODE(&local_odp_mr->r_rb_node);
- refcount_set(&local_odp_mr->r_refcount, 1);
+ kref_init(&local_odp_mr->r_kref);
local_odp_mr->r_trans = rs->rs_transport;
local_odp_mr->r_sock = rs;
local_odp_mr->r_trans_private =
@@ -827,7 +830,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
if (!mr)
err = -EINVAL; /* invalid r_key */
else
- refcount_inc(&mr->r_refcount);
+ kref_get(&mr->r_kref);
spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
if (mr) {
@@ -905,9 +908,11 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
rm->atomic.op_silent = !!(args->flags & RDS_RDMA_SILENT);
rm->atomic.op_active = 1;
rm->atomic.op_recverr = rs->rs_recverr;
- rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1, &ret);
- if (!rm->atomic.op_sg)
+ rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1);
+ if (IS_ERR(rm->atomic.op_sg)) {
+ ret = PTR_ERR(rm->atomic.op_sg);
goto err;
+ }
/* verify 8 byte-aligned */
if (args->local_addr & 0x7) {
diff --git a/net/rds/rds.h b/net/rds/rds.h
index e4a603523083..6019b0c004a9 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -291,7 +291,7 @@ struct rds_incoming {
struct rds_mr {
struct rb_node r_rb_node;
- refcount_t r_refcount;
+ struct kref r_kref;
u32 r_key;
/* A copy of the creation flags */
@@ -299,19 +299,11 @@ struct rds_mr {
unsigned int r_invalidate:1;
unsigned int r_write:1;
- /* This is for RDS_MR_DEAD.
- * It would be nice & consistent to make this part of the above
- * bit field here, but we need to use test_and_set_bit.
- */
- unsigned long r_state;
struct rds_sock *r_sock; /* back pointer to the socket that owns us */
struct rds_transport *r_trans;
void *r_trans_private;
};
-/* Flags for mr->r_state */
-#define RDS_MR_DEAD 0
-
static inline rds_rdma_cookie_t rds_rdma_make_cookie(u32 r_key, u32 offset)
{
return r_key | (((u64) offset) << 32);
@@ -852,8 +844,7 @@ rds_conn_connecting(struct rds_connection *conn)
/* message.c */
struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
-struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents,
- int *ret);
+struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents);
int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
bool zcopy);
struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len);
@@ -946,12 +937,7 @@ void rds_atomic_send_complete(struct rds_message *rm, int wc_status);
int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
struct cmsghdr *cmsg);
-void __rds_put_mr_final(struct rds_mr *mr);
-static inline void rds_mr_put(struct rds_mr *mr)
-{
- if (refcount_dec_and_test(&mr->r_refcount))
- __rds_put_mr_final(mr);
-}
+void __rds_put_mr_final(struct kref *kref);
static inline bool rds_destroy_pending(struct rds_connection *conn)
{
diff --git a/net/rds/send.c b/net/rds/send.c
index 82dcd8b84fe7..68e2bdb08fd0 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1274,9 +1274,11 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
/* Attach data to the rm */
if (payload_len) {
- rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs, &ret);
- if (!rm->data.op_sg)
+ rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
+ if (IS_ERR(rm->data.op_sg)) {
+ ret = PTR_ERR(rm->data.op_sg);
goto out;
+ }
ret = rds_message_copy_from_user(rm, &msg->msg_iter, zcopy);
if (ret)
goto out;
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index 6ffb7e9887ce..ddd0f95713a9 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -25,6 +25,7 @@ rxrpc-y := \
peer_event.o \
peer_object.o \
recvmsg.o \
+ rtt.o \
security.o \
sendmsg.o \
skbuff.o \
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 3eb1ab40ca5c..9fe264bec70c 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -7,6 +7,7 @@
#include <linux/atomic.h>
#include <linux/seqlock.h>
+#include <linux/win_minmax.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/sock.h>
@@ -311,11 +312,14 @@ struct rxrpc_peer {
#define RXRPC_RTT_CACHE_SIZE 32
spinlock_t rtt_input_lock; /* RTT lock for input routine */
ktime_t rtt_last_req; /* Time of last RTT request */
- u64 rtt; /* Current RTT estimate (in nS) */
- u64 rtt_sum; /* Sum of cache contents */
- u64 rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* Determined RTT cache */
- u8 rtt_cursor; /* next entry at which to insert */
- u8 rtt_usage; /* amount of cache actually used */
+ unsigned int rtt_count; /* Number of samples we've got */
+
+ u32 srtt_us; /* smoothed round trip time << 3 in usecs */
+ u32 mdev_us; /* medium deviation */
+ u32 mdev_max_us; /* maximal mdev for the last rtt period */
+ u32 rttvar_us; /* smoothed mdev_max */
+ u32 rto_j; /* Retransmission timeout in jiffies */
+ u8 backoff; /* Backoff timeout */
u8 cong_cwnd; /* Congestion window size */
};
@@ -1041,7 +1045,6 @@ extern unsigned long rxrpc_idle_ack_delay;
extern unsigned int rxrpc_rx_window_size;
extern unsigned int rxrpc_rx_mtu;
extern unsigned int rxrpc_rx_jumbo_max;
-extern unsigned long rxrpc_resend_timeout;
extern const s8 rxrpc_ack_priority[];
@@ -1069,8 +1072,6 @@ void rxrpc_send_keepalive(struct rxrpc_peer *);
* peer_event.c
*/
void rxrpc_error_report(struct sock *);
-void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace,
- rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t);
void rxrpc_peer_keepalive_worker(struct work_struct *);
/*
@@ -1103,6 +1104,14 @@ void rxrpc_notify_socket(struct rxrpc_call *);
int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int);
/*
+ * rtt.c
+ */
+void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace,
+ rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t);
+unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *, bool);
+void rxrpc_peer_init_rtt(struct rxrpc_peer *);
+
+/*
* rxkad.c
*/
#ifdef CONFIG_RXKAD
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 70e44abf106c..b7611cc159e5 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -248,7 +248,7 @@ static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb)
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
ktime_t now = skb->tstamp;
- if (call->peer->rtt_usage < 3 ||
+ if (call->peer->rtt_count < 3 ||
ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now))
rxrpc_propose_ACK(call, RXRPC_ACK_PING, sp->hdr.serial,
true, true,
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index cedbbb3a7c2e..2a65ac41055f 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -111,8 +111,8 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
} else {
unsigned long now = jiffies, ack_at;
- if (call->peer->rtt_usage > 0)
- ack_at = nsecs_to_jiffies(call->peer->rtt);
+ if (call->peer->srtt_us != 0)
+ ack_at = usecs_to_jiffies(call->peer->srtt_us >> 3);
else
ack_at = expiry;
@@ -157,24 +157,18 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call)
static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
{
struct sk_buff *skb;
- unsigned long resend_at;
+ unsigned long resend_at, rto_j;
rxrpc_seq_t cursor, seq, top;
- ktime_t now, max_age, oldest, ack_ts, timeout, min_timeo;
+ ktime_t now, max_age, oldest, ack_ts;
int ix;
u8 annotation, anno_type, retrans = 0, unacked = 0;
_enter("{%d,%d}", call->tx_hard_ack, call->tx_top);
- if (call->peer->rtt_usage > 1)
- timeout = ns_to_ktime(call->peer->rtt * 3 / 2);
- else
- timeout = ms_to_ktime(rxrpc_resend_timeout);
- min_timeo = ns_to_ktime((1000000000 / HZ) * 4);
- if (ktime_before(timeout, min_timeo))
- timeout = min_timeo;
+ rto_j = call->peer->rto_j;
now = ktime_get_real();
- max_age = ktime_sub(now, timeout);
+ max_age = ktime_sub(now, jiffies_to_usecs(rto_j));
spin_lock_bh(&call->lock);
@@ -219,7 +213,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
}
resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest)));
- resend_at += jiffies + rxrpc_resend_timeout;
+ resend_at += jiffies + rto_j;
WRITE_ONCE(call->resend_at, resend_at);
if (unacked)
@@ -234,7 +228,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
rxrpc_timer_set_for_resend);
spin_unlock_bh(&call->lock);
ack_ts = ktime_sub(now, call->acks_latest_ts);
- if (ktime_to_ns(ack_ts) < call->peer->rtt)
+ if (ktime_to_us(ack_ts) < (call->peer->srtt_us >> 3))
goto out;
rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false,
rxrpc_propose_ack_ping_for_lost_ack);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 69e09d69c896..3be4177baf70 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -91,11 +91,11 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
/* We analyse the number of packets that get ACK'd per RTT
* period and increase the window if we managed to fill it.
*/
- if (call->peer->rtt_usage == 0)
+ if (call->peer->rtt_count == 0)
goto out;
if (ktime_before(skb->tstamp,
- ktime_add_ns(call->cong_tstamp,
- call->peer->rtt)))
+ ktime_add_us(call->cong_tstamp,
+ call->peer->srtt_us >> 3)))
goto out_no_clear_ca;
change = rxrpc_cong_rtt_window_end;
call->cong_tstamp = skb->tstamp;
@@ -803,6 +803,30 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks,
}
/*
+ * Return true if the ACK is valid - ie. it doesn't appear to have regressed
+ * with respect to the ack state conveyed by preceding ACKs.
+ */
+static bool rxrpc_is_ack_valid(struct rxrpc_call *call,
+ rxrpc_seq_t first_pkt, rxrpc_seq_t prev_pkt)
+{
+ rxrpc_seq_t base = READ_ONCE(call->ackr_first_seq);
+
+ if (after(first_pkt, base))
+ return true; /* The window advanced */
+
+ if (before(first_pkt, base))
+ return false; /* firstPacket regressed */
+
+ if (after_eq(prev_pkt, call->ackr_prev_seq))
+ return true; /* previousPacket hasn't regressed. */
+
+ /* Some rx implementations put a serial number in previousPacket. */
+ if (after_eq(prev_pkt, base + call->tx_winsize))
+ return false;
+ return true;
+}
+
+/*
* Process an ACK packet.
*
* ack.firstPacket is the sequence number of the first soft-ACK'd/NAK'd packet
@@ -865,9 +889,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
}
/* Discard any out-of-order or duplicate ACKs (outside lock). */
- if (before(first_soft_ack, call->ackr_first_seq) ||
- before(prev_pkt, call->ackr_prev_seq))
+ if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) {
+ trace_rxrpc_rx_discard_ack(call->debug_id, sp->hdr.serial,
+ first_soft_ack, call->ackr_first_seq,
+ prev_pkt, call->ackr_prev_seq);
return;
+ }
buf.info.rxMTU = 0;
ioffset = offset + nr_acks + 3;
@@ -878,9 +905,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
spin_lock(&call->input_lock);
/* Discard any out-of-order or duplicate ACKs (inside lock). */
- if (before(first_soft_ack, call->ackr_first_seq) ||
- before(prev_pkt, call->ackr_prev_seq))
+ if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) {
+ trace_rxrpc_rx_discard_ack(call->debug_id, sp->hdr.serial,
+ first_soft_ack, call->ackr_first_seq,
+ prev_pkt, call->ackr_prev_seq);
goto out;
+ }
call->acks_latest_ts = skb->tstamp;
call->ackr_first_seq = first_soft_ack;
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index a6c1349e965d..01135e54d95d 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -165,15 +165,6 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
goto error;
}
- /* we want to set the don't fragment bit */
- opt = IPV6_PMTUDISC_DO;
- ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_MTU_DISCOVER,
- (char *) &opt, sizeof(opt));
- if (ret < 0) {
- _debug("setsockopt failed");
- goto error;
- }
-
/* Fall through and set IPv4 options too otherwise we don't get
* errors from IPv4 packets sent through the IPv6 socket.
*/
diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c
index 214405f75346..d4144fd86f84 100644
--- a/net/rxrpc/misc.c
+++ b/net/rxrpc/misc.c
@@ -63,11 +63,6 @@ unsigned int rxrpc_rx_mtu = 5692;
*/
unsigned int rxrpc_rx_jumbo_max = 4;
-/*
- * Time till packet resend (in milliseconds).
- */
-unsigned long rxrpc_resend_timeout = 4 * HZ;
-
const s8 rxrpc_ack_priority[] = {
[0] = 0,
[RXRPC_ACK_DELAY] = 1,
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index bad3d2420344..f8b632a5c619 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -369,7 +369,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
(test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) ||
retrans ||
call->cong_mode == RXRPC_CALL_SLOW_START ||
- (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) ||
+ (call->peer->rtt_count < 3 && sp->hdr.seq & 1) ||
ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
ktime_get_real())))
whdr.flags |= RXRPC_REQUEST_ACK;
@@ -423,13 +423,10 @@ done:
if (whdr.flags & RXRPC_REQUEST_ACK) {
call->peer->rtt_last_req = skb->tstamp;
trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial);
- if (call->peer->rtt_usage > 1) {
+ if (call->peer->rtt_count > 1) {
unsigned long nowj = jiffies, ack_lost_at;
- ack_lost_at = nsecs_to_jiffies(2 * call->peer->rtt);
- if (ack_lost_at < 1)
- ack_lost_at = 1;
-
+ ack_lost_at = rxrpc_get_rto_backoff(call->peer, retrans);
ack_lost_at += nowj;
WRITE_ONCE(call->ack_lost_at, ack_lost_at);
rxrpc_reduce_call_timer(call, ack_lost_at, nowj,
@@ -474,41 +471,21 @@ send_fragmentable:
skb->tstamp = ktime_get_real();
switch (conn->params.local->srx.transport.family) {
+ case AF_INET6:
case AF_INET:
opt = IP_PMTUDISC_DONT;
- ret = kernel_setsockopt(conn->params.local->socket,
- SOL_IP, IP_MTU_DISCOVER,
- (char *)&opt, sizeof(opt));
- if (ret == 0) {
- ret = kernel_sendmsg(conn->params.local->socket, &msg,
- iov, 2, len);
- conn->params.peer->last_tx_at = ktime_get_seconds();
-
- opt = IP_PMTUDISC_DO;
- kernel_setsockopt(conn->params.local->socket, SOL_IP,
- IP_MTU_DISCOVER,
- (char *)&opt, sizeof(opt));
- }
- break;
-
-#ifdef CONFIG_AF_RXRPC_IPV6
- case AF_INET6:
- opt = IPV6_PMTUDISC_DONT;
- ret = kernel_setsockopt(conn->params.local->socket,
- SOL_IPV6, IPV6_MTU_DISCOVER,
- (char *)&opt, sizeof(opt));
- if (ret == 0) {
- ret = kernel_sendmsg(conn->params.local->socket, &msg,
- iov, 2, len);
- conn->params.peer->last_tx_at = ktime_get_seconds();
-
- opt = IPV6_PMTUDISC_DO;
- kernel_setsockopt(conn->params.local->socket,
- SOL_IPV6, IPV6_MTU_DISCOVER,
- (char *)&opt, sizeof(opt));
- }
+ kernel_setsockopt(conn->params.local->socket,
+ SOL_IP, IP_MTU_DISCOVER,
+ (char *)&opt, sizeof(opt));
+ ret = kernel_sendmsg(conn->params.local->socket, &msg,
+ iov, 2, len);
+ conn->params.peer->last_tx_at = ktime_get_seconds();
+
+ opt = IP_PMTUDISC_DO;
+ kernel_setsockopt(conn->params.local->socket,
+ SOL_IP, IP_MTU_DISCOVER,
+ (char *)&opt, sizeof(opt));
break;
-#endif
default:
BUG();
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 923b263c401b..b1449d971883 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -296,52 +296,6 @@ static void rxrpc_distribute_error(struct rxrpc_peer *peer, int error,
}
/*
- * Add RTT information to cache. This is called in softirq mode and has
- * exclusive access to the peer RTT data.
- */
-void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
- rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial,
- ktime_t send_time, ktime_t resp_time)
-{
- struct rxrpc_peer *peer = call->peer;
- s64 rtt;
- u64 sum = peer->rtt_sum, avg;
- u8 cursor = peer->rtt_cursor, usage = peer->rtt_usage;
-
- rtt = ktime_to_ns(ktime_sub(resp_time, send_time));
- if (rtt < 0)
- return;
-
- spin_lock(&peer->rtt_input_lock);
-
- /* Replace the oldest datum in the RTT buffer */
- sum -= peer->rtt_cache[cursor];
- sum += rtt;
- peer->rtt_cache[cursor] = rtt;
- peer->rtt_cursor = (cursor + 1) & (RXRPC_RTT_CACHE_SIZE - 1);
- peer->rtt_sum = sum;
- if (usage < RXRPC_RTT_CACHE_SIZE) {
- usage++;
- peer->rtt_usage = usage;
- }
-
- spin_unlock(&peer->rtt_input_lock);
-
- /* Now recalculate the average */
- if (usage == RXRPC_RTT_CACHE_SIZE) {
- avg = sum / RXRPC_RTT_CACHE_SIZE;
- } else {
- avg = sum;
- do_div(avg, usage);
- }
-
- /* Don't need to update this under lock */
- peer->rtt = avg;
- trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial, rtt,
- usage, avg);
-}
-
-/*
* Perform keep-alive pings.
*/
static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet,
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 452163eadb98..ca29976bb193 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -225,6 +225,8 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
spin_lock_init(&peer->rtt_input_lock);
peer->debug_id = atomic_inc_return(&rxrpc_debug_id);
+ rxrpc_peer_init_rtt(peer);
+
if (RXRPC_TX_SMSS > 2190)
peer->cong_cwnd = 2;
else if (RXRPC_TX_SMSS > 1095)
@@ -497,14 +499,14 @@ void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call,
EXPORT_SYMBOL(rxrpc_kernel_get_peer);
/**
- * rxrpc_kernel_get_rtt - Get a call's peer RTT
+ * rxrpc_kernel_get_srtt - Get a call's peer smoothed RTT
* @sock: The socket on which the call is in progress.
* @call: The call to query
*
- * Get the call's peer RTT.
+ * Get the call's peer smoothed RTT.
*/
-u64 rxrpc_kernel_get_rtt(struct socket *sock, struct rxrpc_call *call)
+u32 rxrpc_kernel_get_srtt(struct socket *sock, struct rxrpc_call *call)
{
- return call->peer->rtt;
+ return call->peer->srtt_us >> 3;
}
-EXPORT_SYMBOL(rxrpc_kernel_get_rtt);
+EXPORT_SYMBOL(rxrpc_kernel_get_srtt);
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index b9d053e42821..8b179e3c802a 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -222,7 +222,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v)
seq_puts(seq,
"Proto Local "
" Remote "
- " Use CW MTU LastUse RTT Rc\n"
+ " Use CW MTU LastUse RTT RTO\n"
);
return 0;
}
@@ -236,15 +236,15 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v)
now = ktime_get_seconds();
seq_printf(seq,
"UDP %-47.47s %-47.47s %3u"
- " %3u %5u %6llus %12llu %2u\n",
+ " %3u %5u %6llus %8u %8u\n",
lbuff,
rbuff,
atomic_read(&peer->usage),
peer->cong_cwnd,
peer->mtu,
now - peer->last_tx_at,
- peer->rtt,
- peer->rtt_cursor);
+ peer->srtt_us >> 3,
+ jiffies_to_usecs(peer->rto_j));
return 0;
}
diff --git a/net/rxrpc/rtt.c b/net/rxrpc/rtt.c
new file mode 100644
index 000000000000..928d8b34a3ee
--- /dev/null
+++ b/net/rxrpc/rtt.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0
+/* RTT/RTO calculation.
+ *
+ * Adapted from TCP for AF_RXRPC by David Howells (dhowells@redhat.com)
+ *
+ * https://tools.ietf.org/html/rfc6298
+ * https://tools.ietf.org/html/rfc1122#section-4.2.3.1
+ * http://ccr.sigcomm.org/archive/1995/jan95/ccr-9501-partridge87.pdf
+ */
+
+#include <linux/net.h>
+#include "ar-internal.h"
+
+#define RXRPC_RTO_MAX ((unsigned)(120 * HZ))
+#define RXRPC_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */
+#define rxrpc_jiffies32 ((u32)jiffies) /* As rxrpc_jiffies32 */
+#define rxrpc_min_rtt_wlen 300 /* As sysctl_tcp_min_rtt_wlen */
+
+static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer)
+{
+ return 200;
+}
+
+static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer)
+{
+ return _usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us);
+}
+
+static u32 rxrpc_bound_rto(u32 rto)
+{
+ return min(rto, RXRPC_RTO_MAX);
+}
+
+/*
+ * Called to compute a smoothed rtt estimate. The data fed to this
+ * routine either comes from timestamps, or from segments that were
+ * known _not_ to have been retransmitted [see Karn/Partridge
+ * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88
+ * piece by Van Jacobson.
+ * NOTE: the next three routines used to be one big routine.
+ * To save cycles in the RFC 1323 implementation it was better to break
+ * it up into three procedures. -- erics
+ */
+static void rxrpc_rtt_estimator(struct rxrpc_peer *peer, long sample_rtt_us)
+{
+ long m = sample_rtt_us; /* RTT */
+ u32 srtt = peer->srtt_us;
+
+ /* The following amusing code comes from Jacobson's
+ * article in SIGCOMM '88. Note that rtt and mdev
+ * are scaled versions of rtt and mean deviation.
+ * This is designed to be as fast as possible
+ * m stands for "measurement".
+ *
+ * On a 1990 paper the rto value is changed to:
+ * RTO = rtt + 4 * mdev
+ *
+ * Funny. This algorithm seems to be very broken.
+ * These formulae increase RTO, when it should be decreased, increase
+ * too slowly, when it should be increased quickly, decrease too quickly
+ * etc. I guess in BSD RTO takes ONE value, so that it is absolutely
+ * does not matter how to _calculate_ it. Seems, it was trap
+ * that VJ failed to avoid. 8)
+ */
+ if (srtt != 0) {
+ m -= (srtt >> 3); /* m is now error in rtt est */
+ srtt += m; /* rtt = 7/8 rtt + 1/8 new */
+ if (m < 0) {
+ m = -m; /* m is now abs(error) */
+ m -= (peer->mdev_us >> 2); /* similar update on mdev */
+ /* This is similar to one of Eifel findings.
+ * Eifel blocks mdev updates when rtt decreases.
+ * This solution is a bit different: we use finer gain
+ * for mdev in this case (alpha*beta).
+ * Like Eifel it also prevents growth of rto,
+ * but also it limits too fast rto decreases,
+ * happening in pure Eifel.
+ */
+ if (m > 0)
+ m >>= 3;
+ } else {
+ m -= (peer->mdev_us >> 2); /* similar update on mdev */
+ }
+
+ peer->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */
+ if (peer->mdev_us > peer->mdev_max_us) {
+ peer->mdev_max_us = peer->mdev_us;
+ if (peer->mdev_max_us > peer->rttvar_us)
+ peer->rttvar_us = peer->mdev_max_us;
+ }
+ } else {
+ /* no previous measure. */
+ srtt = m << 3; /* take the measured time to be rtt */
+ peer->mdev_us = m << 1; /* make sure rto = 3*rtt */
+ peer->rttvar_us = max(peer->mdev_us, rxrpc_rto_min_us(peer));
+ peer->mdev_max_us = peer->rttvar_us;
+ }
+
+ peer->srtt_us = max(1U, srtt);
+}
+
+/*
+ * Calculate rto without backoff. This is the second half of Van Jacobson's
+ * routine referred to above.
+ */
+static void rxrpc_set_rto(struct rxrpc_peer *peer)
+{
+ u32 rto;
+
+ /* 1. If rtt variance happened to be less 50msec, it is hallucination.
+ * It cannot be less due to utterly erratic ACK generation made
+ * at least by solaris and freebsd. "Erratic ACKs" has _nothing_
+ * to do with delayed acks, because at cwnd>2 true delack timeout
+ * is invisible. Actually, Linux-2.4 also generates erratic
+ * ACKs in some circumstances.
+ */
+ rto = __rxrpc_set_rto(peer);
+
+ /* 2. Fixups made earlier cannot be right.
+ * If we do not estimate RTO correctly without them,
+ * all the algo is pure shit and should be replaced
+ * with correct one. It is exactly, which we pretend to do.
+ */
+
+ /* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo
+ * guarantees that rto is higher.
+ */
+ peer->rto_j = rxrpc_bound_rto(rto);
+}
+
+static void rxrpc_ack_update_rtt(struct rxrpc_peer *peer, long rtt_us)
+{
+ if (rtt_us < 0)
+ return;
+
+ //rxrpc_update_rtt_min(peer, rtt_us);
+ rxrpc_rtt_estimator(peer, rtt_us);
+ rxrpc_set_rto(peer);
+
+ /* RFC6298: only reset backoff on valid RTT measurement. */
+ peer->backoff = 0;
+}
+
+/*
+ * Add RTT information to cache. This is called in softirq mode and has
+ * exclusive access to the peer RTT data.
+ */
+void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
+ rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial,
+ ktime_t send_time, ktime_t resp_time)
+{
+ struct rxrpc_peer *peer = call->peer;
+ s64 rtt_us;
+
+ rtt_us = ktime_to_us(ktime_sub(resp_time, send_time));
+ if (rtt_us < 0)
+ return;
+
+ spin_lock(&peer->rtt_input_lock);
+ rxrpc_ack_update_rtt(peer, rtt_us);
+ if (peer->rtt_count < 3)
+ peer->rtt_count++;
+ spin_unlock(&peer->rtt_input_lock);
+
+ trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial,
+ peer->srtt_us >> 3, peer->rto_j);
+}
+
+/*
+ * Get the retransmission timeout to set in jiffies, backing it off each time
+ * we retransmit.
+ */
+unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans)
+{
+ u64 timo_j;
+ u8 backoff = READ_ONCE(peer->backoff);
+
+ timo_j = peer->rto_j;
+ timo_j <<= backoff;
+ if (retrans && timo_j * 2 <= RXRPC_RTO_MAX)
+ WRITE_ONCE(peer->backoff, backoff + 1);
+
+ if (timo_j < 1)
+ timo_j = 1;
+
+ return timo_j;
+}
+
+void rxrpc_peer_init_rtt(struct rxrpc_peer *peer)
+{
+ peer->rto_j = RXRPC_TIMEOUT_INIT;
+ peer->mdev_us = jiffies_to_usecs(RXRPC_TIMEOUT_INIT);
+ peer->backoff = 0;
+ //minmax_reset(&peer->rtt_min, rxrpc_jiffies32, ~0U);
+}
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 098f1f9ec53b..52a24d4ef5d8 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -1148,7 +1148,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
ret = rxkad_decrypt_ticket(conn, skb, ticket, ticket_len, &session_key,
&expiry, _abort_code);
if (ret < 0)
- goto temporary_error_free_resp;
+ goto temporary_error_free_ticket;
/* use the session key from inside the ticket to decrypt the
* response */
@@ -1230,7 +1230,6 @@ protocol_error:
temporary_error_free_ticket:
kfree(ticket);
-temporary_error_free_resp:
kfree(response);
temporary_error:
/* Ignore the response packet if we got a temporary error such as
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 0fcf157aa09f..5e9c43d4a314 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -66,15 +66,14 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx,
struct rxrpc_call *call)
{
rxrpc_seq_t tx_start, tx_win;
- signed long rtt2, timeout;
- u64 rtt;
+ signed long rtt, timeout;
- rtt = READ_ONCE(call->peer->rtt);
- rtt2 = nsecs_to_jiffies64(rtt) * 2;
- if (rtt2 < 2)
- rtt2 = 2;
+ rtt = READ_ONCE(call->peer->srtt_us) >> 3;
+ rtt = usecs_to_jiffies(rtt) * 2;
+ if (rtt < 2)
+ rtt = 2;
- timeout = rtt2;
+ timeout = rtt;
tx_start = READ_ONCE(call->tx_hard_ack);
for (;;) {
@@ -92,7 +91,7 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx,
return -EINTR;
if (tx_win != tx_start) {
- timeout = rtt2;
+ timeout = rtt;
tx_start = tx_win;
}
@@ -271,16 +270,9 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
_debug("need instant resend %d", ret);
rxrpc_instant_resend(call, ix);
} else {
- unsigned long now = jiffies, resend_at;
+ unsigned long now = jiffies;
+ unsigned long resend_at = now + call->peer->rto_j;
- if (call->peer->rtt_usage > 1)
- resend_at = nsecs_to_jiffies(call->peer->rtt * 3 / 2);
- else
- resend_at = rxrpc_resend_timeout;
- if (resend_at < 1)
- resend_at = 1;
-
- resend_at += now;
WRITE_ONCE(call->resend_at, resend_at);
rxrpc_reduce_call_timer(call, resend_at, now,
rxrpc_timer_set_for_send);
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index 2bbb38161851..18dade4e6f9a 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -71,15 +71,6 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.extra1 = (void *)&one_jiffy,
.extra2 = (void *)&max_jiffies,
},
- {
- .procname = "resend_timeout",
- .data = &rxrpc_resend_timeout,
- .maxlen = sizeof(unsigned long),
- .mode = 0644,
- .proc_handler = proc_doulongvec_ms_jiffies_minmax,
- .extra1 = (void *)&one_jiffy,
- .extra2 = (void *)&max_jiffies,
- },
/* Non-time values */
{
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index f6a3b969ead0..0a7ecc292bd3 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1667,6 +1667,7 @@ int tcf_classify_ingress(struct sk_buff *skb,
skb_ext_del(skb, TC_SKB_EXT);
tp = rcu_dereference_bh(fchain->filter_chain);
+ last_executed_chain = fchain->index;
}
ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode,
@@ -2069,6 +2070,7 @@ replay:
err = PTR_ERR(block);
goto errout;
}
+ block->classid = parent;
chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
if (chain_index > TC_ACT_EXT_VAL_MASK) {
@@ -2611,12 +2613,10 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
parent = tcm->tcm_parent;
- if (!parent) {
+ if (!parent)
q = dev->qdisc;
- parent = q->handle;
- } else {
+ else
q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
- }
if (!q)
goto out;
cops = q->ops->cl_ops;
@@ -2632,6 +2632,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
block = cops->tcf_block(q, cl, NULL);
if (!block)
goto out;
+ parent = block->classid;
if (tcf_block_shared(block))
q = NULL;
}
@@ -3522,6 +3523,16 @@ static void tcf_sample_get_group(struct flow_action_entry *entry,
#endif
}
+static enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats)
+{
+ if (WARN_ON_ONCE(hw_stats > TCA_ACT_HW_STATS_ANY))
+ return FLOW_ACTION_HW_STATS_DONT_CARE;
+ else if (!hw_stats)
+ return FLOW_ACTION_HW_STATS_DISABLED;
+
+ return hw_stats;
+}
+
int tc_setup_flow_action(struct flow_action *flow_action,
const struct tcf_exts *exts)
{
@@ -3545,7 +3556,7 @@ int tc_setup_flow_action(struct flow_action *flow_action,
if (err)
goto err_out_locked;
- entry->hw_stats = act->hw_stats;
+ entry->hw_stats = tc_act_hw_stats(act->hw_stats);
if (is_tcf_gact_ok(act)) {
entry->id = FLOW_ACTION_ACCEPT;
@@ -3613,7 +3624,7 @@ int tc_setup_flow_action(struct flow_action *flow_action,
entry->mangle.mask = tcf_pedit_mask(act, k);
entry->mangle.val = tcf_pedit_val(act, k);
entry->mangle.offset = tcf_pedit_offset(act, k);
- entry->hw_stats = act->hw_stats;
+ entry->hw_stats = tc_act_hw_stats(act->hw_stats);
entry = &flow_action->entries[++j];
}
} else if (is_tcf_csum(act)) {
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index a36974e9c601..1bcf8fbfd40e 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -323,7 +323,8 @@ static void choke_reset(struct Qdisc *sch)
sch->q.qlen = 0;
sch->qstats.backlog = 0;
- memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
+ if (q->tab)
+ memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
q->head = q->tail = 0;
red_restart(&q->vars);
}
diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
index b1da5589a0c6..c48f91075b5c 100644
--- a/net/sched/sch_etf.c
+++ b/net/sched/sch_etf.c
@@ -82,7 +82,7 @@ static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb)
if (q->skip_sock_check)
goto skip;
- if (!sk)
+ if (!sk || !sk_fullsock(sk))
return false;
if (!sock_flag(sk, SOCK_TXTIME))
@@ -137,8 +137,9 @@ static void report_sock_error(struct sk_buff *skb, u32 err, u8 code)
struct sock_exterr_skb *serr;
struct sk_buff *clone;
ktime_t txtime = skb->tstamp;
+ struct sock *sk = skb->sk;
- if (!skb->sk || !(skb->sk->sk_txtime_report_errors))
+ if (!sk || !sk_fullsock(sk) || !(sk->sk_txtime_report_errors))
return;
clone = skb_clone(skb, GFP_ATOMIC);
@@ -154,7 +155,7 @@ static void report_sock_error(struct sk_buff *skb, u32 err, u8 code)
serr->ee.ee_data = (txtime >> 32); /* high part of tstamp */
serr->ee.ee_info = txtime; /* low part of tstamp */
- if (sock_queue_err_skb(skb->sk, clone))
+ if (sock_queue_err_skb(sk, clone))
kfree_skb(clone);
}
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 968519ff36e9..436160be9c18 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -416,7 +416,7 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
q->quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM]));
if (tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])
- q->drop_batch_size = min(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]));
+ q->drop_batch_size = max(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]));
if (tb[TCA_FQ_CODEL_MEMORY_LIMIT])
q->memory_limit = min(1U << 31, nla_get_u32(tb[TCA_FQ_CODEL_MEMORY_LIMIT]));
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index c787d4d46017..5a6def5e4e6d 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -637,6 +637,15 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
if (ctl->divisor &&
(!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
return -EINVAL;
+
+ /* slot->allot is a short, make sure quantum is not too big. */
+ if (ctl->quantum) {
+ unsigned int scaled = SFQ_ALLOT_SIZE(ctl->quantum);
+
+ if (scaled <= 0 || scaled > SHRT_MAX)
+ return -EINVAL;
+ }
+
if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
ctl_v1->Wlog))
return -EINVAL;
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
index 0fb10abf7579..7a5e4c454715 100644
--- a/net/sched/sch_skbprio.c
+++ b/net/sched/sch_skbprio.c
@@ -169,6 +169,9 @@ static int skbprio_change(struct Qdisc *sch, struct nlattr *opt,
{
struct tc_skbprio_qopt *ctl = nla_data(opt);
+ if (opt->nla_len != nla_attr_size(sizeof(*ctl)))
+ return -EINVAL;
+
sch->limit = ctl->limit;
return 0;
}
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 09050c1d5517..f7cb0b7faec2 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -858,7 +858,11 @@ struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc,
struct sctp_chunk *retval;
__u32 ctsn;
- ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map);
+ if (chunk && chunk->asoc)
+ ctsn = sctp_tsnmap_get_ctsn(&chunk->asoc->peer.tsn_map);
+ else
+ ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map);
+
shut.cum_tsn_ack = htonl(ctsn);
retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN, 0,
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 2bc29463e1dc..9f36fe911d08 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1523,9 +1523,17 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type,
timeout = asoc->timeouts[cmd->obj.to];
BUG_ON(!timeout);
- timer->expires = jiffies + timeout;
- sctp_association_hold(asoc);
- add_timer(timer);
+ /*
+ * SCTP has a hard time with timer starts. Because we process
+ * timer starts as side effects, it can be hard to tell if we
+ * have already started a timer or not, which leads to BUG
+ * halts when we call add_timer. So here, instead of just starting
+ * a timer, if the timer is already started, and just mod
+ * the timer with the shorter of the two expiration times
+ */
+ if (!timer_pending(timer))
+ sctp_association_hold(asoc);
+ timer_reduce(timer, jiffies + timeout);
break;
case SCTP_CMD_TIMER_RESTART:
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 6a16af4b1ef6..e86620fbd90f 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1856,16 +1856,17 @@ static enum sctp_disposition sctp_sf_do_dupcook_a(
/* Update the content of current association. */
sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
- if (sctp_state(asoc, SHUTDOWN_PENDING) &&
+ if ((sctp_state(asoc, SHUTDOWN_PENDING) ||
+ sctp_state(asoc, SHUTDOWN_SENT)) &&
(sctp_sstate(asoc->base.sk, CLOSING) ||
sock_flag(asoc->base.sk, SOCK_DEAD))) {
- /* if were currently in SHUTDOWN_PENDING, but the socket
- * has been closed by user, don't transition to ESTABLISHED.
- * Instead trigger SHUTDOWN bundled with COOKIE_ACK.
+ /* If the socket has been closed by user, don't
+ * transition to ESTABLISHED. Instead trigger SHUTDOWN
+ * bundled with COOKIE_ACK.
*/
sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
return sctp_sf_do_9_2_start_shutdown(net, ep, asoc,
- SCTP_ST_CHUNK(0), NULL,
+ SCTP_ST_CHUNK(0), repl,
commands);
} else {
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
@@ -5470,7 +5471,7 @@ enum sctp_disposition sctp_sf_do_9_2_start_shutdown(
* in the Cumulative TSN Ack field the last sequential TSN it
* has received from the peer.
*/
- reply = sctp_make_shutdown(asoc, NULL);
+ reply = sctp_make_shutdown(asoc, arg);
if (!reply)
goto nomem;
@@ -6068,7 +6069,7 @@ enum sctp_disposition sctp_sf_autoclose_timer_expire(
disposition = SCTP_DISPOSITION_CONSUME;
if (sctp_outq_is_empty(&asoc->outqueue)) {
disposition = sctp_sf_do_9_2_start_shutdown(net, ep, asoc, type,
- arg, commands);
+ NULL, commands);
}
return disposition;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 25fbd8d9de74..ac5cac0dd24b 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -2032,7 +2032,6 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred,
struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
struct kvec *head = rqstp->rq_rcv_buf.head;
struct rpc_auth *auth = cred->cr_auth;
- unsigned int savedlen = rcv_buf->len;
u32 offset, opaque_len, maj_stat;
__be32 *p;
@@ -2043,9 +2042,9 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred,
offset = (u8 *)(p) - (u8 *)head->iov_base;
if (offset + opaque_len > rcv_buf->len)
goto unwrap_failed;
- rcv_buf->len = offset + opaque_len;
- maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf);
+ maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset,
+ offset + opaque_len, rcv_buf);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
if (maj_stat != GSS_S_COMPLETE)
@@ -2059,10 +2058,9 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred,
*/
xdr_init_decode(xdr, rcv_buf, p, rqstp);
- auth->au_rslack = auth->au_verfsize + 2 +
- XDR_QUADLEN(savedlen - rcv_buf->len);
- auth->au_ralign = auth->au_verfsize + 2 +
- XDR_QUADLEN(savedlen - rcv_buf->len);
+ auth->au_rslack = auth->au_verfsize + 2 + ctx->gc_gss_ctx->slack;
+ auth->au_ralign = auth->au_verfsize + 2 + ctx->gc_gss_ctx->align;
+
return 0;
unwrap_failed:
trace_rpcgss_unwrap_failed(task);
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 6f2d30d7b766..e7180da1fc6a 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -851,8 +851,8 @@ out_err:
}
u32
-gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
- u32 *headskip, u32 *tailskip)
+gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len,
+ struct xdr_buf *buf, u32 *headskip, u32 *tailskip)
{
struct xdr_buf subbuf;
u32 ret = 0;
@@ -881,7 +881,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
/* create a segment skipping the header and leaving out the checksum */
xdr_buf_subsegment(buf, &subbuf, offset + GSS_KRB5_TOK_HDR_LEN,
- (buf->len - offset - GSS_KRB5_TOK_HDR_LEN -
+ (len - offset - GSS_KRB5_TOK_HDR_LEN -
kctx->gk5e->cksumlength));
nblocks = (subbuf.len + blocksize - 1) / blocksize;
@@ -926,7 +926,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
goto out_err;
/* Get the packet's hmac value */
- ret = read_bytes_from_xdr_buf(buf, buf->len - kctx->gk5e->cksumlength,
+ ret = read_bytes_from_xdr_buf(buf, len - kctx->gk5e->cksumlength,
pkt_hmac, kctx->gk5e->cksumlength);
if (ret)
goto out_err;
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 6c1920eed771..cf0fd170ac18 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -261,7 +261,9 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
}
static u32
-gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
+gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, int len,
+ struct xdr_buf *buf, unsigned int *slack,
+ unsigned int *align)
{
int signalg;
int sealalg;
@@ -279,12 +281,13 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
u32 conflen = kctx->gk5e->conflen;
int crypt_offset;
u8 *cksumkey;
+ unsigned int saved_len = buf->len;
dprintk("RPC: gss_unwrap_kerberos\n");
ptr = (u8 *)buf->head[0].iov_base + offset;
if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr,
- buf->len - offset))
+ len - offset))
return GSS_S_DEFECTIVE_TOKEN;
if ((ptr[0] != ((KG_TOK_WRAP_MSG >> 8) & 0xff)) ||
@@ -324,6 +327,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
(!kctx->initiate && direction != 0))
return GSS_S_BAD_SIG;
+ buf->len = len;
if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
struct crypto_sync_skcipher *cipher;
int err;
@@ -376,11 +380,15 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
memmove(orig_start, data_start, data_len);
buf->head[0].iov_len -= (data_start - orig_start);
- buf->len -= (data_start - orig_start);
+ buf->len = len - (data_start - orig_start);
if (gss_krb5_remove_padding(buf, blocksize))
return GSS_S_DEFECTIVE_TOKEN;
+ /* slack must include room for krb5 padding */
+ *slack = XDR_QUADLEN(saved_len - buf->len);
+ /* The GSS blob always precedes the RPC message payload */
+ *align = *slack;
return GSS_S_COMPLETE;
}
@@ -486,7 +494,9 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
}
static u32
-gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
+gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, int len,
+ struct xdr_buf *buf, unsigned int *slack,
+ unsigned int *align)
{
time64_t now;
u8 *ptr;
@@ -532,7 +542,7 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
if (rrc != 0)
rotate_left(offset + 16, buf, rrc);
- err = (*kctx->gk5e->decrypt_v2)(kctx, offset, buf,
+ err = (*kctx->gk5e->decrypt_v2)(kctx, offset, len, buf,
&headskip, &tailskip);
if (err)
return GSS_S_FAILURE;
@@ -542,7 +552,7 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
* it against the original
*/
err = read_bytes_from_xdr_buf(buf,
- buf->len - GSS_KRB5_TOK_HDR_LEN - tailskip,
+ len - GSS_KRB5_TOK_HDR_LEN - tailskip,
decrypted_hdr, GSS_KRB5_TOK_HDR_LEN);
if (err) {
dprintk("%s: error %u getting decrypted_hdr\n", __func__, err);
@@ -568,18 +578,19 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
* Note that buf->head[0].iov_len may indicate the available
* head buffer space rather than that actually occupied.
*/
- movelen = min_t(unsigned int, buf->head[0].iov_len, buf->len);
+ movelen = min_t(unsigned int, buf->head[0].iov_len, len);
movelen -= offset + GSS_KRB5_TOK_HDR_LEN + headskip;
- if (offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
- buf->head[0].iov_len)
- return GSS_S_FAILURE;
+ BUG_ON(offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
+ buf->head[0].iov_len);
memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen);
buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip;
- buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip;
+ buf->len = len - GSS_KRB5_TOK_HDR_LEN + headskip;
/* Trim off the trailing "extra count" and checksum blob */
- buf->len -= ec + GSS_KRB5_TOK_HDR_LEN + tailskip;
+ xdr_buf_trim(buf, ec + GSS_KRB5_TOK_HDR_LEN + tailskip);
+ *align = XDR_QUADLEN(GSS_KRB5_TOK_HDR_LEN + headskip);
+ *slack = *align + XDR_QUADLEN(ec + GSS_KRB5_TOK_HDR_LEN + tailskip);
return GSS_S_COMPLETE;
}
@@ -603,7 +614,8 @@ gss_wrap_kerberos(struct gss_ctx *gctx, int offset,
}
u32
-gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf)
+gss_unwrap_kerberos(struct gss_ctx *gctx, int offset,
+ int len, struct xdr_buf *buf)
{
struct krb5_ctx *kctx = gctx->internal_ctx_id;
@@ -613,9 +625,11 @@ gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf)
case ENCTYPE_DES_CBC_RAW:
case ENCTYPE_DES3_CBC_RAW:
case ENCTYPE_ARCFOUR_HMAC:
- return gss_unwrap_kerberos_v1(kctx, offset, buf);
+ return gss_unwrap_kerberos_v1(kctx, offset, len, buf,
+ &gctx->slack, &gctx->align);
case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
- return gss_unwrap_kerberos_v2(kctx, offset, buf);
+ return gss_unwrap_kerberos_v2(kctx, offset, len, buf,
+ &gctx->slack, &gctx->align);
}
}
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index db550bfc2642..69316ab1b9fa 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -411,10 +411,11 @@ gss_wrap(struct gss_ctx *ctx_id,
u32
gss_unwrap(struct gss_ctx *ctx_id,
int offset,
+ int len,
struct xdr_buf *buf)
{
return ctx_id->mech_type->gm_ops
- ->gss_unwrap(ctx_id, offset, buf);
+ ->gss_unwrap(ctx_id, offset, len, buf);
}
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 54ae5be62f6a..50d93c49ef1a 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -906,7 +906,7 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
if (svc_getnl(&buf->head[0]) != seq)
goto out;
/* trim off the mic and padding at the end before returning */
- buf->len -= 4 + round_up_to_quad(mic.len);
+ xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4);
stat = 0;
out:
kfree(mic.data);
@@ -934,7 +934,7 @@ static int
unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
{
u32 priv_len, maj_stat;
- int pad, saved_len, remaining_len, offset;
+ int pad, remaining_len, offset;
clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
@@ -954,12 +954,8 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs
buf->len -= pad;
fix_priv_head(buf, pad);
- /* Maybe it would be better to give gss_unwrap a length parameter: */
- saved_len = buf->len;
- buf->len = priv_len;
- maj_stat = gss_unwrap(ctx, 0, buf);
+ maj_stat = gss_unwrap(ctx, 0, priv_len, buf);
pad = priv_len - buf->len;
- buf->len = saved_len;
buf->len -= pad;
/* The upper layers assume the buffer is aligned on 4-byte boundaries.
* In the krb5p case, at least, the data ends up offset, so we need to
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index af0ddd28b081..baef5ee43dbb 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -529,7 +529,6 @@ void cache_purge(struct cache_detail *detail)
{
struct cache_head *ch = NULL;
struct hlist_head *head = NULL;
- struct hlist_node *tmp = NULL;
int i = 0;
spin_lock(&detail->hash_lock);
@@ -541,7 +540,9 @@ void cache_purge(struct cache_detail *detail)
dprintk("RPC: %d entries in %s cache\n", detail->entries, detail->name);
for (i = 0; i < detail->hash_size; i++) {
head = &detail->hash_table[i];
- hlist_for_each_entry_safe(ch, tmp, head, cache_list) {
+ while (!hlist_empty(head)) {
+ ch = hlist_entry(head->first, struct cache_head,
+ cache_list);
sunrpc_begin_cache_remove_entry(ch, detail);
spin_unlock(&detail->hash_lock);
sunrpc_end_cache_remove_entry(ch, detail);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 325a0858700f..61b21dafd7c0 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -880,6 +880,22 @@ EXPORT_SYMBOL_GPL(rpc_shutdown_client);
/*
* Free an RPC client
*/
+static void rpc_free_client_work(struct work_struct *work)
+{
+ struct rpc_clnt *clnt = container_of(work, struct rpc_clnt, cl_work);
+
+ /* These might block on processes that might allocate memory,
+ * so they cannot be called in rpciod, so they are handled separately
+ * here.
+ */
+ rpc_clnt_debugfs_unregister(clnt);
+ rpc_free_clid(clnt);
+ rpc_clnt_remove_pipedir(clnt);
+ xprt_put(rcu_dereference_raw(clnt->cl_xprt));
+
+ kfree(clnt);
+ rpciod_down();
+}
static struct rpc_clnt *
rpc_free_client(struct rpc_clnt *clnt)
{
@@ -890,17 +906,14 @@ rpc_free_client(struct rpc_clnt *clnt)
rcu_dereference(clnt->cl_xprt)->servername);
if (clnt->cl_parent != clnt)
parent = clnt->cl_parent;
- rpc_clnt_debugfs_unregister(clnt);
- rpc_clnt_remove_pipedir(clnt);
rpc_unregister_client(clnt);
rpc_free_iostats(clnt->cl_metrics);
clnt->cl_metrics = NULL;
- xprt_put(rcu_dereference_raw(clnt->cl_xprt));
xprt_iter_destroy(&clnt->cl_xpi);
- rpciod_down();
put_cred(clnt->cl_cred);
- rpc_free_clid(clnt);
- kfree(clnt);
+
+ INIT_WORK(&clnt->cl_work, rpc_free_client_work);
+ schedule_work(&clnt->cl_work);
return parent;
}
@@ -2420,6 +2433,11 @@ rpc_check_timeout(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
+ if (RPC_SIGNALLED(task)) {
+ rpc_call_rpcerror(task, -ERESTARTSYS);
+ return;
+ }
+
if (xprt_adjust_timeout(task->tk_rqstp) == 0)
return;
@@ -2808,8 +2826,7 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
task = rpc_call_null_helper(clnt, xprt, NULL,
RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC|RPC_TASK_NULLCREDS,
&rpc_cb_add_xprt_call_ops, data);
- if (IS_ERR(task))
- return PTR_ERR(task);
+
rpc_put_task(task);
success:
return 1;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index e27e3532ec75..2284ff038dad 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -908,9 +908,6 @@ int svc_send(struct svc_rqst *rqstp)
if (!xprt)
goto out;
- /* release the receive skb before sending the reply */
- xprt->xpt_ops->xpo_release_rqst(rqstp);
-
/* calculate over-all length */
xb = &rqstp->rq_res;
xb->len = xb->head[0].iov_len +
@@ -1040,6 +1037,8 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
dprintk("svc: svc_delete_xprt(%p)\n", xprt);
xprt->xpt_ops->xpo_detach(xprt);
+ if (xprt->xpt_bc_xprt)
+ xprt->xpt_bc_xprt->ops->close(xprt->xpt_bc_xprt);
spin_lock_bh(&serv->sv_lock);
list_del_init(&xprt->xpt_list);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 519cf9c4f8fd..023514e392b3 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -527,6 +527,8 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
unsigned int uninitialized_var(sent);
int err;
+ svc_release_udp_skb(rqstp);
+
svc_set_cmsg_data(rqstp, cmh);
err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
@@ -1076,6 +1078,8 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
unsigned int uninitialized_var(sent);
int err;
+ svc_release_skb(rqstp);
+
err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent);
xdr_free_bvec(xdr);
if (err < 0 || sent != (xdr->len + sizeof(marker)))
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 15b58c5144f9..6f7d82fb1eb0 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -1150,6 +1150,47 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
}
EXPORT_SYMBOL_GPL(xdr_buf_subsegment);
+/**
+ * xdr_buf_trim - lop at most "len" bytes off the end of "buf"
+ * @buf: buf to be trimmed
+ * @len: number of bytes to reduce "buf" by
+ *
+ * Trim an xdr_buf by the given number of bytes by fixing up the lengths. Note
+ * that it's possible that we'll trim less than that amount if the xdr_buf is
+ * too small, or if (for instance) it's all in the head and the parser has
+ * already read too far into it.
+ */
+void xdr_buf_trim(struct xdr_buf *buf, unsigned int len)
+{
+ size_t cur;
+ unsigned int trim = len;
+
+ if (buf->tail[0].iov_len) {
+ cur = min_t(size_t, buf->tail[0].iov_len, trim);
+ buf->tail[0].iov_len -= cur;
+ trim -= cur;
+ if (!trim)
+ goto fix_len;
+ }
+
+ if (buf->page_len) {
+ cur = min_t(unsigned int, buf->page_len, trim);
+ buf->page_len -= cur;
+ trim -= cur;
+ if (!trim)
+ goto fix_len;
+ }
+
+ if (buf->head[0].iov_len) {
+ cur = min_t(size_t, buf->head[0].iov_len, trim);
+ buf->head[0].iov_len -= cur;
+ trim -= cur;
+ }
+fix_len:
+ buf->len -= (len - trim);
+}
+EXPORT_SYMBOL_GPL(xdr_buf_trim);
+
static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len)
{
unsigned int this_len;
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 4a81e6995d3e..3c627dc685cc 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -388,7 +388,9 @@ static int rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
} while (nsegs);
done:
- return xdr_stream_encode_item_absent(xdr);
+ if (xdr_stream_encode_item_absent(xdr) < 0)
+ return -EMSGSIZE;
+ return 0;
}
/* Register and XDR encode the Write list. Supports encoding a list
@@ -454,7 +456,9 @@ static int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt,
*segcount = cpu_to_be32(nchunks);
done:
- return xdr_stream_encode_item_absent(xdr);
+ if (xdr_stream_encode_item_absent(xdr) < 0)
+ return -EMSGSIZE;
+ return 0;
}
/* Register and XDR encode the Reply chunk. Supports encoding an array
@@ -480,8 +484,11 @@ static int rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
int nsegs, nchunks;
__be32 *segcount;
- if (wtype != rpcrdma_replych)
- return xdr_stream_encode_item_absent(xdr);
+ if (wtype != rpcrdma_replych) {
+ if (xdr_stream_encode_item_absent(xdr) < 0)
+ return -EMSGSIZE;
+ return 0;
+ }
seg = req->rl_segments;
nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index d510a3a15d4b..af7eb8d202ae 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -244,6 +244,8 @@ static void
xprt_rdma_bc_close(struct rpc_xprt *xprt)
{
dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
+
+ xprt_disconnect_done(xprt);
xprt->cwnd = RPC_CWNDSHIFT;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 54469b72b25f..efa5fcb5793f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -223,6 +223,26 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
svc_rdma_recv_ctxt_destroy(rdma, ctxt);
}
+/**
+ * svc_rdma_release_rqst - Release transport-specific per-rqst resources
+ * @rqstp: svc_rqst being released
+ *
+ * Ensure that the recv_ctxt is released whether or not a Reply
+ * was sent. For example, the client could close the connection,
+ * or svc_process could drop an RPC, before the Reply is sent.
+ */
+void svc_rdma_release_rqst(struct svc_rqst *rqstp)
+{
+ struct svc_rdma_recv_ctxt *ctxt = rqstp->rq_xprt_ctxt;
+ struct svc_xprt *xprt = rqstp->rq_xprt;
+ struct svcxprt_rdma *rdma =
+ container_of(xprt, struct svcxprt_rdma, sc_xprt);
+
+ rqstp->rq_xprt_ctxt = NULL;
+ if (ctxt)
+ svc_rdma_recv_ctxt_put(rdma, ctxt);
+}
+
static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma,
struct svc_rdma_recv_ctxt *ctxt)
{
@@ -820,6 +840,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
__be32 *p;
int ret;
+ rqstp->rq_xprt_ctxt = NULL;
+
spin_lock(&rdma_xprt->sc_rq_dto_lock);
ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_read_complete_q);
if (ctxt) {
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index bd7c195d872e..23c2d3ce0dc9 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -323,8 +323,6 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
if (atomic_sub_return(cc->cc_sqecount,
&rdma->sc_sq_avail) > 0) {
ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
- trace_svcrdma_post_rw(&cc->cc_cqe,
- cc->cc_sqecount, ret);
if (ret)
break;
return 0;
@@ -337,6 +335,7 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
trace_svcrdma_sq_retry(rdma);
} while (1);
+ trace_svcrdma_sq_post_err(rdma, ret);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
/* If even one was posted, there will be a completion. */
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 90cba3058f04..b6c8643867f2 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -322,15 +322,17 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr)
}
svc_xprt_get(&rdma->sc_xprt);
+ trace_svcrdma_post_send(wr);
ret = ib_post_send(rdma->sc_qp, wr, NULL);
- trace_svcrdma_post_send(wr, ret);
- if (ret) {
- set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
- svc_xprt_put(&rdma->sc_xprt);
- wake_up(&rdma->sc_send_wait);
- }
- break;
+ if (ret)
+ break;
+ return 0;
}
+
+ trace_svcrdma_sq_post_err(rdma, ret);
+ set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+ svc_xprt_put(&rdma->sc_xprt);
+ wake_up(&rdma->sc_send_wait);
return ret;
}
@@ -924,12 +926,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp);
if (ret < 0)
goto err1;
- ret = 0;
-
-out:
- rqstp->rq_xprt_ctxt = NULL;
- svc_rdma_recv_ctxt_put(rdma, rctxt);
- return ret;
+ return 0;
err2:
if (ret != -E2BIG && ret != -EINVAL)
@@ -938,16 +935,14 @@ out:
ret = svc_rdma_send_error_msg(rdma, sctxt, rqstp);
if (ret < 0)
goto err1;
- ret = 0;
- goto out;
+ return 0;
err1:
svc_rdma_send_ctxt_put(rdma, sctxt);
err0:
trace_svcrdma_send_failed(rqstp, ret);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
- ret = -ENOTCONN;
- goto out;
+ return -ENOTCONN;
}
/**
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 8bb99980ae85..ea54785db4f8 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -71,7 +71,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
struct sockaddr *sa, int salen,
int flags);
static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt);
-static void svc_rdma_release_rqst(struct svc_rqst *);
static void svc_rdma_detach(struct svc_xprt *xprt);
static void svc_rdma_free(struct svc_xprt *xprt);
static int svc_rdma_has_wspace(struct svc_xprt *xprt);
@@ -552,10 +551,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
return NULL;
}
-static void svc_rdma_release_rqst(struct svc_rqst *rqstp)
-{
-}
-
/*
* When connected, an svc_xprt has at least two references:
*
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index cdd84c09df10..05c4d3a9cda2 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -289,6 +289,7 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
case RDMA_CM_EVENT_DISCONNECTED:
ep->re_connect_status = -ECONNABORTED;
disconnected:
+ xprt_force_disconnect(xprt);
return rpcrdma_ep_destroy(ep);
default:
break;
@@ -1355,8 +1356,8 @@ int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
--ep->re_send_count;
}
+ trace_xprtrdma_post_send(req);
rc = frwr_send(r_xprt, req);
- trace_xprtrdma_post_send(req, rc);
if (rc)
return -ENOTCONN;
return 0;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 0bda8a73e8a8..845d0be805ec 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2584,6 +2584,7 @@ static int bc_send_request(struct rpc_rqst *req)
static void bc_close(struct rpc_xprt *xprt)
{
+ xprt_disconnect_done(xprt);
}
/*
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index c8c47fc72653..8c47ded2edb6 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -1712,6 +1712,7 @@ exit:
case -EBUSY:
this_cpu_inc(stats->stat[STAT_ASYNC]);
*skb = NULL;
+ tipc_aead_put(aead);
return rc;
default:
this_cpu_inc(stats->stat[STAT_NOK]);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 467c53a1fb5c..d4675e922a8f 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1065,7 +1065,7 @@ static void tipc_link_update_cwin(struct tipc_link *l, int released,
/* Enter fast recovery */
if (unlikely(retransmitted)) {
l->ssthresh = max_t(u16, l->window / 2, 300);
- l->window = l->ssthresh;
+ l->window = min_t(u16, l->ssthresh, l->window);
return;
}
/* Enter slow start */
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 10292c942384..803a3a6d0f50 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -2038,6 +2038,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
n = tipc_node_find_by_id(net, ehdr->id);
}
tipc_crypto_rcv(net, (n) ? n->crypto_rx : NULL, &skb, b);
+ tipc_node_put(n);
if (!skb)
return;
@@ -2090,7 +2091,7 @@ rcv:
/* Check/update node state before receiving */
if (unlikely(skb)) {
if (unlikely(skb_linearize(skb)))
- goto discard;
+ goto out_node_put;
tipc_node_write_lock(n);
if (tipc_node_check_state(n, skb, bearer_id, &xmitq)) {
if (le->link) {
@@ -2119,6 +2120,7 @@ rcv:
if (!skb_queue_empty(&xmitq))
tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr, n);
+out_node_put:
tipc_node_put(n);
discard:
kfree_skb(skb);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 87466607097f..e370ad0edd76 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1739,22 +1739,21 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct sk_buff *skb,
return 0;
}
-static void tipc_sk_send_ack(struct tipc_sock *tsk)
+static struct sk_buff *tipc_sk_build_ack(struct tipc_sock *tsk)
{
struct sock *sk = &tsk->sk;
- struct net *net = sock_net(sk);
struct sk_buff *skb = NULL;
struct tipc_msg *msg;
u32 peer_port = tsk_peer_port(tsk);
u32 dnode = tsk_peer_node(tsk);
if (!tipc_sk_connected(sk))
- return;
+ return NULL;
skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0,
dnode, tsk_own_node(tsk), peer_port,
tsk->portid, TIPC_OK);
if (!skb)
- return;
+ return NULL;
msg = buf_msg(skb);
msg_set_conn_ack(msg, tsk->rcv_unacked);
tsk->rcv_unacked = 0;
@@ -1764,7 +1763,19 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk)
tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf);
msg_set_adv_win(msg, tsk->rcv_win);
}
- tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg));
+ return skb;
+}
+
+static void tipc_sk_send_ack(struct tipc_sock *tsk)
+{
+ struct sk_buff *skb;
+
+ skb = tipc_sk_build_ack(tsk);
+ if (!skb)
+ return;
+
+ tipc_node_xmit_skb(sock_net(&tsk->sk), skb, tsk_peer_node(tsk),
+ msg_link_selector(buf_msg(skb)));
}
static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
@@ -1938,7 +1949,6 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
bool peek = flags & MSG_PEEK;
int offset, required, copy, copied = 0;
int hlen, dlen, err, rc;
- bool ack = false;
long timeout;
/* Catch invalid receive attempts */
@@ -1983,7 +1993,6 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
/* Copy data if msg ok, otherwise return error/partial data */
if (likely(!err)) {
- ack = msg_ack_required(hdr);
offset = skb_cb->bytes_read;
copy = min_t(int, dlen - offset, buflen - copied);
rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);
@@ -2011,7 +2020,7 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
/* Send connection flow control advertisement when applicable */
tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
- if (ack || tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)
+ if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)
tipc_sk_send_ack(tsk);
/* Exit if all requested data or FIN/error received */
@@ -2105,9 +2114,11 @@ static void tipc_sk_proto_rcv(struct sock *sk,
* tipc_sk_filter_connect - check incoming message for a connection-based socket
* @tsk: TIPC socket
* @skb: pointer to message buffer.
+ * @xmitq: for Nagle ACK if any
* Returns true if message should be added to receive queue, false otherwise
*/
-static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
+static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb,
+ struct sk_buff_head *xmitq)
{
struct sock *sk = &tsk->sk;
struct net *net = sock_net(sk);
@@ -2171,8 +2182,17 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
if (!skb_queue_empty(&sk->sk_write_queue))
tipc_sk_push_backlog(tsk);
/* Accept only connection-based messages sent by peer */
- if (likely(con_msg && !err && pport == oport && pnode == onode))
+ if (likely(con_msg && !err && pport == oport &&
+ pnode == onode)) {
+ if (msg_ack_required(hdr)) {
+ struct sk_buff *skb;
+
+ skb = tipc_sk_build_ack(tsk);
+ if (skb)
+ __skb_queue_tail(xmitq, skb);
+ }
return true;
+ }
if (!tsk_peer_msg(tsk, hdr))
return false;
if (!err)
@@ -2267,7 +2287,7 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb,
while ((skb = __skb_dequeue(&inputq))) {
hdr = buf_msg(skb);
limit = rcvbuf_limit(sk, skb);
- if ((sk_conn && !tipc_sk_filter_connect(tsk, skb)) ||
+ if ((sk_conn && !tipc_sk_filter_connect(tsk, skb, xmitq)) ||
(!sk_conn && msg_connected(hdr)) ||
(!grp && msg_in_group(hdr)))
err = TIPC_ERR_NO_PORT;
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index aa015c233898..6ebbec1bedd1 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -96,6 +96,16 @@ void tipc_sub_get(struct tipc_subscription *subscription);
(swap_ ? swab32(val__) : val__); \
})
+/* tipc_sub_write - write val_ to field_ of struct sub_ in user endian format
+ */
+#define tipc_sub_write(sub_, field_, val_) \
+ ({ \
+ struct tipc_subscr *sub__ = sub_; \
+ u32 val__ = val_; \
+ int swap_ = !((sub__)->filter & TIPC_FILTER_MASK); \
+ (sub__)->field_ = swap_ ? swab32(val__) : val__; \
+ })
+
/* tipc_evt_write - write val_ to field_ of struct evt_ in user endian format
*/
#define tipc_evt_write(evt_, field_, val_) \
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index 3a12fc18239b..446af7bbd13e 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -237,8 +237,8 @@ static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s)
if (!s || !memcmp(s, &sub->evt.s, sizeof(*s))) {
tipc_sub_unsubscribe(sub);
atomic_dec(&tn->subscription_count);
- } else if (s) {
- break;
+ if (s)
+ break;
}
}
spin_unlock_bh(&con->sub_lock);
@@ -362,9 +362,10 @@ static int tipc_conn_rcv_sub(struct tipc_topsrv *srv,
{
struct tipc_net *tn = tipc_net(srv->net);
struct tipc_subscription *sub;
+ u32 s_filter = tipc_sub_read(s, filter);
- if (tipc_sub_read(s, filter) & TIPC_SUB_CANCEL) {
- s->filter &= __constant_ntohl(~TIPC_SUB_CANCEL);
+ if (s_filter & TIPC_SUB_CANCEL) {
+ tipc_sub_write(s, filter, s_filter & ~TIPC_SUB_CANCEL);
tipc_conn_delete_sub(con, s);
return 0;
}
@@ -400,12 +401,15 @@ static int tipc_conn_rcv_from_sock(struct tipc_conn *con)
return -EWOULDBLOCK;
if (ret == sizeof(s)) {
read_lock_bh(&sk->sk_callback_lock);
- ret = tipc_conn_rcv_sub(srv, con, &s);
+ /* RACE: the connection can be closed in the meantime */
+ if (likely(connected(con)))
+ ret = tipc_conn_rcv_sub(srv, con, &s);
read_unlock_bh(&sk->sk_callback_lock);
+ if (!ret)
+ return 0;
}
- if (ret < 0)
- tipc_conn_close(con);
+ tipc_conn_close(con);
return ret;
}
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index d6620ad53546..28a283f26a8d 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -161,9 +161,11 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
struct udp_bearer *ub, struct udp_media_addr *src,
struct udp_media_addr *dst, struct dst_cache *cache)
{
- struct dst_entry *ndst = dst_cache_get(cache);
+ struct dst_entry *ndst;
int ttl, err = 0;
+ local_bh_disable();
+ ndst = dst_cache_get(cache);
if (dst->proto == htons(ETH_P_IP)) {
struct rtable *rt = (struct rtable *)ndst;
@@ -210,9 +212,11 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
src->port, dst->port, false);
#endif
}
+ local_bh_enable();
return err;
tx_error:
+ local_bh_enable();
kfree_skb(skb);
return err;
}
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 156efce50dbd..0e989005bdc2 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -56,9 +56,9 @@ enum {
TLS_NUM_PROTS,
};
-static struct proto *saved_tcpv6_prot;
+static const struct proto *saved_tcpv6_prot;
static DEFINE_MUTEX(tcpv6_prot_mutex);
-static struct proto *saved_tcpv4_prot;
+static const struct proto *saved_tcpv4_prot;
static DEFINE_MUTEX(tcpv4_prot_mutex);
static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG];
static struct proto_ops tls_sw_proto_ops;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index c98e602a1a2d..2d399b6c4075 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -780,7 +780,7 @@ static int tls_push_record(struct sock *sk, int flags,
static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
bool full_record, u8 record_type,
- size_t *copied, int flags)
+ ssize_t *copied, int flags)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
@@ -796,10 +796,13 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
psock = sk_psock_get(sk);
if (!psock || !policy) {
err = tls_push_record(sk, flags, record_type);
- if (err && err != -EINPROGRESS) {
+ if (err && sk->sk_err == EBADMSG) {
*copied -= sk_msg_free(sk, msg);
tls_free_open_rec(sk);
+ err = -sk->sk_err;
}
+ if (psock)
+ sk_psock_put(sk, psock);
return err;
}
more_data:
@@ -822,9 +825,10 @@ more_data:
switch (psock->eval) {
case __SK_PASS:
err = tls_push_record(sk, flags, record_type);
- if (err && err != -EINPROGRESS) {
+ if (err && sk->sk_err == EBADMSG) {
*copied -= sk_msg_free(sk, msg);
tls_free_open_rec(sk);
+ err = -sk->sk_err;
goto out_err;
}
break;
@@ -914,7 +918,8 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
unsigned char record_type = TLS_RECORD_TYPE_DATA;
bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
bool eor = !(msg->msg_flags & MSG_MORE);
- size_t try_to_copy, copied = 0;
+ size_t try_to_copy;
+ ssize_t copied = 0;
struct sk_msg *msg_pl, *msg_en;
struct tls_rec *rec;
int required_size;
@@ -1116,7 +1121,7 @@ send_end:
release_sock(sk);
mutex_unlock(&tls_ctx->tx_lock);
- return copied ? copied : ret;
+ return copied > 0 ? copied : ret;
}
static int tls_sw_do_sendpage(struct sock *sk, struct page *page,
@@ -1130,7 +1135,7 @@ static int tls_sw_do_sendpage(struct sock *sk, struct page *page,
struct sk_msg *msg_pl;
struct tls_rec *rec;
int num_async = 0;
- size_t copied = 0;
+ ssize_t copied = 0;
bool full_record;
int record_room;
int ret = 0;
@@ -1232,7 +1237,7 @@ wait_for_memory:
}
sendpage_end:
ret = sk_stream_error(sk, flags, ret);
- return copied ? copied : ret;
+ return copied > 0 ? copied : ret;
}
int tls_sw_sendpage_locked(struct sock *sk, struct page *page,
@@ -2081,8 +2086,9 @@ static void tls_data_ready(struct sock *sk)
strp_data_ready(&ctx->strp);
psock = sk_psock_get(sk);
- if (psock && !list_empty(&psock->ingress_msg)) {
- ctx->saved_data_ready(sk);
+ if (psock) {
+ if (!list_empty(&psock->ingress_msg))
+ ctx->saved_data_ready(sk);
sk_psock_put(sk, psock);
}
}
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 709038a4783e..69efc891885f 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -157,7 +157,11 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt)
{
+ if (pkt->tap_delivered)
+ return;
+
vsock_deliver_tap(virtio_transport_build_skb, pkt);
+ pkt->tap_delivered = true;
}
EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 5fa402144cda..692bcd35f809 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -644,10 +644,8 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_HE_CAPABILITY] = { .type = NLA_BINARY,
.len = NL80211_HE_MAX_CAPABILITY_LEN },
- [NL80211_ATTR_FTM_RESPONDER] = {
- .type = NLA_NESTED,
- .validation_data = nl80211_ftm_responder_policy,
- },
+ [NL80211_ATTR_FTM_RESPONDER] =
+ NLA_POLICY_NESTED(nl80211_ftm_responder_policy),
[NL80211_ATTR_TIMEOUT] = NLA_POLICY_MIN(NLA_U32, 1),
[NL80211_ATTR_PEER_MEASUREMENTS] =
NLA_POLICY_NESTED(nl80211_pmsr_attr_policy),
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index 00e782335cb0..25bf72ee6cad 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -115,8 +115,10 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev,
goto drop;
}
- if (!pskb_may_pull(skb, 1))
+ if (!pskb_may_pull(skb, 1)) {
+ x25_neigh_put(nb);
return 0;
+ }
switch (skb->data[0]) {
diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c
index 8aa415a38814..0285aaa1e93c 100644
--- a/net/x25/x25_subr.c
+++ b/net/x25/x25_subr.c
@@ -357,6 +357,12 @@ void x25_disconnect(struct sock *sk, int reason, unsigned char cause,
sk->sk_state_change(sk);
sock_set_flag(sk, SOCK_DEAD);
}
+ if (x25->neighbour) {
+ read_lock_bh(&x25_list_lock);
+ x25_neigh_put(x25->neighbour);
+ x25->neighbour = NULL;
+ read_unlock_bh(&x25_list_lock);
+ }
}
/*
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index fa7bb5e060d0..ed7a6060f73c 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -343,7 +343,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
unsigned int chunks, chunks_per_page;
u64 addr = mr->addr, size = mr->len;
- int size_chk, err;
+ int err;
if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
/* Strictly speaking we could support this, if:
@@ -382,8 +382,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
return -EINVAL;
}
- size_chk = chunk_size - headroom - XDP_PACKET_HEADROOM;
- if (size_chk < 0)
+ if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
return -EINVAL;
umem->address = (unsigned long)addr;
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 356f90e4522b..c350108aa38d 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -131,8 +131,9 @@ static void __xsk_rcv_memcpy(struct xdp_umem *umem, u64 addr, void *from_buf,
u64 page_start = addr & ~(PAGE_SIZE - 1);
u64 first_len = PAGE_SIZE - (addr - page_start);
- memcpy(to_buf, from_buf, first_len + metalen);
- memcpy(next_pg_addr, from_buf + first_len, len - first_len);
+ memcpy(to_buf, from_buf, first_len);
+ memcpy(next_pg_addr, from_buf + first_len,
+ len + metalen - first_len);
return;
}